Skip to content

Commit 44d49f3

Browse files
authored
Add a more detailed introduction for model compression (#772)
1 parent f326a86 commit 44d49f3

4 files changed

Lines changed: 70 additions & 36 deletions

File tree

deepmd/entrypoints/compress.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def compress(
2323
input: str,
2424
output: str,
2525
extrapolate: int,
26-
stride: float,
26+
step: float,
2727
frequency: str,
2828
checkpoint_folder: str,
2929
mpi_log: str,
@@ -34,9 +34,9 @@ def compress(
3434
"""Compress model.
3535
3636
The table is composed of fifth-order polynomial coefficients and is assembled from
37-
two sub-tables. The first table takes the stride(parameter) as it's uniform stride,
38-
while the second table takes 10 * stride as it's uniform stride. The range of the
39-
first table is automatically detected by deepmd-kit, while the second table ranges
37+
two sub-tables. The first table takes the step parameter as the domain's uniform step size,
38+
while the second table takes 10 * step as it's uniform step size. The range of the
39+
first table is automatically detected by the code, while the second table ranges
4040
from the first table's upper boundary(upper) to the extrapolate(parameter) * upper.
4141
4242
Parameters
@@ -49,8 +49,8 @@ def compress(
4949
compressed model filename
5050
extrapolate : int
5151
scale of model extrapolation
52-
stride : float
53-
uniform stride of tabulation's first table
52+
step : float
53+
uniform step size of the tabulation's first table
5454
frequency : str
5555
frequency of tabulation overflow check
5656
checkpoint_folder : str
@@ -71,8 +71,8 @@ def compress(
7171
jdata["model"]["compress"]["model_file"] = input
7272
jdata["model"]["compress"]["table_config"] = [
7373
extrapolate,
74-
stride,
75-
10 * stride,
74+
step,
75+
10 * step,
7676
int(frequency),
7777
]
7878
# be careful here, if one want to refine the model

deepmd/entrypoints/main.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,8 @@ def parse_args(args: Optional[List[str]] = None):
242242
# * compress model *****************************************************************
243243
# Compress a model, which including tabulating the embedding-net.
244244
# The table is composed of fifth-order polynomial coefficients and is assembled
245-
# from two sub-tables. The first table takes the stride(parameter) as it's uniform
246-
# stride, while the second table takes 10 * stride as it\s uniform stride
245+
# from two sub-tables. The first table takes the step(parameter) as it's uniform
246+
# step, while the second table takes 10 * step as it\s uniform step
247247
#  The range of the first table is automatically detected by deepmd-kit, while the
248248
# second table ranges from the first table's upper boundary(upper) to the
249249
# extrapolate(parameter) * upper.
@@ -263,36 +263,43 @@ def parse_args(args: Optional[List[str]] = None):
263263
"--input",
264264
default="frozen_model.pb",
265265
type=str,
266-
help="The original frozen model, which will be compressed by the deepmd-kit",
266+
help="The original frozen model, which will be compressed by the code",
267267
)
268268
parser_compress.add_argument(
269269
"-o",
270270
"--output",
271-
default="frozen_model_compress.pb",
271+
default="frozen_model_compressed.pb",
272272
type=str,
273273
help="The compressed model",
274274
)
275+
parser_compress.add_argument(
276+
"-s",
277+
"--step",
278+
default=0.01,
279+
type=float,
280+
help="Model compression uses fifth-order polynomials to interpolate the embedding-net. "
281+
"It introduces two tables with different step size to store the parameters of the polynomials. "
282+
"The first table covers the range of the training data, while the second table is an extrapolation of the training data. "
283+
"The domain of each table is uniformly divided by a given step size. "
284+
"And the step(parameter) denotes the step size of the first table and the second table will "
285+
"use 10 * step as it's step size to save the memory. "
286+
"Usually the value ranges from 0.1 to 0.001. "
287+
"Smaller step means higher accuracy and bigger model size",
288+
)
275289
parser_compress.add_argument(
276290
"-e",
277291
"--extrapolate",
278292
default=5,
279293
type=int,
280-
help="The scale of model extrapolation",
281-
)
282-
parser_compress.add_argument(
283-
"-s",
284-
"--stride",
285-
default=0.01,
286-
type=float,
287-
help="The uniform stride of tabulation's first table, the second table will "
288-
"use 10 * stride as it's uniform stride",
294+
help="The domain range of the first table is automatically detected by the code: [d_low, d_up]. "
295+
"While the second table ranges from the first table's upper boundary(d_up) to the extrapolate(parameter) * d_up: [d_up, extrapolate * d_up]",
289296
)
290297
parser_compress.add_argument(
291298
"-f",
292299
"--frequency",
293300
default=-1,
294301
type=int,
295-
help="The frequency of tabulation overflow check(If the input environment "
302+
help="The frequency of tabulation overflow check(Whether the input environment "
296303
"matrix overflow the first or second table range). "
297304
"By default do not check the overflow",
298305
)

doc/getting-started.md

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -244,23 +244,50 @@ positional arguments:
244244

245245
optional arguments:
246246
-h, --help show this help message and exit
247+
-v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}, --log-level {DEBUG,3,INFO,2,WARNING,1,ERROR,0}
248+
set verbosity level by string or number, 0=ERROR,
249+
1=WARNING, 2=INFO and 3=DEBUG (default: INFO)
250+
-l LOG_PATH, --log-path LOG_PATH
251+
set log file to log messages to disk, if not
252+
specified, the logs will only be output to console
253+
(default: None)
254+
-m {master,collect,workers}, --mpi-log {master,collect,workers}
255+
Set the manner of logging when running with MPI.
256+
'master' logs only on main process, 'collect'
257+
broadcasts logs from workers to master and 'workers'
258+
means each process will output its own log (default:
259+
master)
247260
-i INPUT, --input INPUT
248261
The original frozen model, which will be compressed by
249-
the deepmd-kit
262+
the code (default: frozen_model.pb)
250263
-o OUTPUT, --output OUTPUT
251-
The compressed model
264+
The compressed model (default:
265+
frozen_model_compressed.pb)
266+
-s STEP, --step STEP Model compression uses fifth-order polynomials to
267+
interpolate the embedding-net. It introduces two
268+
tables with different step size to store the
269+
parameters of the polynomials. The first table covers
270+
the range of the training data, while the second table
271+
is an extrapolation of the training data. The domain
272+
of each table is uniformly divided by a given step
273+
size. And the step(parameter) denotes the step size of
274+
the first table and the second table will use 10 *
275+
step as it's step size to save the memory. Usually the
276+
value ranges from 0.1 to 0.001. Smaller step means
277+
higher accuracy and bigger model size (default: 0.01)
252278
-e EXTRAPOLATE, --extrapolate EXTRAPOLATE
253-
The scale of model extrapolation
254-
-s STRIDE, --stride STRIDE
255-
The uniform stride of tabulation's first table, the
256-
second table will use 10 * stride as it's uniform
257-
stride
279+
The domain range of the first table is automatically
280+
detected by the code: [d_low, d_up]. While the second
281+
table ranges from the first table's upper
282+
boundary(d_up) to the extrapolate(parameter) * d_up:
283+
[d_up, extrapolate * d_up] (default: 5)
258284
-f FREQUENCY, --frequency FREQUENCY
259-
The frequency of tabulation overflow check(If the
285+
The frequency of tabulation overflow check(Whether the
260286
input environment matrix overflow the first or second
261287
table range). By default do not check the overflow
262-
-d FOLDER, --folder FOLDER
263-
path to checkpoint folder
288+
(default: -1)
289+
-c CHECKPOINT_FOLDER, --checkpoint-folder CHECKPOINT_FOLDER
290+
path to checkpoint folder (default: .)
264291
```
265292
**Parameter explanation**
266293

source/tests/test_argument_parser.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,10 +272,10 @@ def test_parser_compress(self):
272272
ARGS = {
273273
"INPUT": dict(type=str, value="INFILE"),
274274
"--output": dict(type=str, value="OUTFILE"),
275-
"--extrapolate": dict(type=int, value=10),
276-
"--stride": dict(type=float, value=0.1),
277-
"--frequency": dict(type=int, value=1),
278-
"--checkpoint-folder": dict(type=str, value="FOLDER"),
275+
"--extrapolate": dict(type=int, value=5),
276+
"--step": dict(type=float, value=0.1),
277+
"--frequency": dict(type=int, value=-1),
278+
"--checkpoint-folder": dict(type=str, value="."),
279279
}
280280

281281
self.run_test(command="compress", mapping=ARGS)

0 commit comments

Comments
 (0)