deepmodeling · amcadmus · Jul 9, 2021 · Jul 6, 2021 · Jul 9, 2021
diff --git a/deepmd/entrypoints/__init__.py b/deepmd/entrypoints/__init__.py
@@ -5,7 +5,9 @@
 from .doc import doc_train_input
 from .freeze import freeze
 from .test import test
-from .train import train
+# import `train` as `train_dp` to avoid the conflict of the
+# module name `train` and the function name `train`
+from .train import train as train_dp
 from .transfer import transfer
 from ..infer.model_devi import make_model_devi
 from .convert import convert

diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
@@ -11,7 +11,7 @@
     doc_train_input,
     freeze,
     test,
-    train,
+    train_dp,
     transfer,
     make_model_devi,
     convert,
@@ -416,7 +416,7 @@ def main():
     dict_args = vars(args)
 
     if args.command == "train":
-        train(**dict_args)
+        train_dp(**dict_args)
     elif args.command == "freeze":
         freeze(**dict_args)
     elif args.command == "config":

diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py
@@ -19,6 +19,7 @@
 from deepmd.utils.compat import updata_deepmd_input
 from deepmd.utils.data_system import DeepmdDataSystem
 from deepmd.utils.sess import run_sess
+from deepmd.utils.neighbor_stat import NeighborStat
 
 if TYPE_CHECKING:
     from deepmd.run_options import TFServerV1
@@ -173,6 +174,9 @@ def train(
     jdata = updata_deepmd_input(jdata, warning=True, dump="input_v2_compat.json")
 
     jdata = normalize(jdata)
+
+    jdata = update_sel(jdata)
+
     with open(output, "w") as fp:
         json.dump(jdata, fp, indent=4)
 
@@ -327,3 +331,88 @@ def get_modifier(modi_data=None):
     else:
         modifier = None
     return modifier
+
+
+def get_rcut(jdata):
+    descrpt_data = jdata['model']['descriptor']
+    rcut_list = []
+    if descrpt_data['type'] == 'hybrid':
+        for ii in descrpt_data['list']:
+            rcut_list.append(ii['rcut'])
+    else:
+        rcut_list.append(descrpt_data['rcut'])
+    return max(rcut_list)
+
+
+def get_type_map(jdata):
+    return jdata['model'].get('type_map', None)
+
+
+def get_sel(jdata, rcut):
+    max_rcut = get_rcut(jdata)
+    type_map = get_type_map(jdata)
+
+    if len(type_map) == 0:
+        type_map = None
+    train_data = get_data(jdata["training"]["training_data"], max_rcut, type_map, None)
+    train_data.get_batch()
+    data_ntypes = train_data.get_ntypes()
+    if type_map is not None:
+        map_ntypes = len(type_map)
+    else:
+        map_ntypes = data_ntypes
+    ntypes = max([map_ntypes, data_ntypes])
+
+    neistat = NeighborStat(ntypes, rcut)
+
+    min_nbor_dist, max_nbor_size = neistat.get_stat(train_data)
+
+    return max_nbor_size
+
+
+def parse_auto_sel(sel):
+    if type(sel) is not str:
+        return False
+    words = sel.split(':')
+    if words[0] == 'auto':
+        return True
+    else:
+        return False
+
+
+def parse_auto_sel_ratio(sel):
+    if not parse_auto_sel(sel):
+        raise RuntimeError(f'invalid auto sel format {sel}')
+    else:
+        words = sel.split(':')
+        if len(words) == 1:
+            ratio = 1.1
+        elif len(words) == 2:
+            ratio = float(words[1])
+        else:
+            raise RuntimeError(f'invalid auto sel format {sel}')
+        return ratio
+
+
+def wrap_up_4(xx):
+    return 4 * ((int(xx) + 3) // 4)
+
+
+def update_one_sel(jdata, descriptor):
+    if parse_auto_sel(descriptor['sel']) :
+        ratio = parse_auto_sel_ratio(descriptor['sel'])
+        rcut = descriptor['rcut']
+        tmp_sel = get_sel(jdata, rcut)
+        descriptor['sel'] = [int(wrap_up_4(ii * ratio)) for ii in tmp_sel]
+    return descriptor
+
+
+def update_sel(jdata):    
+    descrpt_data = jdata['model']['descriptor']
+    if descrpt_data['type'] == 'hybrid':
+        for ii in range(len(descrpt_data['list'])):
+            descrpt_data['list'][ii] = update_one_sel(jdata, descrpt_data['list'][ii])
+    else:
+        descrpt_data = update_one_sel(jdata, descrpt_data)
+    jdata['model']['descriptor'] = descrpt_data
+    return jdata
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
@@ -58,7 +58,9 @@ def descrpt_local_frame_args ():
 
 
 def descrpt_se_a_args():
-    doc_sel = 'A list of integers. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.'
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = 'The cut-off radius.'
     doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
     doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
@@ -73,7 +75,7 @@ def descrpt_se_a_args():
     doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
 
     return [
-        Argument("sel", list, optional = False, doc = doc_sel),
+        Argument("sel", [list,str], optional = True, default = "auto", doc = doc_sel),
         Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
         Argument("rcut_smth", float, optional = True, default = 0.5, doc = doc_rcut_smth),
         Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),
@@ -90,7 +92,9 @@ def descrpt_se_a_args():
 
 
 def descrpt_se_t_args():
-    doc_sel = 'A list of integers. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.'
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = 'The cut-off radius.'
     doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
     doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
@@ -102,7 +106,7 @@ def descrpt_se_t_args():
     doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
 
     return [
-        Argument("sel", list, optional = False, doc = doc_sel),
+        Argument("sel", [list,str], optional = True, default = "auto", doc = doc_sel),
         Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
         Argument("rcut_smth", float, optional = True, default = 0.5, doc = doc_rcut_smth),
         Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),
@@ -129,7 +133,9 @@ def descrpt_se_a_tpe_args():
 
 
 def descrpt_se_r_args():
-    doc_sel = 'A list of integers. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.'
+    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = 'The cut-off radius.'
     doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
     doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
@@ -143,7 +149,7 @@ def descrpt_se_r_args():
     doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
 
     return [
-        Argument("sel", list, optional = False, doc = doc_sel),
+        Argument("sel", [list,str], optional = True, default = "auto", doc = doc_sel),
         Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
         Argument("rcut_smth", float, optional = True, default = 0.5, doc = doc_rcut_smth),
         Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),