feat(pt): consistent "frozen" model (#3450)

njzjz · web-flow · commit da9b526f34f3 · 2024-03-12T09:26:49.000Z
This PR is based on #3449, as the test needs #3449 to pass. Add a consistent `frozen` model in pt. Both TF and PT now support using models in any format. --------- Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
@@ -230,6 +230,10 @@ def deserialize(cls, data: dict) -> "NativeLayer":
             variables.get("b", None),
             variables.get("idt", None),
         )
+        if obj.b is not None:
+            obj.b = obj.b.ravel()
+        if obj.idt is not None:
+            obj.idt = obj.idt.ravel()
         obj.check_shape_consistency()
         return obj
 
diff --git a/deepmd/pt/model/model/__init__.py b/deepmd/pt/model/model/__init__.py
@@ -37,6 +37,9 @@
 from .ener_model import (
     EnergyModel,
 )
+from .frozen import (
+    FrozenModel,
+)
 from .make_hessian_model import (
     make_hessian_model,
 )
@@ -173,6 +176,7 @@ def get_model(model_params):
     "get_model",
     "DPModel",
     "EnergyModel",
+    "FrozenModel",
     "SpinModel",
     "SpinEnergyModel",
     "DPZBLModel",
diff --git a/deepmd/pt/model/model/frozen.py b/deepmd/pt/model/model/frozen.py
@@ -0,0 +1,174 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import tempfile
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
+
+import torch
+
+from deepmd.dpmodel.output_def import (
+    FittingOutputDef,
+)
+from deepmd.entrypoints.convert_backend import (
+    convert_backend,
+)
+from deepmd.pt.model.model.model import (
+    BaseModel,
+)
+
+
+@BaseModel.register("frozen")
+class FrozenModel(BaseModel):
+    """Load model from a frozen model, which cannot be trained.
+
+    Parameters
+    ----------
+    model_file : str
+        The path to the frozen model
+    """
+
+    def __init__(self, model_file: str, **kwargs):
+        super().__init__(**kwargs)
+        self.model_file = model_file
+        if model_file.endswith(".pth"):
+            self.model = torch.jit.load(model_file)
+        else:
+            # try to convert from other formats
+            with tempfile.NamedTemporaryFile(suffix=".pth") as f:
+                convert_backend(INPUT=model_file, OUTPUT=f.name)
+                self.model = torch.jit.load(f.name)
+
+    @torch.jit.export
+    def fitting_output_def(self) -> FittingOutputDef:
+        """Get the output def of developer implemented atomic models."""
+        return self.model.fitting_output_def()
+
+    @torch.jit.export
+    def get_rcut(self) -> float:
+        """Get the cut-off radius."""
+        return self.model.get_rcut()
+
+    @torch.jit.export
+    def get_type_map(self) -> List[str]:
+        """Get the type map."""
+        return self.model.get_type_map()
+
+    @torch.jit.export
+    def get_sel(self) -> List[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.model.get_sel()
+
+    @torch.jit.export
+    def get_dim_fparam(self) -> int:
+        """Get the number (dimension) of frame parameters of this atomic model."""
+        return self.model.get_dim_fparam()
+
+    @torch.jit.export
+    def get_dim_aparam(self) -> int:
+        """Get the number (dimension) of atomic parameters of this atomic model."""
+        return self.model.get_dim_aparam()
+
+    @torch.jit.export
+    def get_sel_type(self) -> List[int]:
+        """Get the selected atom types of this model.
+
+        Only atoms with selected atom types have atomic contribution
+        to the result of the model.
+        If returning an empty list, all atom types are selected.
+        """
+        return self.model.get_sel_type()
+
+    @torch.jit.export
+    def is_aparam_nall(self) -> bool:
+        """Check whether the shape of atomic parameters is (nframes, nall, ndim).
+
+        If False, the shape is (nframes, nloc, ndim).
+        """
+        return self.model.is_aparam_nall()
+
+    @torch.jit.export
+    def mixed_types(self) -> bool:
+        """If true, the model
+        1. assumes total number of atoms aligned across frames;
+        2. uses a neighbor list that does not distinguish different atomic types.
+
+        If false, the model
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. uses a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.model.mixed_types()
+
+    @torch.jit.export
+    def forward(
+        self,
+        coord,
+        atype,
+        box: Optional[torch.Tensor] = None,
+        fparam: Optional[torch.Tensor] = None,
+        aparam: Optional[torch.Tensor] = None,
+        do_atomic_virial: bool = False,
+    ) -> Dict[str, torch.Tensor]:
+        return self.model.forward(
+            coord,
+            atype,
+            box=box,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+        )
+
+    @torch.jit.export
+    def get_model_def_script(self) -> str:
+        """Get the model definition script."""
+        # try to use the original script instead of "frozen model"
+        # Note: this cannot change the script of the parent model
+        # it may still try to load hard-coded filename, which might
+        # be a problem
+        return self.model.get_model_def_script()
+
+    def serialize(self) -> dict:
+        from deepmd.pt.model.model import (
+            get_model,
+        )
+
+        # try to recover the original model
+        model_def_script = json.loads(self.get_model_def_script())
+        model = get_model(model_def_script)
+        model.load_state_dict(self.model.state_dict())
+        return model.serialize()
+
+    @classmethod
+    def deserialize(cls, data: dict):
+        raise RuntimeError("Should not touch here.")
+
+    @torch.jit.export
+    def get_nnei(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        return self.model.get_nnei()
+
+    @torch.jit.export
+    def get_nsel(self) -> int:
+        """Returns the total number of selected neighboring atoms in the cut-off radius."""
+        return self.model.get_nsel()
+
+    @classmethod
+    def update_sel(cls, global_jdata: dict, local_jdata: dict):
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        global_jdata : dict
+            The global data, containing the training section
+        local_jdata : dict
+            The local data refer to the current class
+        """
+        return local_jdata
+
+    @torch.jit.export
+    def model_output_type(self) -> str:
+        """Get the output type for the model."""
+        return self.model.model_output_type()
diff --git a/deepmd/tf/fit/ener.py b/deepmd/tf/fit/ener.py
@@ -868,7 +868,7 @@ def deserialize(cls, data: dict, suffix: str = ""):
             data["nets"],
             suffix=suffix,
         )
-        fitting.bias_atom_e = data["@variables"]["bias_atom_e"]
+        fitting.bias_atom_e = data["@variables"]["bias_atom_e"].ravel()
         if fitting.numb_fparam > 0:
             fitting.fparam_avg = data["@variables"]["fparam_avg"]
             fitting.fparam_inv_std = data["@variables"]["fparam_inv_std"]
@@ -922,7 +922,7 @@ def serialize(self, suffix: str = "") -> dict:
                 suffix=suffix,
             ),
             "@variables": {
-                "bias_atom_e": self.bias_atom_e,
+                "bias_atom_e": self.bias_atom_e.reshape(-1, 1),
                 "fparam_avg": self.fparam_avg,
                 "fparam_inv_std": self.fparam_inv_std,
                 "aparam_avg": self.aparam_avg,
diff --git a/deepmd/tf/model/frozen.py b/deepmd/tf/model/frozen.py
@@ -1,4 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import tempfile
 from enum import (
     Enum,
 )
@@ -7,6 +10,9 @@
     Union,
 )
 
+from deepmd.entrypoints.convert_backend import (
+    convert_backend,
+)
 from deepmd.infer.deep_pot import (
     DeepPot,
 )
@@ -24,6 +30,10 @@
 from deepmd.tf.loss.loss import (
     Loss,
 )
+from deepmd.tf.utils.graph import (
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
 
 from .model import (
     Model,
@@ -43,7 +53,14 @@ class FrozenModel(Model):
     def __init__(self, model_file: str, **kwargs):
         super().__init__(**kwargs)
         self.model_file = model_file
-        self.model = DeepPotential(model_file)
+        if not model_file.endswith(".pb"):
+            # try to convert from other formats
+            with tempfile.NamedTemporaryFile(
+                suffix=".pb", dir=os.curdir, delete=False
+            ) as f:
+                convert_backend(INPUT=model_file, OUTPUT=f.name)
+                self.model_file = f.name
+        self.model = DeepPotential(self.model_file)
         if isinstance(self.model, DeepPot):
             self.model_type = "ener"
         else:
@@ -228,3 +245,19 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict):
         """
         # we don't know how to compress it, so no neighbor statistics here
         return local_jdata
+
+    def serialize(self, suffix: str = "") -> dict:
+        # try to recover the original model
+        # the current graph contains a prefix "load",
+        # so it cannot used to recover the original model
+        graph, graph_def = load_graph_def(self.model_file)
+        t_jdata = get_tensor_by_name_from_graph(graph, "train_attr/training_script")
+        jdata = json.loads(t_jdata)
+        model = Model(**jdata["model"])
+        # important! must be called before serialize
+        model.init_variables(graph=graph, graph_def=graph_def)
+        return model.serialize()
+
+    @classmethod
+    def deserialize(cls, data: dict, suffix: str = ""):
+        raise RuntimeError("Should not touch here.")
diff --git a/deepmd/tf/model/model.py b/deepmd/tf/model/model.py
@@ -566,7 +566,8 @@ def deserialize(cls, data: dict, suffix: str = "") -> "Model":
         """
         if cls is Model:
             return Model.get_class_by_type(data.get("type", "standard")).deserialize(
-                data
+                data,
+                suffix=suffix,
             )
         raise NotImplementedError("Not implemented in class %s" % cls.__name__)
 
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
@@ -1461,7 +1461,6 @@ def frozen_model_args() -> Argument:
         [
             Argument("model_file", str, optional=False, doc=doc_model_file),
         ],
-        doc=doc_only_tf_supported,
     )
     return ca
 
diff --git a/source/tests/consistent/model/test_frozen.py b/source/tests/consistent/model/test_frozen.py
diff --git a/source/tests/infer/deeppot.dp b/source/tests/infer/deeppot.dp

Original file line number	Diff line number	Diff line change
`@@ -566,7 +566,8 @@ def deserialize(cls, data: dict, suffix: str = "") -> "Model":`
`566`	`566`	`"""`
`567`	`567`	`if cls is Model:`
`568`	`568`	`return Model.get_class_by_type(data.get("type", "standard")).deserialize(`
`569`		`- data`
	`569`	`+ data,`
	`570`	`+ suffix=suffix,`
`570`	`571`	`)`
`571`	`572`	`raise NotImplementedError("Not implemented in class %s" % cls.__name__)`
`572`	`573`
Original file line number	Diff line number	Diff line change
`@@ -1461,7 +1461,6 @@ def frozen_model_args() -> Argument:`
`1461`	`1461`	`[`
`1462`	`1462`	`Argument("model_file", str, optional=False, doc=doc_model_file),`
`1463`	`1463`	`],`
`1464`		`- doc=doc_only_tf_supported,`
`1465`	`1464`	`)`
`1466`	`1465`	`return ca`
`1467`	`1466`