ise-uiuc · ganler · Mar 7, 2023 · Mar 7, 2023 · Mar 7, 2023 · Mar 7, 2023
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -33,9 +33,16 @@ jobs:
           yes | python nnsmith/cli/model_gen.py model.type=torch mgen.method=symbolic-cinit mgen.rank_choices="[4]" mgen.dtype_choices="[f32]" mgen.include="[core.NCHWConv2d, core.ReLU]" mgen.patch_requires=./tests/mock/requires_patch.py backend.type=torchjit
       - name: Test ONNX + ONNXRuntime
         run: |
+          pytest tests/onnxruntime
           yes | python nnsmith/cli/model_gen.py model.type=onnx mgen.method=symbolic
           yes | python nnsmith/cli/model_gen.py model.type=onnx backend.type=onnxruntime mgen.method=concolic
           python nnsmith/cli/model_exec.py model.type=onnx backend.type=onnxruntime model.path=nnsmith_output/model.onnx
+      - name: Test ONNX + TVM
+        run: |
+          pytest tests/tvm
+      - name: Test ONNX + TRT
+        run: |
+          pytest tests/tensorrt
       - name: Test TensorFlow
         run: |
           pip install -r requirements/sys/tensorflow.txt --pre --upgrade

diff --git a/doc/CONTRIBUTING.md b/doc/CONTRIBUTING.md
@@ -37,7 +37,7 @@ Oftentimes not, rare cases yes (that's why it is suggested to submit an issue fo
 **S-sized contributions** are oftentimes easy-to-accept, including bug/typo fixes, CI improvements, test-case improvements, etc.
 as long as it is beneficial and satisfies the properties in the "General coding guidance" section.
 
-**M-sized contributions** such as extending new frontends/backends/fuzzing strategies/etc. are welcome as well
+**M-sized contributions** such as extending new front-ends/backends/fuzzing strategies/etc. are welcome as well
 -- as long as it shows an edge in improvements.
 However, for maintainability, it could be moved to the temporary "contrib" folder if it is non-trivial/unclear for being well-maintained.
 For example, let's say we supported backend "X" in the "contrib" folder and started to submitting bug reports to the "X" community.
@@ -56,7 +56,7 @@ as long as we agree on that the benefits (over the efforts) are unquestionable.
 
 ### `pre-commit`
 
-[`pre-commit`](https://pre-commit.com/) is a convenient tool to check and format your code while commiting codes.
+[`pre-commit`](https://pre-commit.com/) is a convenient tool to check and format your code while committing codes.
 
 To set-up pre-commit:
 
@@ -65,11 +65,11 @@ pip install -r requirements/dev.txt
 pre-commit install
 ```
 
-Now it will run checking and auto-formating while you commit:
+Now it will run checking and auto-formatting while you commit:
 
 ```shell
 git commit ...
-# if [NOTHING HAPPENDS], you are good to go;
+# if [NOTHING HAPPENS], you are good to go;
 # if [IT FAILS], the auto-formatting is automatically applied;
 #                you just need to check, `git add` these changes and re-commit.
 ```
@@ -84,10 +84,13 @@ If appliable (e.g., adding a new backend), add a few tests to validate your impl
 To run the Python tests:
 
 ```shell
-# env of torch & tf will conflict so split their unit tests.
+# env of torch & tf (and others) will conflict so split their unit tests.
 pytest tests/core -s
 pytest tests/torch -s
 pytest tests/tensorflow -s
+pytest tests/onnxruntime -s
+pytest tests/tvm -s
+pytest tests/tensorrt -s
 ```
 
 ### Simple code
@@ -96,8 +99,8 @@ pytest tests/tensorflow -s
 
 Maintaining code is hard, esp. when
 (i) initial code owners are not available; and
-(ii) the code is too complicated to be understand/modify.
-As a result, contributors are recommand to write simple code:
+(ii) the code is too complicated to be understood/modified.
+As a result, contributors are recommended to write simple code:
 (i) easy-to-understand;
 (ii) well-organized and easy-to-extend;
 (iii) well-documented if the concept is tricky;

diff --git a/nnsmith/materialize/__init__.py b/nnsmith/materialize/__init__.py
@@ -185,23 +185,25 @@ def init(name, backend_target=None) -> Type["Model"]:
             )
 
         if name == "torch":
-            from nnsmith.materialize.torch import TorchModel
+            from nnsmith.materialize.torch import TorchModelCPU, TorchModelCUDA
 
-            # PyTorch CPU - GPU implementation are quite the same.
-            return TorchModel
+            if backend_target == "gpu" or backend_target == "cuda":
+                return TorchModelCUDA
+            return TorchModelCPU
         elif name == "onnx":
             # device agnoistic
-            from nnsmith.materialize.onnx import ONNXModel
+            from nnsmith.materialize.onnx import ONNXModelCPU, ONNXModelCUDA
 
-            return ONNXModel
+            if backend_target == "gpu" or backend_target == "cuda":
+                return ONNXModelCUDA
+            return ONNXModelCPU
         elif name == "tensorflow":
-            from nnsmith.materialize.tensorflow import TFModelCPU, TFModelGPU
+            from nnsmith.materialize.tensorflow import TFModelCPU, TFModelCUDA
 
             if backend_target == "gpu" or backend_target == "cuda":
                 # XLA must align device location of eager mode execution.
-                return TFModelGPU
-            else:
-                return TFModelCPU
+                return TFModelCUDA
+            return TFModelCPU
 
         raise ValueError(
             f"Unsupported: ModelType={name} for BackendTarget={backend_target}"

diff --git a/nnsmith/materialize/onnx/__init__.py b/nnsmith/materialize/onnx/__init__.py
@@ -3,7 +3,7 @@
 import warnings
 from io import BytesIO
 from os import PathLike
-from typing import Dict, List, Optional, Tuple, Type, Union
+from typing import Dict, Generic, List, Optional, Tuple, Type, TypeVar, Union
 
 import onnx
 import onnx.checker
@@ -17,7 +17,12 @@
 from nnsmith.abstract.op import AbsTensor
 from nnsmith.gir import GraphIR
 from nnsmith.macro import onnx2external_data_dir
-from nnsmith.materialize.torch import SymbolNet, TorchModel
+from nnsmith.materialize.torch import (
+    SymbolNet,
+    TorchModel,
+    TorchModelCPU,
+    TorchModelCUDA,
+)
 
 
 def create_deadcode_onnx(onnx_model: onnx.ModelProto, name_mask) -> onnx.ModelProto:
@@ -52,7 +57,10 @@ def torch2onnx(
     # Dummy inputs
     if dummy_inputs is None:
         dummy_inputs = [
-            torch.ones(size=svar.shape).uniform_(1, 2).to(dtype=svar.dtype.torch())
+            torch.ones(size=svar.shape)
+            .to(model.device)
+            .uniform_(1, 2)
+            .to(dtype=svar.dtype.torch())
             for _, svar in model.input_like.items()
         ]
 
@@ -67,6 +75,7 @@ def torch2onnx(
                 "default" if verbose else "ignore", category=UserWarning, append=True
             )
             model.eval()
+            # TODO: How to remove the annoying "Diagnostic Run ..." message?
             torch.onnx.export(
                 model,
                 tuple(dummy_inputs),
@@ -145,6 +154,12 @@ def get_onnx_proto(model: Union[onnx.ModelProto, str]) -> onnx.ModelProto:
 
 
 class ONNXModel(TorchModel):
+    PTType: Type[TorchModel] = None
+
+    @classmethod
+    def device(cls) -> torch.device:
+        return cls.PTType.device()
+
     def __init__(self, with_torch=True):
         """Initialize a ONNXModel.
 
@@ -191,8 +206,7 @@ def _dce_prob() -> float:  # \in [0, 1]
     @classmethod
     def from_gir(cls: Type["ONNXModel"], gir: GraphIR, **kwargs) -> "ONNXModel":
         ret = cls()
-        ret.torch_model = SymbolNet(gir, **kwargs)
-
+        ret.torch_model = cls.PTType.from_gir(gir, **kwargs).torch_model
         ret.full_input_like = ret.torch_model.input_like
         ret.full_output_like = ret.torch_model.output_like
         ret.masked_output_like = ret.full_output_like
@@ -203,7 +217,7 @@ def from_gir(cls: Type["ONNXModel"], gir: GraphIR, **kwargs) -> "ONNXModel":
         return ret
 
     def refine_weights(self) -> None:
-        TorchModel.refine_weights(self)
+        self.PTType.refine_weights(self)
         # weights are set. let's save the model.
         self.onnx_model = self.get_onnx_from_torch()
         if set(self.masked_output_like.keys()) != set(self.full_output_like):
@@ -221,8 +235,8 @@ def output_like(self) -> Dict[str, AbsTensor]:
 
     def dump(self, path: PathLike) -> None:
         if self.with_torch:
-            TorchModel.dump(
-                self, path.replace(self.name_suffix(), TorchModel.name_suffix())
+            self.PTType.dump(
+                self, path.replace(self.name_suffix(), self.PTType.name_suffix())
             )
             if self.onnx_model is None:
                 self.onnx_model = self.get_onnx_from_torch()
@@ -235,7 +249,7 @@ def load(cls, path: PathLike) -> "ONNXModel":
         ret = cls()
         ret.onnx_model = onnx.load(path)
 
-        torch_path = path.replace(cls.name_suffix(), TorchModel.name_suffix())
+        torch_path = path.replace(cls.name_suffix(), cls.PTType.name_suffix())
 
         ret.with_torch = False
         full_input_like, full_output_like = analyze_onnx_io(ret.onnx_model)
@@ -246,7 +260,7 @@ def load(cls, path: PathLike) -> "ONNXModel":
         # FIXME: missing key(s) in state_dict: "mlist.0.data", "mlist.1.data".
         if os.path.exists(torch_path):
             ret.with_torch = True
-            ret.torch_model = TorchModel.load(torch_path)
+            ret.torch_model = cls.PTType.load(torch_path)
             ret.full_input_like = ret.torch_model.input_like
             ret.full_output_like = ret.torch_model.output_like
 
@@ -278,3 +292,11 @@ def name_suffix() -> str:
     @classmethod
     def skip_dtypes(cls) -> List[DType]:
         return DTYPE_GEN_COMPLEX
+
+
+class ONNXModelCPU(ONNXModel):
+    PTType = TorchModelCPU
+
+
+class ONNXModelCUDA(ONNXModel):
+    PTType = TorchModelCUDA
diff --git a/nnsmith/materialize/tensorflow/__init__.py b/nnsmith/materialize/tensorflow/__init__.py
@@ -210,7 +210,7 @@ def device(self) -> tf.device:
         return tf.device(tf.config.list_logical_devices("CPU")[0].name)
 
 
-class TFModelGPU(TFModel):
+class TFModelCUDA(TFModel):
     @property
     def device(self) -> tf.device:
         gpus = tf.config.list_logical_devices("GPU")

diff --git a/nnsmith/materialize/torch/__init__.py b/nnsmith/materialize/torch/__init__.py
@@ -1,4 +1,5 @@
 import pickle
+from abc import ABC, abstractmethod
 from os import PathLike
 from typing import Dict, List, Type
 
@@ -13,20 +14,25 @@
 from nnsmith.util import register_seed_setter
 
 
-class TorchModel(Model):
+class TorchModel(Model, ABC):
     def __init__(self) -> None:
         super().__init__()
         self.torch_model: SymbolNet = None
         self.sat_inputs = None
 
+    @classmethod
+    @abstractmethod
+    def device(cls) -> torch.device:
+        pass
+
     @property
     def version(self) -> str:
         return torch.__version__
 
     @classmethod
     def from_gir(cls: Type["TorchModel"], ir: GraphIR, **kwargs) -> "TorchModel":
         ret = cls()
-        ret.torch_model = SymbolNet(ir, **kwargs)
+        ret.torch_model = SymbolNet(ir, **kwargs).to(cls.device())
         return ret
 
     @staticmethod
@@ -35,7 +41,8 @@ def gir_name() -> str:
 
     def refine_weights(self) -> None:
         self.torch_model.enable_proxy_grad()
-        searcher = PracticalHybridSearch(self.torch_model)
+        use_cuda = self.device().type == "cuda"
+        searcher = PracticalHybridSearch(self.torch_model, use_cuda=use_cuda)
         # TODO(@ganler): Can we directly get both inputs and outputs?
         _, inputs = searcher.search(
             max_time_ms=20,
@@ -53,6 +60,7 @@ def make_oracle(self) -> Oracle:
                 inputs = self.torch_model.get_random_inps()
             else:
                 inputs = self.sat_inputs
+            inputs = {k: v.to(self.device()) for k, v in inputs.items()}
             outputs = self.torch_model.forward(**inputs)
 
         # numpyify
@@ -81,7 +89,7 @@ def load(cls, path: PathLike) -> "TorchModel":
         )
         with open(gir_path, "rb") as f:
             ir = pickle.load(f)
-        torch_model = SymbolNet(ir)
+        torch_model = SymbolNet(ir).to(cls.device())
         torch_model.load_state_dict(torch.load(path), strict=False)
         ret.torch_model = torch_model
         return ret
@@ -109,3 +117,15 @@ def operators() -> List[Type[AbsOpBase]]:
     @staticmethod
     def add_seed_setter() -> None:
         register_seed_setter("torch", torch.manual_seed, overwrite=True)
+
+
+class TorchModelCPU(TorchModel):
+    @classmethod
+    def device(cls) -> torch.device:
+        return torch.device("cpu")
+
+
+class TorchModelCUDA(TorchModel):
+    @classmethod
+    def device(cls) -> torch.device:
+        return torch.device("cuda")
diff --git a/nnsmith/materialize/torch/symbolnet.py b/nnsmith/materialize/torch/symbolnet.py
@@ -62,11 +62,9 @@ def __init__(
         ir: GraphIR,
         record_intermediate=False,
         use_gradient=False,
-        megabyte_lim=__MB_LIM__,
         print_grad=0,
     ):
         super(SymbolNet, self).__init__()
-        self.megabyte_lim = megabyte_lim
         self.print_grad = print_grad
         # <TorchFunc, <keys -> inputs>, <keys -> outputs>, original op>
         self.instructions = []
@@ -78,6 +76,7 @@ def __init__(
         self.mlist = nn.ModuleList()
         # whether or not to register intermediate tensors as output tensors. Useful (at least) for checking nan
         self.record_intermediate = record_intermediate
+        self._device = None
 
         self.ir = ir
 
@@ -107,6 +106,14 @@ def __init__(
         self.check_intermediate_numeric = False
         self.invalid_found_last = None
 
+    def to(self, device):
+        self._device = device
+        return super().to(device)
+
+    @property
+    def device(self):
+        return self._device
+
     @property
     def input_like(self):
         return self.input_map

diff --git a/tests/torch/test_ort_backend.py → tests/onnxruntime/test_ort_backend.py b/tests/torch/test_ort_backend.py → tests/onnxruntime/test_ort_backend.py
diff --git a/tests/torch/test_trt_backend.py → tests/tensorrt/test_trt_backend.py b/tests/torch/test_trt_backend.py → tests/tensorrt/test_trt_backend.py
@@ -1,10 +1,7 @@
 import GPUtil
 import pytest
 
-if not GPUtil.getAvailable():
-    pytest.skip(
-        "Skipping TensorRT tests due to no GPU detected.", allow_module_level=True
-    )
+has_gpu = len(GPUtil.getGPUs()) > 0
 
 from nnsmith.abstract.dtype import DType
 from nnsmith.backends import BackendFactory
@@ -15,6 +12,7 @@
 TestCase.__test__ = False  # supress PyTest warning
 
 
+@pytest.mark.skipif(not has_gpu, reason="Skipping TensorRT testing due to no GPU found")
 def test_narrow_spec_cache_make_and_reload():
     factory = BackendFactory.init("tensorrt", target="cuda", optmax=True)
     ONNXModel = Model.init("onnx")
@@ -34,6 +32,7 @@ def test_narrow_spec_cache_make_and_reload():
     )
 
 
+@pytest.mark.skipif(not has_gpu, reason="Skipping TensorRT testing due to no GPU found")
 def test_synthesized_onnx_model(tmp_path):
     d = tmp_path / "test_trt_onnx"
     d.mkdir()