Skip to content

Commit 386f3b2

Browse files
authored
feat(pt): add op library (#3620)
Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
1 parent 23f67a1 commit 386f3b2

12 files changed

Lines changed: 215 additions & 6 deletions

File tree

backend/find_pytorch.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# SPDX-License-Identifier: LGPL-3.0-or-later
2+
import os
3+
import site
4+
from functools import (
5+
lru_cache,
6+
)
7+
from importlib.machinery import (
8+
FileFinder,
9+
)
10+
from importlib.util import (
11+
find_spec,
12+
)
13+
from pathlib import (
14+
Path,
15+
)
16+
from sysconfig import (
17+
get_path,
18+
)
19+
from typing import (
20+
Optional,
21+
)
22+
23+
24+
@lru_cache
25+
def find_pytorch() -> Optional[str]:
26+
"""Find PyTorch library.
27+
28+
Tries to find PyTorch in the order of:
29+
30+
1. Environment variable `PYTORCH_ROOT` if set
31+
2. The current Python environment.
32+
3. user site packages directory if enabled
33+
4. system site packages directory (purelib)
34+
35+
Considering the default PyTorch package still uses old CXX11 ABI, we
36+
cannot install it automatically.
37+
38+
Returns
39+
-------
40+
str, optional
41+
PyTorch library path if found.
42+
"""
43+
if os.environ.get("DP_ENABLE_PYTORCH", "0") == "0":
44+
return None
45+
pt_spec = None
46+
47+
if (pt_spec is None or not pt_spec) and os.environ.get("PYTORCH_ROOT") is not None:
48+
site_packages = Path(os.environ.get("PYTORCH_ROOT")).parent.absolute()
49+
pt_spec = FileFinder(str(site_packages)).find_spec("torch")
50+
51+
# get pytorch spec
52+
# note: isolated build will not work for backend
53+
if pt_spec is None or not pt_spec:
54+
pt_spec = find_spec("torch")
55+
56+
if not pt_spec and site.ENABLE_USER_SITE:
57+
# first search TF from user site-packages before global site-packages
58+
site_packages = site.getusersitepackages()
59+
if site_packages:
60+
pt_spec = FileFinder(site_packages).find_spec("torch")
61+
62+
if not pt_spec:
63+
# purelib gets site-packages path
64+
site_packages = get_path("purelib")
65+
if site_packages:
66+
pt_spec = FileFinder(site_packages).find_spec("torch")
67+
68+
# get install dir from spec
69+
try:
70+
pt_install_dir = pt_spec.submodule_search_locations[0] # type: ignore
71+
# AttributeError if ft_spec is None
72+
# TypeError if submodule_search_locations are None
73+
# IndexError if submodule_search_locations is an empty list
74+
except (AttributeError, TypeError, IndexError):
75+
pt_install_dir = None
76+
return pt_install_dir

backend/read_env.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
Version,
1414
)
1515

16+
from .find_pytorch import (
17+
find_pytorch,
18+
)
1619
from .find_tensorflow import (
1720
find_tensorflow,
1821
get_tf_version,
@@ -99,6 +102,19 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
99102
cmake_args.append("-DENABLE_TENSORFLOW=OFF")
100103
tf_version = None
101104

105+
if os.environ.get("DP_ENABLE_PYTORCH", "0") == "1":
106+
pt_install_dir = find_pytorch()
107+
if pt_install_dir is None:
108+
raise RuntimeError("Cannot find installed PyTorch.")
109+
cmake_args.extend(
110+
[
111+
"-DENABLE_PYTORCH=ON",
112+
f"-DCMAKE_PREFIX_PATH={pt_install_dir}",
113+
]
114+
)
115+
else:
116+
cmake_args.append("-DENABLE_PYTORCH=OFF")
117+
102118
cmake_args = [
103119
"-DBUILD_PY_IF:BOOL=TRUE",
104120
*cmake_args,

deepmd/pt/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,10 @@
11
# SPDX-License-Identifier: LGPL-3.0-or-later
2+
3+
# import customized OPs globally
4+
from deepmd.pt.cxx_op import (
5+
ENABLE_CUSTOMIZED_OP,
6+
)
7+
8+
__all__ = [
9+
"ENABLE_CUSTOMIZED_OP",
10+
]

deepmd/pt/cxx_op.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# SPDX-License-Identifier: LGPL-3.0-or-later
2+
import platform
3+
4+
import torch
5+
6+
from deepmd.env import (
7+
SHARED_LIB_DIR,
8+
)
9+
10+
11+
def load_library(module_name: str) -> bool:
12+
"""Load OP library.
13+
14+
Parameters
15+
----------
16+
module_name : str
17+
Name of the module
18+
19+
Returns
20+
-------
21+
bool
22+
Whether the library is loaded successfully
23+
"""
24+
if platform.system() == "Windows":
25+
ext = ".dll"
26+
prefix = ""
27+
else:
28+
ext = ".so"
29+
prefix = "lib"
30+
31+
module_file = (SHARED_LIB_DIR / (prefix + module_name)).with_suffix(ext).resolve()
32+
33+
if module_file.is_file():
34+
torch.ops.load_library(module_file)
35+
return True
36+
return False
37+
38+
39+
ENABLE_CUSTOMIZED_OP = load_library("deepmd_op_pt")
40+
41+
__all__ = [
42+
"ENABLE_CUSTOMIZED_OP",
43+
]

deepmd/pt/entrypoints/main.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
from deepmd.main import (
3333
parse_args,
3434
)
35+
from deepmd.pt.cxx_op import (
36+
ENABLE_CUSTOMIZED_OP,
37+
)
3538
from deepmd.pt.infer import (
3639
inference,
3740
)
@@ -224,6 +227,7 @@ def get_backend_info(self) -> dict:
224227
return {
225228
"Backend": "PyTorch",
226229
"PT ver": f"v{torch.__version__}-g{torch.version.git_version[:11]}",
230+
"Enable custom OP": ENABLE_CUSTOMIZED_OP,
227231
}
228232

229233

doc/install/install-from-source.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,16 @@ Note that TensorFlow may have specific requirements for the compiler version to
118118

119119
:::
120120

121+
:::{tab-item} PyTorch {{ pytorch_icon }}
122+
123+
You can set the environment variable `export DP_ENABLE_PYTORCH=1` to enable customized C++ OPs in the PyTorch backend.
124+
Note that PyTorch may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by PyTorch.
125+
126+
The customized C++ OPs are not enabled by default because TensorFlow and PyTorch packages from the PyPI use different `_GLIBCXX_USE_CXX11_ABI` flags.
127+
We recommend conda-forge packages in this case.
128+
129+
:::
130+
121131
::::
122132

123133
Execute
@@ -135,6 +145,7 @@ One may set the following environment variables before executing `pip`:
135145
| CUDAToolkit_ROOT | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. |
136146
| ROCM_ROOT | Path | Detected automatically | The path to the ROCM toolkit directory. |
137147
| DP_ENABLE_TENSORFLOW | 0, 1 | 1 | {{ tensorflow_icon }} Enable the TensorFlow backend. |
148+
| DP_ENABLE_PYTORCH | 0, 1 | 0 | {{ pytorch_icon }} Enable customized C++ OPs for the PyTorch backend. PyTorch can still run without customized C++ OPs, but features will be limited. |
138149
| TENSORFLOW_ROOT | Path | Detected automatically | {{ tensorflow_icon }} The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against. |
139150
| DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1 | 0 | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. |
140151
| CMAKE_ARGS | str | - | Additional CMake arguments |

source/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,9 @@ if(NOT DEEPMD_C_ROOT)
305305
if(ENABLE_TENSORFLOW)
306306
add_subdirectory(op/)
307307
endif()
308+
if(ENABLE_PYTORCH)
309+
add_subdirectory(op/pt/)
310+
endif()
308311
add_subdirectory(lib/)
309312
endif()
310313
if(BUILD_PY_IF)

source/api_cc/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ set_target_properties(
3636
${libname}
3737
PROPERTIES INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}"
3838
INSTALL_RPATH_USE_LINK_PATH TRUE
39-
BUILD_RPATH "$ORIGIN/../op")
39+
BUILD_RPATH "$ORIGIN/../op;$ORIGIN/../op/pt")
4040
target_compile_definitions(${libname} PRIVATE TF_PRIVATE)
4141
if(CMAKE_TESTING_ENABLED)
4242
target_link_libraries(${libname} PRIVATE coverage_config)

source/api_cc/src/DeepPotPT.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ void DeepPotPT::init(const std::string& model,
3737
<< std::endl;
3838
return;
3939
}
40+
deepmd::load_op_library();
4041
int gpu_num = torch::cuda::device_count();
4142
if (gpu_num > 0) {
4243
gpu_id = gpu_rank % gpu_num;

source/api_cc/src/common.cc

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -377,21 +377,27 @@ void deepmd::get_env_nthreads(int& num_intra_nthreads,
377377
}
378378
}
379379

380-
void deepmd::load_op_library() {
381-
#ifdef BUILD_TENSORFLOW
382-
tensorflow::Env* env = tensorflow::Env::Default();
380+
static inline void _load_single_op_library(std::string library_name) {
383381
#if defined(_WIN32)
384-
std::string dso_path = "deepmd_op.dll";
382+
std::string dso_path = library_name + ".dll";
385383
void* dso_handle = LoadLibrary(dso_path.c_str());
386384
#else
387-
std::string dso_path = "libdeepmd_op.so";
385+
std::string dso_path = "lib" + library_name + ".so";
388386
void* dso_handle = dlopen(dso_path.c_str(), RTLD_NOW | RTLD_LOCAL);
389387
#endif
390388
if (!dso_handle) {
391389
throw deepmd::deepmd_exception(
392390
dso_path +
393391
" is not found! You can add the library directory to LD_LIBRARY_PATH");
394392
}
393+
}
394+
395+
void deepmd::load_op_library() {
396+
#ifdef BUILD_TENSORFLOW
397+
_load_single_op_library("deepmd_op");
398+
#endif
399+
#ifdef BUILD_PYTORCH
400+
_load_single_op_library("deepmd_op_pt");
395401
#endif
396402
}
397403

0 commit comments

Comments
 (0)