Add semantics 'with context' for gpu and cpu

etotmeni · etotmeni · commit e31c726af283 · 2020-10-02T11:15:33.000-05:00
diff --git a/numba/core/decorators.py b/numba/core/decorators.py
@@ -147,8 +147,10 @@ def bar(x, y):
     if 'target' in options:
         target = options.pop('target')
         warnings.warn("The 'target' keyword argument is deprecated.", NumbaDeprecationWarning)
-    else:
+    elif '_target' in options:
         target = options.pop('_target', 'cpu')
+    else:
+        target = None
 
     parallel_option = options.get('parallel')
     if isinstance(parallel_option, dict) and parallel_option.get('offload') is True:
@@ -187,22 +189,8 @@ def bar(x, y):
 
 
 def _jit(sigs, locals, target, cache, targetoptions, **dispatcher_args):
-    dispatcher = registry.dispatcher_registry[target]
-
-    def wrapper(func):
-        if extending.is_jitted(func):
-            raise TypeError(
-                "A jit decorator was called on an already jitted function "
-                f"{func}.  If trying to access the original python "
-                f"function, use the {func}.py_func attribute."
-            )
-
-        if not inspect.isfunction(func):
-            raise TypeError(
-                "The decorated object is not a function (got type "
-                f"{type(func)})."
-            )
 
+    def wrapper(func, dispatcher):
         if config.ENABLE_CUDASIM and target == 'cuda':
             from numba import cuda
             return cuda.jit(func)
@@ -226,7 +214,33 @@ def wrapper(func):
                 disp.disable_compile()
         return disp
 
-    return wrapper
+    def __wrapper(func):
+        if extending.is_jitted(func):
+            raise TypeError(
+                "A jit decorator was called on an already jitted function "
+                f"{func}.  If trying to access the original python "
+                f"function, use the {func}.py_func attribute."
+            )
+
+        if not inspect.isfunction(func):
+            raise TypeError(
+                "The decorated object is not a function (got type "
+                f"{type(func)})."
+            )
+
+        if (target == 'npyufunc' or targetoptions.get('no_cpython_wrapper')
+            or sigs or config.DISABLE_JIT or not targetoptions.get('nopython')):
+            target_ = target
+            if target_ is None:
+                target_ = 'cpu'
+            disp = registry.dispatcher_registry[target_]
+            return wrapper(func, disp)
+
+        from numba.dppl.target_dispatcher import TargetDispatcher
+        disp = TargetDispatcher(func, wrapper, target)
+        return disp
+
+    return __wrapper
 
 
 def generated_jit(function=None, target='cpu', cache=False,
diff --git a/numba/core/dispatcher.py b/numba/core/dispatcher.py
@@ -673,7 +673,14 @@ def _set_uuid(self, u):
         self._recent.append(self)
 
 
-class Dispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase):
+import abc
+
+class DispatcherMeta(abc.ABCMeta):
+    def __instancecheck__(self, other):
+        return type(type(other)) == DispatcherMeta
+
+
+class Dispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase, metaclass=DispatcherMeta):
     """
     Implementation of user-facing dispatcher objects (i.e. created using
     the @jit decorator).
diff --git a/numba/core/registry.py b/numba/core/registry.py
@@ -2,6 +2,7 @@
 
 from numba.core.descriptors import TargetDescriptor
 from numba.core import utils, typing, dispatcher, cpu
+from numba.core.compiler_lock import global_compiler_lock
 
 # -----------------------------------------------------------------------------
 # Default CPU target descriptors
@@ -26,16 +27,19 @@ class CPUTarget(TargetDescriptor):
     _nested = _NestedContext()
 
     @utils.cached_property
+    @global_compiler_lock
     def _toplevel_target_context(self):
         # Lazily-initialized top-level target context, for all threads
         return cpu.CPUContext(self.typing_context)
 
     @utils.cached_property
+    @global_compiler_lock
     def _toplevel_typing_context(self):
         # Lazily-initialized top-level typing context, for all threads
         return typing.Context()
 
     @property
+    @global_compiler_lock
     def target_context(self):
         """
         The target context for CPU targets.
@@ -47,6 +51,7 @@ def target_context(self):
             return self._toplevel_target_context
 
     @property
+    @global_compiler_lock
     def typing_context(self):
         """
         The typing context for CPU targets.
@@ -57,6 +62,7 @@ def typing_context(self):
         else:
             return self._toplevel_typing_context
 
+    @global_compiler_lock
     def nested_context(self, typing_context, target_context):
         """
         A context manager temporarily replacing the contexts with the
diff --git a/numba/dppl/examples/dppl_with_context.py b/numba/dppl/examples/dppl_with_context.py
@@ -0,0 +1,35 @@
+import numpy as np
+from numba import dppl, njit, prange
+import dpctl
+import dpctl.ocldrv as ocldrv
+
+
+@njit
+def g(a):
+    return a + 1
+
+
+@njit
+def f(a, b, c, N):
+    for i in prange(N):
+        a[i] = b[i] + g(c[i])
+
+
+def main():
+    N = 10
+    a = np.ones(N)
+    b = np.ones(N)
+    c = np.ones(N)
+
+    if ocldrv.has_gpu_device:
+        with dpctl.device_context(dpctl.device_type.gpu):
+            f(a, b, c, N)
+    elif ocldrv.has_cpu_device:
+        with dpctl.device_context(dpctl.device_type.cpu):
+            f(a, b, c, N)
+    else:
+        print("No device found")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/numba/dppl/target_dispatcher.py b/numba/dppl/target_dispatcher.py
@@ -0,0 +1,72 @@
+from numba.core import registry, serialize, dispatcher
+from numba import types
+from numba.core.errors import UnsupportedError
+import dpctl
+import dpctl.ocldrv as ocldrv
+from numba.core.compiler_lock import global_compiler_lock
+
+
+class TargetDispatcher(serialize.ReduceMixin, metaclass=dispatcher.DispatcherMeta):
+    __numba__ = 'py_func'
+
+    def __init__(self, py_func, wrapper, target, compiled=None):
+
+        self.__py_func = py_func
+        self.__target = target
+        self.__wrapper = wrapper
+        self.__compiled = compiled if compiled is not None else {}
+        self.__doc__ = py_func.__doc__
+        self.__name__ = py_func.__name__
+        self.__module__ = py_func.__module__
+
+    def __call__(self, *args, **kwargs):
+        return self.get_compiled()(*args, **kwargs)
+
+    def __getattr__(self, name):
+        return getattr(self.get_compiled(), name)
+
+    def __get__(self, obj, objtype=None):
+        return self.get_compiled().__get__(obj, objtype)
+
+    def __repr__(self):
+        return self.get_compiled().__repr__()
+
+    @classmethod
+    def _rebuild(cls, py_func, wrapper, target, compiled):
+        self = cls(py_func, wrapper, target, compiled)
+        return self
+
+    def get_compiled(self, target=None):
+        if target is None:
+            target = self.__target
+
+        disp = self.get_current_disp()
+        if not disp in self.__compiled.keys():
+            with global_compiler_lock:
+                if not disp in self.__compiled.keys():
+                    self.__compiled[disp] = self.__wrapper(self.__py_func, disp)
+
+        return self.__compiled[disp]
+
+    def get_current_disp(self):
+        target = self.__target
+
+        if dpctl.is_in_device_context():
+            if self.__target is not None:
+                raise UnsupportedError("Unsupported defined 'target' with using context device")
+            if dpctl.get_current_device_type() == dpctl.device_type.gpu:
+                from numba.dppl import dppl_offload_dispatcher
+                return registry.dispatcher_registry['__dppl_offload_gpu__']
+
+        if target is None:
+            target = 'cpu'
+
+        return registry.dispatcher_registry[target]
+
+    def _reduce_states(self):
+        return dict(
+            py_func=self.__py_func,
+            wrapper=self.__wrapper,
+            target=self.__target,
+            compiled=self.__compiled
+        )
diff --git a/numba/dppl/tests/dppl/test_with_context.py b/numba/dppl/tests/dppl/test_with_context.py
@@ -0,0 +1,102 @@
+import numba
+import numpy as np
+from numba import dppl, njit
+from numba.core import errors
+from numba.tests.support import captured_stdout
+from numba.dppl.testing import DPPLTestCase, unittest
+import dpctl
+import dpctl.ocldrv as ocldrv
+
+
+@unittest.skipIf(not dpctl.has_gpu_queues(), "No GPU platforms available")
+@unittest.skipIf(not dpctl.has_cpu_queues(), "No CPU platforms available")
+class TestWithDPPLContext(DPPLTestCase):
+    def test_with_dppl_context_gpu(self):
+
+        @njit
+        def nested_func(a, b):
+            np.sin(a, b)
+
+        @njit
+        def func(b):
+            a = np.ones((64), dtype=np.float64)
+            nested_func(a, b)
+
+        numba.dppl.compiler.DEBUG = 1
+        expected = np.ones((64), dtype=np.float64)
+        got_gpu = np.ones((64), dtype=np.float64)
+
+        with captured_stdout() as got_gpu_message:
+            with dpctl.device_context(dpctl.device_type.gpu):
+                func(got_gpu)
+
+        func(expected)
+
+        np.testing.assert_array_equal(expected, got_gpu)
+        self.assertTrue('Parfor lowered on DPPL-device' in got_gpu_message.getvalue())
+
+
+    def test_with_dppl_context_cpu(self):
+        
+        @njit
+        def nested_func(a, b):
+            np.sin(a, b)
+
+        @njit
+        def func(b):
+            a = np.ones((64), dtype=np.float64)
+            nested_func(a, b)
+
+        numba.dppl.compiler.DEBUG = 1
+        expected = np.ones((64), dtype=np.float64)
+        got_cpu = np.ones((64), dtype=np.float64)
+
+        with captured_stdout() as got_cpu_message:
+            with dpctl.device_context(dpctl.device_type.cpu):
+                func(got_cpu)
+
+        func(expected)
+
+        np.testing.assert_array_equal(expected, got_cpu)
+        self.assertTrue('Parfor lowered on DPPL-device' not in got_cpu_message.getvalue())
+
+
+    def test_with_dppl_context_target(self):
+
+        @njit(target='cpu')
+        def nested_func_target(a, b):
+            np.sin(a, b)
+
+        @njit(target='gpu')
+        def func_target(b):
+            a = np.ones((64), dtype=np.float64)
+            nested_func_target(a, b)
+
+        @njit
+        def func_no_target(b):
+            a = np.ones((64), dtype=np.float64)
+            nested_func_target(a, b)
+
+        a = np.ones((64), dtype=np.float64)
+        b = np.ones((64), dtype=np.float64)
+
+        with self.assertRaises(errors.UnsupportedError) as raises_1:
+            with dpctl.device_context(dpctl.device_type.gpu):
+                nested_func_target(a, b)
+
+        with self.assertRaises(errors.UnsupportedError) as raises_2:
+            with dpctl.device_context(dpctl.device_type.gpu):
+                func_target(a)
+
+        with self.assertRaises(errors.UnsupportedError) as raises_3:
+            with dpctl.device_context(dpctl.device_type.gpu):
+                func_no_target(a)
+
+        msg = "Unsupported defined 'target' with using context device"
+        self.assertTrue(msg in str(raises_1.exception))
+        self.assertTrue(msg in str(raises_2.exception))
+        self.assertTrue(msg in str(raises_3.exception))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/numba/tests/test_dispatcher.py b/numba/tests/test_dispatcher.py
@@ -398,6 +398,8 @@ def test_serialization(self):
         def foo(x):
             return x + 1
 
+        foo = foo.get_compiled()
+
         self.assertEqual(foo(1), 2)
 
         # get serialization memo
diff --git a/numba/tests/test_nrt.py b/numba/tests/test_nrt.py
@@ -249,6 +249,8 @@ def alloc_nrt_memory():
             """
             return np.empty(N, dtype)
 
+        alloc_nrt_memory = alloc_nrt_memory.get_compiled()
+
         def keep_memory():
             return alloc_nrt_memory()
 
diff --git a/numba/tests/test_record_dtype.py b/numba/tests/test_record_dtype.py
@@ -803,8 +803,8 @@ def test_record_arg_transform(self):
         self.assertIn('Array', transformed)
         self.assertNotIn('first', transformed)
         self.assertNotIn('second', transformed)
-        # Length is usually 50 - 5 chars tolerance as above.
-        self.assertLess(len(transformed), 50)
+        # Length is usually 60 - 5 chars tolerance as above.
+        self.assertLess(len(transformed), 60)
 
     def test_record_two_arrays(self):
         """
diff --git a/numba/tests/test_serialize.py b/numba/tests/test_serialize.py
@@ -135,9 +135,9 @@ def test_reuse(self):
 
         Note that "same function" is intentionally under-specified.
         """
-        func = closure(5)
+        func = closure(5).get_compiled()
         pickled = pickle.dumps(func)
-        func2 = closure(6)
+        func2 = closure(6).get_compiled()
         pickled2 = pickle.dumps(func2)
 
         f = pickle.loads(pickled)
@@ -152,7 +152,7 @@ def test_reuse(self):
         self.assertEqual(h(2, 3), 11)
 
         # Now make sure the original object doesn't exist when deserializing
-        func = closure(7)
+        func = closure(7).get_compiled()
         func(42, 43)
         pickled = pickle.dumps(func)
         del func