Merge branch 'main' of github.com:getkeops/keops

bcharlier · bcharlier · commit de6d22300ff6 · 2024-01-18T17:53:57.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,14 @@
+* v2.2 - Jan 18, 2024
+   - added option to disable fast_math Cuda compiler option
+   - added comparison operators between LazyTensor
+	 - added support for symbolic differentiation of symbolic operations : Grad, Diff, Laplacian, Divergence
+	 - added support for forward autodiff, compatible with PyTorch forward autodiff tools (torch.jvp, etc.)
+	 - added support for torch.vmap
+	 - added support for intermediate variables in formulas (factorize and auto_factorize methods of LazyTensor class)
+   - changed cache folder name, now include name of local host
+   - added torch.compile comparison to the benchmarks
+   - fixed issues 294, 305, 310, 325, 342, ...
+
 * v2.1.2 - Apr 4, 2023
    - Fixes a memory leaks introduced in version v2.x (issues 284 and 300).
    - The name of the cache folder now include the hostname.
diff --git a/Dockerfile b/Dockerfile
@@ -16,7 +16,7 @@
 # --no-cache .
 
 # KeOps version - the most important parameter:
-ARG KEOPS_VERSION=2.1
+ARG KEOPS_VERSION=2.2
 # We also include all the libraries hosted on www.kernel-operations.io,
 # such as GeomLoss. This is convenient, and has negligible impact
 # on the size of the final image. Cuda and PyTorch weigh ~5Gb anyway,
diff --git a/docker-images.sh b/docker-images.sh
@@ -7,7 +7,7 @@
 
 # Up to date as of Thursday, Jan. 18th, 2024:
 PYTHON_VERSION=3.11
-KEOPS_VERSION=2.1.2
+KEOPS_VERSION=2.2
 GEOMLOSS_VERSION=0.2.6
 CUDA_VERSION=12.1
 CUDA_CHANNEL=nvidia/label/cuda-12.1.1
diff --git a/keopscore/keopscore/__init__.py b/keopscore/keopscore/__init__.py
@@ -11,8 +11,8 @@
 with open(os.path.join(here, "keops_version"), encoding="utf-8") as v:
     __version__ = v.read().rstrip()
 
-from .config.config import set_build_folder, get_build_folder
-from .utils.code_gen_utils import clean_keops
+from keopscore.config.config import set_build_folder, get_build_folder
+from keopscore.utils.code_gen_utils import clean_keops
 
 # flags for debugging :
 # prints information about atomic operations during code building
@@ -26,12 +26,12 @@
 
 cuda_block_size = 192
 
-from . import config as keopscoreconfig
+from keopscore import config as keopscoreconfig
 
 if keopscoreconfig.config.use_cuda:
     keopscoreconfig.config.init_cudalibs()
-    from .binders.nvrtc.Gpu_link_compile import Gpu_link_compile
-    from .binders.nvrtc.Gpu_link_compile import jit_compile_dll
+    from keopscore.binders.nvrtc.Gpu_link_compile import Gpu_link_compile
+    from keopscore.binders.nvrtc.Gpu_link_compile import jit_compile_dll
 
     if not os.path.exists(jit_compile_dll()):
         Gpu_link_compile.compile_jit_compile_dll()
diff --git a/keopscore/keopscore/formulas/maths/SoftDTW_SqDist.py b/keopscore/keopscore/formulas/maths/SoftDTW_SqDist.py
@@ -4,13 +4,21 @@
 
 from keopscore.formulas.Operation import Operation
 from keopscore.utils.misc_utils import KeOps_Error
-from keopscore.utils.code_gen_utils import c_variable, pointer, c_array, c_for_loop, c_zero_float
+from keopscore.utils.code_gen_utils import (
+    c_variable,
+    pointer,
+    c_array,
+    c_for_loop,
+    c_zero_float,
+)
 from keopscore.utils.code_gen_utils import use_pragma_unroll
 from keopscore.formulas.variables.Zero import Zero
 from keopscore.formulas.maths.Extract import Extract
 
+
 class SoftDTW_SqDist(Operation):
     string_id = "SoftDTW_SqDist"
+
     def __init__(self, x, y, gamma, params=()):
         # x is vector of size n, y is vector of size m, gamma is scalar,
         # output is scalar
@@ -23,7 +31,7 @@ def __init__(self, x, y, gamma, params=()):
 
     def Op(self, out, table, x, y, gamma):
         dtype = x.dtype
-        n,m = self.n, self.m
+        n, m = self.n, self.m
         code = f"""
             #define MIN2(a,b) fminf(a,b) //(((a)<(b))?(a):(b))
             #define MIN3(a,b,c) MIN2(MIN2(a,b),c)
@@ -71,39 +79,39 @@ def Op(self, out, table, x, y, gamma):
             {out}[0] = rij;
 
                 """
-    
+
         return code
-    
+
     def DiffT(self, v, gradin):
         x, y, gamma = self.children
-        n,m = self.n, self.m
+        n, m = self.n, self.m
         if v in gamma.Vars_:
-            KeOps_Error("autograd wrt gamma in SoftDTW_SqDist operation not implemented.")
+            KeOps_Error(
+                "autograd wrt gamma in SoftDTW_SqDist operation not implemented."
+            )
         grad = GradSoftDTW_SqDist(x, y, gamma) * gradin
-        gradx = Extract(grad,0,n)
-        grady = Extract(grad,n,m)
+        gradx = Extract(grad, 0, n)
+        grady = Extract(grad, n, m)
         return x.DiffT(v, gradx) + y.DiffT(v, grady)
-    
-
-
 
 
 class GradSoftDTW_SqDist(Operation):
     string_id = "GradSoftDTW_SqDist"
+
     def __init__(self, x, y, gamma, params=()):
         # x is vector of size n, y is vector of size m, gamma is scalar,
         # output is of size n+m, corresponding to concatenation of grads wrt x and y
         if gamma.dim != 1:
             KeOps_Error("input gamma should be scalar")
-        n,m = x.dim, y.dim
+        n, m = x.dim, y.dim
         super().__init__(x, y, gamma, params=())
         self.n = n
         self.m = m
-        self.dim = n+m
+        self.dim = n + m
 
     def Op(self, out, table, x, y, gamma):
         dtype = x.dtype
-        n,m = self.n, self.m
+        n, m = self.n, self.m
         code = f"""
             #define MIN2(a,b) fminf(a,b) //(((a)<(b))?(a):(b))
             #define MIN3(a,b,c) MIN2(MIN2(a,b),c)
@@ -200,9 +208,9 @@ def Op(self, out, table, x, y, gamma):
                 ejp1[0] = eij;
             }}
                 """
-        
+
         return code
-    
+
     def DiffT(self, v, gradin):
         KeOps_Error("autograd for GradSoftDTW_SqDist operation not implemented.")
-        pass
+        pass
diff --git a/pykeops/pykeops/common/lazy_tensor.py b/pykeops/pykeops/common/lazy_tensor.py
@@ -1592,7 +1592,7 @@ def difference_matrix(self, other):
         return self.binary(
             other,
             "DifferenceMatrix",
-            dimres=(other.ndim * self.ndim), 
+            dimres=(other.ndim * self.ndim),
             dimcheck=None,
         )
 
@@ -1607,7 +1607,7 @@ def softdtw_sqdist(self, other, gamma):
             other,
             gamma,
             "SoftDTW_SqDist",
-            dimres=1, 
+            dimres=1,
             dimcheck=None,
         )
 
diff --git a/pykeops/pykeops/numpy/generic/generic_red.py b/pykeops/pykeops/numpy/generic/generic_red.py
@@ -327,9 +327,9 @@ def __call__(self, *args, backend="auto", device_id=-1, ranges=None, out=None):
             )
             args = tuple(np.ascontiguousarray(arg) for arg in args)
 
-        # N.B.: KeOps C++ expects contiguous integer arrays as ranges
+        # N.B.: KeOps C++ expects contiguous int64 arrays as ranges
         if ranges:
-            ranges = tuple(np.ascontiguousarray(r) for r in ranges)
+            ranges = tuple(r.astype("int64", order="C") for r in ranges)
 
         nx, ny = get_sizes(self.aliases, *args)
         nout, nred = (nx, ny) if self.axis == 1 else (ny, nx)
diff --git a/pykeops/pykeops/sandbox/test_soft_dtw_kernel.py b/pykeops/pykeops/sandbox/test_soft_dtw_kernel.py
@@ -19,68 +19,75 @@
 gamma = torch.tensor(0.1, device=device_id)
 
 ##################################
-# SoftDTW operation in pytorch 
+# SoftDTW operation in pytorch
 ##################################
 
+
 def softmin(args, gamma):
-    minargs = reduce(lambda x,y:torch.min(x,y), args)
-    if gamma>0:
-        minargs -= gamma * sum(((minargs-arg)/gamma).exp() for arg in args).log() 
+    minargs = reduce(lambda x, y: torch.min(x, y), args)
+    if gamma > 0:
+        minargs -= gamma * sum(((minargs - arg) / gamma).exp() for arg in args).log()
     return minargs
 
+
 def SoftDTW_torch(x, y, gamma):
     n, m = x.shape[1], y.shape[1]
-    x, y = x[:,None,:], y[None,:,:]
-    rjm1 = [torch.tensor(torch.inf, device=device_id) for _ in range(n+1)]
-    rjm1[0] = torch.tensor(0., device=device_id)
+    x, y = x[:, None, :], y[None, :, :]
+    rjm1 = [torch.tensor(torch.inf, device=device_id) for _ in range(n + 1)]
+    rjm1[0] = torch.tensor(0.0, device=device_id)
     torchinf = torch.tensor(torch.inf, device=device_id)
-    for j in range(1,m+1):
+    for j in range(1, m + 1):
         rim1j = torchinf
-        for i in range(1,n+1):
-            rij = (x[:,:,i-1]-y[:,:,j-1])**2 + softmin((rjm1[i], rjm1[i-1], rim1j), gamma)
-            rjm1[i-1] = rim1j
+        for i in range(1, n + 1):
+            rij = (x[:, :, i - 1] - y[:, :, j - 1]) ** 2 + softmin(
+                (rjm1[i], rjm1[i - 1], rim1j), gamma
+            )
+            rjm1[i - 1] = rim1j
             rim1j = rij
         rjm1[i] = rij
     return rij
 
 
-
 #########################################
 # reduction function with torch and keops
 #########################################
 
+
 def fun_torch(x, y, gamma):
-    Sxy = SoftDTW_torch(x,y,gamma)
+    Sxy = SoftDTW_torch(x, y, gamma)
     Kxy = (-Sxy).exp()
     return Kxy.sum(dim=1)
 
+
 def fun_keops(x, y, gamma):
-    n,m = x.shape[1], y.shape[1]
+    n, m = x.shape[1], y.shape[1]
     formula = "Exp(-SoftDTW_SqDist(x,y,gamma))"
     aliases = [f"x=Vi({n})", f"y=Vj({m})", "gamma=Pm(1)"]
     Kxy = Genred(formula, aliases, reduction_op="Sum", axis=1)
-    return Kxy(x,y,gamma.view((1,1)))
+    return Kxy(x, y, gamma.view((1, 1)))
+
 
 def fun_lazytensor(x, y, gamma):
-    x = LazyTensor(x[:,None,:])
-    y = LazyTensor(y[None,:,:])
-    sdtw = x.softdtw_sqdist(y,gamma)
+    x = LazyTensor(x[:, None, :])
+    y = LazyTensor(y[None, :, :])
+    sdtw = x.softdtw_sqdist(y, gamma)
     K = (-sdtw).exp()
     return K.sum(axis=1)
 
+
 ##################################
 # test
 ##################################
 
-#funs = (fun_torch, fun_keops, fun_lazytensor)
+# funs = (fun_torch, fun_keops, fun_lazytensor)
 funs = (fun_torch, fun_lazytensor)
 out = []
 for fun in funs:
     print("**************************")
     print("Testing " + fun.__name__)
     if do_warmup:
-        fun(x[:100,:], y[:100,:], gamma)
-        fun(x[:100,:], y[:100,:], gamma)
+        fun(x[:100, :], y[:100, :], gamma)
+        fun(x[:100, :], y[:100, :], gamma)
     start = time.time()
     out.append(fun(x, y, gamma).squeeze())
     end = time.time()
@@ -89,8 +96,11 @@ def fun_lazytensor(x, y, gamma):
 print("******")
 
 if len(out) > 1:
-    for k in range(1,len(out)):
-        print(f"relative error {funs[k].__name__} vs {funs[0].__name__}:", (torch.norm(out[0] - out[k]) / torch.norm(out[0])).item())
+    for k in range(1, len(out)):
+        print(
+            f"relative error {funs[k].__name__} vs {funs[0].__name__}:",
+            (torch.norm(out[0] - out[k]) / torch.norm(out[0])).item(),
+        )
 
 
 if test_grad:
@@ -105,7 +115,8 @@ def fun_lazytensor(x, y, gamma):
         print("time for " + fun.__name__ + " (grad):", end - start)
 
     if len(out_g) > 1:
-        for k in range(1,len(out)):
-            print(f"relative error grad {funs[k].__name__} vs {funs[0].__name__}:", (torch.norm(out_g[0] - out_g[k]) / torch.norm(out_g[0])).item())
-        
-
+        for k in range(1, len(out)):
+            print(
+                f"relative error grad {funs[k].__name__} vs {funs[0].__name__}:",
+                (torch.norm(out_g[0] - out_g[k]) / torch.norm(out_g[0])).item(),
+            )
diff --git a/pykeops/pykeops/sandbox/test_soft_dtw_kernel_dissmatrix.py b/pykeops/pykeops/sandbox/test_soft_dtw_kernel_dissmatrix.py
diff --git a/pykeops/pykeops/sandbox/test_soft_dtw_kernel_v0.py b/pykeops/pykeops/sandbox/test_soft_dtw_kernel_v0.py
diff --git a/pykeops/pykeops/torch/generic/generic_red.py b/pykeops/pykeops/torch/generic/generic_red.py
diff --git a/readme.md b/readme.md