Skip to content

Commit de6d223

Browse files
committed
Merge branch 'main' of github.com:getkeops/keops
2 parents 9a1aaaa + 65fd029 commit de6d223

File tree

12 files changed

+240
-144
lines changed

12 files changed

+240
-144
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
* v2.2 - Jan 18, 2024
2+
- added option to disable fast_math Cuda compiler option
3+
- added comparison operators between LazyTensor
4+
- added support for symbolic differentiation of symbolic operations : Grad, Diff, Laplacian, Divergence
5+
- added support for forward autodiff, compatible with PyTorch forward autodiff tools (torch.jvp, etc.)
6+
- added support for torch.vmap
7+
- added support for intermediate variables in formulas (factorize and auto_factorize methods of LazyTensor class)
8+
- changed cache folder name, now include name of local host
9+
- added torch.compile comparison to the benchmarks
10+
- fixed issues 294, 305, 310, 325, 342, ...
11+
112
* v2.1.2 - Apr 4, 2023
213
- Fixes a memory leaks introduced in version v2.x (issues 284 and 300).
314
- The name of the cache folder now include the hostname.

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# --no-cache .
1717

1818
# KeOps version - the most important parameter:
19-
ARG KEOPS_VERSION=2.1
19+
ARG KEOPS_VERSION=2.2
2020
# We also include all the libraries hosted on www.kernel-operations.io,
2121
# such as GeomLoss. This is convenient, and has negligible impact
2222
# on the size of the final image. Cuda and PyTorch weigh ~5Gb anyway,

docker-images.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
# Up to date as of Thursday, Jan. 18th, 2024:
99
PYTHON_VERSION=3.11
10-
KEOPS_VERSION=2.1.2
10+
KEOPS_VERSION=2.2
1111
GEOMLOSS_VERSION=0.2.6
1212
CUDA_VERSION=12.1
1313
CUDA_CHANNEL=nvidia/label/cuda-12.1.1

keopscore/keopscore/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
with open(os.path.join(here, "keops_version"), encoding="utf-8") as v:
1212
__version__ = v.read().rstrip()
1313

14-
from .config.config import set_build_folder, get_build_folder
15-
from .utils.code_gen_utils import clean_keops
14+
from keopscore.config.config import set_build_folder, get_build_folder
15+
from keopscore.utils.code_gen_utils import clean_keops
1616

1717
# flags for debugging :
1818
# prints information about atomic operations during code building
@@ -26,12 +26,12 @@
2626

2727
cuda_block_size = 192
2828

29-
from . import config as keopscoreconfig
29+
from keopscore import config as keopscoreconfig
3030

3131
if keopscoreconfig.config.use_cuda:
3232
keopscoreconfig.config.init_cudalibs()
33-
from .binders.nvrtc.Gpu_link_compile import Gpu_link_compile
34-
from .binders.nvrtc.Gpu_link_compile import jit_compile_dll
33+
from keopscore.binders.nvrtc.Gpu_link_compile import Gpu_link_compile
34+
from keopscore.binders.nvrtc.Gpu_link_compile import jit_compile_dll
3535

3636
if not os.path.exists(jit_compile_dll()):
3737
Gpu_link_compile.compile_jit_compile_dll()

keopscore/keopscore/formulas/maths/SoftDTW_SqDist.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,21 @@
44

55
from keopscore.formulas.Operation import Operation
66
from keopscore.utils.misc_utils import KeOps_Error
7-
from keopscore.utils.code_gen_utils import c_variable, pointer, c_array, c_for_loop, c_zero_float
7+
from keopscore.utils.code_gen_utils import (
8+
c_variable,
9+
pointer,
10+
c_array,
11+
c_for_loop,
12+
c_zero_float,
13+
)
814
from keopscore.utils.code_gen_utils import use_pragma_unroll
915
from keopscore.formulas.variables.Zero import Zero
1016
from keopscore.formulas.maths.Extract import Extract
1117

18+
1219
class SoftDTW_SqDist(Operation):
1320
string_id = "SoftDTW_SqDist"
21+
1422
def __init__(self, x, y, gamma, params=()):
1523
# x is vector of size n, y is vector of size m, gamma is scalar,
1624
# output is scalar
@@ -23,7 +31,7 @@ def __init__(self, x, y, gamma, params=()):
2331

2432
def Op(self, out, table, x, y, gamma):
2533
dtype = x.dtype
26-
n,m = self.n, self.m
34+
n, m = self.n, self.m
2735
code = f"""
2836
#define MIN2(a,b) fminf(a,b) //(((a)<(b))?(a):(b))
2937
#define MIN3(a,b,c) MIN2(MIN2(a,b),c)
@@ -71,39 +79,39 @@ def Op(self, out, table, x, y, gamma):
7179
{out}[0] = rij;
7280
7381
"""
74-
82+
7583
return code
76-
84+
7785
def DiffT(self, v, gradin):
7886
x, y, gamma = self.children
79-
n,m = self.n, self.m
87+
n, m = self.n, self.m
8088
if v in gamma.Vars_:
81-
KeOps_Error("autograd wrt gamma in SoftDTW_SqDist operation not implemented.")
89+
KeOps_Error(
90+
"autograd wrt gamma in SoftDTW_SqDist operation not implemented."
91+
)
8292
grad = GradSoftDTW_SqDist(x, y, gamma) * gradin
83-
gradx = Extract(grad,0,n)
84-
grady = Extract(grad,n,m)
93+
gradx = Extract(grad, 0, n)
94+
grady = Extract(grad, n, m)
8595
return x.DiffT(v, gradx) + y.DiffT(v, grady)
86-
87-
88-
8996

9097

9198
class GradSoftDTW_SqDist(Operation):
9299
string_id = "GradSoftDTW_SqDist"
100+
93101
def __init__(self, x, y, gamma, params=()):
94102
# x is vector of size n, y is vector of size m, gamma is scalar,
95103
# output is of size n+m, corresponding to concatenation of grads wrt x and y
96104
if gamma.dim != 1:
97105
KeOps_Error("input gamma should be scalar")
98-
n,m = x.dim, y.dim
106+
n, m = x.dim, y.dim
99107
super().__init__(x, y, gamma, params=())
100108
self.n = n
101109
self.m = m
102-
self.dim = n+m
110+
self.dim = n + m
103111

104112
def Op(self, out, table, x, y, gamma):
105113
dtype = x.dtype
106-
n,m = self.n, self.m
114+
n, m = self.n, self.m
107115
code = f"""
108116
#define MIN2(a,b) fminf(a,b) //(((a)<(b))?(a):(b))
109117
#define MIN3(a,b,c) MIN2(MIN2(a,b),c)
@@ -200,9 +208,9 @@ def Op(self, out, table, x, y, gamma):
200208
ejp1[0] = eij;
201209
}}
202210
"""
203-
211+
204212
return code
205-
213+
206214
def DiffT(self, v, gradin):
207215
KeOps_Error("autograd for GradSoftDTW_SqDist operation not implemented.")
208-
pass
216+
pass

pykeops/pykeops/common/lazy_tensor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1592,7 +1592,7 @@ def difference_matrix(self, other):
15921592
return self.binary(
15931593
other,
15941594
"DifferenceMatrix",
1595-
dimres=(other.ndim * self.ndim),
1595+
dimres=(other.ndim * self.ndim),
15961596
dimcheck=None,
15971597
)
15981598

@@ -1607,7 +1607,7 @@ def softdtw_sqdist(self, other, gamma):
16071607
other,
16081608
gamma,
16091609
"SoftDTW_SqDist",
1610-
dimres=1,
1610+
dimres=1,
16111611
dimcheck=None,
16121612
)
16131613

pykeops/pykeops/numpy/generic/generic_red.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -327,9 +327,9 @@ def __call__(self, *args, backend="auto", device_id=-1, ranges=None, out=None):
327327
)
328328
args = tuple(np.ascontiguousarray(arg) for arg in args)
329329

330-
# N.B.: KeOps C++ expects contiguous integer arrays as ranges
330+
# N.B.: KeOps C++ expects contiguous int64 arrays as ranges
331331
if ranges:
332-
ranges = tuple(np.ascontiguousarray(r) for r in ranges)
332+
ranges = tuple(r.astype("int64", order="C") for r in ranges)
333333

334334
nx, ny = get_sizes(self.aliases, *args)
335335
nout, nred = (nx, ny) if self.axis == 1 else (ny, nx)

pykeops/pykeops/sandbox/test_soft_dtw_kernel.py

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,68 +19,75 @@
1919
gamma = torch.tensor(0.1, device=device_id)
2020

2121
##################################
22-
# SoftDTW operation in pytorch
22+
# SoftDTW operation in pytorch
2323
##################################
2424

25+
2526
def softmin(args, gamma):
26-
minargs = reduce(lambda x,y:torch.min(x,y), args)
27-
if gamma>0:
28-
minargs -= gamma * sum(((minargs-arg)/gamma).exp() for arg in args).log()
27+
minargs = reduce(lambda x, y: torch.min(x, y), args)
28+
if gamma > 0:
29+
minargs -= gamma * sum(((minargs - arg) / gamma).exp() for arg in args).log()
2930
return minargs
3031

32+
3133
def SoftDTW_torch(x, y, gamma):
3234
n, m = x.shape[1], y.shape[1]
33-
x, y = x[:,None,:], y[None,:,:]
34-
rjm1 = [torch.tensor(torch.inf, device=device_id) for _ in range(n+1)]
35-
rjm1[0] = torch.tensor(0., device=device_id)
35+
x, y = x[:, None, :], y[None, :, :]
36+
rjm1 = [torch.tensor(torch.inf, device=device_id) for _ in range(n + 1)]
37+
rjm1[0] = torch.tensor(0.0, device=device_id)
3638
torchinf = torch.tensor(torch.inf, device=device_id)
37-
for j in range(1,m+1):
39+
for j in range(1, m + 1):
3840
rim1j = torchinf
39-
for i in range(1,n+1):
40-
rij = (x[:,:,i-1]-y[:,:,j-1])**2 + softmin((rjm1[i], rjm1[i-1], rim1j), gamma)
41-
rjm1[i-1] = rim1j
41+
for i in range(1, n + 1):
42+
rij = (x[:, :, i - 1] - y[:, :, j - 1]) ** 2 + softmin(
43+
(rjm1[i], rjm1[i - 1], rim1j), gamma
44+
)
45+
rjm1[i - 1] = rim1j
4246
rim1j = rij
4347
rjm1[i] = rij
4448
return rij
4549

4650

47-
4851
#########################################
4952
# reduction function with torch and keops
5053
#########################################
5154

55+
5256
def fun_torch(x, y, gamma):
53-
Sxy = SoftDTW_torch(x,y,gamma)
57+
Sxy = SoftDTW_torch(x, y, gamma)
5458
Kxy = (-Sxy).exp()
5559
return Kxy.sum(dim=1)
5660

61+
5762
def fun_keops(x, y, gamma):
58-
n,m = x.shape[1], y.shape[1]
63+
n, m = x.shape[1], y.shape[1]
5964
formula = "Exp(-SoftDTW_SqDist(x,y,gamma))"
6065
aliases = [f"x=Vi({n})", f"y=Vj({m})", "gamma=Pm(1)"]
6166
Kxy = Genred(formula, aliases, reduction_op="Sum", axis=1)
62-
return Kxy(x,y,gamma.view((1,1)))
67+
return Kxy(x, y, gamma.view((1, 1)))
68+
6369

6470
def fun_lazytensor(x, y, gamma):
65-
x = LazyTensor(x[:,None,:])
66-
y = LazyTensor(y[None,:,:])
67-
sdtw = x.softdtw_sqdist(y,gamma)
71+
x = LazyTensor(x[:, None, :])
72+
y = LazyTensor(y[None, :, :])
73+
sdtw = x.softdtw_sqdist(y, gamma)
6874
K = (-sdtw).exp()
6975
return K.sum(axis=1)
7076

77+
7178
##################################
7279
# test
7380
##################################
7481

75-
#funs = (fun_torch, fun_keops, fun_lazytensor)
82+
# funs = (fun_torch, fun_keops, fun_lazytensor)
7683
funs = (fun_torch, fun_lazytensor)
7784
out = []
7885
for fun in funs:
7986
print("**************************")
8087
print("Testing " + fun.__name__)
8188
if do_warmup:
82-
fun(x[:100,:], y[:100,:], gamma)
83-
fun(x[:100,:], y[:100,:], gamma)
89+
fun(x[:100, :], y[:100, :], gamma)
90+
fun(x[:100, :], y[:100, :], gamma)
8491
start = time.time()
8592
out.append(fun(x, y, gamma).squeeze())
8693
end = time.time()
@@ -89,8 +96,11 @@ def fun_lazytensor(x, y, gamma):
8996
print("******")
9097

9198
if len(out) > 1:
92-
for k in range(1,len(out)):
93-
print(f"relative error {funs[k].__name__} vs {funs[0].__name__}:", (torch.norm(out[0] - out[k]) / torch.norm(out[0])).item())
99+
for k in range(1, len(out)):
100+
print(
101+
f"relative error {funs[k].__name__} vs {funs[0].__name__}:",
102+
(torch.norm(out[0] - out[k]) / torch.norm(out[0])).item(),
103+
)
94104

95105

96106
if test_grad:
@@ -105,7 +115,8 @@ def fun_lazytensor(x, y, gamma):
105115
print("time for " + fun.__name__ + " (grad):", end - start)
106116

107117
if len(out_g) > 1:
108-
for k in range(1,len(out)):
109-
print(f"relative error grad {funs[k].__name__} vs {funs[0].__name__}:", (torch.norm(out_g[0] - out_g[k]) / torch.norm(out_g[0])).item())
110-
111-
118+
for k in range(1, len(out)):
119+
print(
120+
f"relative error grad {funs[k].__name__} vs {funs[0].__name__}:",
121+
(torch.norm(out_g[0] - out_g[k]) / torch.norm(out_g[0])).item(),
122+
)

0 commit comments

Comments
 (0)