Skip to content

Commit d9dab3f

Browse files
authored
Merge pull request #612 from dimensionalOS/matt-image-upgrades
Image upgrades! Impls for CUDA + numpy, along with an abstraction and full backwards compatibility Former-commit-id: cae9d1b [formerly 42f410f] Former-commit-id: 7b38a68
1 parent e214aab commit d9dab3f

8 files changed

Lines changed: 2890 additions & 546 deletions

File tree

dimos/msgs/sensor_msgs/Image.py

Lines changed: 417 additions & 406 deletions
Large diffs are not rendered by default.
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
# Copyright 2025 Dimensional Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import base64
18+
import os
19+
from abc import ABC, abstractmethod
20+
from enum import Enum
21+
from typing import Any
22+
23+
import cv2
24+
import numpy as np
25+
26+
try:
27+
import cupy as cp # type: ignore
28+
29+
HAS_CUDA = True
30+
except Exception: # pragma: no cover - optional dependency
31+
cp = None # type: ignore
32+
HAS_CUDA = False
33+
34+
# Optional nvImageCodec (preferred GPU codec)
35+
USE_NVIMGCODEC = os.environ.get("USE_NVIMGCODEC", "0") == "1"
36+
NVIMGCODEC_LAST_USED = False
37+
try: # pragma: no cover - optional dependency
38+
if HAS_CUDA and USE_NVIMGCODEC:
39+
from nvidia import nvimgcodec # type: ignore
40+
41+
try:
42+
_enc_probe = nvimgcodec.Encoder() # type: ignore[attr-defined]
43+
HAS_NVIMGCODEC = True
44+
except Exception:
45+
nvimgcodec = None # type: ignore
46+
HAS_NVIMGCODEC = False
47+
else:
48+
nvimgcodec = None # type: ignore
49+
HAS_NVIMGCODEC = False
50+
except Exception: # pragma: no cover - optional dependency
51+
nvimgcodec = None # type: ignore
52+
HAS_NVIMGCODEC = False
53+
54+
55+
class ImageFormat(Enum):
56+
BGR = "BGR"
57+
RGB = "RGB"
58+
RGBA = "RGBA"
59+
BGRA = "BGRA"
60+
GRAY = "GRAY"
61+
GRAY16 = "GRAY16"
62+
DEPTH = "DEPTH"
63+
DEPTH16 = "DEPTH16"
64+
65+
66+
def _is_cu(x) -> bool:
67+
return HAS_CUDA and cp is not None and isinstance(x, cp.ndarray) # type: ignore
68+
69+
70+
def _ascontig(x):
71+
if _is_cu(x):
72+
return x if x.flags["C_CONTIGUOUS"] else cp.ascontiguousarray(x) # type: ignore
73+
return x if x.flags["C_CONTIGUOUS"] else np.ascontiguousarray(x)
74+
75+
76+
def _to_cpu(x):
77+
return cp.asnumpy(x) if _is_cu(x) else x # type: ignore
78+
79+
80+
def _to_cu(x):
81+
if HAS_CUDA and cp is not None and isinstance(x, np.ndarray): # type: ignore
82+
return cp.asarray(x) # type: ignore
83+
return x
84+
85+
86+
def _encode_nvimgcodec_cuda(bgr_cu, quality: int = 80) -> bytes: # pragma: no cover - optional
87+
if not HAS_NVIMGCODEC or nvimgcodec is None:
88+
raise RuntimeError("nvimgcodec not available")
89+
if bgr_cu.ndim != 3 or bgr_cu.shape[2] != 3:
90+
raise RuntimeError("nvimgcodec expects HxWx3 image")
91+
if bgr_cu.dtype != cp.uint8: # type: ignore[attr-defined]
92+
raise RuntimeError("nvimgcodec requires uint8 input")
93+
if not bgr_cu.flags["C_CONTIGUOUS"]:
94+
bgr_cu = cp.ascontiguousarray(bgr_cu) # type: ignore[attr-defined]
95+
encoder = nvimgcodec.Encoder() # type: ignore[attr-defined]
96+
try:
97+
img = nvimgcodec.Image(bgr_cu, nvimgcodec.PixelFormat.BGR) # type: ignore[attr-defined]
98+
except Exception:
99+
img = nvimgcodec.Image(cp.asnumpy(bgr_cu), nvimgcodec.PixelFormat.BGR) # type: ignore[attr-defined]
100+
if hasattr(nvimgcodec, "EncodeParams"):
101+
params = nvimgcodec.EncodeParams(quality=quality) # type: ignore[attr-defined]
102+
bitstreams = encoder.encode([img], [params])
103+
else:
104+
bitstreams = encoder.encode([img])
105+
bs0 = bitstreams[0]
106+
if hasattr(bs0, "buf"):
107+
return bytes(bs0.buf)
108+
return bytes(bs0)
109+
110+
111+
class AbstractImage(ABC):
112+
data: Any
113+
format: ImageFormat
114+
frame_id: str
115+
ts: float
116+
117+
@property
118+
@abstractmethod
119+
def is_cuda(self) -> bool: # pragma: no cover - abstract
120+
...
121+
122+
@property
123+
def height(self) -> int:
124+
return int(self.data.shape[0])
125+
126+
@property
127+
def width(self) -> int:
128+
return int(self.data.shape[1])
129+
130+
@property
131+
def channels(self) -> int:
132+
if getattr(self.data, "ndim", 0) == 2:
133+
return 1
134+
if getattr(self.data, "ndim", 0) == 3:
135+
return int(self.data.shape[2])
136+
raise ValueError("Invalid image dimensions")
137+
138+
@property
139+
def shape(self):
140+
return tuple(self.data.shape)
141+
142+
@property
143+
def dtype(self):
144+
return self.data.dtype
145+
146+
@abstractmethod
147+
def to_opencv(self) -> np.ndarray: # pragma: no cover - abstract
148+
...
149+
150+
@abstractmethod
151+
def to_rgb(self) -> "AbstractImage": # pragma: no cover - abstract
152+
...
153+
154+
@abstractmethod
155+
def to_bgr(self) -> "AbstractImage": # pragma: no cover - abstract
156+
...
157+
158+
@abstractmethod
159+
def to_grayscale(self) -> "AbstractImage": # pragma: no cover - abstract
160+
...
161+
162+
@abstractmethod
163+
def resize(
164+
self, width: int, height: int, interpolation: int = cv2.INTER_LINEAR
165+
) -> "AbstractImage": # pragma: no cover - abstract
166+
...
167+
168+
@abstractmethod
169+
def sharpness(self) -> float: # pragma: no cover - abstract
170+
...
171+
172+
def copy(self) -> "AbstractImage":
173+
return self.__class__(
174+
data=self.data.copy(), format=self.format, frame_id=self.frame_id, ts=self.ts
175+
) # type: ignore
176+
177+
def save(self, filepath: str) -> bool:
178+
global NVIMGCODEC_LAST_USED
179+
if self.is_cuda and HAS_NVIMGCODEC and nvimgcodec is not None:
180+
try:
181+
bgr = self.to_bgr()
182+
if _is_cu(bgr.data):
183+
jpeg = _encode_nvimgcodec_cuda(bgr.data)
184+
NVIMGCODEC_LAST_USED = True
185+
with open(filepath, "wb") as f:
186+
f.write(jpeg)
187+
return True
188+
except Exception:
189+
NVIMGCODEC_LAST_USED = False
190+
arr = self.to_opencv()
191+
return cv2.imwrite(filepath, arr)
192+
193+
def to_base64(self, quality: int = 80) -> str:
194+
global NVIMGCODEC_LAST_USED
195+
if self.is_cuda and HAS_NVIMGCODEC and nvimgcodec is not None:
196+
try:
197+
bgr = self.to_bgr()
198+
if _is_cu(bgr.data):
199+
jpeg = _encode_nvimgcodec_cuda(bgr.data, quality=quality)
200+
NVIMGCODEC_LAST_USED = True
201+
return base64.b64encode(jpeg).decode("utf-8")
202+
except Exception:
203+
NVIMGCODEC_LAST_USED = False
204+
bgr = self.to_bgr()
205+
success, buffer = cv2.imencode(
206+
".jpg", _to_cpu(bgr.data), [int(cv2.IMWRITE_JPEG_QUALITY), int(quality)]
207+
)
208+
if not success:
209+
raise ValueError("Failed to encode image as JPEG")
210+
return base64.b64encode(buffer.tobytes()).decode("utf-8")

0 commit comments

Comments
 (0)