Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ This release streamlines Datumaro by removing a number of lesser-used features,

### New features
- Experimental dataset class
(<https://github.com/open-edge-platform/datumaro/pull/1807>, <https://github.com/open-edge-platform/datumaro/pull/1810>, <https://github.com/open-edge-platform/datumaro/pull/1811>, <https://github.com/open-edge-platform/datumaro/pull/1834>, <https://github.com/open-edge-platform/datumaro/pull/1858>, <https://github.com/open-edge-platform/datumaro/pull/1845>, <https://github.com/open-edge-platform/datumaro/pull/1863>, <https://github.com/open-edge-platform/datumaro/pull/1868>, <https://github.com/open-edge-platform/datumaro/pull/1876>)
(<https://github.com/open-edge-platform/datumaro/pull/1807>, <https://github.com/open-edge-platform/datumaro/pull/1810>, <https://github.com/open-edge-platform/datumaro/pull/1811>, <https://github.com/open-edge-platform/datumaro/pull/1834>, <https://github.com/open-edge-platform/datumaro/pull/1858>, <https://github.com/open-edge-platform/datumaro/pull/1845>, <https://github.com/open-edge-platform/datumaro/pull/1863>, <https://github.com/open-edge-platform/datumaro/pull/1868>, <https://github.com/open-edge-platform/datumaro/pull/1876>, <https://github.com/open-edge-platform/datumaro/pull/1877>)

### Enhancements
- Mark several dependencies as optional
Expand Down
212 changes: 212 additions & 0 deletions src/datumaro/experimental/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
ImageInfo,
ImageInfoField,
ImagePathField,
InstanceMaskField,
LabelField,
MaskField,
PolygonField,
Expand Down Expand Up @@ -617,3 +618,214 @@ def apply_conversion_batch(batch_df: pl.DataFrame) -> pl.DataFrame:
mask_data.struct.field("shape").alias(output_shape_column_name),
]
)


@converter(lazy=True)
class PolygonToInstanceMaskConverter(Converter):
"""
Converts polygon annotations to instance masks.

Transforms polygon coordinates into binary instance masks of shape (N, H, W)
where N is the number of instances. Each mask represents a single instance
without category information.
"""

input_polygon: AttributeSpec[PolygonField]
input_image_info: AttributeSpec[ImageInfoField]
output_instance_mask: AttributeSpec[InstanceMaskField]

def filter_output_spec(self) -> bool:
"""Configure output specification for instance mask format."""
# Configure output for instance mask format
self.output_instance_mask = AttributeSpec(
name=self.output_instance_mask.name,
field=InstanceMaskField(
semantic=self.input_polygon.field.semantic,
dtype=self.output_instance_mask.field.dtype,
),
)
return True

def convert(self, df: pl.DataFrame) -> pl.DataFrame:
"""
Rasterize polygon coordinates into instance masks.

Args:
df: DataFrame with polygon coordinates and image info

Returns:
DataFrame with instance mask data in output column
"""
input_column_name = self.input_polygon.name
image_info_column_name = self.input_image_info.name
output_column_name = self.output_instance_mask.name
output_shape_column_name = self.output_instance_mask.name + "_shape"

def polygons_to_instance_masks(
polygons_data: list, img_info: dict
) -> tuple[list[bool], list[int]]:
"""Rasterize polygons into instance masks using OpenCV contour filling."""
# Extract image dimensions
image_width = img_info["width"]
image_height = img_info["height"]

# Convert dtype - use uint8 for OpenCV, then convert to bool
numpy_dtype = polars_to_numpy_dtype(self.output_instance_mask.field.dtype)

if len(polygons_data) == 0:
# No polygons, return empty mask with shape (0, H, W)
empty_mask = np.array([], dtype=numpy_dtype)
return empty_mask.tolist(), [0, image_height, image_width]

# Create instance masks for each polygon
instance_masks = []

for polygon_data in polygons_data:
coords = polygon_data.to_numpy()

# Initialize mask for this instance (use uint8 for OpenCV compatibility)
mask = np.zeros((image_height, image_width), dtype=np.uint8)

# Denormalize coordinates if needed
if self.input_polygon.field.normalize:
coords = coords.copy()
coords[:, 0] *= image_width
coords[:, 1] *= image_height

# Convert to OpenCV contour format
contour = coords.astype(np.int32)

# Fill polygon with 1 for instance mask
cv2.drawContours(
mask,
[contour],
0,
1, # Fill with 1 for binary instance mask
thickness=cv2.FILLED,
)

# Convert to the target dtype (e.g., bool)
mask = mask.astype(numpy_dtype)
instance_masks.append(mask)

# Stack into (N, H, W) tensor
stacked_masks = np.stack(instance_masks, axis=0)
return stacked_masks.reshape(-1), list(stacked_masks.shape)

# Apply conversion using map_batches
def apply_conversion_batch(batch_df: pl.DataFrame, **kwargs) -> pl.DataFrame:
"""Apply polygon-to-instance-mask conversion for a batch."""
batch_polygons = batch_df.struct["polygons"]
batch_img_infos = batch_df.struct["img_info"]

results_batch_mask = []
results_batch_shape = []

for polygons, img_info in zip(batch_polygons, batch_img_infos):
mask_data, shape_data = polygons_to_instance_masks(polygons, img_info)
results_batch_mask.append(pl.Series(mask_data))
results_batch_shape.append(shape_data)

return pl.struct(
pl.Series(results_batch_mask).alias("mask"),
pl.Series(results_batch_shape, dtype=pl.List(pl.Int32)).alias("shape"),
eager=True,
)

mask_data = pl.struct(
[
pl.col(input_column_name).alias("polygons"),
pl.col(image_info_column_name).alias("img_info"),
]
).map_batches(
apply_conversion_batch,
return_dtype=pl.Struct(
{"mask": pl.List(self.output_instance_mask.field.dtype), "shape": pl.List(pl.Int32)}
),
)

return df.with_columns(
[
mask_data.struct.field("mask").alias(output_column_name),
mask_data.struct.field("shape").alias(output_shape_column_name),
]
)


@converter(lazy=True)
class PolygonToBBoxConverter(Converter):
"""
Converts polygon annotations to bounding boxes.

Extracts the bounding box coordinates that enclose each polygon.
"""

input_polygon: AttributeSpec[PolygonField]
output_bbox: AttributeSpec[BBoxField]

def filter_output_spec(self) -> bool:
"""Configure output specification for bounding box format."""
# Configure output for bbox format
self.output_bbox = AttributeSpec(
name=self.output_bbox.name,
field=BBoxField(
semantic=self.input_polygon.field.semantic,
dtype=self.input_polygon.field.dtype,
format=self.output_bbox.field.format,
normalize=self.input_polygon.field.normalize, # Inherit normalization from polygon
),
)
return True

def convert(self, df: pl.DataFrame) -> pl.DataFrame:
"""
Extract bounding boxes from polygon coordinates.

Args:
df: DataFrame with polygon coordinates

Returns:
DataFrame with bounding box data in output column
"""
input_column_name = self.input_polygon.name
output_column_name = self.output_bbox.name

df = df.with_columns(
pl.col(input_column_name)
.list.eval(
pl.concat_arr(
[
pl.element().list.eval(pl.element().arr.get(0)).list.min(),
pl.element().list.eval(pl.element().arr.get(1)).list.min(),
pl.element().list.eval(pl.element().arr.get(0)).list.max(),
pl.element().list.eval(pl.element().arr.get(1)).list.max(),
]
)
)
.alias(output_column_name)
)

# Format according to output bbox format
if self.output_bbox.field.format == "x1y1x2y2":
# Already in this format
pass
elif self.output_bbox.field.format == "xywh":
df = df.with_columns(
pl.col(output_column_name).list.eval(
pl.concat_arr(
[
pl.element().arr.get(0),
pl.element().arr.get(1),
pl.element().arr.get(2) - pl.element().arr.get(0),
pl.element().arr.get(3) - pl.element().arr.get(1),
]
)
)
)
else:
raise NotImplementedError(
f"This conversion is not yet implemented "
f"for the format {self.output_bbox.field.format}."
)

return df
51 changes: 51 additions & 0 deletions src/datumaro/experimental/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,3 +513,54 @@ def mask_field(dtype: Any = pl.UInt8(), semantic: Semantic = Semantic.Default) -
MaskField instance configured with the given parameters
"""
return MaskField(semantic=semantic, dtype=dtype)


@dataclass(frozen=True)
class InstanceMaskField(Field):
"""
Represents an instance mask tensor field for instance segmentation masks.

Handles 3D tensor data of shape (N, H, W) where N is the number of instances,
H and W are the mask height and width. Each mask is a binary mask representing
a single instance. Unlike MaskField, this does not contain category information.

Attributes:
semantic: Semantic tags describing the instance mask purpose
dtype: Polars data type for mask values (defaults to bool for binary masks)
"""

semantic: Semantic
dtype: PolarsDataType = pl.Boolean()

def to_polars_schema(self, name: str) -> dict[str, pl.DataType]:
"""Generate Polars schema with separate columns for data and shape."""
return {name: pl.List(self.dtype), name + "_shape": pl.List(pl.Int32())}

def to_polars(self, name: str, value: Any) -> dict[str, pl.Series]:
"""Convert instance mask tensor to flattened data and shape information."""
numpy_value = to_numpy(value, self.dtype)
return {
name: pl.Series(name, [numpy_value.reshape(-1)]),
name + "_shape": pl.Series(name + "_shape", [numpy_value.shape]),
}

def from_polars(self, name: str, row_index: int, df: pl.DataFrame, target_type: type[T]) -> T:
"""Reconstruct instance mask tensor from flattened data using stored shape."""
flat_data = df[name][row_index]
shape = df[name + "_shape"][row_index]
numpy_data = np.array(flat_data).reshape(shape)
return from_polars_data(numpy_data, target_type) # type: ignore


def instance_mask_field(dtype: Any = pl.Boolean(), semantic: Semantic = Semantic.Default) -> Any:
"""
Create an InstanceMaskField instance with the specified parameters.

Args:
dtype: Polars data type for mask values (defaults to pl.Boolean())
semantic: Semantic tags describing the instance mask purpose (optional)

Returns:
InstanceMaskField instance configured with the given parameters
"""
return InstanceMaskField(semantic=semantic, dtype=dtype)
4 changes: 2 additions & 2 deletions src/datumaro/experimental/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def create_from_categories(

bbox_labels_attribute = AttributeInfo(
type=np.ndarray,
annotation=label_field(dtype=pl.Int32, multi_label=False, is_list=True),
annotation=label_field(is_list=True),
categories=new_label_categories,
)

Expand Down Expand Up @@ -324,7 +324,7 @@ def create_from_categories(

polygon_labels_attribute = AttributeInfo(
type=np.ndarray,
annotation=label_field(dtype=pl.Int32, multi_label=False, is_list=True),
annotation=label_field(is_list=True),
categories=new_label_categories,
)

Expand Down
Loading