open-edge-platform · IRDonch · Dec 14, 2021 · Dec 10, 2021 · Dec 10, 2021 · Dec 10, 2021
@@ -24,6 +24,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   <https://github.com/openvinotoolkit/datumaro/pull/575>)
 - Import for VoTT dataset format
   (<https://github.com/openvinotoolkit/datumaro/pull/573>)
+- Image resizing transform
+  (<https://github.com/openvinotoolkit/datumaro/pull/581>)
 
 ### Changed
 - The following formats can now be detected unambiguously:

@@ -12,17 +12,19 @@
 import random
 import re
 
+import cv2
+import numpy as np
 import pycocotools.mask as mask_utils
 
 from datumaro.components.annotation import (
-    AnnotationType, Bbox, Label, LabelCategories, MaskCategories,
-    PointsCategories, Polygon, RleMask,
+    AnnotationType, Bbox, Caption, Label, LabelCategories, Mask, MaskCategories,
+    Points, PointsCategories, Polygon, PolyLine, RleMask,
 )
 from datumaro.components.cli_plugin import CliPlugin
 from datumaro.components.extractor import (
     DEFAULT_SUBSET_NAME, IExtractor, ItemTransform, Transform,
 )
-from datumaro.util import NOTSET, parse_str_enum_value
+from datumaro.util import NOTSET, parse_str_enum_value, take_by
 from datumaro.util.annotation_util import find_group_leader, find_instances
 import datumaro.util.mask_tools as mask_tools
 
@@ -734,3 +736,85 @@ def transform_item(self, item):
                 label=bbox.label, attributes=bbox.attributes))
 
         return item.wrap(annotations=annotations)
+
+class ResizeTransform(ItemTransform):
+    """
+    Resizes images and annotations in the dataset to the specified size.
+    Supports upscaling, downscaling and mixed variants.|n
+    |n
+    Examples:|n
+    - Resize all images to 256x256 size|n
+    |s|s%(prog)s -dw 256 -dh 256
+    """
+
+    @classmethod
+    def build_cmdline_parser(cls, **kwargs):
+        parser = super().build_cmdline_parser(**kwargs)
+        parser.add_argument('-dw', '--width', type=int,
+            help="Destination image width")
+        parser.add_argument('-dh', '--height', type=int,
+            help="Destination image height")
+        return parser
+
+    def __init__(self, extractor: IExtractor, width: int, height: int) -> None:
+        super().__init__(extractor)
+
+        assert width > 0 and height > 0
+        self._width = width
+        self._height = height
+
+    def transform_item(self, item):
+        if not item.has_image:
+            raise Exception("Image info is required for this transform")
+
+        h, w = item.image.size
+        xscale = self._width / float(w)
+        yscale = self._height / float(h)
+
+        if item.image.has_data:
+            # LANCZOS4 is preferable for upscaling, but it works quite slow
+            method = cv2.INTER_AREA if (xscale * yscale) < 1 \
+                else cv2.INTER_CUBIC
+            image = item.image.data / 255.0
+            resized_image = cv2.resize(image, (self._width, self._height),
+                interpolation=method)
+
+        resized_annotations = []
+        for ann in item.annotations:
+            if isinstance(ann, Bbox):
+                resized_annotations.append(ann.wrap(
+                    x=ann.x * xscale,
+                    y=ann.y * yscale,
+                    w=ann.w * xscale,
+                    h=ann.h * yscale,
+                ))
+            elif isinstance(ann, (Polygon, Points, PolyLine)):
+                resized_annotations.append(ann.wrap(
+                    points=[p
+                        for t in ((x * xscale, y * yscale)
+                            for x, y in take_by(ann.points, 2)
+                        )
+                        for p in t
+                    ]
+                ))
+            elif isinstance(ann, Mask):
+                # Can use only NEAREST for masks,
+                # because we can't have interpolated values
+                rescaled_mask = cv2.resize(ann.image.astype(np.float32),
+                    (self._width, self._height),
+                    interpolation=cv2.INTER_NEAREST).astype(np.uint8)
+
+                if isinstance(ann, RleMask):
+                    rle = mask_tools.mask_to_rle(rescaled_mask)
+                    resized_annotations.append(ann.wrap(
+                        rle=mask_utils.frPyObjects(rle, *rle['size'])))
+                else:
+                    resized_annotations.append(ann.wrap(image=rescaled_mask))
+            elif isinstance(ann, (Caption, Label)):
+                resized_annotations.append(ann)
+            else:
+                assert False, f"Unexpected annotation type {type(ann)}"
+
+        return self.wrap_item(item,
+            image=resized_image,
+            annotations=resized_annotations)
@@ -89,6 +89,7 @@ Basic dataset item manipulations:
 - `reindex` - Renames dataset items with numbers
 - `ndr` - Removes duplicated images from dataset
 - `sampler` - Runs inference and leaves only the most representative images
+- `resize` - Resizes images and annotations in the dataset
 
 Subset manipulations:
 - `random_split` - Splits dataset into subsets randomly
@@ -206,3 +207,9 @@ datum transform -t ndr -- \
   -e random \
   -u uniform
 ```
+
+- Resize dataset images and annotations. Supports upscaling, downscaling
+and mixed variants.
+
+```
+datum transform -t resize -- -dw 256 -dh 256
@@ -566,3 +566,49 @@ def test_bboxes_values_decrement_transform(self):
         actual = transforms.BboxValuesDecrement(src_dataset)
 
         compare_datasets(self, dst_dataset, actual)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_resize(self):
+        small_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((4, 4)), annotations=[
+                Label(1),
+                Bbox(1, 1, 2, 2, label=2),
+                Polygon([1, 1, 1, 2, 2, 2, 2, 1], label=1),
+                PolyLine([1, 1, 1, 2, 2, 2, 2, 1], label=2),
+                Points([1, 1, 1, 2, 2, 2, 2, 1], label=2),
+                Mask(np.array([
+                    [0, 0, 1, 1],
+                    [1, 0, 0, 1],
+                    [0, 1, 1, 0],
+                    [1, 1, 0, 0],
+                ]))
+            ])
+        ], categories=['a', 'b', 'c'])
+
+        big_dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=np.zeros((8, 8)), annotations=[
+                Label(1),
+                Bbox(2, 2, 4, 4, label=2),
+                Polygon([2, 2, 2, 4, 4, 4, 4, 2], label=1),
+                PolyLine([2, 2, 2, 4, 4, 4, 4, 2], label=2),
+                Points([2, 2, 2, 4, 4, 4, 4, 2], label=2),
+                Mask(np.array([
+                    [0, 0, 0, 0, 1, 1, 1, 1],
+                    [0, 0, 0, 0, 1, 1, 1, 1],
+                    [1, 1, 0, 0, 0, 0, 1, 1],
+                    [1, 1, 0, 0, 0, 0, 1, 1],
+                    [0, 0, 1, 1, 1, 1, 0, 0],
+                    [0, 0, 1, 1, 1, 1, 0, 0],
+                    [1, 1, 1, 1, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 0, 0, 0, 0],
+                ]))
+            ])
+        ], categories=['a', 'b', 'c'])
+
+        with self.subTest('upscale'):
+            actual = transforms.ResizeTransform(small_dataset, width=8, height=8)
+            compare_datasets(self, big_dataset, actual)
+
+        with self.subTest('downscale'):
+            actual = transforms.ResizeTransform(big_dataset, width=4, height=4)
+            compare_datasets(self, small_dataset, actual)