open-edge-platform · sooahleex · Oct 10, 2024 · Sep 9, 2024 · Sep 10, 2024 · Sep 30, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/1615>)
 - Update docs for transform plugins
   (<https://github.com/openvinotoolkit/datumaro/pull/1599>)
+- Update ov ir model for explorer openvino launcher with CLIP ViT-L/14@336px model
+  (<https://github.com/openvinotoolkit/datumaro/pull/1603>)
 
 ### Bug fixes
 

diff --git a/src/datumaro/components/algorithms/hash_key_inference/base.py b/src/datumaro/components/algorithms/hash_key_inference/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2024 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -21,13 +21,13 @@ def __init__(self, *datasets: Sequence[Dataset]) -> None:
     @property
     def model(self):
         if self._model is None:
-            self._model = explorer.ExplorerLauncher(model_name="clip_visual_ViT-B_32")
+            self._model = explorer.ExplorerLauncher(model_name="clip_visual_vit_l_14_336px_int8")
         return self._model
 
     @property
     def text_model(self):
         if self._text_model is None:
-            self._text_model = explorer.ExplorerLauncher(model_name="clip_text_ViT-B_32")
+            self._text_model = explorer.ExplorerLauncher(model_name="clip_text_vit_l_14_336px_int8")
         return self._text_model
 
     def _compute_hash_key(self, datasets, datasets_to_infer):

diff --git a/src/datumaro/components/annotation.py b/src/datumaro/components/annotation.py
@@ -261,8 +261,8 @@ class HashKey(Annotation):
 
     @hash_key.validator
     def _validate(self, attribute, value: np.ndarray):
-        """Check whether value is a 1D Numpy array having 64 np.uint8 values"""
-        if value.ndim != 1 or value.shape[0] != 64 or value.dtype != np.uint8:
+        """Check whether value is a 1D Numpy array having 96 np.uint8 values"""
+        if value.ndim != 1 or value.shape[0] != 96 or value.dtype != np.uint8:
             raise ValueError(value)
 
     def __eq__(self, other):

diff --git a/src/datumaro/plugins/openvino_plugin/launcher.py b/src/datumaro/plugins/openvino_plugin/launcher.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021 Intel Corporation
+# Copyright (C) 2019-2024 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -92,6 +92,8 @@ class BuiltinOpenvinoModelInfo(OpenvinoModelInfo):
     downloadable_models = {
         "clip_text_ViT-B_32",
         "clip_visual_ViT-B_32",
+        "clip_visual_vit_l_14_336px_int8",
+        "clip_text_vit_l_14_336px_int8",
         "googlenet-v4-tf",
     }
 

diff --git a/src/datumaro/plugins/openvino_plugin/samples/clip_text_vit_l_14_336px_int8_interp.py b/src/datumaro/plugins/openvino_plugin/samples/clip_text_vit_l_14_336px_int8_interp.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from typing import List, Tuple
+
+from datumaro.components.abstracts import IModelInterpreter
+from datumaro.components.abstracts.model_interpreter import LauncherInputType, ModelPred, PrepInfo
+from datumaro.components.annotation import Annotation, AnnotationType, LabelCategories
+from datumaro.components.dataset_base import DatasetItem
+from datumaro.components.errors import DatumaroError
+from datumaro.components.media import Image
+from datumaro.plugins.openvino_plugin.samples.utils import gen_hash_key
+
+
+class ClipTextViTL14ModelInterpreter(IModelInterpreter):
+    def preprocess(self, inp: DatasetItem) -> Tuple[LauncherInputType, PrepInfo]:
+        img = inp.media_as(Image).data
+        return img, None
+
+    def postprocess(self, pred: ModelPred, info: PrepInfo) -> List[Annotation]:
+        feature_vector = pred.get("output")
+        if feature_vector is None:
+            raise DatumaroError('"output" key should exist in the model prediction.')
+
+        return [gen_hash_key(feature_vector)]
+
+    def get_categories(self):
+        label_categories = LabelCategories()
+        return {AnnotationType.label: label_categories}
diff --git a/src/datumaro/plugins/openvino_plugin/samples/clip_visual_vit_l_14_336px_int8_interp.py b/src/datumaro/plugins/openvino_plugin/samples/clip_visual_vit_l_14_336px_int8_interp.py
@@ -0,0 +1,52 @@
+# Copyright (C) 2024 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import os.path as osp
+from typing import List, Tuple
+
+import cv2
+import numpy as np
+
+from datumaro.components.abstracts import IModelInterpreter
+from datumaro.components.abstracts.model_interpreter import LauncherInputType, ModelPred, PrepInfo
+from datumaro.components.annotation import Annotation, AnnotationType, LabelCategories
+from datumaro.components.dataset_base import DatasetItem
+from datumaro.components.errors import DatumaroError
+from datumaro.components.media import Image
+from datumaro.plugins.openvino_plugin.samples.utils import gen_hash_key
+from datumaro.util.samples import get_samples_path
+
+
+class ClipViTL14ModelInterpreter(IModelInterpreter):
+    mean = (255 * np.array([0.485, 0.456, 0.406])).reshape(1, 1, 3)
+    std = (255 * np.array([0.229, 0.224, 0.225])).reshape(1, 1, 3)
+
+    def preprocess(self, inp: DatasetItem) -> Tuple[LauncherInputType, PrepInfo]:
+        img = inp.media_as(Image).data
+        img = cv2.resize(img, (336, 336))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = (img - self.mean) / self.std
+
+        if img.ndim == 3 and img.shape[2] in {3, 4}:
+            img = np.transpose(img, (2, 0, 1))
+        return img, None
+
+    def postprocess(self, pred: ModelPred, info: PrepInfo) -> List[Annotation]:
+        feature_vector = pred.get("output")
+        if feature_vector is None:
+            raise DatumaroError('"output" key should exist in the model prediction.')
+
+        return [gen_hash_key(feature_vector)]
+
+    def get_categories(self):
+        label_categories = LabelCategories()
+        openvino_plugin_samples_dir = get_samples_path()
+        imagenet_class_path = osp.join(openvino_plugin_samples_dir, "imagenet.class")
+
+        with open(imagenet_class_path, "r", encoding="utf-8") as file:
+            labels = [line.strip() for line in file]
+            for label in labels:
+                label_categories.add(label)
+
+        return {AnnotationType.label: label_categories}
diff --git a/tests/unit/test_annotation.py b/tests/unit/test_annotation.py
@@ -45,16 +45,16 @@ def test_get_points(self, fxt_ellipses: List[Ellipse]):
 class HashKeyTest:
     @pytest.fixture
     def fxt_hashkeys_same(self):
-        hash_key = np.random.randint(0, 256, size=(64,), dtype=np.uint8)
+        hash_key = np.random.randint(0, 256, size=(96,), dtype=np.uint8)
         hashkey1 = HashKey(hash_key=hash_key)
         hashkey2 = HashKey(hash_key=hash_key)
         return hashkey1, hashkey2
 
     @pytest.fixture
     def fxt_hashkeys_diff(self):
         np.random.seed(3003)
-        hashkey1 = HashKey(hash_key=np.random.randint(0, 256, size=(64,), dtype=np.uint8))
-        hashkey2 = HashKey(hash_key=np.random.randint(0, 256, size=(64,), dtype=np.uint8))
+        hashkey1 = HashKey(hash_key=np.random.randint(0, 256, size=(96,), dtype=np.uint8))
+        hashkey2 = HashKey(hash_key=np.random.randint(0, 256, size=(96,), dtype=np.uint8))
         return hashkey1, hashkey2
 
     @pytest.mark.parametrize(

diff --git a/tests/unit/test_explorer.py b/tests/unit/test_explorer.py
@@ -1,21 +1,17 @@
-import os.path as osp
-from copy import deepcopy
-from functools import partial
 from unittest import TestCase
+from unittest.mock import patch
 
 import numpy as np
 
 from datumaro.components.algorithms.hash_key_inference.explorer import Explorer
-from datumaro.components.annotation import AnnotationType, Caption, Label
+from datumaro.components.annotation import AnnotationType, HashKey
 from datumaro.components.dataset import Dataset
 from datumaro.components.dataset_base import DatasetItem
 from datumaro.components.errors import MediaTypeError
-from datumaro.components.media import Image
-from datumaro.plugins.data_formats.datumaro.exporter import DatumaroExporter
+from datumaro.util.meta_file_util import load_hash_key
 
 from tests.requirements import Requirements, mark_requirement
 from tests.utils.assets import get_test_asset_path
-from tests.utils.test_utils import TestDir
 
 
 class ExplorerTest(TestCase):
@@ -171,3 +167,71 @@ def test_pointcloud_assert(self):
         with self.assertRaises(MediaTypeError) as capture:
             Explorer(imported_dataset)
         self.assertIn("PointCloud", str(capture.exception))
+
+
+class MetaFileTest(TestCase):
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_no_hashkey_dir(self):
+        """
+        Test that the function returns the original dataset if the hashkey directory doesn't exist.
+        """
+        dataset = [DatasetItem(id="000001", subset="test")]
+        with patch("os.path.isdir") as mock_isdir:
+            mock_isdir.return_value = False
+            result = load_hash_key("invalid_path", dataset)
+            self.assertEqual(result, dataset)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_no_hashkey_file(self):
+        """
+        Test that the function returns the original dataset if the hashkey file doesn't exist.
+        """
+        dataset = [DatasetItem(id="000001", subset="test")]
+        with patch("os.path.isdir") as mock_isdir, patch(
+            "datumaro.util.meta_file_util.has_hashkey_file"
+        ) as mock_has_hashkey_file:
+            mock_isdir.return_value = True
+            mock_has_hashkey_file.return_value = False
+            result = load_hash_key("hashkey_dir", dataset)
+            self.assertEqual(result, dataset)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_load_hash_key(self):
+        """
+        Test that the function successfully parses the hashkey file and adds HashKey annotations to the dataset items.
+        """
+        dataset = [
+            DatasetItem(id="000001", subset="train", annotations=[]),
+            DatasetItem(id="000002", subset="val", annotations=[]),
+        ]
+        expected_hashkey1 = np.ones((96,), dtype=np.uint8)
+        expected_hashkey2 = np.zeros((96,), dtype=np.uint8)
+        hashkey_dict = {
+            "train/000001": expected_hashkey1.tolist(),
+            "val/000002": expected_hashkey2.tolist(),
+        }
+
+        with patch("os.path.isdir") as mock_isdir, patch(
+            "datumaro.util.meta_file_util.has_hashkey_file"
+        ) as mock_has_hashkey_file, patch(
+            "datumaro.util.meta_file_util.parse_hashkey_file"
+        ) as mock_parse_hashkey_file:
+            mock_isdir.return_value = True
+            mock_has_hashkey_file.return_value = True
+            mock_parse_hashkey_file.return_value = hashkey_dict
+
+            result = load_hash_key("hashkey_dir", dataset)
+
+            self.assertEqual(len(result), len(dataset))
+            self.assertEqual(result[0].id, dataset[0].id)
+            self.assertEqual(result[0].subset, dataset[0].subset)
+
+            # Check if HashKey annotations are added
+            self.assertEqual(len(result[0].annotations), 1)
+            self.assertIsInstance(result[0].annotations[0], HashKey)
+            self.assertTrue(np.array_equal(result[0].annotations[0].hash_key, expected_hashkey1))
+
+            # Check if HashKey annotations are added for the second item as well
+            self.assertEqual(len(result[1].annotations), 1)
+            self.assertIsInstance(result[1].annotations[0], HashKey)
+            self.assertTrue(np.array_equal(result[1].annotations[0].hash_key, expected_hashkey2))
diff --git a/tests/unit/test_hashkey.py b/tests/unit/test_hashkey.py
@@ -46,7 +46,7 @@ def fxt_dataset_dir_with_hash_key(test_dir, fxt_data_format):
     test_asset_dir = test_asset_dir_map[fxt_data_format]
     dataset = Dataset.import_from(test_asset_dir, format=fxt_data_format)
     for item in dataset:
-        hash_key = HashKey(hash_key=np.random.randint(0, 256, size=(64,), dtype=np.uint8))
+        hash_key = HashKey(hash_key=np.random.randint(0, 256, size=(96,), dtype=np.uint8))
         item.annotations += [hash_key]
 
     if fxt_data_format == "wider_face":

diff --git a/tests/unit/test_visualizer.py b/tests/unit/test_visualizer.py
@@ -474,7 +474,7 @@ def setUpClass(cls):
         super().setUpClass()
 
         for item in cls.dataset:
-            item.annotations.append(HashKey(np.ones(64).astype(np.uint8)))
+            item.annotations.append(HashKey(np.ones(96).astype(np.uint8)))
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_vis_one_sample(self):