open-edge-platform · zhiltsov-max · Feb 15, 2021 · Feb 15, 2021 · Feb 15, 2021 · Feb 15, 2021
@@ -8,7 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+- `Icdar13/15` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/96>)
 - Laziness, source caching, tracking of changes and partial updating for `Dataset` (<https://github.com/openvinotoolkit/datumaro/pull/102>)
+- `Market-1501` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/108>)
 
 ### Changed
 - OpenVINO model launcher is updated for OpenVINO r2021.1 (<https://github.com/openvinotoolkit/datumaro/pull/100>)

@@ -136,6 +136,7 @@ CVAT annotations                             ---> Publication, statistics etc.
   - [CVAT](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
   - [LabelMe](http://labelme.csail.mit.edu/Release3.0)
   - [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2) (`word_recognition`, `text_localization`, `text_segmentation`)
+  - [Market-1501](https://www.aitribune.com/dataset/2018051063) (`person re-identification`)
 - Dataset building
   - Merging multiple datasets into one
   - Dataset filtering by a custom criteria:

@@ -0,0 +1,81 @@
+# Copyright (C) 2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import os.path as osp
+import re
+from glob import glob
+
+from datumaro.components.converter import Converter
+from datumaro.components.extractor import (DatasetItem, Importer,
+    SourceExtractor)
+
+
+class Market1501Path:
+    QUERY_DIR = 'query'
+    BBOX_DIR = 'bounding_box_'
+    IMAGE_EXT = '.jpg'
+    PATTERN = re.compile(r'([-\d]+)_c(\d)')
+
+class Market1501Extractor(SourceExtractor):
+    def __init__(self, path):
+        if not osp.isdir(path):
+            raise NotADirectoryError("Can't open folder with annotation files '%s'" % path)
+        subset = ''
+        for dirname in glob(osp.join(path, '*')):
+            if osp.basename(dirname).startswith(Market1501Path.BBOX_DIR):
+                subset = osp.basename(dirname).replace(Market1501Path.BBOX_DIR, '')
+                break
+        super().__init__(subset=subset)
+        self._path = path
+        self._items = list(self._load_items(path).values())
+
+    def _load_items(self, path):
+        items = {}
+
+        paths = glob(osp.join(path, Market1501Path.QUERY_DIR, '*'))
+        paths += glob(osp.join(path, Market1501Path.BBOX_DIR + self._subset, '*'))
+
+        for image_path in paths:
+            if not osp.isfile(image_path) or \
+                    osp.splitext(image_path)[-1] != Market1501Path.IMAGE_EXT:
+                continue
+
+            item_id = osp.splitext(osp.basename(image_path))[0]
+            attributes = {}
+            pid, camid = map(int, Market1501Path.PATTERN.search(image_path).groups())
+            if pid == -1:
+                continue
+
+            camid -= 1
+            attributes['person_id'] = pid
+            attributes['camera_id'] = camid
+            if osp.basename(osp.dirname(image_path)) == Market1501Path.QUERY_DIR:
+                attributes['query'] = True
+            else:
+                attributes['query'] = False
+            items[item_id] = DatasetItem(id=item_id, subset=self._subset,
+                image=image_path, attributes=attributes)
+        return items
+
+class Market1501Importer(Importer):
+    @classmethod
+    def find_sources(cls, path):
+        if not osp.isdir(path):
+            return []
+        return [{ 'url': path, 'format': 'market1501' }]
+
+class Market1501Converter(Converter):
+    DEFAULT_IMAGE_EXT = '.jpg'
+
+    def apply(self):
+        for subset_name, subset in self._extractor.subsets().items():
+            for item in subset:
+                if item.has_image and self._save_images:
+                    if item.attributes and 'query' in item.attributes:
+                        if item.attributes.get('query'):
+                            dirname = Market1501Path.QUERY_DIR
+                        else:
+                            dirname = Market1501Path.BBOX_DIR + subset_name
+                        self._save_image(item, osp.join(self._save_dir,
+                            dirname, item.id + Market1501Path.IMAGE_EXT))
@@ -123,6 +123,9 @@ List of supported formats:
 - ICDAR13/15 (`word_recognition`, `text_localization`, `text_segmentation`)
   - [Format specification](https://rrc.cvc.uab.es/?ch=2)
   - [Dataset example](../tests/assets/icdar_dataset)
+- Market-1501 (`person re-identification`)
+  - [Format specification](https://www.aitribune.com/dataset/2018051063)
+  - [Dataset example](../tests/assets/market1501_dataset)
 
 List of supported annotation types:
 - Labels

@@ -0,0 +1,93 @@
+import os.path as osp
+from unittest import TestCase
+
+import numpy as np
+from datumaro.components.dataset import Dataset
+from datumaro.components.extractor import DatasetItem
+from datumaro.plugins.market1501_format import (Market1501Converter,
+    Market1501Importer)
+from datumaro.util.test_utils import TestDir, compare_datasets
+
+
+class Market1501FormatTest(TestCase):
+    def test_can_save_and_load(self):
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id='0001_c2s3_000001_00',
+                subset='test', image=np.ones((2, 5, 3)),
+                attributes = {
+                    'camera_id': 1,
+                    'person_id': 1,
+                    'query': True
+                }
+            ),
+            DatasetItem(id='0002_c4s2_000002_00',
+                subset='test', image=np.ones((2, 5, 3)),
+                attributes = {
+                    'camera_id': 3,
+                    'person_id': 2,
+                    'query': False
+                }
+            ),
+            DatasetItem(id='0001_c1s1_000003_00',
+                subset='test', image=np.ones((2, 5, 3)),
+                attributes = {
+                    'camera_id': 0,
+                    'person_id': 1,
+                    'query': False
+                }
+            ),
+        ])
+
+        with TestDir() as test_dir:
+            Market1501Converter.convert(source_dataset, test_dir, save_images=True)
+            parsed_dataset = Dataset.import_from(test_dir, 'market1501')
+
+            compare_datasets(self, source_dataset, parsed_dataset)
+
+    def test_can_save_dataset_with_no_subsets(self):
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(id='0001_c2s3_000001_00',
+                image=np.ones((2, 5, 3)),
+                attributes = {
+                    'camera_id': 1,
+                    'person_id': 1,
+                    'query': True
+                }
+            ),
+        ])
+
+        with TestDir() as test_dir:
+            Market1501Converter.convert(source_dataset, test_dir, save_images=True)
+            parsed_dataset = Dataset.import_from(test_dir, 'market1501')
+
+            compare_datasets(self, source_dataset, parsed_dataset)
+
+DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'market1501_dataset')
+
+class Market1501ImporterTest(TestCase):
+    def test_can_detect(self):
+        self.assertTrue(Market1501Importer.detect(DUMMY_DATASET_DIR))
+
+    def test_can_import(self):
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='0001_c2s3_000111_00',
+                subset='test', image=np.ones((2, 5, 3)),
+                attributes = {
+                    'camera_id': 1,
+                    'person_id': 1,
+                    'query': True
+                }
+            ),
+            DatasetItem(id='0001_c1s1_001051_00',
+                subset='test', image=np.ones((2, 5, 3)),
+                attributes = {
+                    'camera_id': 0,
+                    'person_id': 1,
+                    'query': False
+                }
+            ),
+        ])
+
+        dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'market1501')
+
+        compare_datasets(self, expected_dataset, dataset)