Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]
### Added
- `Icdar13/15` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/96>)
- Laziness, source caching, tracking of changes and partial updating for `Dataset` (<https://github.com/openvinotoolkit/datumaro/pull/102>)
- `Market-1501` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/108>)

### Changed
- OpenVINO model launcher is updated for OpenVINO r2021.1 (<https://github.com/openvinotoolkit/datumaro/pull/100>)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ CVAT annotations ---> Publication, statistics etc.
- [CVAT](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md)
- [LabelMe](http://labelme.csail.mit.edu/Release3.0)
- [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2) (`word_recognition`, `text_localization`, `text_segmentation`)
- [Market-1501](https://www.aitribune.com/dataset/2018051063) (`person re-identification`)
- Dataset building
- Merging multiple datasets into one
- Dataset filtering by a custom criteria:
Expand Down
81 changes: 81 additions & 0 deletions datumaro/plugins/market1501_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright (C) 2020 Intel Corporation
#
# SPDX-License-Identifier: MIT

import os.path as osp
import re
from glob import glob

from datumaro.components.converter import Converter
from datumaro.components.extractor import (DatasetItem, Importer,
SourceExtractor)


class Market1501Path:
QUERY_DIR = 'query'
BBOX_DIR = 'bounding_box_'
IMAGE_EXT = '.jpg'
PATTERN = re.compile(r'([-\d]+)_c(\d)')

class Market1501Extractor(SourceExtractor):
def __init__(self, path):
if not osp.isdir(path):
raise NotADirectoryError("Can't open folder with annotation files '%s'" % path)
subset = ''
for dirname in glob(osp.join(path, '*')):
if osp.basename(dirname).startswith(Market1501Path.BBOX_DIR):
subset = osp.basename(dirname).replace(Market1501Path.BBOX_DIR, '')
break
super().__init__(subset=subset)
self._path = path
self._items = list(self._load_items(path).values())

def _load_items(self, path):
items = {}

paths = glob(osp.join(path, Market1501Path.QUERY_DIR, '*'))
paths += glob(osp.join(path, Market1501Path.BBOX_DIR + self._subset, '*'))

for image_path in paths:
if not osp.isfile(image_path) or \
osp.splitext(image_path)[-1] != Market1501Path.IMAGE_EXT:
continue

item_id = osp.splitext(osp.basename(image_path))[0]
attributes = {}
pid, camid = map(int, Market1501Path.PATTERN.search(image_path).groups())
if pid == -1:
continue

camid -= 1
attributes['person_id'] = pid
attributes['camera_id'] = camid
if osp.basename(osp.dirname(image_path)) == Market1501Path.QUERY_DIR:
attributes['query'] = True
else:
attributes['query'] = False
items[item_id] = DatasetItem(id=item_id, subset=self._subset,
image=image_path, attributes=attributes)
return items

class Market1501Importer(Importer):
@classmethod
def find_sources(cls, path):
if not osp.isdir(path):
return []
return [{ 'url': path, 'format': 'market1501' }]

class Market1501Converter(Converter):
DEFAULT_IMAGE_EXT = '.jpg'

def apply(self):
for subset_name, subset in self._extractor.subsets().items():
for item in subset:
if item.has_image and self._save_images:
if item.attributes and 'query' in item.attributes:
if item.attributes.get('query'):
dirname = Market1501Path.QUERY_DIR
else:
dirname = Market1501Path.BBOX_DIR + subset_name
self._save_image(item, osp.join(self._save_dir,
dirname, item.id + Market1501Path.IMAGE_EXT))
3 changes: 3 additions & 0 deletions docs/user_manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ List of supported formats:
- ICDAR13/15 (`word_recognition`, `text_localization`, `text_segmentation`)
- [Format specification](https://rrc.cvc.uab.es/?ch=2)
- [Dataset example](../tests/assets/icdar_dataset)
- Market-1501 (`person re-identification`)
- [Format specification](https://www.aitribune.com/dataset/2018051063)
- [Dataset example](../tests/assets/market1501_dataset)

List of supported annotation types:
- Labels
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
93 changes: 93 additions & 0 deletions tests/test_market1501_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import os.path as osp
from unittest import TestCase

import numpy as np
from datumaro.components.dataset import Dataset
from datumaro.components.extractor import DatasetItem
from datumaro.plugins.market1501_format import (Market1501Converter,
Market1501Importer)
from datumaro.util.test_utils import TestDir, compare_datasets


class Market1501FormatTest(TestCase):
def test_can_save_and_load(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id='0001_c2s3_000001_00',
subset='test', image=np.ones((2, 5, 3)),
attributes = {
'camera_id': 1,
'person_id': 1,
'query': True
}
),
DatasetItem(id='0002_c4s2_000002_00',
subset='test', image=np.ones((2, 5, 3)),
attributes = {
'camera_id': 3,
'person_id': 2,
'query': False
}
),
DatasetItem(id='0001_c1s1_000003_00',
subset='test', image=np.ones((2, 5, 3)),
attributes = {
'camera_id': 0,
'person_id': 1,
'query': False
}
),
])

with TestDir() as test_dir:
Market1501Converter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = Dataset.import_from(test_dir, 'market1501')

compare_datasets(self, source_dataset, parsed_dataset)

def test_can_save_dataset_with_no_subsets(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id='0001_c2s3_000001_00',
image=np.ones((2, 5, 3)),
attributes = {
'camera_id': 1,
'person_id': 1,
'query': True
}
),
])

with TestDir() as test_dir:
Market1501Converter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = Dataset.import_from(test_dir, 'market1501')

compare_datasets(self, source_dataset, parsed_dataset)

DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'market1501_dataset')

class Market1501ImporterTest(TestCase):
def test_can_detect(self):
self.assertTrue(Market1501Importer.detect(DUMMY_DATASET_DIR))

def test_can_import(self):
expected_dataset = Dataset.from_iterable([
DatasetItem(id='0001_c2s3_000111_00',
subset='test', image=np.ones((2, 5, 3)),
attributes = {
'camera_id': 1,
'person_id': 1,
'query': True
}
),
DatasetItem(id='0001_c1s1_001051_00',
subset='test', image=np.ones((2, 5, 3)),
attributes = {
'camera_id': 0,
'person_id': 1,
'query': False
}
),
])

dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'market1501')

compare_datasets(self, expected_dataset, dataset)