Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/537>)
- Datumaro can now be installed on Windows on Python 3.9
(<https://github.com/openvinotoolkit/datumaro/pull/547>)
- Import for SYNTHIA dataset format.
- Import for SYNTHIA dataset format
(<https://github.com/openvinotoolkit/datumaro/pull/532>)
- Support for Accuracy Checker dataset meta files in formats
(<https://github.com/openvinotoolkit/datumaro/pull/553>)

### Changed
- The following formats can now be detected unambiguously:
Expand Down
10 changes: 9 additions & 1 deletion datumaro/components/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from datumaro.components.dataset import DatasetPatch
from datumaro.components.extractor import DatasetItem
from datumaro.components.media import Image
from datumaro.util.meta_file_util import save_meta_file
from datumaro.util.os_util import rmtree
from datumaro.util.scope import on_error_do, scoped

Expand All @@ -28,6 +29,8 @@ def build_cmdline_parser(cls, **kwargs):
parser.add_argument('--image-ext', default=None,
help="Image extension (default: keep or use format default%s)" % \
(' ' + cls.DEFAULT_IMAGE_EXT if cls.DEFAULT_IMAGE_EXT else ''))
parser.add_argument('--save-dataset-meta', action='store_true',
help="Save dataset meta file (default: %(default)s)")

return parser

Expand Down Expand Up @@ -70,7 +73,7 @@ def apply(self):
raise NotImplementedError("Should be implemented in a subclass")

def __init__(self, extractor, save_dir, save_images=False,
image_ext=None, default_image_ext=None):
image_ext=None, default_image_ext=None, save_dataset_meta=False):
default_image_ext = default_image_ext or self.DEFAULT_IMAGE_EXT
assert default_image_ext
self._default_image_ext = default_image_ext
Expand All @@ -81,6 +84,8 @@ def __init__(self, extractor, save_dir, save_images=False,
self._extractor = extractor
self._save_dir = save_dir

self._save_dataset_meta = save_dataset_meta

# TODO: refactor this variable.
# Can be used by a subclass to store the current patch info
if isinstance(extractor, DatasetPatch.DatasetPatchWrapper):
Expand Down Expand Up @@ -144,3 +149,6 @@ def _save_point_cloud(self, item=None, path=None, *,
if item.point_cloud and osp.isfile(item.point_cloud):
if item.point_cloud != path:
shutil.copyfile(item.point_cloud, path)

def _save_meta_file(self, path):
save_meta_file(path, self._extractor.categories())
34 changes: 24 additions & 10 deletions datumaro/plugins/cityscapes_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
from datumaro.util.annotation_util import make_label_id_mapping
from datumaro.util.image import find_images, load_image, save_image
from datumaro.util.mask_tools import generate_colormap, paint_mask
from datumaro.util.meta_file_util import (
has_meta_file, is_meta_file, parse_meta_file,
)

CityscapesLabelMap = OrderedDict([
('unlabeled', (0, 0, 0)),
Expand Down Expand Up @@ -172,11 +175,15 @@ def __init__(self, path, subset=None):

def _load_categories(self, path):
label_map = None
label_map_path = osp.join(path, CityscapesPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
if has_meta_file(path):
label_map = parse_meta_file(path)
else:
label_map = CityscapesLabelMap
label_map_path = osp.join(path, CityscapesPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
else:
label_map = CityscapesLabelMap

self._labels = [label for label in label_map]
return make_cityscapes_categories(label_map)

Expand Down Expand Up @@ -335,11 +342,15 @@ def apply(self):
self.save_label_map()

def save_label_map(self):
path = osp.join(self._save_dir, CityscapesPath.LABELMAP_FILE)
labels = self._extractor.categories()[AnnotationType.label]
if len(self._label_map) > len(labels):
self._label_map.pop('background')
write_label_map(path, self._label_map)
if self._save_dataset_meta:
self._save_meta_file(self._save_dir)
else:
labels = self._extractor.categories()[AnnotationType.label]
if len(self._label_map) > len(labels):
self._label_map.pop('background')

path = osp.join(self._save_dir, CityscapesPath.LABELMAP_FILE)
write_label_map(path, self._label_map)

def _load_categories(self, label_map_source):
if label_map_source == LabelmapType.cityscapes.name:
Expand Down Expand Up @@ -370,7 +381,10 @@ def _load_categories(self, label_map_source):
sorted(label_map_source.items(), key=lambda e: e[0]))

elif isinstance(label_map_source, str) and osp.isfile(label_map_source):
label_map = parse_label_map(label_map_source)
if is_meta_file(label_map_source):
label_map = parse_meta_file(label_map_source)
else:
label_map = parse_label_map(label_map_source)

else:
raise Exception("Wrong labelmap specified, "
Expand Down
26 changes: 17 additions & 9 deletions datumaro/plugins/imagenet_txt_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from datumaro.components.errors import DatasetImportError
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class ImagenetTxtPath:
Expand Down Expand Up @@ -57,9 +58,10 @@ def __init__(self, path: str, *,
subset = osp.splitext(osp.basename(path))[0]
super().__init__(subset=subset)

root_dir = osp.dirname(path)
if not image_dir:
image_dir = ImagenetTxtPath.IMAGE_DIR
self.image_dir = osp.join(osp.dirname(path), image_dir)
self.image_dir = osp.join(root_dir, image_dir)

self._generate_labels = False

Expand All @@ -70,8 +72,11 @@ def __init__(self, path: str, *,
labels = ()
self._generate_labels = True
elif labels_source == _LabelsSource.file:
labels = self._parse_labels(
osp.join(osp.dirname(path), labels_file))
if has_meta_file(root_dir):
labels = list(parse_meta_file(root_dir).keys())
else:
labels = self._parse_labels(
osp.join(root_dir, labels_file))
else:
assert False, "Unhandled labels source %s" % labels_source
else:
Expand Down Expand Up @@ -200,9 +205,12 @@ def apply(self):
with open(annotation_file, 'w', encoding='utf-8') as f:
f.write(annotation)

labels_file = osp.join(subset_dir, ImagenetTxtPath.LABELS_FILE)
with open(labels_file, 'w', encoding='utf-8') as f:
f.writelines(l.name + '\n'
for l in extractor.categories().get(
AnnotationType.label, LabelCategories())
)
if self._save_dataset_meta:
self._save_meta_file(subset_dir)
else:
labels_file = osp.join(subset_dir, ImagenetTxtPath.LABELS_FILE)
with open(labels_file, 'w', encoding='utf-8') as f:
f.writelines(l.name + '\n'
for l in extractor.categories().get(
AnnotationType.label, LabelCategories())
)
15 changes: 11 additions & 4 deletions datumaro/plugins/voc_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@
from datumaro.util.annotation_util import make_label_id_mapping
from datumaro.util.image import save_image
from datumaro.util.mask_tools import paint_mask, remap_mask
from datumaro.util.meta_file_util import has_meta_file

from .format import (
VocInstColormap, VocPath, VocTask, make_voc_categories, make_voc_label_map,
parse_label_map, write_label_map,
parse_label_map, parse_meta_file, write_label_map, write_meta_file,
)


Expand Down Expand Up @@ -530,8 +531,11 @@ def save_segm(self, path, mask, colormap=None):
save_image(path, mask, create_dir=True)

def save_label_map(self):
path = osp.join(self._save_dir, VocPath.LABELMAP_FILE)
write_label_map(path, self._label_map)
if self._save_dataset_meta:
write_meta_file(self._save_dir, self._label_map)
else:
path = osp.join(self._save_dir, VocPath.LABELMAP_FILE)
write_label_map(path, self._label_map)

def _load_categories(self, label_map_source):
if label_map_source == LabelmapType.voc.name:
Expand Down Expand Up @@ -562,7 +566,10 @@ def _load_categories(self, label_map_source):
sorted(label_map_source.items(), key=lambda e: e[0]))

elif isinstance(label_map_source, str) and osp.isfile(label_map_source):
label_map = parse_label_map(label_map_source)
if has_meta_file(label_map_source):
label_map = parse_meta_file(label_map_source)
else:
label_map = parse_label_map(label_map_source)

else:
raise Exception("Wrong labelmap specified: '%s', "
Expand Down
15 changes: 10 additions & 5 deletions datumaro/plugins/voc_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
from datumaro.components.media import Image
from datumaro.util.image import find_images
from datumaro.util.mask_tools import invert_colormap, lazy_mask
from datumaro.util.meta_file_util import has_meta_file

from .format import (
VocInstColormap, VocPath, VocTask, make_voc_categories, parse_label_map,
parse_meta_file,
)

_inverse_inst_colormap = invert_colormap(VocInstColormap)
Expand Down Expand Up @@ -48,12 +50,15 @@ def _get_label_id(self, label):
assert label_id is not None, label
return label_id

@staticmethod
def _load_categories(dataset_path):
def _load_categories(self, dataset_path):
label_map = None
label_map_path = osp.join(dataset_path, VocPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
if has_meta_file(dataset_path):
label_map = parse_meta_file(dataset_path)
else:
label_map_path = osp.join(dataset_path, VocPath.LABELMAP_FILE)
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)

return make_voc_categories(label_map)

def _load_subset_list(self, subset_path):
Expand Down
72 changes: 72 additions & 0 deletions datumaro/plugins/voc_format/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@
from collections import OrderedDict
from enum import Enum, auto
from itertools import chain
import json
import os.path as osp

import numpy as np

from datumaro.components.annotation import (
AnnotationType, LabelCategories, MaskCategories,
)
from datumaro.util import find
from datumaro.util.meta_file_util import get_meta_file


class VocTask(Enum):
Expand Down Expand Up @@ -157,6 +161,33 @@ def parse_label_map(path):
label_map[name] = [color, parts, actions]
return label_map

def parse_meta_file(path):
meta_file = path
if osp.isdir(path):
meta_file = get_meta_file(path)

with open(meta_file) as f:
dataset_meta = json.load(f)

label_map = OrderedDict()
parts = dataset_meta.get('parts', {})
actions = dataset_meta.get('actions', {})

for i, label in enumerate(dataset_meta.get('labels', [])):
label_map[label] = [None, parts.get(str(i), []), actions.get(str(i), [])]

colors = dataset_meta.get('segmentation_colors', [])

for i, label in enumerate(dataset_meta.get('label_map', {}).values()):
if label not in label_map:
label_map[label] = [None, [], []]

if any(colors) and colors[i] is not None:
label_map[label][0] = tuple(colors[i])


return label_map

def write_label_map(path, label_map):
with open(path, 'w', encoding='utf-8') as f:
f.write('# label:color_rgb:parts:actions\n')
Expand All @@ -171,6 +202,47 @@ def write_label_map(path, label_map):

f.write('%s\n' % ':'.join([label_name, color_rgb, parts, actions]))

def write_meta_file(path, label_map):
dataset_meta = {}

labels = []
labels_dict = {}
segmentation_colors = []
parts = {}
actions = {}

for i, (label_name, label_desc) in enumerate(label_map.items()):
labels.append(label_name)
if label_desc[0]:
labels_dict[str(i)] = label_name
segmentation_colors.append(
[int(label_desc[0][0]), int(label_desc[0][1]), int(label_desc[0][2])])

parts[str(i)] = label_desc[1]
actions[str(i)] = label_desc[2]

dataset_meta['labels'] = labels

if any(segmentation_colors):
dataset_meta['label_map'] = labels_dict
dataset_meta['segmentation_colors'] = segmentation_colors

bg_label = find(label_map.items(), lambda x: x[1] == (0, 0, 0))
if bg_label is not None:
dataset_meta['background_label'] = str(bg_label[0])

if any(parts):
dataset_meta['parts'] = parts

if any(actions):
dataset_meta['actions'] = actions

meta_file = get_meta_file(path)

with open(meta_file, 'w') as f:
json.dump(dataset_meta, f)


def make_voc_categories(label_map=None):
if label_map is None:
label_map = make_voc_label_map()
Expand Down
19 changes: 14 additions & 5 deletions datumaro/plugins/widerface_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.util import str_to_bool
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


class WiderFacePath:
Expand Down Expand Up @@ -44,8 +45,13 @@ def __init__(self, path, subset=None):

def _load_categories(self):
label_cat = LabelCategories()
path = osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE)
if osp.isfile(path):
if has_meta_file(self._dataset_dir):
Comment thread
zhiltsov-max marked this conversation as resolved.
labels = parse_meta_file(self._dataset_dir).keys()
for label in labels:
label_cat.add(label)
elif osp.isfile(osp.join(self._dataset_dir,
WiderFacePath.LABELS_FILE)):
path = osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE)
with open(path, encoding='utf-8') as labels_file:
for line in labels_file:
label_cat.add(line.strip())
Expand Down Expand Up @@ -157,9 +163,12 @@ def apply(self):

label_categories = self._extractor.categories()[AnnotationType.label]

labels_path = osp.join(save_dir, WiderFacePath.LABELS_FILE)
with open(labels_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(label.name for label in label_categories))
if self._save_dataset_meta:
self._save_meta_file(save_dir)
else:
labels_path = osp.join(save_dir, WiderFacePath.LABELS_FILE)
with open(labels_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(label.name for label in label_categories))

for subset_name, subset in self._extractor.subsets().items():
subset_dir = osp.join(save_dir,
Expand Down
Loading