Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions datumaro/components/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ def updated_subsets(self) -> Dict[str, ItemStatus]:
}
return self._updated_subsets

def as_dataset(self, parent: IDataset) -> IDataset:
return DatasetItemStorageDatasetView(self.data, parent.categories())


class DatasetSubset(IDataset): # non-owning view
def __init__(self, parent: 'Dataset', name: str):
Expand Down Expand Up @@ -388,6 +391,10 @@ def get_patch(self):
return DatasetPatch(self._storage, self._categories,
self._updated_items)

def flush_changes(self):
self._updated_items = {}
self._transformed = False


class Dataset(IDataset):
_global_eager = False
Expand Down Expand Up @@ -570,6 +577,9 @@ def bind(self, path: str, format: str = None):
self._source_path = path
self._format = format or DEFAULT_FORMAT

def flush_changes(self):
self._data.flush_changes()

@error_rollback('on_error', implicit=True)
def export(self, save_dir: str, format, **kwargs):
inplace = (save_dir == self._source_path and format == self._format)
Expand All @@ -589,6 +599,7 @@ def export(self, save_dir: str, format, **kwargs):
converter.convert(self, save_dir=save_dir, **kwargs)
if not self.is_bound:
self.bind(save_dir, format)
self.flush_changes()
else:
converter.patch(self, self.patch, save_dir=save_dir, **kwargs)

Expand Down
193 changes: 141 additions & 52 deletions datumaro/plugins/voc_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
from lxml import etree as ET

from datumaro.components.converter import Converter
from datumaro.components.dataset import ItemStatus
from datumaro.components.extractor import (AnnotationType,
CompiledMask, LabelCategories)
CompiledMask, DatasetItem, LabelCategories)
from datumaro.util import find, str_to_bool
from datumaro.util.image import save_image
from datumaro.util.mask_tools import paint_mask, remap_mask
Expand Down Expand Up @@ -51,6 +52,7 @@ def _write_xml_bbox(bbox, parent_elem):

class VocConverter(Converter):
DEFAULT_IMAGE_EXT = VocPath.IMAGE_EXT
BUILTIN_ATTRS = {'difficult', 'pose', 'truncated', 'occluded' }

@staticmethod
def _split_tasks_string(s):
Expand Down Expand Up @@ -107,6 +109,8 @@ def __init__(self, extractor, save_dir,
assert isinstance(label_map, (str, dict)), label_map
self._load_categories(label_map)

self._patch = None

def apply(self):
self.make_dirs()
self.save_subsets()
Expand Down Expand Up @@ -158,6 +162,11 @@ def save_subsets(self):
action_list = OrderedDict()
layout_list = OrderedDict()
segm_list = OrderedDict()
has_classes = False
has_dets = False
has_actions = False
has_layouts = False
has_masks = False

for item in subset:
log.debug("Converting item '%s'", item.id)
Expand All @@ -181,9 +190,8 @@ def save_subsets(self):
elif a.type == AnnotationType.mask:
masks.append(a)

if self._tasks is None and bboxes or \
self._tasks & {VocTask.detection, VocTask.person_layout,
VocTask.action_classification}:
if self._tasks & {VocTask.detection, VocTask.person_layout,
VocTask.action_classification}:
root_elem = ET.Element('annotation')
if '_' in item.id:
folder = item.id[ : item.id.find('_')]
Expand Down Expand Up @@ -276,8 +284,7 @@ def save_subsets(self):
obj_elem.append(actions_elem)

if self._allow_attributes:
native_attrs = {'difficult', 'pose',
'truncated', 'occluded' }
native_attrs = set(self.BUILTIN_ATTRS)
native_attrs.update(label_actions)

attrs_elem = ET.Element('attributes')
Expand All @@ -301,6 +308,9 @@ def save_subsets(self):
clsdet_list[item.id] = True
layout_list[item.id] = objects_with_parts
action_list[item.id] = objects_with_actions
has_dets = True
has_layouts |= len(objects_with_parts) != 0
has_actions |= len(objects_with_actions) != 0

for label_ann in labels:
label = self.get_label(label_ann.label)
Expand All @@ -309,6 +319,7 @@ def save_subsets(self):
class_list = class_lists.get(item.id, set())
class_list.add(label_ann.label)
class_lists[item.id] = class_list
has_classes = True

clsdet_list[item.id] = True

Expand All @@ -326,106 +337,151 @@ def save_subsets(self):
colormap=VocInstColormap)

segm_list[item.id] = True
has_masks = True

if len(item.annotations) == 0:
clsdet_list[item.id] = None
layout_list[item.id] = None
action_list[item.id] = None
segm_list[item.id] = None

if self._tasks & {VocTask.classification, VocTask.detection,
if (has_classes or has_dets) and self._tasks & {
VocTask.classification, VocTask.detection,
VocTask.action_classification, VocTask.person_layout}:
self.save_clsdet_lists(subset_name, clsdet_list)
if self._tasks & {VocTask.classification}:
if has_classes and self._tasks & {VocTask.classification}:
self.save_class_lists(subset_name, class_lists)
if self._tasks & {VocTask.action_classification}:
if has_actions and self._tasks & {VocTask.action_classification}:
self.save_action_lists(subset_name, action_list)
if self._tasks & {VocTask.person_layout}:
if has_layouts and self._tasks & {VocTask.person_layout}:
self.save_layout_lists(subset_name, layout_list)
if self._tasks & {VocTask.segmentation}:
if has_masks and self._tasks & {VocTask.segmentation}:
self.save_segm_lists(subset_name, segm_list)

def save_action_lists(self, subset_name, action_list):
if not action_list:
return
@staticmethod
def _get_filtered_lines(path, patch, subset, items=None):
lines = {}
with open(path) as f:
for line in f:
item, text, _ = line.split(maxsplit=1) + ['', '']
if not patch or patch.updated_items.get((item, subset)) != \
ItemStatus.removed:
lines.setdefault(item, []).append(text)
if items is not None:
items.update((k, True) for k in lines)
return lines

def save_action_lists(self, subset_name, action_list):
os.makedirs(self._action_subsets_dir, exist_ok=True)

ann_file = osp.join(self._action_subsets_dir, subset_name + '.txt')
items = {k: True for k in action_list}
if self._patch and osp.isfile(ann_file):
self._get_filtered_lines(ann_file, self._patch, subset_name, items)
with open(ann_file, 'w') as f:
for item in action_list:
for item in items:
f.write('%s\n' % item)

if len(action_list) == 0:
if not items and not self._patch:
return

all_actions = set(chain(*(self._get_actions(l)
for l in self._label_map)))
for action in all_actions:
ann_file = osp.join(self._action_subsets_dir,
'%s_%s.txt' % (action, subset_name))
def _write_item(f, item, objs, action):
if not objs:
return
for obj_id, obj_actions in objs.items():
presented = obj_actions[action]
f.write('%s %s % d\n' % \
(item, 1 + obj_id, 1 if presented else -1))

all_actions = {
act: osp.join(self._action_subsets_dir,
'%s_%s.txt' % (act, subset_name))
for act in chain(*(self._get_actions(l) for l in self._label_map))
}
for action, ann_file in all_actions.items():
lines = {}
if self._patch and osp.isfile(ann_file):
lines = self._get_filtered_lines(ann_file, None, subset_name)

with open(ann_file, 'w') as f:
for item, objs in action_list.items():
if not objs:
continue
for obj_id, obj_actions in objs.items():
presented = obj_actions[action]
f.write('%s %s % d\n' % \
(item, 1 + obj_id, 1 if presented else -1))
for item in items:
if item in action_list:
_write_item(f, item, action_list[item], action)
elif item in lines:
f.writelines(lines[item])

def save_class_lists(self, subset_name, class_lists):
if not class_lists:
return
def _write_item(f, item, item_labels):
if not item_labels:
return
item_labels = [self.get_label(l) for l in item_labels]
presented = label in item_labels
f.write('%s % d\n' % (item, 1 if presented else -1))

os.makedirs(self._cls_subsets_dir, exist_ok=True)

for label in self._label_map:
ann_file = osp.join(self._cls_subsets_dir,
'%s_%s.txt' % (label, subset_name))
items = {k: True for k in class_lists}
lines = {}
if self._patch and osp.isfile(ann_file):
lines = self._get_filtered_lines(ann_file, self._patch,
subset_name, items)

with open(ann_file, 'w') as f:
for item, item_labels in class_lists.items():
if not item_labels:
continue
item_labels = [self.get_label(l) for l in item_labels]
presented = label in item_labels
f.write('%s % d\n' % (item, 1 if presented else -1))
for item in items:
if item in class_lists:
_write_item(f, item, class_lists[item])
elif item in lines:
f.writelines(lines[item])

def save_clsdet_lists(self, subset_name, clsdet_list):
if not clsdet_list:
return

os.makedirs(self._cls_subsets_dir, exist_ok=True)

ann_file = osp.join(self._cls_subsets_dir, subset_name + '.txt')
items = {k: True for k in clsdet_list}
if self._patch and osp.isfile(ann_file):
self._get_filtered_lines(ann_file, self._patch, subset_name, items)

with open(ann_file, 'w') as f:
for item in clsdet_list:
for item in items:
f.write('%s\n' % item)

def save_segm_lists(self, subset_name, segm_list):
if not segm_list:
return

os.makedirs(self._segm_subsets_dir, exist_ok=True)

ann_file = osp.join(self._segm_subsets_dir, subset_name + '.txt')
items = {k: True for k in segm_list}
if self._patch and osp.isfile(ann_file):
self._get_filtered_lines(ann_file, self._patch, subset_name, items)

with open(ann_file, 'w') as f:
for item in segm_list:
for item in items:
f.write('%s\n' % item)

def save_layout_lists(self, subset_name, layout_list):
if not layout_list:
return
def _write_item(f, item, item_layouts):
if item_layouts:
for obj_id in item_layouts:
f.write('%s % d\n' % (item, 1 + obj_id))
else:
f.write('%s\n' % item)

os.makedirs(self._layout_subsets_dir, exist_ok=True)

ann_file = osp.join(self._layout_subsets_dir, subset_name + '.txt')
items = {k: True for k in layout_list}
lines = {}
if self._patch and osp.isfile(ann_file):
self._get_filtered_lines(ann_file, self._patch, subset_name, items)

with open(ann_file, 'w') as f:
for item, item_layouts in layout_list.items():
if item_layouts:
for obj_id in item_layouts:
f.write('%s % d\n' % (item, 1 + obj_id))
else:
f.write('%s\n' % (item))
for item in items:
if item in layout_list:
_write_item(f, item, layout_list[item])
elif item in lines:
f.writelines(lines[item])

def save_segm(self, path, mask, colormap=None):
if self._apply_colormap:
Expand Down Expand Up @@ -554,6 +610,39 @@ def map_id(src_id):
def _remap_mask(self, mask):
return remap_mask(mask, self._label_id_mapping)

@classmethod
def patch(cls, dataset, patch, save_dir, **kwargs):
conv = cls(patch.as_dataset(dataset), save_dir=save_dir, **kwargs)
conv._patch = patch
conv.apply()

conv = cls(dataset, save_dir=save_dir, **kwargs)
images_dir = osp.join(save_dir, VocPath.IMAGES_DIR)
for (item_id, subset), status in patch.updated_items.items():
if status != ItemStatus.removed:
item = patch.data.get(item_id, subset)
else:
item = DatasetItem(item_id, subset=subset)

if not (status == ItemStatus.removed or not item.has_image):
continue

image_path = osp.join(images_dir, conv._make_image_filename(item))
if osp.isfile(image_path):
os.unlink(image_path)

if not [a for a in item.annotations
if a.type is AnnotationType.mask]:
path = osp.join(save_dir, VocPath.SEGMENTATION_DIR,
item.id + VocPath.SEGM_EXT)
if osp.isfile(path):
os.unlink(path)

path = osp.join(save_dir, VocPath.INSTANCES_DIR,
item.id + VocPath.SEGM_EXT)
if osp.isfile(path):
os.unlink(path)

class VocClassificationConverter(VocConverter):
def __init__(self, *args, **kwargs):
kwargs['tasks'] = VocTask.classification
Expand Down
26 changes: 25 additions & 1 deletion datumaro/plugins/yolo_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from collections import OrderedDict

from datumaro.components.converter import Converter
from datumaro.components.extractor import AnnotationType, DEFAULT_SUBSET_NAME
from datumaro.components.dataset import ItemStatus
from datumaro.components.extractor import (AnnotationType, DEFAULT_SUBSET_NAME,
DatasetItem)

from .format import YoloPath

Expand Down Expand Up @@ -103,3 +105,25 @@ def apply(self):

f.write('names = %s\n' % osp.join('data', 'obj.names'))
f.write('backup = backup/\n')

@classmethod
def patch(cls, dataset, patch, save_dir, **kwargs):
for subset in patch.updated_subsets:
cls.convert(dataset.get_subset(subset), save_dir=save_dir, **kwargs)

conv = cls(dataset, save_dir=save_dir, **kwargs)
for (item_id, subset), status in patch.updated_items.items():
if status != ItemStatus.removed:
item = patch.data.get(item_id, subset)
else:
item = DatasetItem(item_id, subset=subset)

if not (status == ItemStatus.removed or not item.has_image):
continue

if subset == DEFAULT_SUBSET_NAME:
subset = YoloPath.DEFAULT_SUBSET_NAME
image_path = osp.join(save_dir, 'obj_%s_data' % subset,
conv._make_image_filename(item))
if osp.isfile(image_path):
os.unlink(image_path)
Loading