Skip to content
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `WiderFace` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/65>)
- Function to transform annotations to labels (<https://github.com/openvinotoolkit/datumaro/pull/66>)
- Task-specific Splitter (<https://github.com/openvinotoolkit/datumaro/pull/68>, <https://github.com/openvinotoolkit/datumaro/pull/81>)
- `VGGFace2` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/69>)
- Unique image count statistic (<https://github.com/openvinotoolkit/datumaro/pull/87)
- `VGGFace2` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/69>, <https://github.com/openvinotoolkit/datumaro/pull/82>)
- Unique image count statistic (<https://github.com/openvinotoolkit/datumaro/pull/87>)

### Changed
- `Dataset` class extended with new operations: `save`, `load`, `export`, `import_from`, `detect`, `run_model` (<https://github.com/openvinotoolkit/datumaro/pull/71>)
Expand Down
145 changes: 110 additions & 35 deletions datumaro/plugins/vgg_face2_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@

from datumaro.components.converter import Converter
from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem,
Importer, Points, LabelCategories, SourceExtractor)
Importer, Label, LabelCategories, Points, SourceExtractor)


class VggFace2Path:
ANNOTATION_DIR = "bb_landmark"
IMAGE_EXT = '.jpg'
BBOXES_FILE = 'loose_bb_'
LANDMARKS_FILE = 'loose_landmark_'
LABELS_FILE = 'labels.txt'

class VggFace2Extractor(SourceExtractor):
def __init__(self, path):
Expand All @@ -29,11 +30,29 @@ def __init__(self, path):
subset = subset.split('_')[2]
super().__init__(subset=subset)

self._load_categories()
self._categories = self._load_categories()
self._items = list(self._load_items(path).values())

def _load_categories(self):
self._categories[AnnotationType.label] = LabelCategories()
label_cat = LabelCategories()
path = osp.join(self._dataset_dir, VggFace2Path.LABELS_FILE)
if osp.isfile(path):
with open(path, encoding='utf-8') as labels_file:
lines = [s.strip() for s in labels_file]
for line in lines:
objects = line.split()
label = objects[0]
class_name = None
if 1 < len(objects):
class_name = objects[1]
label_cat.add(label, parent=class_name)
else:
subset_path = osp.join(self._dataset_dir, self._subset)
if osp.isdir(subset_path):
for images_dir in sorted(os.listdir(subset_path)):
if osp.isdir(osp.join(subset_path, images_dir)):
label_cat.add(images_dir)
return { AnnotationType.label: label_cat }

def _load_items(self, path):
items = {}
Expand All @@ -42,30 +61,47 @@ def _load_items(self, path):

for row in landmarks_table:
item_id = row['NAME_ID']
image_path = osp.join(self._dataset_dir, self._subset,
item_id + VggFace2Path.IMAGE_EXT)
annotations = []
label = None
if '/' in item_id:
label_name = item_id.split('/')[0]
label = self._categories[AnnotationType.label].find(label_name)[0]
if label is not None:
item_id = item_id[len(label_name) + 1:]
Copy link
Copy Markdown
Contributor

@zhiltsov-max zhiltsov-max Jan 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why there can be no label directory prefix?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If for example item.id = a/1 and there is no label

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a folder for unlabeled items (like in ImageNet)?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is a good idea, because otherwise we require labels.txt.

if item_id not in items:
image_path = osp.join(self._dataset_dir, self._subset,
row['NAME_ID'] + VggFace2Path.IMAGE_EXT)
items[item_id] = DatasetItem(id=item_id, subset=self._subset,
image=image_path)
annotations = items[item_id].annotations
if len([p for p in row if row[p] == '']) == 0 and len(row) == 11:
annotations.append(Points(
[float(row[p]) for p in row if p != 'NAME_ID']))
if item_id in items and 0 < len(annotations):
annotation = items[item_id].annotations
annotation.append(annotations[0])
else:
items[item_id] = DatasetItem(id=item_id, subset=self._subset,
image=image_path, annotations=annotations)
[float(row[p]) for p in row if p != 'NAME_ID'], label=label,
group=1))
elif label is not None:
annotations.append(Label(label=label, group=1))

bboxes_path = osp.join(self._dataset_dir, VggFace2Path.ANNOTATION_DIR,
VggFace2Path.BBOXES_FILE + self._subset + '.csv')
if osp.isfile(bboxes_path):
with open(bboxes_path) as content:
bboxes_table = list(csv.DictReader(content))
for row in bboxes_table:
item_id = row['NAME_ID']
label = None
if '/' in item_id:
label_name = item_id.split('/')[0]
label = self._categories[AnnotationType.label].find(label_name)[0]
if label is not None:
item_id = item_id[len(label_name) + 1:]
if item_id not in items:
image_path = osp.join(self._dataset_dir, self._subset,
row['NAME_ID'] + VggFace2Path.IMAGE_EXT)
items[item_id] = DatasetItem(id=item_id, subset=self._subset,
image=image_path)
annotations = items[item_id].annotations
if len([p for p in row if row[p] == '']) == 0 and len(row) == 5:
item_id = row['NAME_ID']
annotations = items[item_id].annotations
annotations.append(Bbox(int(row['X']), int(row['Y']),
int(row['W']), int(row['H'])))
annotations.append(Bbox(float(row['X']), float(row['Y']),
float(row['W']), float(row['H']), label=label, group=1))
return items

class VggFace2Importer(Importer):
Expand All @@ -81,37 +117,76 @@ class VggFace2Converter(Converter):

def apply(self):
save_dir = self._save_dir

os.makedirs(save_dir, exist_ok=True)

labels_path = osp.join(save_dir, VggFace2Path.LABELS_FILE)
labels_file = ''
for label in self._extractor.categories()[AnnotationType.label]:
labels_file += '%s' % label.name
if label.parent:
labels_file += ' %s' % label.parent
labels_file += '\n'
with open(labels_path, 'w', encoding='utf-8') as f:
f.write(labels_file)

label_categories = self._extractor.categories()[AnnotationType.label]

for subset_name, subset in self._extractor.subsets().items():
subset_dir = osp.join(save_dir, subset_name)
bboxes_table = []
landmarks_table = []
for item in subset:
if item.has_image and self._save_images:
self._save_image(item, osp.join(save_dir, subset_dir,
item.id + VggFace2Path.IMAGE_EXT))
labels = set(p.label for p in item.annotations
if getattr(p, 'label') != None)
if labels:
for label in labels:
self._save_image(item, osp.join(subset_dir,
label_categories[label].name + '/' \
+ item.id + VggFace2Path.IMAGE_EXT))
else:
self._save_image(item, osp.join(subset_dir,
item.id + VggFace2Path.IMAGE_EXT))

landmarks = [a for a in item.annotations
if a.type == AnnotationType.points]
if landmarks:
for landmark in landmarks:
points = landmark.points
landmarks_table.append({'NAME_ID': item.id,
'P1X': points[0], 'P1Y': points[1],
'P2X': points[2], 'P2Y': points[3],
'P3X': points[4], 'P3Y': points[5],
'P4X': points[6], 'P4Y': points[7],
'P5X': points[8], 'P5Y': points[9]})
else:
landmarks_table.append({'NAME_ID': item.id})
for landmark in landmarks:
name_id = item.id
if landmark.label is not None \
and label_categories[landmark.label].name:
name_id = label_categories[landmark.label].name \
+ '/' + item.id
points = landmark.points
landmarks_table.append({'NAME_ID': name_id,
'P1X': points[0], 'P1Y': points[1],
'P2X': points[2], 'P2Y': points[3],
'P3X': points[4], 'P3Y': points[5],
'P4X': points[6], 'P4Y': points[7],
'P5X': points[8], 'P5Y': points[9]})

bboxes = [a for a in item.annotations
if a.type == AnnotationType.bbox]
if bboxes:
for bbox in bboxes:
bboxes_table.append({'NAME_ID': item.id, 'X': int(bbox.x),
'Y': int(bbox.y), 'W': int(bbox.w), 'H': int(bbox.h)})
for bbox in bboxes:
name_id = item.id
if bbox.label is not None and \
label_categories[bbox.label].name:
name_id = label_categories[bbox.label].name \
+ '/' + item.id
bboxes_table.append({'NAME_ID': name_id, 'X': bbox.x,
'Y': bbox.y, 'W': bbox.w, 'H': bbox.h})

labels = [a for a in item.annotations
if a.type == AnnotationType.label]
for label in labels:
name_id = item.id
if label.label is not None and \
label_categories[label.label].name:
name_id = label_categories[label.label].name \
+ '/' + item.id
landmarks_table.append({'NAME_ID': name_id})

if not landmarks and not bboxes and not labels:
landmarks_table.append({'NAME_ID': item.id})

landmarks_path = osp.join(save_dir, VggFace2Path.ANNOTATION_DIR,
VggFace2Path.LANDMARKS_FILE + subset_name + '.csv')
Expand Down
2 changes: 2 additions & 0 deletions tests/assets/vgg_face2_dataset/labels.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
n000001 car
n000002 person
78 changes: 51 additions & 27 deletions tests/test_vgg_face2_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
from unittest import TestCase

import numpy as np
from datumaro.components.extractor import Bbox, DatasetItem, Points
from datumaro.components.dataset import Dataset
from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem,
Label, LabelCategories, Points)
from datumaro.plugins.vgg_face2_format import (VggFace2Converter,
VggFace2Importer)
from datumaro.util.test_utils import TestDir, compare_datasets
Expand All @@ -14,58 +15,78 @@ def test_can_save_and_load(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id='1', subset='train', image=np.ones((8, 8, 3)),
annotations=[
Bbox(0, 2, 4, 2),
Bbox(0, 2, 4, 2, label=0, group=1),
Points([3.2, 3.12, 4.11, 3.2, 2.11,
2.5, 3.5, 2.11, 3.8, 2.13]),
2.5, 3.5, 2.11, 3.8, 2.13], label=0, group=1),
]
),
DatasetItem(id='2', subset='train', image=np.ones((10, 10, 3)),
annotations=[
Points([4.23, 4.32, 5.34, 4.45, 3.54,
3.56, 4.52, 3.51, 4.78, 3.34]),
3.56, 4.52, 3.51, 4.78, 3.34], label=1, group=1),
]
),
DatasetItem(id='3', subset='val', image=np.ones((8, 8, 3))),
DatasetItem(id='4', subset='val', image=np.ones((10, 10, 3)),
DatasetItem(id='3', subset='train', image=np.ones((8, 8, 3)),
annotations=[Label(2, group=1)]
),
DatasetItem(id='4', subset='train', image=np.ones((10, 10, 3)),
annotations=[
Bbox(0, 2, 4, 2),
Bbox(0, 2, 4, 2, label=3, group=1),
Points([3.2, 3.12, 4.11, 3.2, 2.11,
2.5, 3.5, 2.11, 3.8, 2.13]),
Bbox(2, 2, 1, 2),
Points([2.787, 2.898, 2.965, 2.79, 2.8,
2.456, 2.81, 2.32, 2.89, 2.3]),
2.5, 3.5, 2.11, 3.8, 2.13], label=3, group=1),
]
),
DatasetItem(id='5', subset='val', image=np.ones((8, 8, 3)),
DatasetItem(id='a/5', subset='train', image=np.ones((8, 8, 3)),
annotations=[
Bbox(2, 2, 2, 2),
Bbox(2, 2, 2, 2, group=1),
]
),
], categories=[])
DatasetItem(id='label_0', subset='train', image=np.ones((8, 8, 3)),
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
[('label_%s' % i, 'class_%s' % i) for i in range(5)]),
})

with TestDir() as test_dir:
VggFace2Converter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = VggFace2Importer()(test_dir).make_dataset()
parsed_dataset = Dataset.import_from(test_dir, 'vgg_face2')

compare_datasets(self, source_dataset, parsed_dataset)

def test_can_save_dataset_with_no_subsets(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id='a/b/1', image=np.ones((8, 8, 3)),
DatasetItem(id='b/1', image=np.ones((8, 8, 3)),
annotations=[
Bbox(0, 2, 4, 2),
Bbox(0, 2, 4, 2, label=0, group=1),
Points([4.23, 4.32, 5.34, 4.45, 3.54,
3.56, 4.52, 3.51, 4.78, 3.34]),
3.56, 4.52, 3.51, 4.78, 3.34], label=0, group=1),
]
),
], categories=[])
], categories=['a'])

with TestDir() as test_dir:
VggFace2Converter.convert(source_dataset, test_dir, save_images=True)
parsed_dataset = VggFace2Importer()(test_dir).make_dataset()
parsed_dataset = Dataset.import_from(test_dir, 'vgg_face2')

compare_datasets(self, source_dataset, parsed_dataset)

def test_can_save_dataset_with_no_save_images(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id='1', image=np.ones((8, 8, 3)),
annotations=[
Bbox(0, 2, 4, 2, label=0, group=1),
Points([4.23, 4.32, 5.34, 4.45, 3.54,
3.56, 4.52, 3.51, 4.78, 3.34], label=0, group=1),
]
),
], categories=['label_0'])

with TestDir() as test_dir:
VggFace2Converter.convert(source_dataset, test_dir, save_images=False)
parsed_dataset = Dataset.import_from(test_dir, 'vgg_face2')

compare_datasets(self, source_dataset, parsed_dataset)

DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'vgg_face2_dataset')

Expand All @@ -75,23 +96,26 @@ def test_can_detect(self):

def test_can_import(self):
expected_dataset = Dataset.from_iterable([
DatasetItem(id='n000001/0001_01', subset='train',
DatasetItem(id='0001_01', subset='train',
image=np.ones((10, 15, 3)),
annotations=[
Bbox(2, 2, 1, 2),
Bbox(2, 2, 1, 2, label=0, group=1),
Points([2.787, 2.898, 2.965, 2.79, 2.8,
2.456, 2.81, 2.32, 2.89, 2.3]),
2.456, 2.81, 2.32, 2.89, 2.3], label=0, group=1),
]
),
DatasetItem(id='n000002/0002_01', subset='train',
DatasetItem(id='0002_01', subset='train',
image=np.ones((10, 15, 3)),
annotations=[
Bbox(1, 3, 1, 1),
Bbox(1, 3, 1, 1, label=1, group=1),
Points([1.2, 3.8, 1.8, 3.82, 1.51,
3.634, 1.43, 3.34, 1.65, 3.32])
3.634, 1.43, 3.34, 1.65, 3.32], label=1, group=1)
]
),
], categories=[])
], categories={
AnnotationType.label: LabelCategories.from_iterable(
[('n000001', 'car'), ('n000002', 'person')]),
})

dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'vgg_face2')

Expand Down