Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
media type (<https://github.com/openvinotoolkit/datumaro/pull/628>)
- Common Semantic Segmentation dataset format (import-only)
(<https://github.com/openvinotoolkit/datumaro/pull/685>)
- An option to disable `data/` prefix inclusion in YOLO export
(<https://github.com/openvinotoolkit/datumaro/pull/689>)
- New command `describe-downloads` to print information about downloadable datasets
(<https://github.com/openvinotoolkit/datumaro/pull/678>)

Expand Down
38 changes: 31 additions & 7 deletions datumaro/plugins/yolo_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
from datumaro.components.converter import Converter
from datumaro.components.dataset import ItemStatus
from datumaro.components.errors import MediaTypeError
from datumaro.components.extractor import DEFAULT_SUBSET_NAME, DatasetItem
from datumaro.components.extractor import DEFAULT_SUBSET_NAME, DatasetItem, IExtractor
from datumaro.components.media import Image
from datumaro.util import str_to_bool

from .format import YoloPath

Expand All @@ -32,6 +33,24 @@ class YoloConverter(Converter):
# https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects
DEFAULT_IMAGE_EXT = ".jpg"

@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument(
"--add-path-prefix",
default=True,
type=str_to_bool,
help="Add the 'data/' prefix for paths in the dataset info (default: %(default)s)",
)
return parser

def __init__(
self, extractor: IExtractor, save_dir: str, *, add_path_prefix: bool = True, **kwargs
) -> None:
super().__init__(extractor, save_dir, **kwargs)

self._prefix = "data" if add_path_prefix else ""

def apply(self):
extractor = self._extractor
save_dir = self._save_dir
Expand Down Expand Up @@ -81,7 +100,9 @@ def apply(self):
self._save_image(item, osp.join(subset_dir, image_name))
else:
log.warning("Item '%s' has no image" % item.id)
image_paths[item.id] = osp.join("data", osp.basename(subset_dir), image_name)
image_paths[item.id] = osp.join(
self._prefix, osp.basename(subset_dir), image_name
)

yolo_annotation = self._export_item_annotation(item)
annotation_path = osp.join(subset_dir, "%s.txt" % item.id)
Expand All @@ -91,7 +112,7 @@ def apply(self):
except Exception as e:
self._report_item_error(e, item_id=(item.id, item.subset))

subset_list_name = "%s.txt" % subset_name
subset_list_name = f"{subset_name}.txt"
subset_list_path = osp.join(save_dir, subset_list_name)
if self._patch and subset_name in self._patch.updated_subsets and not image_paths:
if osp.isfile(subset_list_path):
Expand All @@ -100,15 +121,18 @@ def apply(self):

subset_lists[subset_name] = subset_list_name
with open(subset_list_path, "w", encoding="utf-8") as f:
f.writelines("%s\n" % s for s in image_paths.values())
f.writelines("%s\n" % s.replace("\\", "/") for s in image_paths.values())

with open(osp.join(save_dir, "obj.data"), "w", encoding="utf-8") as f:
f.write("classes = %s\n" % len(label_ids))
f.write(f"classes = {len(label_ids)}\n")

for subset_name, subset_list_name in subset_lists.items():
f.write("%s = %s\n" % (subset_name, osp.join("data", subset_list_name)))
f.write(
"%s = %s\n"
% (subset_name, osp.join(self._prefix, subset_list_name).replace("\\", "/"))
)

f.write("names = %s\n" % osp.join("data", "obj.names"))
f.write("names = %s\n" % osp.join(self._prefix, "obj.names"))
f.write("backup = backup/\n")

def _export_item_annotation(self, item):
Expand Down
2 changes: 2 additions & 0 deletions site/content/en/docs/formats/yolo.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ Extra options for exporting to YOLO format:
(default: `False`)
- `--image-ext <IMAGE_EXT>` allow to specify image extension
for exporting dataset (default: use original or `.jpg`, if none)
- `--add-path-prefix` allows to specify, whether to include the
`data/` path prefix in the annotation files or not (default: `True`)

## Examples

Expand Down
1 change: 1 addition & 0 deletions tests/requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class Requirements:
DATUM_497 = "Support import for SYNTHIA dataset"
DATUM_542 = "Images missing after merging two datasets"
DATUM_580 = "Import for MPII Human Pose Dataset"
DATUM_609 = "Allow not to prepend data/ prefix in YOLO export"
DATUM_616 = "Import for BraTS dataset"
DATUM_673 = "Pickling for Dataset and Annotations"

Expand Down
30 changes: 30 additions & 0 deletions tests/test_yolo_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,36 @@ def test_can_save_and_load_with_meta_file(self):
self.assertTrue(osp.isfile(osp.join(test_dir, "dataset_meta.json")))
compare_datasets(self, source_dataset, parsed_dataset)

@mark_requirement(Requirements.DATUM_609)
def test_can_save_and_load_without_path_prefix(self):
source_dataset = Dataset.from_iterable(
[
DatasetItem(
id=3,
subset="valid",
media=Image(data=np.ones((8, 8, 3))),
annotations=[
Bbox(0, 1, 5, 2, label=2),
],
),
],
categories=["a", "b"],
)

with TestDir() as test_dir:
YoloConverter.convert(source_dataset, test_dir, save_media=True, add_path_prefix=False)
parsed_dataset = Dataset.import_from(test_dir, "yolo")

with open(osp.join(test_dir, "obj.data"), "r") as f:
lines = f.readlines()
self.assertIn("valid = valid.txt\n", lines)

with open(osp.join(test_dir, "valid.txt"), "r") as f:
lines = f.readlines()
self.assertIn("obj_valid_data/3.jpg\n", lines)

compare_datasets(self, source_dataset, parsed_dataset)


DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), "assets", "yolo_dataset")

Expand Down