open-edge-platform · zhiltsov-max · Mar 28, 2022 · Mar 22, 2022 · Mar 22, 2022 · Mar 22, 2022
@@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   media type (<https://github.com/openvinotoolkit/datumaro/pull/628>)
 - Common Semantic Segmentation dataset format (import-only)
   (<https://github.com/openvinotoolkit/datumaro/pull/685>)
+- An option to disable `data/` prefix inclusion in YOLO export
+  (<https://github.com/openvinotoolkit/datumaro/pull/689>)
 - New command `describe-downloads` to print information about downloadable datasets
   (<https://github.com/openvinotoolkit/datumaro/pull/678>)
 

@@ -11,8 +11,9 @@
 from datumaro.components.converter import Converter
 from datumaro.components.dataset import ItemStatus
 from datumaro.components.errors import MediaTypeError
-from datumaro.components.extractor import DEFAULT_SUBSET_NAME, DatasetItem
+from datumaro.components.extractor import DEFAULT_SUBSET_NAME, DatasetItem, IExtractor
 from datumaro.components.media import Image
+from datumaro.util import str_to_bool
 
 from .format import YoloPath
 
@@ -32,6 +33,24 @@ class YoloConverter(Converter):
     # https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects
     DEFAULT_IMAGE_EXT = ".jpg"
 
+    @classmethod
+    def build_cmdline_parser(cls, **kwargs):
+        parser = super().build_cmdline_parser(**kwargs)
+        parser.add_argument(
+            "--add-path-prefix",
+            default=True,
+            type=str_to_bool,
+            help="Add the 'data/' prefix for paths in the dataset info (default: %(default)s)",
+        )
+        return parser
+
+    def __init__(
+        self, extractor: IExtractor, save_dir: str, *, add_path_prefix: bool = True, **kwargs
+    ) -> None:
+        super().__init__(extractor, save_dir, **kwargs)
+
+        self._prefix = "data" if add_path_prefix else ""
+
     def apply(self):
         extractor = self._extractor
         save_dir = self._save_dir
@@ -81,7 +100,9 @@ def apply(self):
                             self._save_image(item, osp.join(subset_dir, image_name))
                         else:
                             log.warning("Item '%s' has no image" % item.id)
-                    image_paths[item.id] = osp.join("data", osp.basename(subset_dir), image_name)
+                    image_paths[item.id] = osp.join(
+                        self._prefix, osp.basename(subset_dir), image_name
+                    )
 
                     yolo_annotation = self._export_item_annotation(item)
                     annotation_path = osp.join(subset_dir, "%s.txt" % item.id)
@@ -91,7 +112,7 @@ def apply(self):
                 except Exception as e:
                     self._report_item_error(e, item_id=(item.id, item.subset))
 
-            subset_list_name = "%s.txt" % subset_name
+            subset_list_name = f"{subset_name}.txt"
             subset_list_path = osp.join(save_dir, subset_list_name)
             if self._patch and subset_name in self._patch.updated_subsets and not image_paths:
                 if osp.isfile(subset_list_path):
@@ -100,15 +121,18 @@ def apply(self):
 
             subset_lists[subset_name] = subset_list_name
             with open(subset_list_path, "w", encoding="utf-8") as f:
-                f.writelines("%s\n" % s for s in image_paths.values())
+                f.writelines("%s\n" % s.replace("\\", "/") for s in image_paths.values())
 
         with open(osp.join(save_dir, "obj.data"), "w", encoding="utf-8") as f:
-            f.write("classes = %s\n" % len(label_ids))
+            f.write(f"classes = {len(label_ids)}\n")
 
             for subset_name, subset_list_name in subset_lists.items():
-                f.write("%s = %s\n" % (subset_name, osp.join("data", subset_list_name)))
+                f.write(
+                    "%s = %s\n"
+                    % (subset_name, osp.join(self._prefix, subset_list_name).replace("\\", "/"))
+                )
 
-            f.write("names = %s\n" % osp.join("data", "obj.names"))
+            f.write("names = %s\n" % osp.join(self._prefix, "obj.names"))
             f.write("backup = backup/\n")
 
     def _export_item_annotation(self, item):

@@ -147,6 +147,8 @@ Extra options for exporting to YOLO format:
   (default: `False`)
 - `--image-ext <IMAGE_EXT>` allow to specify image extension
   for exporting dataset (default: use original or `.jpg`, if none)
+- `--add-path-prefix` allows to specify, whether to include the
+  `data/` path prefix in the annotation files or not (default: `True`)
 
 ## Examples
 

@@ -36,6 +36,7 @@ class Requirements:
     DATUM_497 = "Support import for SYNTHIA dataset"
     DATUM_542 = "Images missing after merging two datasets"
     DATUM_580 = "Import for MPII Human Pose Dataset"
+    DATUM_609 = "Allow not to prepend data/ prefix in YOLO export"
     DATUM_616 = "Import for BraTS dataset"
     DATUM_673 = "Pickling for Dataset and Annotations"
 

@@ -275,6 +275,36 @@ def test_can_save_and_load_with_meta_file(self):
             self.assertTrue(osp.isfile(osp.join(test_dir, "dataset_meta.json")))
             compare_datasets(self, source_dataset, parsed_dataset)
 
+    @mark_requirement(Requirements.DATUM_609)
+    def test_can_save_and_load_without_path_prefix(self):
+        source_dataset = Dataset.from_iterable(
+            [
+                DatasetItem(
+                    id=3,
+                    subset="valid",
+                    media=Image(data=np.ones((8, 8, 3))),
+                    annotations=[
+                        Bbox(0, 1, 5, 2, label=2),
+                    ],
+                ),
+            ],
+            categories=["a", "b"],
+        )
+
+        with TestDir() as test_dir:
+            YoloConverter.convert(source_dataset, test_dir, save_media=True, add_path_prefix=False)
+            parsed_dataset = Dataset.import_from(test_dir, "yolo")
+
+            with open(osp.join(test_dir, "obj.data"), "r") as f:
+                lines = f.readlines()
+                self.assertIn("valid = valid.txt\n", lines)
+
+            with open(osp.join(test_dir, "valid.txt"), "r") as f:
+                lines = f.readlines()
+                self.assertIn("obj_valid_data/3.jpg\n", lines)
+
+            compare_datasets(self, source_dataset, parsed_dataset)
+
 
 DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), "assets", "yolo_dataset")