open-edge-platform · sooahleex · Aug 26, 2024 · Aug 26, 2024
@@ -9,8 +9,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### New features
 - Add a new CLI command: datum format
   (<https://github.com/openvinotoolkit/datumaro/pull/1570>)
-- Support language dataset for DmTorchDataset
-  (<https://github.com/openvinotoolkit/datumaro/pull/1588>)
 
 ### Enhancements
 - Change _Shape to Shape and add comments for subclasses of Shape

@@ -64,6 +64,3 @@ json-stream
 
 # TabularValidator
 nltk
-
-# torch converter for language
-portalocker
@@ -85,7 +85,7 @@ def parse_requirements(filename=CORE_REQUIREMENTS_FILE):
     extras_require={
         "tf": ["tensorflow"],
         "tfds": ["tensorflow-datasets<4.9.3"],
-        "torch": ["torch", "torchvision", "torchtext==0.16.0"],
+        "torch": ["torch", "torchvision"],
         "default": DEFAULT_REQUIREMENTS,
     },
     ext_modules=ext_modules,

@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -17,7 +17,6 @@
     "detection": AnnotationType.bbox,
     "instance_segmentation": AnnotationType.polygon,
     "semantic_segmentation": AnnotationType.mask,
-    "tabular": [AnnotationType.label, AnnotationType.caption],
 }
 
 
@@ -89,10 +88,7 @@ def _gen_item(self, idx: int):
                 if ann.type == TASK_ANN_TYPE[self.task]
             ]
             label = mask_tools.merge_masks((mask, label_id) for mask, label_id in masks)
-        elif self.task == "tabular":
-            label = [
-                ann.as_dict() for ann in item.annotations if ann.type in TASK_ANN_TYPE[self.task]
-            ]
+
         return image, label
 
 
@@ -107,58 +103,15 @@ def __init__(
             task: str,
             transform: Optional[Callable] = None,
             target_transform: Optional[Callable] = None,
-            target: Optional[str] = None,
-            tokenizer: Optional[tuple[Callable, Callable]] = None,
-            vocab: Optional[tuple[Callable, Callable]] = None,
         ):
             super().__init__(dataset=dataset, subset=subset, task=task)
 
             self.transform = transform
             self.target_transform = target_transform
 
-            if self.task == "tabular":
-                if not isinstance(target, dict):
-                    raise ValueError(
-                        "Target should be a dictionary with 'input' and 'output' keys."
-                    )
-                self.input_target = target.get("input")
-                self.output_target = target.get("output")
-                if not self.input_target:
-                    raise ValueError(
-                        "Please provide target column for tabular task which is used for input"
-                    )
-
-                if not (tokenizer and vocab):
-                    raise ValueError("Both tokenizer and vocab must be provided for tabular task")
-                self.tokenizer = tokenizer
-                self.vocab = vocab
-
         def __getitem__(self, idx):
             image, label = self._gen_item(idx)
 
-            if self.task == "tabular":
-                text = image()[self.input_target]
-
-                if self.output_target:
-                    src_tokenizer, tgt_tokenizer = self.tokenizer
-                    src_vocab, tgt_vocab = self.vocab
-                    src_tokens = src_tokenizer(text)
-                    src_token_ids = src_vocab(src_tokens)
-
-                    label_text = label[0]["caption"].split(f"{self.output_target}:")[-1]
-                    tgt_tokens = tgt_tokenizer(label_text)
-                    tgt_token_ids = tgt_vocab(tgt_tokens)
-
-                    return torch.tensor(src_token_ids, dtype=torch.long), torch.tensor(
-                        tgt_token_ids, dtype=torch.long
-                    )
-                else:
-                    tokens = self.tokenizer(text)
-                    token_ids = self.vocab(tokens)
-                    return torch.tensor(token_ids, dtype=torch.long), torch.tensor(
-                        label[0]["label"], dtype=torch.long
-                    )
-
             if len(image.shape) == 2:
                 image = np.expand_dims(image, axis=-1)