open-edge-platform · goodsong81 · Jan 9, 2024 · Jan 8, 2024 · Jan 8, 2024 · Jan 8, 2024
@@ -7,17 +7,17 @@ on:
         type: choice
         description: Model type to run benchmark
         options:
-        - default  # speed, balance, accuracy models only
-        - all      # default + other models
+          - default # speed, balance, accuracy models only
+          - all # default + other models
         default: default
       data-size:
         type: choice
         description: Dataset size to run benchmark
         options:
-        - small
-        - medium
-        - large
-        - all
+          - small
+          - medium
+          - large
+          - all
         default: all
       num-repeat:
         description: Overrides default per-data-size number of repeat setting
@@ -29,9 +29,9 @@ on:
         type: choice
         description: The last operation to evaluate. 'optimize' means all.
         options:
-        - train
-        - export
-        - optimize
+          - train
+          - export
+          - optimize
         default: optimize
 
 jobs:

@@ -7,17 +7,17 @@ on:
         type: choice
         description: Model type to run benchmark
         options:
-        - default  # speed, balance, accuracy models only
-        - all      # default + other models
+          - default # speed, balance, accuracy models only
+          - all # default + other models
         default: default
       data-size:
         type: choice
         description: Dataset size to run benchmark
         options:
-        - small
-        - medium
-        - large
-        - all
+          - small
+          - medium
+          - large
+          - all
         default: large
       num-repeat:
         description: Overrides default per-data-size number of repeat setting
@@ -29,9 +29,9 @@ on:
         type: choice
         description: The last operation to evaluate. 'optimize' means all.
         options:
-        - train
-        - export
-        - optimize
+          - train
+          - export
+          - optimize
         default: optimize
 
 jobs:

@@ -286,15 +286,19 @@ def train(exit_stack: Optional[ExitStack] = None):  # pylint: disable=too-many-b
 
     resource_tracker = None
     if args.track_resource_usage and not is_multigpu_child_process():
-        resource_tracker = ResourceTracker(args.track_resource_usage, args.gpus)
+        resource_tracker = ResourceTracker(
+            config_manager.output_path / "resource_usage.yaml", args.track_resource_usage, args.gpus
+        )
         resource_tracker.start()
+        if exit_stack is not None:
+            exit_stack.callback(resource_tracker.stop)
 
     task.train(
         dataset, output_model, train_parameters=TrainParameters(), seed=args.seed, deterministic=args.deterministic
     )
 
-    if resource_tracker is not None:
-        resource_tracker.stop(config_manager.output_path / "resource_usage.yaml")
+    if resource_tracker is not None and exit_stack is None:
+        resource_tracker.stop()
 
     model_path = config_manager.output_path / "models"
     save_model_data(output_model, str(model_path))

@@ -30,12 +30,16 @@ class ResourceTracker:
     """Class to track resources usage.
 
     Args:
+        output_path (Union[str, Path]): Output file path to save CPU & GPU utilization and max meory usage values.
         resource_type (str, optional): Which resource to track. Available values are cpu, gpu or all now.
                                         Defaults to "all".
         gpu_ids (Optional[str]): GPU indices to record.
     """
 
-    def __init__(self, resource_type: str = "all", gpu_ids: Optional[str] = None):
+    def __init__(self, output_path: Union[str, Path], resource_type: str = "all", gpu_ids: Optional[str] = None):
+        if isinstance(output_path, str):
+            output_path = Path(output_path)
+        self.output_path = output_path
         if resource_type == "all":
             self._resource_type = AVAILABLE_RESOURCE_TYPE
         else:
@@ -62,19 +66,12 @@ def start(self):
         )
         self._mem_check_proc.start()
 
-    def stop(self, output_path: Union[str, Path]):
-        """Terminate a process to record resources usage.
-
-        Args:
-            output_path (Union[str, Path]): Output file path to save CPU & GPU utilization and max meory usage values.
-        """
+    def stop(self):
+        """Terminate a process to record resources usage."""
         if self._mem_check_proc is None or not self._mem_check_proc.is_alive():
             return
 
-        if isinstance(output_path, str):
-            output_path = Path(output_path)
-
-        self._queue.put(output_path)
+        self._queue.put(self.output_path)
         self._mem_check_proc.join(10)
         if self._mem_check_proc.exitcode is None:
             self._mem_check_proc.terminate()

@@ -22,8 +22,9 @@ def _set_up(self, mocker):
     @e2e_pytest_unit
     @pytest.mark.parametrize("resource_type", ("cpu", "gpu", "all", "cpu,gpu"))
     @pytest.mark.parametrize("gpu_ids", (None, "0", "0,3"))
-    def test_init(self, resource_type, gpu_ids):
-        ResourceTracker(resource_type, gpu_ids)
+    @pytest.mark.parametrize("output_path", ("fake", Path("fake")))
+    def test_init(self, output_path, resource_type, gpu_ids):
+        ResourceTracker(output_path, resource_type, gpu_ids)
 
     @e2e_pytest_unit
     @pytest.mark.parametrize("resource_type", ("cpu", "gpu", "all", "cpu,gpu"))
@@ -41,7 +42,7 @@ def test_start(self, resource_type, gpu_ids):
             expected_gpu_ids[0] = 0
 
         # run
-        resource_tracker = ResourceTracker(resource_type, gpu_ids)
+        resource_tracker = ResourceTracker("fake_output", resource_type, gpu_ids)
         resource_tracker.start()
 
         self.mock_proc.start.assert_called_once()  # check that a process to track resource usages starts
@@ -51,7 +52,7 @@ def test_start(self, resource_type, gpu_ids):
 
     @e2e_pytest_unit
     def test_start_multiple_times(self):
-        resource_tracker = ResourceTracker()
+        resource_tracker = ResourceTracker("fake_output")
 
         # run multiple times
         resource_tracker.start()
@@ -63,9 +64,9 @@ def test_start_multiple_times(self):
     def test_stop(self):
         output_path = Path("fake")
 
-        resource_tracker = ResourceTracker()
+        resource_tracker = ResourceTracker(output_path)
         resource_tracker.start()
-        resource_tracker.stop(output_path)
+        resource_tracker.stop()
 
         # check that code to terminate a process is executed properly
         self.mock_queue.put.assert_called_once_with(output_path)
@@ -77,9 +78,9 @@ def test_stop_not_exit_normally(self):
         output_path = Path("fake")
         self.mock_proc.exitcode = None
 
-        resource_tracker = ResourceTracker()
+        resource_tracker = ResourceTracker(output_path)
         resource_tracker.start()
-        resource_tracker.stop(output_path)
+        resource_tracker.stop()
 
         # check that code to terminate a process is executed properly
         self.mock_queue.put.assert_called_once_with(output_path)
@@ -90,8 +91,8 @@ def test_stop_not_exit_normally(self):
 
     @e2e_pytest_unit
     def test_stop_before_start(self):
-        resource_tracker = ResourceTracker()
-        resource_tracker.stop("fake")
+        resource_tracker = ResourceTracker("fake")
+        resource_tracker.stop()
 
         # check that code to make a process done isn't called
         self.mock_queue.put.assert_not_called()