Skip to content

Commit 8e8fb2a

Browse files
Several changes to wrap the call to the experiment runner:
- New class to handle location and status of experiments - New method of PayuManager to run experiments - Added implementation of method to generate experiments. Currently registers a list of existing experiments, instead of actually generating them.
1 parent 9eb6871 commit 8e8fb2a

7 files changed

Lines changed: 253 additions & 31 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ dependencies = [
2121
"pint-xarray",
2222
"matplotlib",
2323
"access-config-utils",
24+
"experiment-runner",
25+
"payu", # Extra dependency of experiment-runner, not handle correctly in conda
2426
]
2527

2628
[build-system]

src/access/profiling/access_models.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,15 @@
1616

1717

1818
class ESM16Profiling(PayuManager):
19-
"""Handles profiling of ACCESS-ESM1.6 configurations."""
19+
"""Handles profiling of ACCESS-ESM1.6 configurations.
20+
21+
Args:
22+
work_dir (Path): Path to directory used to generate and run profiling experiments.
23+
config_name (str): User supplied name. It is used to build some internal paths, but has no other effect.
24+
"""
25+
26+
def __init__(self, work_dir: Path, config_name: str | None = None) -> None:
27+
super().__init__(work_dir=work_dir, config_name=config_name)
2028

2129
def get_component_logs(self, path: Path) -> dict[str, ProfilingLog]:
2230
"""Returns available profiling logs for the components in ACCESS-ESM1.6.

src/access/profiling/manager.py

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
from abc import ABC, abstractmethod
5+
from enum import Enum
56
from pathlib import Path
67

78
import xarray as xr
@@ -49,15 +50,46 @@ def parse(self) -> xr.Dataset:
4950
)
5051

5152

53+
class ProfilingExperimentStatus(Enum):
54+
"""Enumeration representing the status of a profiling experiment."""
55+
56+
NEW = 1
57+
RUNNING = 2
58+
DONE = 3
59+
60+
61+
class ProfilingExperiment:
62+
"""Represents a profiling experiment.
63+
64+
Args:
65+
path (Path): Path to the experiment directory.
66+
"""
67+
68+
path: Path # Path to the experiment directory
69+
status: ProfilingExperimentStatus = ProfilingExperimentStatus.NEW # Status of the experiment
70+
71+
def __init__(self, path: Path) -> None:
72+
self.path = path
73+
74+
5275
class ProfilingManager(ABC):
5376
"""Abstract base class to handle profiling data and workflows.
5477
5578
This high-level class defines methods to parse different types of profiling data. Currently,
5679
it supports parsing and plotting scaling data.
80+
81+
Args:
82+
work_dir (Path): Path to directory used to generate and run profiling experiments.
5783
"""
5884

85+
work_dir: Path # Path to directory used to generate and run profiling experiments.
86+
experiments: dict[str, ProfilingExperiment] = {} # Dictionary storing ProfilingExperiment instances.
5987
data: dict[str, xr.Dataset] = {} # Dictionary mapping component names to their profiling datasets.
6088

89+
def __init__(self, work_dir: Path) -> None:
90+
super().__init__()
91+
self.work_dir = work_dir
92+
6193
@abstractmethod
6294
def parse_profiling_data(self, path: Path) -> dict[str, xr.Dataset]:
6395
"""Parses profiling data from the specified path.
@@ -80,27 +112,23 @@ def parse_ncpus(self, path: Path) -> int:
80112
int: Number of CPUs used in the experiment.
81113
"""
82114

83-
def parse_scaling_data(self, paths: list[Path]):
84-
"""Parses profiling data from a list of experiment directories.
85-
86-
Args:
87-
paths (list[Path]): List of paths to experiment directories.
88-
"""
115+
def parse_scaling_data(self):
116+
"""Parses profiling data from the experiments."""
89117
self.data = {}
90-
for path in paths:
91-
# Parse data
92-
datasets = self.parse_profiling_data(path)
93-
94-
# Find number of cpus used
95-
ncpus = self.parse_ncpus(path)
96-
97-
# Add ncpus dimension and concatenate with existing data
98-
for name, ds in datasets.items():
99-
ds = ds.expand_dims({"ncpus": 1}).assign_coords({"ncpus": [ncpus]})
100-
if name in self.data:
101-
self.data[name] = xr.concat([self.data[name], ds], dim="ncpus", join="outer")
102-
else:
103-
self.data[name] = ds
118+
for exp in self.experiments.values():
119+
if exp.status == ProfilingExperimentStatus.DONE:
120+
datasets = self.parse_profiling_data(exp.path)
121+
122+
# Find number of cpus used
123+
ncpus = self.parse_ncpus(exp.path)
124+
125+
# Add ncpus dimension and concatenate with existing data
126+
for name, ds in datasets.items():
127+
ds = ds.expand_dims({"ncpus": 1}).assign_coords({"ncpus": [ncpus]})
128+
if name in self.data:
129+
self.data[name] = xr.concat([self.data[name], ds], dim="ncpus", join="outer")
130+
else:
131+
self.data[name] = ds
104132

105133
def plot_scaling_data(
106134
self,

src/access/profiling/payu_manager.py

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,32 @@
77

88
import xarray as xr
99
from access.config import YAMLParser
10+
from experiment_runner.experiment_runner import ExperimentRunner
1011

11-
from access.profiling.manager import ProfilingLog, ProfilingManager
12+
from access.profiling.manager import ProfilingExperiment, ProfilingExperimentStatus, ProfilingLog, ProfilingManager
1213
from access.profiling.payujson_parser import PayuJSONProfilingParser
1314

1415
logger = logging.getLogger(__name__)
1516

1617

1718
class PayuManager(ProfilingManager, ABC):
18-
"""Abstract base class to handle profiling of Payu configurations."""
19+
"""Abstract base class to handle profiling of Payu configurations.
20+
21+
Args:
22+
work_dir (Path): Path to directory used to generate and run profiling experiments.
23+
config_name (str): User supplied name. It is used to build some internal paths, but has no other effect.
24+
"""
25+
26+
config_name: str # User supplied name. It is used to build some internal paths, but has no other effect.
27+
_nruns: int = 1 # Number of repetitions for the Payu experiments.
28+
_startfrom_restart: str = "cold" # Restart option for the Payu experiments.
29+
30+
def __init__(self, work_dir: Path, config_name: str | None = None) -> None:
31+
super().__init__(work_dir=work_dir)
32+
if config_name is None:
33+
self.config_name = "config"
34+
else:
35+
self.config_name = config_name
1936

2037
@abstractmethod
2138
def get_component_logs(self, path: Path) -> dict[str, ProfilingLog]:
@@ -27,6 +44,86 @@ def get_component_logs(self, path: Path) -> dict[str, ProfilingLog]:
2744
dict[str, ProfilingLog]: Dictionary mapping component names to their ProfilingLog instances.
2845
"""
2946

47+
@property
48+
def nruns(self) -> int:
49+
"""Returns the number of repetitions for the Payu experiments.
50+
51+
Returns:
52+
int: Number of repetitions.
53+
"""
54+
return self._nruns
55+
56+
@nruns.setter
57+
def nruns(self, value: int) -> None:
58+
"""Sets the number of repetitions for the Payu experiments.
59+
60+
Args:
61+
value (int): Number of repetitions.
62+
"""
63+
if value < 1:
64+
raise ValueError("Number of runs must be at least 1.")
65+
self._nruns = value
66+
67+
@property
68+
def startfrom_restart(self) -> str:
69+
"""Returns the restart option for the Payu experiments.
70+
71+
Returns:
72+
str: Restart option.
73+
"""
74+
return self._startfrom_restart
75+
76+
@startfrom_restart.setter
77+
def startfrom_restart(self, value: str) -> None:
78+
"""Sets the restart option for the Payu experiments.
79+
80+
Args:
81+
value (str): Restart option.
82+
"""
83+
self._startfrom_restart = value
84+
85+
def generate_experiments(self, branches: list[str]) -> None:
86+
"""Generates Payu experiments for profiling data generation.
87+
88+
Args:
89+
branches (list[str]): List of branches to generate experiments for.
90+
"""
91+
92+
for branch in branches:
93+
if branch in self.experiments:
94+
logger.info(f"Experiment for branch {branch} already exists. Skipping addition.")
95+
else:
96+
self.experiments[branch] = ProfilingExperiment(self.work_dir / branch / self.config_name)
97+
98+
def run_experiments(self) -> None:
99+
"""Runs Payu experiments for profiling data generation."""
100+
101+
runner_config = {
102+
"test_path": self.work_dir,
103+
"repository_directory": self.config_name,
104+
"running_branches": [],
105+
"keep_uuid": True,
106+
"nruns": [],
107+
"startfrom_restart": [],
108+
}
109+
110+
for path, exp in self.experiments.items():
111+
print(path, exp.status)
112+
if exp.status == ProfilingExperimentStatus.NEW:
113+
runner_config["running_branches"].append(path)
114+
runner_config["nruns"].append(self.nruns)
115+
runner_config["startfrom_restart"].append(self.startfrom_restart)
116+
exp.status = ProfilingExperimentStatus.RUNNING
117+
118+
# Run the experiment runner
119+
ExperimentRunner(runner_config).run()
120+
121+
# We are marking all running experiments as done here, but later this should be implemented properly
122+
# so that an actual check is performed, probably somewhere else.
123+
for exp in self.experiments.values():
124+
if exp.status == ProfilingExperimentStatus.RUNNING:
125+
exp.status = ProfilingExperimentStatus.DONE
126+
30127
def parse_ncpus(self, path: Path) -> int:
31128
"""Parses the number of CPUs used in a given Payu experiment.
32129

tests/test_access_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_esm16_config_profiling(mock_is_file, mock_yaml_parse, mock_path_read_te
1919
"""Test the ESM16ConfigProfiling class."""
2020

2121
# Instantiate ESM16ConfigProfiling
22-
config_profiling = ESM16Profiling()
22+
config_profiling = ESM16Profiling(Path("/fake/test_path"))
2323

2424
# Mock the presence of all log files
2525
mock_is_file.side_effect = [True, True, True]

tests/test_manager.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88
import xarray as xr
99

10-
from access.profiling.manager import ProfilingLog, ProfilingManager
10+
from access.profiling.manager import ProfilingExperiment, ProfilingExperimentStatus, ProfilingLog, ProfilingManager
1111
from access.profiling.metrics import count, tavg, tmax
1212

1313

@@ -43,6 +43,18 @@ def test_profiling_log():
4343
mock_path.read_text.assert_called_once()
4444

4545

46+
def test_profiling_experiment():
47+
"""Test the ProfilingExperiment class."""
48+
49+
experiment = ProfilingExperiment(path=Path("/fake/path"))
50+
51+
assert experiment.path == Path("/fake/path")
52+
assert experiment.status == ProfilingExperimentStatus.NEW
53+
54+
experiment.status = ProfilingExperimentStatus.RUNNING
55+
assert experiment.status == ProfilingExperimentStatus.RUNNING
56+
57+
4658
class MockProfilingManager(ProfilingManager):
4759
"""Test class inheriting from ProfilingManager to test its methods.
4860
@@ -55,11 +67,16 @@ class MockProfilingManager(ProfilingManager):
5567
"""
5668

5769
def __init__(self, paths, ncpus, datasets) -> None:
58-
super().__init__()
70+
super().__init__(Path("/fake/work_dir"))
5971

6072
self._mock_ncpus = dict(zip([path.name for path in paths], ncpus, strict=True))
6173
self._mock_datasets = dict(zip([path.name for path in paths], datasets, strict=True))
6274

75+
# Pre-generate experiments
76+
for path in paths:
77+
self.experiments[path] = ProfilingExperiment(path)
78+
self.experiments[path].status = ProfilingExperimentStatus.DONE
79+
6380
def parse_ncpus(self, path):
6481
"""Simulate parsing number of CPUs for a given path."""
6582
return self._mock_ncpus[path.name]
@@ -104,7 +121,7 @@ def test_scaling_data(mock_plot, scaling_data):
104121
paths, ncpus, datasets = scaling_data
105122
config_prof = MockProfilingManager(paths, ncpus, datasets)
106123

107-
config_prof.parse_scaling_data(paths)
124+
config_prof.parse_scaling_data()
108125

109126
assert set(config_prof.data.keys()) == {"component"}
110127
assert set(config_prof.data["component"].dims) == {"ncpus", "region"}, (

0 commit comments

Comments
 (0)