ACCESS-NRI
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/access/profiling/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/access/profiling/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/access/profiling/access_models.py‎
Lines changed: 51 additions & 0 deletions b/‎src/access/profiling/access_models.py‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎src/access/profiling/manager.py‎
Lines changed: 120 additions & 0 deletions b/‎src/access/profiling/manager.py‎
Lines changed: 120 additions & 0 deletions
diff --git a/‎src/access/profiling/parser.py‎
Lines changed: 0 additions & 47 deletions b/‎src/access/profiling/parser.py‎
Lines changed: 0 additions & 47 deletions
diff --git a/‎src/access/profiling/payu_manager.py‎
Lines changed: 86 additions & 0 deletions b/‎src/access/profiling/payu_manager.py‎
Lines changed: 86 additions & 0 deletions
@@ -20,6 +20,7 @@ dependencies = [
     "pint",
     "pint-xarray",
     "matplotlib",
+    "access-config-utils",
 ]
 
 [build-system]
 
@@ -9,6 +9,7 @@
 with suppress(PackageNotFoundError):
     __version__ = version("access-profiling")
 
+from access.profiling.access_models import ESM16Profiling
 from access.profiling.cice5_parser import CICE5ProfilingParser
 from access.profiling.esmf_parser import ESMFSummaryProfilingParser
 from access.profiling.fms_parser import FMSProfilingParser
@@ -23,4 +24,5 @@
     "CICE5ProfilingParser",
     "PayuJSONProfilingParser",
     "ESMFSummaryProfilingParser",
+    "ESM16Profiling",
 ]
@@ -0,0 +1,51 @@
+# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from pathlib import Path
+
+from access.config import YAMLParser
+
+from access.profiling.cice5_parser import CICE5ProfilingParser
+from access.profiling.fms_parser import FMSProfilingParser
+from access.profiling.manager import ProfilingLog
+from access.profiling.payu_manager import PayuManager
+from access.profiling.um_parser import UMProfilingParser
+
+logger = logging.getLogger(__name__)
+
+
+class ESM16Profiling(PayuManager):
+    """Handles profiling of ACCESS-ESM1.6 configurations."""
+
+    def get_component_logs(self, path: Path) -> dict[str, ProfilingLog]:
+        """Returns available profiling logs for the components in ACCESS-ESM1.6.
+
+        Args:
+            path (Path): Path to the output directory.
+        Returns:
+            dict[str, ProfilingLog]: Dictionary mapping component names to their ProfilingLog instances.
+        """
+        logs = {}
+        parser = YAMLParser()
+
+        um_env_path = path / "atmosphere" / "um_env.yaml"
+        um_env = parser.parse(um_env_path.read_text())
+        um_logfile = path / "atmosphere" / f"{um_env['UM_STDOUT_FILE']}0"
+        if um_logfile.is_file():
+            logger.debug(f"Found UM log file: {um_logfile}")
+            logs["UM"] = ProfilingLog(um_logfile, UMProfilingParser())
+
+        config_path = path / "config.yaml"
+        payu_config = parser.parse(config_path.read_text())
+        mom5_logfile = path / f"{payu_config['model']}.out"
+        if mom5_logfile.is_file():
+            logger.debug(f"Found MOM5 log file: {mom5_logfile}")
+            logs["MOM5"] = ProfilingLog(mom5_logfile, FMSProfilingParser(has_hits=False))
+
+        cice5_logfile = path / "ice" / "ice_diag.d"
+        if cice5_logfile.is_file():
+            logger.debug(f"Found CICE5 log file: {cice5_logfile}")
+            logs["CICE5"] = ProfilingLog(cice5_logfile, CICE5ProfilingParser())
+
+        return logs
@@ -0,0 +1,120 @@
+# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+import xarray as xr
+
+from access.profiling.metrics import ProfilingMetric
+from access.profiling.parser import ProfilingParser
+from access.profiling.scaling import plot_scaling_metrics
+
+
+class ProfilingLog:
+    """Represents a profiling log file.
+
+    Args:
+        filepath (Path): Path to the log file.
+        parser (ProfilingParser): Parser to use for this log file.
+    """
+
+    filepath: Path  # Path to the log file
+    parser: ProfilingParser  # Parser to use for this log file
+
+    def __init__(self, filepath: Path, parser: ProfilingParser):
+        self.filepath = filepath
+        self.parser = parser
+
+    def parse(self) -> xr.Dataset:
+        """Parses the log file and returns the profiling data as an xarray Dataset.
+
+        Returns:
+           xr.Dataset: Parsed profiling data."""
+        path = self.filepath
+        log = path.read_text()
+        data = self.parser.read(log)
+        return xr.Dataset(
+            data_vars=dict(
+                zip(
+                    self.parser.metrics,
+                    [
+                        xr.DataArray(data[metric], dims=["region"]).pint.quantify(metric.units)
+                        for metric in self.parser.metrics
+                    ],
+                    strict=True,
+                )
+            ),
+            coords={"region": data["region"]},
+        )
+
+
+class ProfilingManager(ABC):
+    """Abstract base class to handle profiling data and workflows.
+
+    This high-level class defines methods to parse different types of profiling data. Currently,
+    it supports parsing and plotting scaling data.
+    """
+
+    data: dict[str, xr.Dataset] = {}  # Dictionary mapping component names to their profiling datasets.
+
+    @abstractmethod
+    def parse_profiling_data(self, path: Path) -> dict[str, xr.Dataset]:
+        """Parses profiling data from the specified path.
+
+        Args:
+            path (Path): Path to the experiment directory.
+
+        Returns:
+            dict[str, xr.Dataset]: Dictionary mapping component names to their profiling datasets.
+        """
+
+    @abstractmethod
+    def parse_ncpus(self, path: Path) -> int:
+        """Parses the number of CPUs used in a given experiment in the specified path.
+
+        Args:
+            path (Path): Path to the experiment directory.
+
+        Returns:
+            int: Number of CPUs used in the experiment.
+        """
+
+    def parse_scaling_data(self, paths: list[Path]):
+        """Parses profiling data from a list of experiment directories.
+
+        Args:
+            paths (list[Path]): List of paths to experiment directories.
+        """
+        self.data = {}
+        for path in paths:
+            # Parse data
+            datasets = self.parse_profiling_data(path)
+
+            # Find number of cpus used
+            ncpus = self.parse_ncpus(path)
+
+            # Add ncpus dimension and concatenate with existing data
+            for name, ds in datasets.items():
+                ds = ds.expand_dims({"ncpus": 1}).assign_coords({"ncpus": [ncpus]})
+                if name in self.data:
+                    self.data[name] = xr.concat([self.data[name], ds], dim="ncpus", join="outer")
+                else:
+                    self.data[name] = ds
+
+    def plot_scaling_data(
+        self,
+        components: list[str],
+        regions: list[list[str]],
+        metric: ProfilingMetric,
+        region_relabel_map: dict | None = None,
+    ):
+        """Plots scaling data for the specified components, regions and metric.
+
+        Args:
+            components (list[str]): List of component names to plot.
+            regions (list[list[str]]): List of regions to plot for each component.
+            metric (ProfilingMetric): Metric to use for the scaling plots.
+            region_relabel_map (dict | None): Optional mapping to relabel regions in the plots.
+        """
+        plot_scaling_metrics([self.data[c] for c in components], regions, metric, region_relabel_map=region_relabel_map)
@@ -4,12 +4,10 @@
 """Classes and utilities to build profiling parsers for reading profiling data."""
 
 from abc import ABC, abstractmethod
-from collections.abc import Iterable
 from typing import Any
 
 # Next import is required to register pint with xarray
 import pint_xarray  # noqa: F401
-import xarray as xr
 
 from access.profiling.metrics import ProfilingMetric
 
@@ -51,51 +49,6 @@ def read(self, stream: str) -> dict:
             dict: profiling data.
         """
 
-    def parse_data_series(self, logs: list[str], varname: str, vars: Iterable) -> xr.Dataset:
-        """Given a list of logs containing profiling data, parse the data and return it as a xarray dataset.
-
-        For example, if the logs correspond to different runs of the same application with different number of CPUs,
-        then varname should be "ncpus" and vars could be a list with core counts:
-
-            log_1cpu = open("log_1cpu.txt").read()
-            log_2cpu = open("log_2cpu.txt").read()
-            log_4cpu = open("log_4cpu.txt").read()
-            scaling_data = parser.parse_data_series(
-                logs= [log_1cpu, log_2cpu, log_4cpu],
-                varname="ncpus",
-                vars=[1, 2, 4]
-            )
-
-        Args:
-            Logs (list[str]): Logs to parse.
-            varname (str): Name of the variable that changes between logs.
-            vars (Iterable): An iterable returning the value of the variable that changes between logs.
-
-        Returns:
-            Dataset: Series profiling data.
-        """
-        datasets = []
-        for var, log in zip(vars, logs, strict=True):
-            data = self.read(log)
-            datasets.append(
-                xr.Dataset(
-                    data_vars=dict(
-                        zip(
-                            self.metrics,
-                            [
-                                xr.DataArray([data[metric]], dims=[varname, "region"]).pint.quantify(metric.units)
-                                for metric in self.metrics
-                            ],
-                            strict=True,
-                        )
-                    ),
-                    coords={varname: [var], "region": data["region"]},
-                )
-            )
-
-        # Create dataset with all the data
-        return xr.concat(datasets, dim=varname)
-
 
 def _convert_from_string(value: str) -> Any:
     """Tries to convert a string to the most appropriate numeric type. Leaves it unchanged if conversion does not
 
@@ -0,0 +1,86 @@
+# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+import xarray as xr
+from access.config import YAMLParser
+
+from access.profiling.manager import ProfilingLog, ProfilingManager
+from access.profiling.payujson_parser import PayuJSONProfilingParser
+
+logger = logging.getLogger(__name__)
+
+
+class PayuManager(ProfilingManager, ABC):
+    """Abstract base class to handle profiling of Payu configurations."""
+
+    @abstractmethod
+    def get_component_logs(self, path: Path) -> dict[str, ProfilingLog]:
+        """Returns available profiling logs for the components in the configuration.
+
+        Args:
+            path (Path): Path to the output directory.
+        Returns:
+            dict[str, ProfilingLog]: Dictionary mapping component names to their ProfilingLog instances.
+        """
+
+    def parse_ncpus(self, path: Path) -> int:
+        """Parses the number of CPUs used in a given Payu experiment.
+
+        Args:
+            path (Path): Path to the Payu experiment directory. Must contain a config.yaml file.
+        Returns:
+            int: Number of CPUs used in the experiment. If multiple submodels are defined, returns the sum of their
+                 ncpus.
+        """
+        config_path = path / "config.yaml"
+        payu_config = YAMLParser().parse(config_path.read_text())
+        if "submodels" in payu_config:
+            return sum(submodel["ncpus"] for submodel in payu_config["submodels"])
+        else:
+            return payu_config["ncpus"]
+
+    def parse_profiling_data(self, path: Path) -> dict[str, xr.Dataset]:
+        """Parses profiling data from a Payu experiment directory.
+
+        Args:
+            path (Path): Path to the Payu experiment directory.
+        Returns:
+            dict[str, xr.Dataset]: Dictionary mapping component names to their profiling datasets.
+        Raises:
+            FileNotFoundError: If the archive or output directories are missing.
+        """
+        datasets = {}
+        logs = {}
+
+        # Check archive directory exists
+        archive = path / "archive"
+        if not archive.is_dir():
+            raise FileNotFoundError(f"Directory {archive} does not exist!")
+
+        # Parse payu json profiling data if available
+        matches = sorted(archive.glob("payu_jobs/*/run/*.json"))
+        if len(matches) > 1:
+            logger.warning(f"Multiple payu json logs found in {path}! Using the first one found.")
+        if len(matches) >= 1:
+            logs["payu"] = ProfilingLog(matches[0], PayuJSONProfilingParser())
+
+        # Find how many output directories are available and get logs from each component
+        matches = sorted(archive.glob("output*"))
+        if len(matches) == 0:
+            raise FileNotFoundError(f"No output files found in {path}!")
+        elif len(matches) > 1:
+            logger.warning(f"Multiple output directories found in {path}! Using the first one found.")
+        for output in matches:
+            logs.update(self.get_component_logs(output))
+
+        # Parse all logs
+        for name, log in logs.items():
+            logger.info(f"Parsing {name} profiling log: {log.filepath}. ")
+            datasets[name] = log.parse()
+            logger.info(" Done.")
+
+        return datasets
Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@ dependencies = [`
`20`	`20`	`"pint",`
`21`	`21`	`"pint-xarray",`
`22`	`22`	`"matplotlib",`
	`23`	`+ "access-config-utils",`
`23`	`24`	`]`
`24`	`25`
`25`	`26`	`[build-system]`