From 939654662870df4186e79b0a4cd7d3ddf74d5ef2 Mon Sep 17 00:00:00 2001
From: ShixuanZhang <shixuan.zhang@pnnl.gov>
Date: Wed, 8 Oct 2025 13:07:52 -0500
Subject: [PATCH 1/2] Add bug fix to improve robustness and performance

---
 .../synthetic_metrics_plotter.py              | 159 +++++++++++++-----
 zppy_interfaces/pcmdi_diags/viewer.py         | 133 ++++++++++-----
 2 files changed, 215 insertions(+), 77 deletions(-)

diff --git a/zppy_interfaces/pcmdi_diags/synthetic_plots/synthetic_metrics_plotter.py b/zppy_interfaces/pcmdi_diags/synthetic_plots/synthetic_metrics_plotter.py
index fce5645..86803d1 100644
--- a/zppy_interfaces/pcmdi_diags/synthetic_plots/synthetic_metrics_plotter.py
+++ b/zppy_interfaces/pcmdi_diags/synthetic_plots/synthetic_metrics_plotter.py
@@ -712,18 +712,57 @@ def portrait_metric_plot(
     shrink = 0.8 * fscale
     legend_fontsize = fontsize * 0.8
 
+    # --- SMALL GUARD A: basic inputs present ---
+    if not var_list:
+        logger.warning("[Portrait]: No variables to plot (var_list empty); returning.")
+        return
+    if not model_list:
+        logger.warning("[Portrait]: No models to plot (model_list empty); returning.")
+        return
+
     if group == "mean_climate":
-        # data for final plot
-        data_all_nor = np.stack(
-            [data_dict["djf"], data_dict["mam"], data_dict["jja"], data_dict["son"]]
-        )
+        # --- SMALL GUARD B: seasonal arrays exist & stack cleanly ---
+        required = ["djf", "mam", "jja", "son"]
+        missing = [k for k in required if (k not in data_dict or data_dict[k] is None)]
+        if missing:
+            logger.warning(
+                "[Portrait]: Missing seasonal arrays for %s; returning. Missing=%s",
+                group,
+                missing,
+            )
+            return
+        try:
+            arrs = [np.asarray(data_dict[k]) for k in required]
+            if any(a.size == 0 for a in arrs):
+                logger.warning(
+                    "[Portrait]: One or more seasonal arrays are empty; returning."
+                )
+                return
+            data_all_nor = np.stack(arrs)
+        except Exception as e:
+            logger.warning(
+                "[Portrait]: Failed to stack seasonal arrays: %s; returning.", e
+            )
+            return
+
         legend_on = True
         legend_labels = ["DJF", "MAM", "JJA", "SON"]
     else:
-        data_all_nor = data_dict
+        # --- SMALL GUARD C: non-seasonal data present ---
+        data_all_nor = np.asarray(data_dict)
+        if data_all_nor.size == 0:
+            logger.warning("[Portrait]: Input data array is empty; returning.")
+            return
         legend_on = False
         legend_labels = []
 
+    # --- SMALL GUARD D: minimal shape sanity (avoid cryptic errors downstream) ---
+    if data_all_nor.ndim < 2:
+        logger.warning(
+            "[Portrait]: Data has ndim=%d (<2); returning.", data_all_nor.ndim
+        )
+        return
+
     highlight_models = get_highlight_models(model_list, model_name)
     lable_colors = []
     for model in model_list:
@@ -742,7 +781,6 @@ def portrait_metric_plot(
     elif stat in ["stdv_pc_ratio_to_obs"]:
         var_range = (0.5, 1.5)
         cmap_color = "jet"
-        cmap_bounds = [0.5, 0.7, 0.9, 1.1, 1.3, 1.5]
         cmap_bounds = [r / 10 for r in range(5, 16, 1)]
     else:
         var_range = (-0.5, 0.5)
@@ -782,7 +820,9 @@ def portrait_metric_plot(
 
     # Add title
     fig.suptitle(
-        f"{region} — {group} ({stat_name})", fontsize=fontsize * 1.1, fontweight="bold"
+        f"{region} — {group} ({stat_name})",
+        fontsize=fontsize * 1.1,
+        fontweight="bold",
     )
     fig.tight_layout(rect=[0, 0, 1, 0.95])  # leave top 5 % free for title
 
@@ -888,29 +928,54 @@ def parcoord_metric_plot(
             "#377eb8",
             "#dede00",
         ]
-
-    # ensemble mean for E3SM group
+    # --- SMALL GUARD 1: highlight models may be missing ---
     highlight_model1 = get_highlight_models(data_dict.get("model", []), model_name)
-    irow_str = data_dict[data_dict["model"] == highlight_model1[0]].index[0]
-    irow_end = data_dict[data_dict["model"] == highlight_model1[-1]].index[0] + 1
-    data_dict.loc[mean2_name] = data_dict[irow_str:irow_end].mean(
-        numeric_only=True, skipna=True
-    )
-    data_dict.at[mean2_name, "model"] = mean2_name
+    if not highlight_model1:
+        # No highlightable models → skip means/highlights later
+        highlight_model1 = []
+        have_highlights = False
+    else:
+        have_highlights = True
 
-    # ensemble mean for CMIP group
-    irow_sub = data_dict[data_dict["model"] == highlight_model1[0]].index[0]
-    data_dict.loc[mean1_name] = data_dict[:irow_sub].mean(
-        numeric_only=True, skipna=True
-    )
-    data_dict.at[mean1_name, "model"] = mean1_name
-    data_dict.loc[mean2_name] = data_dict[irow_sub:].mean(
-        numeric_only=True, skipna=True
-    )
-    data_dict.at[mean2_name, "model"] = mean2_name
+    # Only compute E3SM mean if we found highlight rows in the DF
+    if have_highlights:
+        if (highlight_model1[0] in set(data_dict["model"])) and (
+            highlight_model1[-1] in set(data_dict["model"])
+        ):
+            irow_str = data_dict.index[data_dict["model"] == highlight_model1[0]][0]
+            irow_end = (
+                data_dict.index[data_dict["model"] == highlight_model1[-1]][0] + 1
+            )
+            data_dict.loc[mean2_name] = data_dict.iloc[irow_str:irow_end].mean(
+                numeric_only=True, skipna=True
+            )
+            data_dict.at[mean2_name, "model"] = mean2_name
+        else:
+            have_highlights = False  # fallback if names weren’t found
+
+    # CMIP/E3SM means only if we can split reliably
+    if have_highlights:
+        irow_sub = data_dict.index[data_dict["model"] == highlight_model1[0]][0]
+        data_dict.loc[mean1_name] = data_dict.iloc[:irow_sub].mean(
+            numeric_only=True, skipna=True
+        )
+        data_dict.at[mean1_name, "model"] = mean1_name
+        data_dict.loc[mean2_name] = data_dict.iloc[irow_sub:].mean(
+            numeric_only=True, skipna=True
+        )
+        data_dict.at[mean2_name, "model"] = mean2_name
+
+    # --- SMALL GUARD 1: highlights models
+    if not have_highlights:
+        logger.warning(
+            f"[ParCoord]: No highlightable models found for model_name={model_name}; "
+            f"Skipping highlight and mean calculations."
+        )
 
-    model_list = data_dict["model"].to_list()
-    highlight_model2 = highlight_model1 + [mean1_name, mean2_name]
+    model_list = data_dict["model"].astype(str).to_list()
+    highlight_model2 = highlight_model1 + (
+        [mean1_name, mean2_name] if have_highlights else []
+    )
 
     # colors for highlight lines
     lncolors = []
@@ -922,30 +987,48 @@ def parcoord_metric_plot(
         else:
             lncolors.append(xcolors[i % len(xcolors)])
 
-    var_name1 = sorted(var_names.copy())
+    # --- SMALL GUARD 2: keep only existing, non-empty vars ---
+    var_name1 = sorted(
+        v for v in var_names if (v in data_dict.columns) and data_dict[v].notna().any()
+    )
+    if not var_name1:
+        logger.warning(
+            f"[ParCoord]: Nothing to plot for group={group}, region={region}, stat={stat}. "
+            f"No valid variables found in metrics data (columns checked={len(var_names)})."
+        )
+        return
+
     # label information
     var_labels = []
-    for i, var in enumerate(var_name1):
-        index = var_names.index(var)
-        if var_units is not None:
-            var_labels.append(var_names[index] + "\n" + var_units[index])
+    for v in var_name1:
+        idx = var_names.index(v)
+        if var_units is not None and idx < len(var_units):
+            var_labels.append(var_names[idx] + "\n" + var_units[idx])
         else:
-            var_labels.append(var_names[index])
+            var_labels.append(var_names[idx])
 
     # final plot data
     data_var = data_dict[var_name1].to_numpy()
 
+    # --- SMALL GUARD 3: ensure at least 1 column for parallel-coords ---
+    if data_var.ndim != 2 or data_var.shape[1] == 0:
+        logger.warning(
+            f"[ParCoord]: Not enough data to process parallel coordinate plots "
+            f"(shape={data_var.shape}); returning without plot."
+        )
+        return
+
     xlabel = "Metric"
     ylabel = "{} ({})".format(stat_name, stat.upper())
 
     if "mean_climate" in [group, region]:
-        title = "Model Performance of Annual Climatology ({}, {})".format(
-            stat.upper(), region.upper()
-        )
+        title = f"Model Performance of Annual Climatology ({stat.upper()}, {region.upper()})"
     elif "variability_modes" in [group, region]:
-        title = "Model Performance of Modes Variability ({})".format(stat.upper())
+        title = f"Model Performance of Modes Variability ({stat.upper()})"
     elif "enso" in [group, region]:
-        title = "Model Performance of ENSO ({})".format(stat.upper())
+        title = f"Model Performance of ENSO ({stat.upper()})"
+    else:
+        title = f"Model Performance ({stat.upper()}, {region.upper()})"
 
     fig, ax = parallel_coordinate_plot(
         data_var,
diff --git a/zppy_interfaces/pcmdi_diags/viewer.py b/zppy_interfaces/pcmdi_diags/viewer.py
index 9d80c85..98b7034 100644
--- a/zppy_interfaces/pcmdi_diags/viewer.py
+++ b/zppy_interfaces/pcmdi_diags/viewer.py
@@ -691,23 +691,44 @@ def __init__(self, diag_dir, fig_dir):
             ("Pattern Corr.", "cor_xy", "Portrait"),
             ("RMSE", "rms_xy", "Portrait", "rms_xyt", "ParCoord"),
         ]
+        # 1 section column + 1 region column + 3 slots per metric group
+        self.COLS_PER_METRIC = 4
+        self.TOTAL_COLS = 2 + self.COLS_PER_METRIC * len(self.metrics)
+
+    def set_layout_from_metrics(self, metrics):
+        """Update metrics & recompute TOTAL_COLS so all rows align."""
+        if metrics is not None:
+            self.metrics = metrics
+        self.TOTAL_COLS = 2 + self.COLS_PER_METRIC * len(self.metrics)
+
+    # --- helper to keep rows aligned ---
+    def _pad_or_check(self, row):
+        """Keep rows from exceeding TOTAL_COLS; never add filler cells."""
+        w = sum(int(c.get("colspan", 1)) for c in row)
+
+        # exact or short → leave as-is (no empty filler appended)
+        if w <= self.TOTAL_COLS:
+            return row
+
+        # too wide → shrink from the last colspan cell
+        over = w - self.TOTAL_COLS
+        for c in reversed(row):
+            if "colspan" in c:
+                c["colspan"] = max(1, int(c["colspan"]) - over)
+                break
+        return row
 
     def build_summary_table(self, regions=None, metrics=None):
         clim_path = safe_join(str(self.fig_dir), "ERROR_metric/mean_climate")
         metric_table = []
 
-        if regions is None:
-            regions = self.regions
-
-        if metrics is None:
-            metrics = self.metrics
+        regions = self.regions if regions is None else regions
+        metrics = self.metrics if metrics is None else metrics
 
         for i, region in enumerate(regions):
             row = []
             if i == 0:
-                row.append(
-                    {"content": "<b>Mean Climate</b>", "rowspan": len(self.regions)}
-                )
+                row.append({"content": "<b>Mean Climate</b>", "rowspan": len(regions)})
             row.append({"content": region.upper()})
 
             for metric in metrics:
@@ -721,11 +742,15 @@ def build_summary_table(self, regions=None, metrics=None):
                         filename_pattern=filename_pattern,
                         label=mode,
                     )
-                    row.append({"colspan": 4, "content": f"{name}<br>{link}"})
+                    row.append(
+                        {
+                            "colspan": self.COLS_PER_METRIC,
+                            "content": f"{name}<br>{link}",
+                        }
+                    )
 
                 elif len(metric) == 5:
                     name, prefix1, mode1, prefix2, mode2 = metric
-
                     link1 = create_image_link(
                         fig_dir=clim_path,
                         diag_dir=self.diag_dir,
@@ -740,19 +765,24 @@ def build_summary_table(self, regions=None, metrics=None):
                         filename_pattern=f"{prefix2}_{region}_parcoord_mean_climate.png",
                         label=mode2,
                     )
+                    row.append(
+                        {
+                            "colspan": self.COLS_PER_METRIC,
+                            "content": f"{name}<br>{link1} {link2}",
+                        }
+                    )
 
-                    row.append({"colspan": 4, "content": f"{name}<br>{link1} {link2}"})
-
-            metric_table.append(row)
-
+            metric_table.append(self._pad_or_check(row))
         return metric_table
 
     def build_enso_row(self):
-        row: List[Dict[str, object]] = []
+        saved_total = self.TOTAL_COLS
+        self.TOTAL_COLS = 2 + self.COLS_PER_METRIC * len(self.metrics)
+
+        row = []
         row.append({"content": "<b>ENSO</b>"})
         row.append({"content": "TROPICS"})
         enso_path = safe_join(str(self.fig_dir), "ERROR_metric/enso_metric")
-
         link = create_image_link(
             fig_dir=enso_path,
             diag_dir=self.diag_dir,
@@ -760,14 +790,28 @@ def build_enso_row(self):
             filename_pattern="enso_metric_skill_portrait.png",
             label="Portrait",
         )
+        row.append(
+            {
+                "colspan": str(self.TOTAL_COLS - 2),
+                "content": f"Performance Skill<br>{link}",
+            }
+        )
 
-        row.append({"colspan": 12, "content": f"Performance Skill<br>{link}"})
+        row = self._pad_or_check(row)
+        self.TOTAL_COLS = saved_total
         return row
 
     def build_emov_row(self):
-        row: List[Dict[str, object]] = []
-        row.append({"content": "<b>EMoVs</b>"})
-        row.append({"content": "Extra-TROPICS"})
+        # --- Section-independent layout ---
+        # Always reserve first 2 columns: section + region
+        saved_total = self.TOTAL_COLS
+        self.TOTAL_COLS = 2 + self.COLS_PER_METRIC * 3  # 3 EMoV metrics
+
+        row = []
+        # Add section name (same column position as "Mean Climate")
+        row.append({"content": "<b>EMoVs</b>"})  # section column
+        row.append({"content": "Extra-TROPICS"})  # region column
+
         emov_path = safe_join(str(self.fig_dir), "ERROR_metric/variability_modes")
 
         modes_metrics = [
@@ -797,7 +841,16 @@ def build_emov_row(self):
         for name, f1, mode1, f2, mode2 in modes_metrics:
             link1 = create_image_link(emov_path, self.diag_dir, [], f1, mode1)
             link2 = create_image_link(emov_path, self.diag_dir, [], f2, mode2)
-            row.append({"colspan": 4, "content": f"{name}<br>{link1} {link2}"})
+            row.append(
+                {
+                    "colspan": str(self.COLS_PER_METRIC),
+                    "content": f"{name}<br>{link1} {link2}",
+                }
+            )
+
+        # Ensure correct total columns and restore TOTAL_COLS
+        row = self._pad_or_check(row)
+        self.TOTAL_COLS = saved_total
         return row
 
 
@@ -815,36 +868,38 @@ def generate_summary_table(
     Build the summary metrics table (Mean Climate, ENSO, EMoV).
     Returns a list of rows (each row is a list of cell dicts).
     """
-    # Coerce possible string flags
-    for name in ("clim_show", "mova_show", "movc_show", "enso_show"):
-        val = locals()[name]
-        if not isinstance(val, bool):
-            locals()[name] = str(val).strip().lower() in {
-                "1",
-                "true",
-                "t",
-                "yes",
-                "y",
-                "on",
-            }
-    clim_show, mova_show, movc_show, enso_show = (
-        clim_show,
-        mova_show,
-        movc_show,
-        enso_show,
-    )
+
+    # --- Coerce flags ---
+    truthy = {"1", "true", "t", "yes", "y", "on"}
+    if not isinstance(clim_show, bool):
+        clim_show = str(clim_show).strip().lower() in truthy
+    if not isinstance(mova_show, bool):
+        mova_show = str(mova_show).strip().lower() in truthy
+    if not isinstance(movc_show, bool):
+        movc_show = str(movc_show).strip().lower() in truthy
+    if not isinstance(enso_show, bool):
+        enso_show = str(enso_show).strip().lower() in truthy
+
+    # --- Normalize regions ---
+    if isinstance(clim_regions, str):
+        clim_regions = [r.strip() for r in clim_regions.split(",") if r.strip()]
 
     builder = SummaryTableBuilder(diag_dir, fig_dir)
+    builder.set_layout_from_metrics(clim_metrics)
+
     table: List[list] = []
 
+    # === Mean Climate ===
     if clim_show:
         table.extend(
             builder.build_summary_table(regions=clim_regions, metrics=clim_metrics)
         )
 
+    # === ENSO Section ===
     if enso_show:
         table.append(builder.build_enso_row())
 
+    # === EMoVs Section ===
     if mova_show or movc_show:
         table.append(builder.build_emov_row())
 

From d65ef170a49da98213b05eace6b2e001997778e0 Mon Sep 17 00:00:00 2001
From: Ryan Forsyth <forsyth2@llnl.gov>
Date: Fri, 10 Oct 2025 17:08:06 -0500
Subject: [PATCH 2/2] Improve error handling in pcmdi_setup

---
 zppy_interfaces/pcmdi_diags/pcmdi_setup.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/zppy_interfaces/pcmdi_diags/pcmdi_setup.py b/zppy_interfaces/pcmdi_diags/pcmdi_setup.py
index d88981f..57bbc86 100644
--- a/zppy_interfaces/pcmdi_diags/pcmdi_setup.py
+++ b/zppy_interfaces/pcmdi_diags/pcmdi_setup.py
@@ -377,12 +377,15 @@ def derive_missing_variable(varin, path, model_id):
     base_ds = None
     output_file = None
 
+    vars_without_files: List[str] = []
+
     for i, (src_var, scale) in enumerate(var_dic.items()):
         fpaths = sorted(glob.glob(os.path.join(path, f"*.{src_var}.*.nc")))
         if not fpaths:
-            raise FileNotFoundError(
+            vars_without_files.append(
                 f"No file found for source variable '{src_var}' in {path}"
             )
+            continue
         fpath = fpaths[0]
         ds = xcdat_open(fpath)
         data = ds[src_var] * scale
@@ -397,6 +400,9 @@ def derive_missing_variable(varin, path, model_id):
         else:
             derived_data = derived_data + data
 
+    if vars_without_files:
+        raise FileNotFoundError(" ; ".join(vars_without_files))
+
     if base_ds is not None and derived_data is not None:
         derived_da = xr.DataArray(
             data=derived_data.data,