From 939654662870df4186e79b0a4cd7d3ddf74d5ef2 Mon Sep 17 00:00:00 2001 From: ShixuanZhang Date: Wed, 8 Oct 2025 13:07:52 -0500 Subject: [PATCH 1/2] Add bug fix to improve robustness and performance --- .../synthetic_metrics_plotter.py | 159 +++++++++++++----- zppy_interfaces/pcmdi_diags/viewer.py | 133 ++++++++++----- 2 files changed, 215 insertions(+), 77 deletions(-) diff --git a/zppy_interfaces/pcmdi_diags/synthetic_plots/synthetic_metrics_plotter.py b/zppy_interfaces/pcmdi_diags/synthetic_plots/synthetic_metrics_plotter.py index fce5645..86803d1 100644 --- a/zppy_interfaces/pcmdi_diags/synthetic_plots/synthetic_metrics_plotter.py +++ b/zppy_interfaces/pcmdi_diags/synthetic_plots/synthetic_metrics_plotter.py @@ -712,18 +712,57 @@ def portrait_metric_plot( shrink = 0.8 * fscale legend_fontsize = fontsize * 0.8 + # --- SMALL GUARD A: basic inputs present --- + if not var_list: + logger.warning("[Portrait]: No variables to plot (var_list empty); returning.") + return + if not model_list: + logger.warning("[Portrait]: No models to plot (model_list empty); returning.") + return + if group == "mean_climate": - # data for final plot - data_all_nor = np.stack( - [data_dict["djf"], data_dict["mam"], data_dict["jja"], data_dict["son"]] - ) + # --- SMALL GUARD B: seasonal arrays exist & stack cleanly --- + required = ["djf", "mam", "jja", "son"] + missing = [k for k in required if (k not in data_dict or data_dict[k] is None)] + if missing: + logger.warning( + "[Portrait]: Missing seasonal arrays for %s; returning. Missing=%s", + group, + missing, + ) + return + try: + arrs = [np.asarray(data_dict[k]) for k in required] + if any(a.size == 0 for a in arrs): + logger.warning( + "[Portrait]: One or more seasonal arrays are empty; returning." + ) + return + data_all_nor = np.stack(arrs) + except Exception as e: + logger.warning( + "[Portrait]: Failed to stack seasonal arrays: %s; returning.", e + ) + return + legend_on = True legend_labels = ["DJF", "MAM", "JJA", "SON"] else: - data_all_nor = data_dict + # --- SMALL GUARD C: non-seasonal data present --- + data_all_nor = np.asarray(data_dict) + if data_all_nor.size == 0: + logger.warning("[Portrait]: Input data array is empty; returning.") + return legend_on = False legend_labels = [] + # --- SMALL GUARD D: minimal shape sanity (avoid cryptic errors downstream) --- + if data_all_nor.ndim < 2: + logger.warning( + "[Portrait]: Data has ndim=%d (<2); returning.", data_all_nor.ndim + ) + return + highlight_models = get_highlight_models(model_list, model_name) lable_colors = [] for model in model_list: @@ -742,7 +781,6 @@ def portrait_metric_plot( elif stat in ["stdv_pc_ratio_to_obs"]: var_range = (0.5, 1.5) cmap_color = "jet" - cmap_bounds = [0.5, 0.7, 0.9, 1.1, 1.3, 1.5] cmap_bounds = [r / 10 for r in range(5, 16, 1)] else: var_range = (-0.5, 0.5) @@ -782,7 +820,9 @@ def portrait_metric_plot( # Add title fig.suptitle( - f"{region} — {group} ({stat_name})", fontsize=fontsize * 1.1, fontweight="bold" + f"{region} — {group} ({stat_name})", + fontsize=fontsize * 1.1, + fontweight="bold", ) fig.tight_layout(rect=[0, 0, 1, 0.95]) # leave top 5 % free for title @@ -888,29 +928,54 @@ def parcoord_metric_plot( "#377eb8", "#dede00", ] - - # ensemble mean for E3SM group + # --- SMALL GUARD 1: highlight models may be missing --- highlight_model1 = get_highlight_models(data_dict.get("model", []), model_name) - irow_str = data_dict[data_dict["model"] == highlight_model1[0]].index[0] - irow_end = data_dict[data_dict["model"] == highlight_model1[-1]].index[0] + 1 - data_dict.loc[mean2_name] = data_dict[irow_str:irow_end].mean( - numeric_only=True, skipna=True - ) - data_dict.at[mean2_name, "model"] = mean2_name + if not highlight_model1: + # No highlightable models → skip means/highlights later + highlight_model1 = [] + have_highlights = False + else: + have_highlights = True - # ensemble mean for CMIP group - irow_sub = data_dict[data_dict["model"] == highlight_model1[0]].index[0] - data_dict.loc[mean1_name] = data_dict[:irow_sub].mean( - numeric_only=True, skipna=True - ) - data_dict.at[mean1_name, "model"] = mean1_name - data_dict.loc[mean2_name] = data_dict[irow_sub:].mean( - numeric_only=True, skipna=True - ) - data_dict.at[mean2_name, "model"] = mean2_name + # Only compute E3SM mean if we found highlight rows in the DF + if have_highlights: + if (highlight_model1[0] in set(data_dict["model"])) and ( + highlight_model1[-1] in set(data_dict["model"]) + ): + irow_str = data_dict.index[data_dict["model"] == highlight_model1[0]][0] + irow_end = ( + data_dict.index[data_dict["model"] == highlight_model1[-1]][0] + 1 + ) + data_dict.loc[mean2_name] = data_dict.iloc[irow_str:irow_end].mean( + numeric_only=True, skipna=True + ) + data_dict.at[mean2_name, "model"] = mean2_name + else: + have_highlights = False # fallback if names weren’t found + + # CMIP/E3SM means only if we can split reliably + if have_highlights: + irow_sub = data_dict.index[data_dict["model"] == highlight_model1[0]][0] + data_dict.loc[mean1_name] = data_dict.iloc[:irow_sub].mean( + numeric_only=True, skipna=True + ) + data_dict.at[mean1_name, "model"] = mean1_name + data_dict.loc[mean2_name] = data_dict.iloc[irow_sub:].mean( + numeric_only=True, skipna=True + ) + data_dict.at[mean2_name, "model"] = mean2_name + + # --- SMALL GUARD 1: highlights models + if not have_highlights: + logger.warning( + f"[ParCoord]: No highlightable models found for model_name={model_name}; " + f"Skipping highlight and mean calculations." + ) - model_list = data_dict["model"].to_list() - highlight_model2 = highlight_model1 + [mean1_name, mean2_name] + model_list = data_dict["model"].astype(str).to_list() + highlight_model2 = highlight_model1 + ( + [mean1_name, mean2_name] if have_highlights else [] + ) # colors for highlight lines lncolors = [] @@ -922,30 +987,48 @@ def parcoord_metric_plot( else: lncolors.append(xcolors[i % len(xcolors)]) - var_name1 = sorted(var_names.copy()) + # --- SMALL GUARD 2: keep only existing, non-empty vars --- + var_name1 = sorted( + v for v in var_names if (v in data_dict.columns) and data_dict[v].notna().any() + ) + if not var_name1: + logger.warning( + f"[ParCoord]: Nothing to plot for group={group}, region={region}, stat={stat}. " + f"No valid variables found in metrics data (columns checked={len(var_names)})." + ) + return + # label information var_labels = [] - for i, var in enumerate(var_name1): - index = var_names.index(var) - if var_units is not None: - var_labels.append(var_names[index] + "\n" + var_units[index]) + for v in var_name1: + idx = var_names.index(v) + if var_units is not None and idx < len(var_units): + var_labels.append(var_names[idx] + "\n" + var_units[idx]) else: - var_labels.append(var_names[index]) + var_labels.append(var_names[idx]) # final plot data data_var = data_dict[var_name1].to_numpy() + # --- SMALL GUARD 3: ensure at least 1 column for parallel-coords --- + if data_var.ndim != 2 or data_var.shape[1] == 0: + logger.warning( + f"[ParCoord]: Not enough data to process parallel coordinate plots " + f"(shape={data_var.shape}); returning without plot." + ) + return + xlabel = "Metric" ylabel = "{} ({})".format(stat_name, stat.upper()) if "mean_climate" in [group, region]: - title = "Model Performance of Annual Climatology ({}, {})".format( - stat.upper(), region.upper() - ) + title = f"Model Performance of Annual Climatology ({stat.upper()}, {region.upper()})" elif "variability_modes" in [group, region]: - title = "Model Performance of Modes Variability ({})".format(stat.upper()) + title = f"Model Performance of Modes Variability ({stat.upper()})" elif "enso" in [group, region]: - title = "Model Performance of ENSO ({})".format(stat.upper()) + title = f"Model Performance of ENSO ({stat.upper()})" + else: + title = f"Model Performance ({stat.upper()}, {region.upper()})" fig, ax = parallel_coordinate_plot( data_var, diff --git a/zppy_interfaces/pcmdi_diags/viewer.py b/zppy_interfaces/pcmdi_diags/viewer.py index 9d80c85..98b7034 100644 --- a/zppy_interfaces/pcmdi_diags/viewer.py +++ b/zppy_interfaces/pcmdi_diags/viewer.py @@ -691,23 +691,44 @@ def __init__(self, diag_dir, fig_dir): ("Pattern Corr.", "cor_xy", "Portrait"), ("RMSE", "rms_xy", "Portrait", "rms_xyt", "ParCoord"), ] + # 1 section column + 1 region column + 3 slots per metric group + self.COLS_PER_METRIC = 4 + self.TOTAL_COLS = 2 + self.COLS_PER_METRIC * len(self.metrics) + + def set_layout_from_metrics(self, metrics): + """Update metrics & recompute TOTAL_COLS so all rows align.""" + if metrics is not None: + self.metrics = metrics + self.TOTAL_COLS = 2 + self.COLS_PER_METRIC * len(self.metrics) + + # --- helper to keep rows aligned --- + def _pad_or_check(self, row): + """Keep rows from exceeding TOTAL_COLS; never add filler cells.""" + w = sum(int(c.get("colspan", 1)) for c in row) + + # exact or short → leave as-is (no empty filler appended) + if w <= self.TOTAL_COLS: + return row + + # too wide → shrink from the last colspan cell + over = w - self.TOTAL_COLS + for c in reversed(row): + if "colspan" in c: + c["colspan"] = max(1, int(c["colspan"]) - over) + break + return row def build_summary_table(self, regions=None, metrics=None): clim_path = safe_join(str(self.fig_dir), "ERROR_metric/mean_climate") metric_table = [] - if regions is None: - regions = self.regions - - if metrics is None: - metrics = self.metrics + regions = self.regions if regions is None else regions + metrics = self.metrics if metrics is None else metrics for i, region in enumerate(regions): row = [] if i == 0: - row.append( - {"content": "Mean Climate", "rowspan": len(self.regions)} - ) + row.append({"content": "Mean Climate", "rowspan": len(regions)}) row.append({"content": region.upper()}) for metric in metrics: @@ -721,11 +742,15 @@ def build_summary_table(self, regions=None, metrics=None): filename_pattern=filename_pattern, label=mode, ) - row.append({"colspan": 4, "content": f"{name}
{link}"}) + row.append( + { + "colspan": self.COLS_PER_METRIC, + "content": f"{name}
{link}", + } + ) elif len(metric) == 5: name, prefix1, mode1, prefix2, mode2 = metric - link1 = create_image_link( fig_dir=clim_path, diag_dir=self.diag_dir, @@ -740,19 +765,24 @@ def build_summary_table(self, regions=None, metrics=None): filename_pattern=f"{prefix2}_{region}_parcoord_mean_climate.png", label=mode2, ) + row.append( + { + "colspan": self.COLS_PER_METRIC, + "content": f"{name}
{link1} {link2}", + } + ) - row.append({"colspan": 4, "content": f"{name}
{link1} {link2}"}) - - metric_table.append(row) - + metric_table.append(self._pad_or_check(row)) return metric_table def build_enso_row(self): - row: List[Dict[str, object]] = [] + saved_total = self.TOTAL_COLS + self.TOTAL_COLS = 2 + self.COLS_PER_METRIC * len(self.metrics) + + row = [] row.append({"content": "ENSO"}) row.append({"content": "TROPICS"}) enso_path = safe_join(str(self.fig_dir), "ERROR_metric/enso_metric") - link = create_image_link( fig_dir=enso_path, diag_dir=self.diag_dir, @@ -760,14 +790,28 @@ def build_enso_row(self): filename_pattern="enso_metric_skill_portrait.png", label="Portrait", ) + row.append( + { + "colspan": str(self.TOTAL_COLS - 2), + "content": f"Performance Skill
{link}", + } + ) - row.append({"colspan": 12, "content": f"Performance Skill
{link}"}) + row = self._pad_or_check(row) + self.TOTAL_COLS = saved_total return row def build_emov_row(self): - row: List[Dict[str, object]] = [] - row.append({"content": "EMoVs"}) - row.append({"content": "Extra-TROPICS"}) + # --- Section-independent layout --- + # Always reserve first 2 columns: section + region + saved_total = self.TOTAL_COLS + self.TOTAL_COLS = 2 + self.COLS_PER_METRIC * 3 # 3 EMoV metrics + + row = [] + # Add section name (same column position as "Mean Climate") + row.append({"content": "EMoVs"}) # section column + row.append({"content": "Extra-TROPICS"}) # region column + emov_path = safe_join(str(self.fig_dir), "ERROR_metric/variability_modes") modes_metrics = [ @@ -797,7 +841,16 @@ def build_emov_row(self): for name, f1, mode1, f2, mode2 in modes_metrics: link1 = create_image_link(emov_path, self.diag_dir, [], f1, mode1) link2 = create_image_link(emov_path, self.diag_dir, [], f2, mode2) - row.append({"colspan": 4, "content": f"{name}
{link1} {link2}"}) + row.append( + { + "colspan": str(self.COLS_PER_METRIC), + "content": f"{name}
{link1} {link2}", + } + ) + + # Ensure correct total columns and restore TOTAL_COLS + row = self._pad_or_check(row) + self.TOTAL_COLS = saved_total return row @@ -815,36 +868,38 @@ def generate_summary_table( Build the summary metrics table (Mean Climate, ENSO, EMoV). Returns a list of rows (each row is a list of cell dicts). """ - # Coerce possible string flags - for name in ("clim_show", "mova_show", "movc_show", "enso_show"): - val = locals()[name] - if not isinstance(val, bool): - locals()[name] = str(val).strip().lower() in { - "1", - "true", - "t", - "yes", - "y", - "on", - } - clim_show, mova_show, movc_show, enso_show = ( - clim_show, - mova_show, - movc_show, - enso_show, - ) + + # --- Coerce flags --- + truthy = {"1", "true", "t", "yes", "y", "on"} + if not isinstance(clim_show, bool): + clim_show = str(clim_show).strip().lower() in truthy + if not isinstance(mova_show, bool): + mova_show = str(mova_show).strip().lower() in truthy + if not isinstance(movc_show, bool): + movc_show = str(movc_show).strip().lower() in truthy + if not isinstance(enso_show, bool): + enso_show = str(enso_show).strip().lower() in truthy + + # --- Normalize regions --- + if isinstance(clim_regions, str): + clim_regions = [r.strip() for r in clim_regions.split(",") if r.strip()] builder = SummaryTableBuilder(diag_dir, fig_dir) + builder.set_layout_from_metrics(clim_metrics) + table: List[list] = [] + # === Mean Climate === if clim_show: table.extend( builder.build_summary_table(regions=clim_regions, metrics=clim_metrics) ) + # === ENSO Section === if enso_show: table.append(builder.build_enso_row()) + # === EMoVs Section === if mova_show or movc_show: table.append(builder.build_emov_row()) From d65ef170a49da98213b05eace6b2e001997778e0 Mon Sep 17 00:00:00 2001 From: Ryan Forsyth Date: Fri, 10 Oct 2025 17:08:06 -0500 Subject: [PATCH 2/2] Improve error handling in pcmdi_setup --- zppy_interfaces/pcmdi_diags/pcmdi_setup.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/zppy_interfaces/pcmdi_diags/pcmdi_setup.py b/zppy_interfaces/pcmdi_diags/pcmdi_setup.py index d88981f..57bbc86 100644 --- a/zppy_interfaces/pcmdi_diags/pcmdi_setup.py +++ b/zppy_interfaces/pcmdi_diags/pcmdi_setup.py @@ -377,12 +377,15 @@ def derive_missing_variable(varin, path, model_id): base_ds = None output_file = None + vars_without_files: List[str] = [] + for i, (src_var, scale) in enumerate(var_dic.items()): fpaths = sorted(glob.glob(os.path.join(path, f"*.{src_var}.*.nc"))) if not fpaths: - raise FileNotFoundError( + vars_without_files.append( f"No file found for source variable '{src_var}' in {path}" ) + continue fpath = fpaths[0] ds = xcdat_open(fpath) data = ds[src_var] * scale @@ -397,6 +400,9 @@ def derive_missing_variable(varin, path, model_id): else: derived_data = derived_data + data + if vars_without_files: + raise FileNotFoundError(" ; ".join(vars_without_files)) + if base_ds is not None and derived_data is not None: derived_da = xr.DataArray( data=derived_data.data,