diff --git a/src/scores/probability/crps_impl.py b/src/scores/probability/crps_impl.py index ae370683f..6db11873d 100644 --- a/src/scores/probability/crps_impl.py +++ b/src/scores/probability/crps_impl.py @@ -199,17 +199,17 @@ def crps_cdf( - The usual CRPS is the threshold-weighted CRPS with :math:`\\text{threshold_weight}(x) = 1` for all x. This can be decomposed into an over-forecast penalty: - :math:`\\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \\text{fcst}(x) - + :math:`\\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \\text{fcst}(x) - \\text{obs_cdf}(x))^2]\\text{d}x}`, over all thresholds x where x >= obs and an under-forecast penalty: - :math:`\\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \\text{(fcst}(x) - + :math:`\\int_{-\\infty}^{\\infty}{[\\text{threshold_weight}(x) \\times \\text{(fcst}(x) - \\text{obs_cdf}(x)^2]\\text{d}x}`, over all thresholds x where x <= obs. - To obtain the components of the CRPS score, set ``include_components`` to ``True``. + To obtain the components of the CRPS score, set ``include_components`` to ``True``. Note that there are several ways to decompose the CRPS and this decomposition differs from - the one used in the :py:func:`scores.probability.crps_for_ensemble` function. + the one used in the :py:func:`scores.probability.crps_for_ensemble` function. Note that the function `crps_cdf` is designed so that the `obs` argument contains actual observed values. `crps_cdf` will convert `obs` into CDF form in order to @@ -223,12 +223,12 @@ def crps_cdf( - (with NaN values excluded) There are two methods of integration: - - "exact" gives the exact integral under that assumption that that `fcst` is - continuous and piecewise linear between its specified values, and that - `threshold_weight` (if supplied) is piecewise constant and right-continuous + - "exact" gives the exact integral under that assumption that that `fcst` is + continuous and piecewise linear between its specified values, and that + `threshold_weight` (if supplied) is piecewise constant and right-continuous between its specified values. - - "trapz" simply uses a trapezoidal rule using the specified values, and so is - an approximation of the CRPS. To get an accurate approximation, the density + - "trapz" simply uses a trapezoidal rule using the specified values, and so is + an approximation of the CRPS. To get an accurate approximation, the density of threshold values can be increased by supplying `additional_thresholds`. Both methods of calculating CRPS may require adding additional values to the @@ -245,25 +245,25 @@ def crps_cdf( fcst: array of forecast CDFs, with the threshold dimension given by `threshold_dim`. obs: array of observations, not in CDF form. threshold_dim: name of the dimension in `fcst` that indexes the thresholds. - threshold_weight: weight to be applied along `threshold_dim` to calculate - threshold-weighted CRPS. Must contain `threshold_dim` as a dimension, and may - also include other dimensions from `fcst`. If `weight=None`, a weight of 1 + threshold_weight: weight to be applied along `threshold_dim` to calculate + threshold-weighted CRPS. Must contain `threshold_dim` as a dimension, and may + also include other dimensions from `fcst`. If `weight=None`, a weight of 1 is applied everywhere, which gives the usual CRPS. - additional_thresholds: additional thresholds values to add to `fcst` and (if + additional_thresholds: additional thresholds values to add to `fcst` and (if applicable) `threshold_weight` prior to calculating CRPS. - propagate_nans: If `True`, propagate NaN values along `threshold_dim` in `fcst` - and `threshold_weight` prior to calculating CRPS. This will result in CRPS - being NaN for these cases. If `False`, NaN values in `fcst` and `weight` will - be replaced, wherever possible, with non-NaN values using the fill method + propagate_nans: If `True`, propagate NaN values along `threshold_dim` in `fcst` + and `threshold_weight` prior to calculating CRPS. This will result in CRPS + being NaN for these cases. If `False`, NaN values in `fcst` and `weight` will + be replaced, wherever possible, with non-NaN values using the fill method specified by `fcst_fill_method` and `threshold_weight_fill_method`. - fcst_fill_method: how to fill values in `fcst` when NaNs have been introduced - (by including additional thresholds) or are specified to be removed (by - setting `propagate_nans=False`). Select one of: + fcst_fill_method: how to fill values in `fcst` when NaNs have been introduced + (by including additional thresholds) or are specified to be removed (by + setting `propagate_nans=False`). Select one of: - "linear": use linear interpolation, then replace any leading or \ trailing NaNs using linear extrapolation. Afterwards, all values are \ clipped to the closed interval [0, 1]. - - "step": apply forward filling, then replace any leading NaNs with 0. + - "step": apply forward filling, then replace any leading NaNs with 0. - "forward": first apply forward filling, then remove any leading NaNs by \ back filling. - "backward": first apply back filling, then remove any trailing NaNs by \ @@ -272,9 +272,9 @@ def crps_cdf( threshold_weight_fill_method: how to fill values in `threshold_weight` when NaNs have been introduced (by including additional thresholds) or are specified - to be removed (by setting `propagate_nans=False`). Select one of "linear", - "step", "forward" or "backward". If the weight function is continuous, - "linear" is probably the best choice. If it is an increasing step function, + to be removed (by setting `propagate_nans=False`). Select one of "linear", + "step", "forward" or "backward". If the weight function is continuous, + "linear" is probably the best choice. If it is an increasing step function, "forward" may be best. integration_method (str): one of "exact" or "trapz". @@ -440,7 +440,6 @@ def crps_cdf_exact_slow( `cdf_fcst`, `cdf_obs` or `threshold_weight`. """ # identify where input arrays have no NaN, collapsing `threshold_dim` - # Mypy doesn't realise the isnan and any come from xarray not numpy inputs_without_nan = ( ~np.isnan(cdf_fcst).any(threshold_dim) & ~np.isnan(cdf_obs).any(threshold_dim) @@ -638,7 +637,7 @@ def adjust_fcst_for_crps( Whether a CDF is decreasing outside specified tolerance is determined as follows. For each CDF in `fcst`, the sum of incremental decreases along the threshold dimension - is calculated. For example, if the CDF values are + is calculated. For example, if the CDF values are `[0, 0.4, 0.3, 0.9, 0.88, 1]` @@ -1007,7 +1006,7 @@ def tw_crps_for_ensemble( ) -> xr.DataArray: """ Calculates the threshold weighted continuous ranked probability score (twCRPS) given - ensemble input using a chaining function ``chaining_func`` (see below). An ensemble of + ensemble input using a chaining function ``chaining_func`` (see below). An ensemble of forecasts can also be thought of as a random sample from the predictive distribution. The twCRPS is calculated by the formula @@ -1026,7 +1025,7 @@ def tw_crps_for_ensemble( where :math:`\\mathbb{1}` is the indicator function which returns a value of 1 if the condition is true and 0 otherwise. A chaining function would then be :math:`v(x) = \\text{max}(x, t)`. - There are currently two methods available for calculating the twCRPS: "ecdf" and "fair". + There are currently two methods available for calculating the twCRPS: "ecdf" and "fair". - If `method="ecdf"` then the twCRPS value returned is \ the exact twCRPS value for the empirical cumulative distribution function \ constructed using the ensemble values. @@ -1088,7 +1087,7 @@ def tw_crps_for_ensemble( References: - Allen, S., Ginsbourger, D., & Ziegel, J. (2023). Evaluating forecasts for high-impact \ events using transformed kernel scores. SIAM/ASA Journal on Uncertainty \ - Quantification, 11(3), 906-940. https://doi.org/10.1137/22M1532184. + Quantification, 11(3), 906-940. https://doi.org/10.1137/22M1532184. - Allen, S. (2024). Weighted scoringRules: Emphasizing Particular Outcomes \ When Evaluating Probabilistic Forecasts. Journal of Statistical Software, \ 110(8), 1-26. https://doi.org/10.18637/jss.v110.i08 @@ -1102,7 +1101,7 @@ def tw_crps_for_ensemble( Examples: - Calculate the twCRPS for an ensemble of forecasts where the chaining function is + Calculate the twCRPS for an ensemble of forecasts where the chaining function is derived from a weight function that assigns a weight of 1 to thresholds above 0.5 and a weight of 0 to thresholds below 0.5. @@ -1202,7 +1201,7 @@ def tail_tw_crps_for_ensemble( References: - Allen, S., Ginsbourger, D., & Ziegel, J. (2023). Evaluating forecasts for high-impact \ events using transformed kernel scores. SIAM/ASA Journal on Uncertainty \ - Quantification, 11(3), 906-940. https://doi.org/10.1137/22M1532184. + Quantification, 11(3), 906-940. https://doi.org/10.1137/22M1532184. - Allen, S. (2024). Weighted scoringRules: Emphasizing Particular Outcomes \ When Evaluating Probabilistic Forecasts. Journal of Statistical Software, \ 110(8), 1-26. https://doi.org/10.18637/jss.v110.i08 @@ -1316,7 +1315,7 @@ def interval_tw_crps_for_ensemble( References: - Allen, S., Ginsbourger, D., & Ziegel, J. (2023). Evaluating forecasts for high-impact \ events using transformed kernel scores. SIAM/ASA Journal on Uncertainty \ - Quantification, 11(3), 906-940. https://doi.org/10.1137/22M1532184. + Quantification, 11(3), 906-940. https://doi.org/10.1137/22M1532184. - Allen, S. (2024). Weighted scoringRules: Emphasizing Particular Outcomes \ When Evaluating Probabilistic Forecasts. Journal of Statistical Software, \ 110(8), 1-26. https://doi.org/10.18637/jss.v110.i08 diff --git a/src/scores/probability/crps_numba.py b/src/scores/probability/crps_numba.py index 2c7495e2c..aab050df6 100644 --- a/src/scores/probability/crps_numba.py +++ b/src/scores/probability/crps_numba.py @@ -150,7 +150,6 @@ def crps_cdf_exact_fast( `cdf_fcst`, `cdf_obs` or `threshold_weight`. """ # identify where input arrays have no NaN, collapsing `threshold_dim` - # Mypy doesn't realise the isnan and any come from xarray not numpy inputs_without_nan = ( ~np.isnan(cdf_fcst).any(threshold_dim) & ~np.isnan(obs) & ~np.isnan(threshold_weight).any(threshold_dim) ) diff --git a/src/scores/typing.py b/src/scores/typing.py index 9267b4718..3c8010156 100644 --- a/src/scores/typing.py +++ b/src/scores/typing.py @@ -108,7 +108,7 @@ def all_same_xarraylike( For internal use only: - - TypeGuards help mypy resolve complex static type checks - avoids + - TypeGuards help resolve complex static type checks - avoids the need to use "type: ignore" when types are ambiguous. - Often used in conjunction with, type-based control flow, asserts