Add SMAPE metric (#4220)

remyogasawara · web-flow · commit 60db76ca94c4 · 2023-07-05T11:03:52.000-07:00
* add SMAPE metric

* update comments

* add smape metric tests

* update num of objectives

* bound to 200 and revise 0 targets

* bound smape
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -3,6 +3,7 @@ Release Notes
 **Future Releases**
     * Enhancements
         * Add run_feature_selection to AutoMLSearch and Default Algorithm :pr:`4210`
+        * Added ``SMAPE`` to the standard metrics for time series problems :pr:`4220`
     * Fixes
         * `IDColumnsDataCheck` now works with Unknown data type :pr:`4203`
     * Changes
diff --git a/evalml/objectives/__init__.py b/evalml/objectives/__init__.py
@@ -16,6 +16,7 @@
     F1,
     MAE,
     MAPE,
+    SMAPE,
     MSE,
     MeanSquaredLogError,
     R2,
diff --git a/evalml/objectives/standard_metrics.py b/evalml/objectives/standard_metrics.py
@@ -5,6 +5,7 @@
 import pandas as pd
 from sklearn import metrics
 from sklearn.preprocessing import label_binarize
+from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError
 
 from evalml.objectives.binary_classification_objective import (
     BinaryClassificationObjective,
@@ -850,6 +851,45 @@ def positive_only(self):
         return True
 
 
+class SMAPE(TimeSeriesRegressionObjective):
+    """Mean absolute percentage error for time series regression. Scaled by 100 to return a percentage.
+
+    Only valid for nonzero inputs. Otherwise, will throw a ValueError.
+
+    Example:
+        >>> y_true = pd.Series([1.5, 2, 3, 1, 0.5, 1, 2.5, 2.5, 1, 0.5, 2])
+        >>> y_pred = pd.Series([1.5, 2.5, 2, 1, 0.5, 1, 3, 2.25, 0.75, 0.25, 1.75])
+        >>> np.testing.assert_almost_equal(SMAPE().objective_function(y_true, y_pred), 18.13652589)
+    """
+
+    name = "Symmetric Mean Absolute Percentage Error"
+    greater_is_better = False
+    score_needs_proba = False
+    perfect_score = 0.0
+    is_bounded_like_percentage = True  # Range [0, 200]
+    expected_range = [0, 200]
+
+    def objective_function(self, y_true, y_predicted, X=None, sample_weight=None):
+        """Objective function for mean absolute percentage error for time series regression."""
+        if ((abs(y_true) + abs(y_predicted)) == 0).any():
+            raise ValueError(
+                "Symmetric Mean Absolute Percentage Error cannot be used when "
+                "true and predicted targets both contain the value 0.",
+            )
+        if isinstance(y_true, pd.Series):
+            y_true = y_true.to_numpy()
+        if isinstance(y_predicted, pd.Series):
+            y_predicted = y_predicted.to_numpy()
+
+        smape = MeanAbsolutePercentageError(symmetric=True)
+        return smape(y_true, y_predicted) * 100
+
+    @classproperty
+    def positive_only(self):
+        """If True, this objective is only valid for positive data."""
+        return True
+
+
 class MSE(RegressionObjective):
     """Mean squared error for regression.
 
diff --git a/evalml/tests/data_checks_tests/test_invalid_target_data_check.py b/evalml/tests/data_checks_tests/test_invalid_target_data_check.py
@@ -15,7 +15,7 @@
     InvalidTargetDataCheck,
 )
 from evalml.exceptions import DataCheckInitError
-from evalml.objectives import MAPE, MeanSquaredLogError, RootMeanSquaredLogError
+from evalml.objectives import MAPE, SMAPE, MeanSquaredLogError, RootMeanSquaredLogError
 from evalml.problem_types import ProblemTypes, is_binary, is_multiclass, is_regression
 from evalml.utils.woodwork_utils import numeric_and_boolean_ww
 
@@ -397,7 +397,7 @@ def test_invalid_target_data_check_invalid_labels_for_nonnegative_objective_name
 
 @pytest.mark.parametrize(
     "objective",
-    [RootMeanSquaredLogError(), MeanSquaredLogError(), MAPE()],
+    [RootMeanSquaredLogError(), MeanSquaredLogError(), MAPE(), SMAPE()],
 )
 def test_invalid_target_data_check_invalid_labels_for_nonnegative_objective_instances(
     objective,
diff --git a/evalml/tests/objective_tests/test_objectives.py b/evalml/tests/objective_tests/test_objectives.py
@@ -27,7 +27,7 @@
 )
 from evalml.objectives.fraud_cost import FraudCost
 from evalml.objectives.objective_base import ObjectiveBase
-from evalml.objectives.standard_metrics import MAPE
+from evalml.objectives.standard_metrics import MAPE, SMAPE
 from evalml.objectives.utils import _all_objectives_dict
 from evalml.problem_types import ProblemTypes
 
@@ -111,21 +111,21 @@ def test_get_core_objectives_types():
     assert len(get_core_objectives(ProblemTypes.MULTICLASS)) == 13
     assert len(get_core_objectives(ProblemTypes.BINARY)) == 8
     assert len(get_core_objectives(ProblemTypes.REGRESSION)) == 7
-    assert len(get_core_objectives(ProblemTypes.TIME_SERIES_REGRESSION)) == 7
+    assert len(get_core_objectives(ProblemTypes.TIME_SERIES_REGRESSION)) == 8
 
 
 def test_get_optimization_objectives_types():
     assert len(get_optimization_objectives(ProblemTypes.MULTICLASS)) == 13
     assert len(get_optimization_objectives(ProblemTypes.BINARY)) == 8
     assert len(get_optimization_objectives(ProblemTypes.REGRESSION)) == 7
-    assert len(get_optimization_objectives(ProblemTypes.TIME_SERIES_REGRESSION)) == 7
+    assert len(get_optimization_objectives(ProblemTypes.TIME_SERIES_REGRESSION)) == 8
 
 
 def test_get_ranking_objectives_types():
     assert len(get_ranking_objectives(ProblemTypes.MULTICLASS)) == 16
     assert len(get_ranking_objectives(ProblemTypes.BINARY)) == 9
     assert len(get_ranking_objectives(ProblemTypes.REGRESSION)) == 9
-    assert len(get_ranking_objectives(ProblemTypes.TIME_SERIES_REGRESSION)) == 10
+    assert len(get_ranking_objectives(ProblemTypes.TIME_SERIES_REGRESSION)) == 11
 
 
 def test_optimization_excludes_ranking():
@@ -135,7 +135,7 @@ def test_optimization_excludes_ranking():
 
 
 def test_get_time_series_objectives_types(time_series_objectives):
-    assert len(time_series_objectives) == 10
+    assert len(time_series_objectives) == 11
 
 
 def test_objective_outputs(
@@ -229,9 +229,9 @@ def test_objectives_support_nullable_types(
     if isinstance(obj, FraudCost):
         # FraudCost needs an "amount" column
         X = pd.DataFrame({"amount": [100, 5, 250, 89] * 5})
-    elif isinstance(obj, MAPE):
+    elif isinstance(obj, (MAPE, SMAPE)):
         if isinstance(y_true.ww.logical_type, BooleanNullable):
-            pytest.skip("MAPE doesn't support inputs containing 0")
+            pytest.skip("MAPE and SMAPE don't support inputs containing 0")
         # Replace numeric inputs containing 0
         y_true = y_true.ww.replace({0: 10})
         y_pred = y_pred.replace({0: 10})
diff --git a/evalml/tests/objective_tests/test_standard_metrics.py b/evalml/tests/objective_tests/test_standard_metrics.py
@@ -8,6 +8,7 @@
     F1,
     MAPE,
     MSE,
+    SMAPE,
     AccuracyBinary,
     AccuracyMulticlass,
     BalancedAccuracyBinary,
@@ -711,6 +712,44 @@ def test_mape_time_series_model():
     ) == pytest.approx(8 / 4 * 100)
 
 
+def test_smape_time_series_model():
+    obj = SMAPE()
+
+    s1_actual = np.array([0, 0, 1, 1, 1, 1, 2, 0, 2])
+    s1_predicted = np.array([0, 1, 0, 1, 1, 2, 1, 2, 0])
+
+    s2_actual = np.array([-1, -2, 1, 3])
+    s2_predicted = np.array([1, 2, -1, -3])
+
+    s3_actual = np.array([1, 2, 4, 2, 1, 2])
+    s3_predicted = np.array([0, 2, 2, 0, 3, 2])
+
+    s4_actual = np.array([0, 2, 0, 2, 1, 2])
+    s4_predicted = np.array([1, 2, 2, 1, 3, 2])
+
+    with pytest.raises(
+        ValueError,
+        match="Symmetric Mean Absolute Percentage Error cannot be used when "
+        "true and predicted targets both contain the value 0.",
+    ):
+        obj.score(s1_actual, s1_predicted)
+    assert obj.score(s2_actual, s2_predicted) == pytest.approx(8 / 4 * 100)
+    assert obj.score(s3_actual, s3_predicted) == pytest.approx((17 / 6) / 3 * 100)
+    assert obj.score(s4_actual, s4_predicted) == pytest.approx((17 / 6) / 3 * 100)
+    assert obj.score(
+        pd.Series(s3_actual, index=range(-12, -6)),
+        s3_predicted,
+    ) == pytest.approx((17 / 6) / 3 * 100)
+    assert obj.score(
+        pd.Series(s2_actual, index=range(10, 14)),
+        pd.Series(s2_predicted, index=range(20, 24)),
+    ) == pytest.approx(8 / 4 * 100)
+    assert obj.score(
+        pd.Series(s4_actual, index=range(-12, -6)),
+        s4_predicted,
+    ) == pytest.approx((17 / 6) / 3 * 100)
+
+
 @pytest.mark.parametrize("objective_class", _all_objectives_dict().values())
 def test_calculate_percent_difference(objective_class):
     score = 5

-Original file line number
+Diff line change
     F1,
     MAE,
     MAPE,
 +    SMAPE,
     MSE,
     MeanSquaredLogError,
     R2,