ServiceNow · Dref360 · May 10, 2022 · May 9, 2022 · May 9, 2022 · May 10, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
   now)
 - New Smart Tags `pipeline_disagreement` and `incorrect_for_all_pipelines` as a first step for pipeline comparison.
 - Links on top words to filter utterances that contain it.
+- `/confusion_matrix` now supports `normalized:bool` to get counts versus normalized confusion matrix.
 
 ### Changed
 

diff --git a/azimuth/modules/model_performance/confusion_matrix.py b/azimuth/modules/model_performance/confusion_matrix.py
@@ -16,6 +16,8 @@
 class ConfusionMatrixModule(FilterableModule[ModelContractConfig]):
     """Computes the confusion matrix on the specified dataset split."""
 
+    allowed_mod_options = FilterableModule.allowed_mod_options | {"cf_normalized"}
+
     def compute_on_dataset_split(self) -> List[ConfusionMatrixResponse]:  # type: ignore
         """Computes confusion matrix from sklearn.
 
@@ -34,6 +36,6 @@ def compute_on_dataset_split(self) -> List[ConfusionMatrixResponse]:  # type: ig
             y_true=labels,
             y_pred=predictions,
             labels=class_ids,
-            normalize="true",
+            normalize="true" if self.mod_options.cf_normalized else None,
         )
         return [ConfusionMatrixResponse(confusion_matrix=cf)]
diff --git a/azimuth/routers/v1/model_performance/confusion_matrix.py b/azimuth/routers/v1/model_performance/confusion_matrix.py
@@ -2,7 +2,7 @@
 # This source code is licensed under the Apache 2.0 license found in the LICENSE file
 # in the root directory of this source tree.
 
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, Query
 
 from azimuth.app import get_dataset_split_manager, get_task_manager
 from azimuth.dataset_split_manager import DatasetSplitManager
@@ -38,12 +38,16 @@ def get_confusion_matrix(
     task_manager: TaskManager = Depends(get_task_manager),
     dataset_split_manager: DatasetSplitManager = Depends(get_dataset_split_manager),
     pipeline_index: int = Depends(require_pipeline_index),
-    without_postprocessing: bool = False,
+    without_postprocessing: bool = Query(
+        False, title="Without Postprocessing", alias="withoutPostprocessing"
+    ),
+    normalized: bool = Query(True, title="Normalized"),
 ) -> ConfusionMatrixResponse:
     mod_options = ModuleOptions(
         filters=named_filters.to_dataset_filters(dataset_split_manager.get_class_names()),
         pipeline_index=pipeline_index,
         without_postprocessing=without_postprocessing,
+        cf_normalized=normalized,
     )
 
     task_result: ConfusionMatrixResponse = get_standard_task_result(

diff --git a/azimuth/types/general/module_options.py b/azimuth/types/general/module_options.py
@@ -97,3 +97,4 @@ class ModuleOptions(AliasModel):
         20,
         title="Nb of bins to compute for different modules.",
     )
+    cf_normalized: bool = Field(True, title="Normalize the confusion matrix.")
diff --git a/tests/test_modules/test_model_performance/test_confusion_matrix.py b/tests/test_modules/test_model_performance/test_confusion_matrix.py
@@ -47,3 +47,12 @@ def test_confusion_matrix(tiny_text_config_postprocessors):
     # Last column (REJECTION_CLASS) should be not empty by default, but empty without postprocessing
     assert json_output.confusion_matrix.sum(0)[2] > 0
     assert json_output_without_postprocessing.confusion_matrix.sum(0)[2] == 0
+
+    # When not normalized, we get the predictions.
+    mod_not_normalized = ConfusionMatrixModule(
+        DatasetSplitName.eval,
+        tiny_text_config_postprocessors,
+        mod_options=ModuleOptions(pipeline_index=0, cf_normalized=False),
+    )
+    [json_output_not_normalized] = mod_not_normalized.compute_on_dataset_split()
+    assert json_output_not_normalized.confusion_matrix.sum() == dm.num_rows
diff --git a/tests/test_routers/test_model_performance/test_confusion_matrix.py b/tests/test_routers/test_model_performance/test_confusion_matrix.py
@@ -18,3 +18,13 @@ def test_get_confusion_matrix(app: FastAPI) -> None:
         [[0.090, 0.136, 0.772], [0.05, 0.05, 0.9], [0.0, 0.0, 0.0]],
         atol=1e-2,
     )
+
+    # not normalized
+    resp = client.get("/dataset_splits/eval/confusion_matrix?pipelineIndex=0&normalized=false")
+    assert resp.status_code == HTTP_200_OK, resp.text
+    data = resp.json()
+    assert np.allclose(
+        data["confusionMatrix"],
+        [[2.0, 3.0, 17.0], [1.0, 1.0, 18.0], [0.0, 0.0, 0.0]],
+        atol=1e-2,
+    )
diff --git a/webapp/src/types/generated/generatedTypes.ts b/webapp/src/types/generated/generatedTypes.ts
@@ -586,6 +586,8 @@ export interface components {
       | "high_epistemic_uncertainty"
       | "correct_top_3"
       | "correct_low_conf"
+      | "incorrect_for_all_pipelines"
+      | "pipeline_disagreement"
       | "NO_SMART_TAGS";
     /**
      * This model should be used as the base for any model that defines aliases to ensure
@@ -1293,7 +1295,8 @@ export interface operations {
         dataset_split_name: components["schemas"]["DatasetSplitName"];
       };
       query: {
-        without_postprocessing?: boolean;
+        withoutPostprocessing?: boolean;
+        normalized?: boolean;
         pipelineIndex: number;
         confidenceMin?: number;
         confidenceMax?: number;
-Original file line number
+Diff line change
@@ Expand Up / @@ -97,3 +97,4 @@ class ModuleOptions(AliasModel): @@
 ,
             title="Nb of bins to compute for different modules.",
         )
+        cf_normalized: bool = Field(True, title="Normalize the confusion matrix.")