diff --git a/CHANGELOG.md b/CHANGELOG.md index be354616..2048d2d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). now) - New Smart Tags `pipeline_disagreement` and `incorrect_for_all_pipelines` as a first step for pipeline comparison. - Links on top words to filter utterances that contain it. +- `/confusion_matrix` now supports `normalized:bool` to get counts versus normalized confusion matrix. ### Changed diff --git a/azimuth/modules/model_performance/confusion_matrix.py b/azimuth/modules/model_performance/confusion_matrix.py index 44ba7412..3e8043dc 100644 --- a/azimuth/modules/model_performance/confusion_matrix.py +++ b/azimuth/modules/model_performance/confusion_matrix.py @@ -16,6 +16,8 @@ class ConfusionMatrixModule(FilterableModule[ModelContractConfig]): """Computes the confusion matrix on the specified dataset split.""" + allowed_mod_options = FilterableModule.allowed_mod_options | {"cf_normalized"} + def compute_on_dataset_split(self) -> List[ConfusionMatrixResponse]: # type: ignore """Computes confusion matrix from sklearn. @@ -34,6 +36,6 @@ def compute_on_dataset_split(self) -> List[ConfusionMatrixResponse]: # type: ig y_true=labels, y_pred=predictions, labels=class_ids, - normalize="true", + normalize="true" if self.mod_options.cf_normalized else None, ) return [ConfusionMatrixResponse(confusion_matrix=cf)] diff --git a/azimuth/routers/v1/model_performance/confusion_matrix.py b/azimuth/routers/v1/model_performance/confusion_matrix.py index 8a0a1deb..28d7b6df 100644 --- a/azimuth/routers/v1/model_performance/confusion_matrix.py +++ b/azimuth/routers/v1/model_performance/confusion_matrix.py @@ -2,7 +2,7 @@ # This source code is licensed under the Apache 2.0 license found in the LICENSE file # in the root directory of this source tree. -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Query from azimuth.app import get_dataset_split_manager, get_task_manager from azimuth.dataset_split_manager import DatasetSplitManager @@ -38,12 +38,16 @@ def get_confusion_matrix( task_manager: TaskManager = Depends(get_task_manager), dataset_split_manager: DatasetSplitManager = Depends(get_dataset_split_manager), pipeline_index: int = Depends(require_pipeline_index), - without_postprocessing: bool = False, + without_postprocessing: bool = Query( + False, title="Without Postprocessing", alias="withoutPostprocessing" + ), + normalized: bool = Query(True, title="Normalized"), ) -> ConfusionMatrixResponse: mod_options = ModuleOptions( filters=named_filters.to_dataset_filters(dataset_split_manager.get_class_names()), pipeline_index=pipeline_index, without_postprocessing=without_postprocessing, + cf_normalized=normalized, ) task_result: ConfusionMatrixResponse = get_standard_task_result( diff --git a/azimuth/types/general/module_options.py b/azimuth/types/general/module_options.py index cef6ec0b..598f9fa9 100644 --- a/azimuth/types/general/module_options.py +++ b/azimuth/types/general/module_options.py @@ -97,3 +97,4 @@ class ModuleOptions(AliasModel): 20, title="Nb of bins to compute for different modules.", ) + cf_normalized: bool = Field(True, title="Normalize the confusion matrix.") diff --git a/tests/test_modules/test_model_performance/test_confusion_matrix.py b/tests/test_modules/test_model_performance/test_confusion_matrix.py index 61c05020..577e2249 100644 --- a/tests/test_modules/test_model_performance/test_confusion_matrix.py +++ b/tests/test_modules/test_model_performance/test_confusion_matrix.py @@ -47,3 +47,12 @@ def test_confusion_matrix(tiny_text_config_postprocessors): # Last column (REJECTION_CLASS) should be not empty by default, but empty without postprocessing assert json_output.confusion_matrix.sum(0)[2] > 0 assert json_output_without_postprocessing.confusion_matrix.sum(0)[2] == 0 + + # When not normalized, we get the predictions. + mod_not_normalized = ConfusionMatrixModule( + DatasetSplitName.eval, + tiny_text_config_postprocessors, + mod_options=ModuleOptions(pipeline_index=0, cf_normalized=False), + ) + [json_output_not_normalized] = mod_not_normalized.compute_on_dataset_split() + assert json_output_not_normalized.confusion_matrix.sum() == dm.num_rows diff --git a/tests/test_routers/test_model_performance/test_confusion_matrix.py b/tests/test_routers/test_model_performance/test_confusion_matrix.py index e8da566e..fac5efe2 100644 --- a/tests/test_routers/test_model_performance/test_confusion_matrix.py +++ b/tests/test_routers/test_model_performance/test_confusion_matrix.py @@ -18,3 +18,13 @@ def test_get_confusion_matrix(app: FastAPI) -> None: [[0.090, 0.136, 0.772], [0.05, 0.05, 0.9], [0.0, 0.0, 0.0]], atol=1e-2, ) + + # not normalized + resp = client.get("/dataset_splits/eval/confusion_matrix?pipelineIndex=0&normalized=false") + assert resp.status_code == HTTP_200_OK, resp.text + data = resp.json() + assert np.allclose( + data["confusionMatrix"], + [[2.0, 3.0, 17.0], [1.0, 1.0, 18.0], [0.0, 0.0, 0.0]], + atol=1e-2, + ) diff --git a/webapp/src/types/generated/generatedTypes.ts b/webapp/src/types/generated/generatedTypes.ts index 0d39760f..4b0c593b 100644 --- a/webapp/src/types/generated/generatedTypes.ts +++ b/webapp/src/types/generated/generatedTypes.ts @@ -586,6 +586,8 @@ export interface components { | "high_epistemic_uncertainty" | "correct_top_3" | "correct_low_conf" + | "incorrect_for_all_pipelines" + | "pipeline_disagreement" | "NO_SMART_TAGS"; /** * This model should be used as the base for any model that defines aliases to ensure @@ -1293,7 +1295,8 @@ export interface operations { dataset_split_name: components["schemas"]["DatasetSplitName"]; }; query: { - without_postprocessing?: boolean; + withoutPostprocessing?: boolean; + normalized?: boolean; pipelineIndex: number; confidenceMin?: number; confidenceMax?: number;