Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
now)
- New Smart Tags `pipeline_disagreement` and `incorrect_for_all_pipelines` as a first step for pipeline comparison.
- Links on top words to filter utterances that contain it.
- `/confusion_matrix` now supports `normalized:bool` to get counts versus normalized confusion matrix.

### Changed

Expand Down
4 changes: 3 additions & 1 deletion azimuth/modules/model_performance/confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
class ConfusionMatrixModule(FilterableModule[ModelContractConfig]):
"""Computes the confusion matrix on the specified dataset split."""

allowed_mod_options = FilterableModule.allowed_mod_options | {"cf_normalized"}

def compute_on_dataset_split(self) -> List[ConfusionMatrixResponse]: # type: ignore
"""Computes confusion matrix from sklearn.

Expand All @@ -34,6 +36,6 @@ def compute_on_dataset_split(self) -> List[ConfusionMatrixResponse]: # type: ig
y_true=labels,
y_pred=predictions,
labels=class_ids,
normalize="true",
normalize="true" if self.mod_options.cf_normalized else None,
)
return [ConfusionMatrixResponse(confusion_matrix=cf)]
8 changes: 6 additions & 2 deletions azimuth/routers/v1/model_performance/confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# This source code is licensed under the Apache 2.0 license found in the LICENSE file
# in the root directory of this source tree.

from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, Query

from azimuth.app import get_dataset_split_manager, get_task_manager
from azimuth.dataset_split_manager import DatasetSplitManager
Expand Down Expand Up @@ -38,12 +38,16 @@ def get_confusion_matrix(
task_manager: TaskManager = Depends(get_task_manager),
dataset_split_manager: DatasetSplitManager = Depends(get_dataset_split_manager),
pipeline_index: int = Depends(require_pipeline_index),
without_postprocessing: bool = False,
without_postprocessing: bool = Query(
False, title="Without Postprocessing", alias="withoutPostprocessing"
),
normalized: bool = Query(True, title="Normalized"),
) -> ConfusionMatrixResponse:
mod_options = ModuleOptions(
filters=named_filters.to_dataset_filters(dataset_split_manager.get_class_names()),
pipeline_index=pipeline_index,
without_postprocessing=without_postprocessing,
cf_normalized=normalized,
)

task_result: ConfusionMatrixResponse = get_standard_task_result(
Expand Down
1 change: 1 addition & 0 deletions azimuth/types/general/module_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,4 @@ class ModuleOptions(AliasModel):
20,
title="Nb of bins to compute for different modules.",
)
cf_normalized: bool = Field(True, title="Normalize the confusion matrix.")
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,12 @@ def test_confusion_matrix(tiny_text_config_postprocessors):
# Last column (REJECTION_CLASS) should be not empty by default, but empty without postprocessing
assert json_output.confusion_matrix.sum(0)[2] > 0
assert json_output_without_postprocessing.confusion_matrix.sum(0)[2] == 0

# When not normalized, we get the predictions.
mod_not_normalized = ConfusionMatrixModule(
DatasetSplitName.eval,
tiny_text_config_postprocessors,
mod_options=ModuleOptions(pipeline_index=0, cf_normalized=False),
)
[json_output_not_normalized] = mod_not_normalized.compute_on_dataset_split()
assert json_output_not_normalized.confusion_matrix.sum() == dm.num_rows
10 changes: 10 additions & 0 deletions tests/test_routers/test_model_performance/test_confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,13 @@ def test_get_confusion_matrix(app: FastAPI) -> None:
[[0.090, 0.136, 0.772], [0.05, 0.05, 0.9], [0.0, 0.0, 0.0]],
atol=1e-2,
)

# not normalized
resp = client.get("/dataset_splits/eval/confusion_matrix?pipelineIndex=0&normalized=false")
assert resp.status_code == HTTP_200_OK, resp.text
data = resp.json()
assert np.allclose(
data["confusionMatrix"],
[[2.0, 3.0, 17.0], [1.0, 1.0, 18.0], [0.0, 0.0, 0.0]],
atol=1e-2,
)
5 changes: 4 additions & 1 deletion webapp/src/types/generated/generatedTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,8 @@ export interface components {
| "high_epistemic_uncertainty"
| "correct_top_3"
| "correct_low_conf"
| "incorrect_for_all_pipelines"
| "pipeline_disagreement"
| "NO_SMART_TAGS";
/**
* This model should be used as the base for any model that defines aliases to ensure
Expand Down Expand Up @@ -1293,7 +1295,8 @@ export interface operations {
dataset_split_name: components["schemas"]["DatasetSplitName"];
};
query: {
without_postprocessing?: boolean;
withoutPostprocessing?: boolean;
normalized?: boolean;
pipelineIndex: number;
confidenceMin?: number;
confidenceMax?: number;
Expand Down