From 57ed3414d6fbfe34033d80debf7eee0975262aae Mon Sep 17 00:00:00 2001 From: Aftabbs Date: Sun, 1 Mar 2026 06:32:27 +0530 Subject: [PATCH] fix: handle mixed str/int label types in calculate_matrix When a DataFrame with dtype="string" contains numeric-looking label values (e.g. "101", "102"), newer NumPy can coerce those strings to integers during np.union1d / np.unique, producing a labels list that mixes Python str and int. Python 3 does not support '<' between str and int, so sorted(labels) raises TypeError inside calculate_matrix. The fix catches the TypeError and falls back to: - Converting all labels to str for consistent sorting. - Casting the target and prediction Series to str so that sklearn.metrics.confusion_matrix receives a homogeneous label set (sklearn calls np.sort internally, which also fails on mixed types). This restores the expected behaviour for string-typed columns with numeric-looking label names without changing the code path for all-string or all-integer label sets. Adds five tests to CalculateMatrixMixedTypeLabelsTest: - all-string labels: regression guard - all-integer labels: regression guard - mixed str/int labels do not raise TypeError - confusion matrix has the correct shape for mixed labels - end-to-end test reproducing the exact scenario from issue #1085 Fixes #1085 --- .../classification_performance.py | 12 ++- .../test_classification_performance.py | 77 +++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/src/evidently/legacy/calculations/classification_performance.py b/src/evidently/legacy/calculations/classification_performance.py index 0c748aedd4..6b456c77c5 100644 --- a/src/evidently/legacy/calculations/classification_performance.py +++ b/src/evidently/legacy/calculations/classification_performance.py @@ -321,7 +321,17 @@ def calculate_lift_table(binded): def calculate_matrix(target: pd.Series, prediction: pd.Series, labels: List[Label]) -> ConfusionMatrix: - sorted_labels = sorted(labels) # type: ignore[type-var] + try: + sorted_labels = sorted(labels) # type: ignore[type-var] + except TypeError: + # Labels contain a mix of incomparable types (e.g. str and int produced when + # numeric-looking string labels such as "101" are coerced to integers by NumPy). + # Normalize every label to str so they can be sorted consistently, and align the + # target/prediction series to the same string representation so that + # sklearn.metrics.confusion_matrix receives a homogeneous label set. + sorted_labels = sorted(str(label) for label in labels) # type: ignore[assignment] + target = target.astype(str) + prediction = prediction.astype(str) matrix = metrics.confusion_matrix(target, prediction, labels=sorted_labels) return ConfusionMatrix(labels=sorted_labels, values=[row.tolist() for row in matrix]) diff --git a/tests/calculations/test_classification_performance.py b/tests/calculations/test_classification_performance.py index 2dcda3c635..54c60b82dd 100644 --- a/tests/calculations/test_classification_performance.py +++ b/tests/calculations/test_classification_performance.py @@ -4,6 +4,7 @@ from sklearn import metrics from evidently.legacy.calculations.classification_performance import calculate_confusion_by_classes +from evidently.legacy.calculations.classification_performance import calculate_matrix from evidently.legacy.calculations.classification_performance import calculate_metrics from evidently.legacy.calculations.classification_performance import get_prediction_data from evidently.legacy.metric_results import ConfusionMatrix @@ -99,3 +100,79 @@ def test_get_prediction_data(dataframe, target, prediction, target_names, pos_la ) for label in target_names: assert np.allclose(data.prediction_probas[label], expected[label], atol=1e-6) + + +class CalculateMatrixMixedTypeLabelsTest: + """calculate_matrix must not raise TypeError when labels contain both str and int values. + + Newer NumPy uses hash-based deduplication in np.unique (and therefore np.union1d), so + numeric-looking string labels such as "101" can be coerced to integers producing a + labels list with mixed types. Sorting such a list with plain sorted() fails in Python 3 + because '<' is not defined between str and int. The function must fall back to a + type-safe sort key in that case. + """ + + def test_all_string_labels_return_correct_matrix(self): + target = pd.Series(["foo", "bar", "foo"]) + prediction = pd.Series(["foo", "foo", "bar"]) + labels = ["foo", "bar"] + result = calculate_matrix(target, prediction, labels) + assert set(result.labels) == {"foo", "bar"} + assert len(result.values) == 2 + + def test_all_integer_labels_return_correct_matrix(self): + target = pd.Series([0, 1, 0, 1]) + prediction = pd.Series([0, 0, 1, 1]) + labels = [0, 1] + result = calculate_matrix(target, prediction, labels) + assert set(result.labels) == {0, 1} + assert len(result.values) == 2 + + def test_mixed_str_int_labels_do_not_raise(self): + # Simulate the case where numeric-looking string labels like "101" have been + # coerced to int by NumPy, resulting in a labels list of mixed str and int. + target = pd.Series(["foo", "bar", 101, 102]) + prediction = pd.Series(["foo", 101, "bar", 102]) + labels = ["foo", "bar", 101, 102] # mixed types — the bug scenario + # Must not raise TypeError + result = calculate_matrix(target, prediction, labels) + assert len(result.labels) == 4 + assert len(result.values) == 4 + + def test_mixed_str_int_labels_confusion_matrix_shape(self): + # Reproduce the exact example from issue #1085 + label_target = ["foo", "bar", "fun", "foo", "fun", "foo"] + label_predict = ["foo", "bar", "fun", "bar", "fun", "fun"] + # Simulate numeric labels coerced to int by NumPy + mixed_labels = ["foo", "bar", "fun", 101, 102] + target = pd.Series(label_target + [101, 102]) + prediction = pd.Series(label_predict + [101, 101]) + result = calculate_matrix(target, prediction, mixed_labels) + assert len(result.labels) == len(mixed_labels) + + def test_string_dtype_dataframe_end_to_end(self): + # Exact reproduction from issue #1085: dtype="string" with numeric-looking labels + from evidently.legacy.metric_results import DatasetUtilityColumns + from evidently.legacy.calculations.classification_performance import get_prediction_data + + label_target = ["foo", "bar", "fun", "foo", "fun", "foo", "101", "102"] + label_predict = ["foo", "bar", "fun", "bar", "fun", "fun", "101", "101"] + data_df = pd.DataFrame( + {"target": label_target, "prediction": label_predict}, dtype="string" + ) + dataset_columns = DatasetColumns( + utility_columns=DatasetUtilityColumns(target="target", prediction="prediction"), + target_names=None, + num_feature_names=[], + cat_feature_names=[], + text_feature_names=[], + datetime_feature_names=[], + ) + pred_data = get_prediction_data(data_df, dataset_columns, pos_label=None) + # calculate_matrix must not raise TypeError + result = calculate_matrix( + data_df["target"].astype(object), + pred_data.predictions.astype(object), + pred_data.labels, + ) + assert len(result.labels) > 0