Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/evidently/legacy/calculations/classification_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,17 @@ def calculate_lift_table(binded):


def calculate_matrix(target: pd.Series, prediction: pd.Series, labels: List[Label]) -> ConfusionMatrix:
sorted_labels = sorted(labels) # type: ignore[type-var]
try:
sorted_labels = sorted(labels) # type: ignore[type-var]
except TypeError:
# Labels contain a mix of incomparable types (e.g. str and int produced when
# numeric-looking string labels such as "101" are coerced to integers by NumPy).
# Normalize every label to str so they can be sorted consistently, and align the
# target/prediction series to the same string representation so that
# sklearn.metrics.confusion_matrix receives a homogeneous label set.
sorted_labels = sorted(str(label) for label in labels) # type: ignore[assignment]
target = target.astype(str)
prediction = prediction.astype(str)
matrix = metrics.confusion_matrix(target, prediction, labels=sorted_labels)
return ConfusionMatrix(labels=sorted_labels, values=[row.tolist() for row in matrix])

Expand Down
77 changes: 77 additions & 0 deletions tests/calculations/test_classification_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sklearn import metrics

from evidently.legacy.calculations.classification_performance import calculate_confusion_by_classes
from evidently.legacy.calculations.classification_performance import calculate_matrix
from evidently.legacy.calculations.classification_performance import calculate_metrics
from evidently.legacy.calculations.classification_performance import get_prediction_data
from evidently.legacy.metric_results import ConfusionMatrix
Expand Down Expand Up @@ -99,3 +100,79 @@ def test_get_prediction_data(dataframe, target, prediction, target_names, pos_la
)
for label in target_names:
assert np.allclose(data.prediction_probas[label], expected[label], atol=1e-6)


class CalculateMatrixMixedTypeLabelsTest:
"""calculate_matrix must not raise TypeError when labels contain both str and int values.

Newer NumPy uses hash-based deduplication in np.unique (and therefore np.union1d), so
numeric-looking string labels such as "101" can be coerced to integers producing a
labels list with mixed types. Sorting such a list with plain sorted() fails in Python 3
because '<' is not defined between str and int. The function must fall back to a
type-safe sort key in that case.
"""

def test_all_string_labels_return_correct_matrix(self):
target = pd.Series(["foo", "bar", "foo"])
prediction = pd.Series(["foo", "foo", "bar"])
labels = ["foo", "bar"]
result = calculate_matrix(target, prediction, labels)
assert set(result.labels) == {"foo", "bar"}
assert len(result.values) == 2

def test_all_integer_labels_return_correct_matrix(self):
target = pd.Series([0, 1, 0, 1])
prediction = pd.Series([0, 0, 1, 1])
labels = [0, 1]
result = calculate_matrix(target, prediction, labels)
assert set(result.labels) == {0, 1}
assert len(result.values) == 2

def test_mixed_str_int_labels_do_not_raise(self):
# Simulate the case where numeric-looking string labels like "101" have been
# coerced to int by NumPy, resulting in a labels list of mixed str and int.
target = pd.Series(["foo", "bar", 101, 102])
prediction = pd.Series(["foo", 101, "bar", 102])
labels = ["foo", "bar", 101, 102] # mixed types — the bug scenario
# Must not raise TypeError
result = calculate_matrix(target, prediction, labels)
assert len(result.labels) == 4
assert len(result.values) == 4

def test_mixed_str_int_labels_confusion_matrix_shape(self):
# Reproduce the exact example from issue #1085
label_target = ["foo", "bar", "fun", "foo", "fun", "foo"]
label_predict = ["foo", "bar", "fun", "bar", "fun", "fun"]
# Simulate numeric labels coerced to int by NumPy
mixed_labels = ["foo", "bar", "fun", 101, 102]
target = pd.Series(label_target + [101, 102])
prediction = pd.Series(label_predict + [101, 101])
result = calculate_matrix(target, prediction, mixed_labels)
assert len(result.labels) == len(mixed_labels)

def test_string_dtype_dataframe_end_to_end(self):
# Exact reproduction from issue #1085: dtype="string" with numeric-looking labels
from evidently.legacy.metric_results import DatasetUtilityColumns
from evidently.legacy.calculations.classification_performance import get_prediction_data

label_target = ["foo", "bar", "fun", "foo", "fun", "foo", "101", "102"]
label_predict = ["foo", "bar", "fun", "bar", "fun", "fun", "101", "101"]
data_df = pd.DataFrame(
{"target": label_target, "prediction": label_predict}, dtype="string"
)
dataset_columns = DatasetColumns(
utility_columns=DatasetUtilityColumns(target="target", prediction="prediction"),
target_names=None,
num_feature_names=[],
cat_feature_names=[],
text_feature_names=[],
datetime_feature_names=[],
)
pred_data = get_prediction_data(data_df, dataset_columns, pos_label=None)
# calculate_matrix must not raise TypeError
result = calculate_matrix(
data_df["target"].astype(object),
pred_data.predictions.astype(object),
pred_data.labels,
)
assert len(result.labels) > 0