Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/evidently/legacy/pipeline/column_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ class ColumnMapping:
item_id: Optional[str] = "item_id"
recommendations_type: Union[RecomType, str] = RecomType.SCORE

def __post_init__(self):
# Accept a bare string for any List[str] field and normalise it to a list
# so that `column_name in mapping.datetime_features` is always a list
# membership test, never a substring search.
for _field in ("numerical_features", "categorical_features", "datetime_features", "text_features"):
value = getattr(self, _field)
if isinstance(value, str):
setattr(self, _field, [value])

@property
def recom_type(self) -> RecomType:
if isinstance(self.recommendations_type, str):
Expand Down
37 changes: 37 additions & 0 deletions tests/utils/test_data_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,40 @@ def test_create_data_definition(reference, current, mapping, target, id, datetim
assert definition.get_prediction_columns() == prediction
assert definition.get_columns() == columns
assert definition.embeddings == embeddings


def test_column_mapping_normalizes_string_feature_lists():
"""Non-regression test for gh-846.

Feature-list fields (datetime_features, categorical_features, etc.) must
accept a bare string and normalise it to a one-element list so that
downstream ``column_name in mapping.datetime_features`` is always a list
membership test and never an accidental substring search.
"""
# String input is normalised to a list
cm = ColumnMapping()
cm.datetime_features = "prediction_timestamp_utc"
cm.__post_init__()
assert cm.datetime_features == ["prediction_timestamp_utc"]

# Key regression: 'prediction' is NOT a substring of 'prediction_timestamp_utc'
# when the field is a proper list
assert "prediction" not in cm.datetime_features

# List input is left unchanged
cm2 = ColumnMapping(datetime_features=["ts1", "ts2"])
assert cm2.datetime_features == ["ts1", "ts2"]

# None input is left unchanged
cm3 = ColumnMapping(datetime_features=None)
assert cm3.datetime_features is None

# Same normalisation applies to the other feature-list fields
cm4 = ColumnMapping()
cm4.categorical_features = "cat_col"
cm4.numerical_features = "num_col"
cm4.text_features = "txt_col"
cm4.__post_init__()
assert cm4.categorical_features == ["cat_col"]
assert cm4.numerical_features == ["num_col"]
assert cm4.text_features == ["txt_col"]