Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 66 additions & 3 deletions sidemantic/adapters/rill.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,28 @@ def _parse_file(self, file_path: Path) -> Model | None:
timeseries_column = data.get("timeseries")
smallest_time_grain = data.get("smallest_time_grain")

for i, dim_def in enumerate(data.get("dimensions") or []):
dimension = self._parse_dimension(dim_def, i, timeseries_column, smallest_time_grain)
# An unnamed expression dimension whose SQL is the timeseries column gets
# renamed to the timeseries name so the default time dimension resolves.
# That rename must not collide with a dimension that *already* owns the
# timeseries name through Rill's own derivation (an explicit `name`, or a
# `column`/key matching the timeseries column -- which can appear at any
# position), nor with an earlier expression dimension that already claimed
# it. Otherwise two dimensions share a name and validate_model rejects the
# duplicates. Pre-scan the natural names so the special-case only fires
# when nothing else claims the name, and gate repeats with a running flag.
dim_defs = list(data.get("dimensions") or [])
# A pure unnamed-expression dimension derives to `dimension_<i>`, so it is
# never a "natural" owner here -- only explicit names and column/key
# fallbacks that equal the timeseries column count as already claiming it.
timeseries_name_taken = bool(timeseries_column) and any(
self._natural_dimension_name(d, i) == timeseries_column for i, d in enumerate(dim_defs)
)
for i, dim_def in enumerate(dim_defs):
dimension = self._parse_dimension(dim_def, i, timeseries_column, smallest_time_grain, timeseries_name_taken)
if dimension:
dimensions.append(dimension)
if timeseries_column and dimension.name == timeseries_column:
timeseries_name_taken = True

# If timeseries is specified but not found in dimensions, create it
if timeseries_column:
Expand Down Expand Up @@ -315,12 +333,37 @@ def _parse_file(self, file_path: Path) -> Model | None:
meta=meta,
)

@staticmethod
def _natural_dimension_name(dim_def: dict[str, Any], index: int) -> str | None:
"""Derive the name Rill itself assigns, ignoring the timeseries special-case.

Mirrors Rill's `name -> column -> dimension_<i>` derivation
(`property:` is the deprecated alias for `column:`). Returns ``None`` for
dimensions Rill drops entirely (`ignore: true`, or no resolvable SQL).
Used to detect when another dimension already owns the timeseries name so
an unnamed expression dimension does not collide by also claiming it.
"""
if dim_def.get("ignore"):
return None

column = dim_def.get("column") or dim_def.get("property")
lookup_key_column = dim_def.get("lookup_key_column")
# Match `_parse_dimension`'s drop condition: no expression/column/key -> None.
if not (dim_def.get("expression") or column or lookup_key_column):
return None

name = dim_def.get("name")
if name:
return name
return column or lookup_key_column or f"dimension_{index}"

def _parse_dimension(
self,
dim_def: dict[str, Any],
index: int,
timeseries_column: str | None,
smallest_time_grain: str | None,
timeseries_name_taken: bool = False,
) -> Dimension | None:
"""Parse a Rill dimension into a Sidemantic Dimension.

Expand All @@ -339,6 +382,9 @@ def _parse_dimension(
index: Position of the dimension in the dimensions list (for name fallback)
timeseries_column: Name of the timeseries column
smallest_time_grain: Smallest time grain for time dimensions
timeseries_name_taken: Whether an earlier unnamed expression dimension
already claimed the timeseries name (so a repeated match keeps its
positional `dimension_<i>` name instead of colliding).

Returns:
Dimension or None if parsing fails
Expand All @@ -359,9 +405,26 @@ def _parse_dimension(
lookup_key_column = dim_def.get("lookup_key_column")

# Derive name following Rill's rules: name -> column -> dimension_<i>.
# When an unnamed dimension's SQL expression *is* the timeseries column
# (e.g. `dimensions: [{expression: order_date}]` alongside
# `timeseries: order_date`), name it after the timeseries column rather
# than the positional `dimension_<i>` fallback. Otherwise the generated
# time dimension stays addressable only as `dimension_<i>`, the later
# auto-create check sees the column already present and skips it, and
# `default_time_dimension` (set to the timeseries column) resolves to no
# dimension -- causing validate_model to reject the model.
#
# Only the *first* such match may claim the timeseries name. Rill keeps
# repeated unnamed expression dimensions distinct as `dimension_<i>`, so
# once the name is taken later matches fall back to their positional name
# to avoid colliding (validate_model rejects duplicate dimension names).
name = dim_def.get("name")
if not name:
name = column or lookup_key_column or f"dimension_{index}"
sql_expr = expression or column or lookup_key_column
if timeseries_column and sql_expr == timeseries_column and not timeseries_name_taken:
name = timeseries_column
Comment thread
nicosuave marked this conversation as resolved.
else:
name = column or lookup_key_column or f"dimension_{index}"

# `label` is the deprecated alias for `display_name`.
label = dim_def.get("display_name") or dim_def.get("label")
Expand Down
172 changes: 172 additions & 0 deletions tests/adapters/rill/test_modern_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,5 +691,177 @@ def test_time_comparison_type_records_metadata():
assert m.meta["rill_type"] == "time_comparison"


def test_unnamed_timeseries_expression_dimension_stays_addressable():
"""An unnamed expression dimension that IS the timeseries keeps the timeseries name.

Regression: with `timeseries: order_date` and an unnamed expression dimension
`{expression: order_date}`, the name fallback produced `dimension_0`. The
auto-create check then saw `sql == timeseries_column` and skipped creating an
`order_date` dimension, while `default_time_dimension` stayed `order_date` ->
validate_model rejected the model because the default time dimension referenced
no dimension. The dimension must be addressable under the timeseries name so the
default time dimension resolves.
"""
graph = _parse_inline(
{
"type": "metrics_view",
"name": "test",
"model": "m",
"timeseries": "order_date",
"smallest_time_grain": "day",
"dimensions": [{"expression": "order_date"}],
}
)
model = graph.models["test"]

# The unnamed expression dimension is named after the timeseries column, and
# exactly one time dimension exists (no duplicate dimension_0 + order_date).
time_dim = model.get_dimension("order_date")
assert time_dim is not None
assert time_dim.sql == "order_date"
assert time_dim.type == "time"
assert not any(d.name == "dimension_0" for d in model.dimensions)

# default_time_dimension now resolves, so the model validates.
assert model.default_time_dimension == "order_date"
assert validate_model(model) == []


def test_unnamed_timeseries_expression_dimension_importable_via_semantic_layer():
"""The CLI-first path (add_model -> validate_model) accepts the model."""
graph = _parse_inline(
{
"type": "metrics_view",
"name": "orders",
"model": "orders_tbl",
"timeseries": "order_date",
"smallest_time_grain": "day",
"dimensions": [{"expression": "order_date"}],
"measures": [{"name": "revenue", "expression": "SUM(amount)"}],
}
)
layer = SemanticLayer()
# add_model runs validate_model and raises on failure.
layer.add_model(graph.models["orders"])
assert layer.get_model("orders").default_time_dimension == "order_date"


def test_repeated_unnamed_timeseries_expression_dimensions_keep_unique_names():
"""Two unnamed expression dimensions matching the timeseries stay distinct.

Regression: the timeseries-name fallback renamed *every* unnamed expression
dimension whose expression equalled the timeseries column to the timeseries
name, so two such dimensions both became `order_date` -> validate_model
rejected the duplicate dimension names and add_model failed on an otherwise
parseable Rill project. Only the first match may claim the timeseries name;
later matches keep their positional `dimension_<i>` name (Rill's own
fallback).
"""
graph = _parse_inline(
{
"type": "metrics_view",
"name": "orders",
"model": "orders_tbl",
"timeseries": "order_date",
"smallest_time_grain": "day",
"dimensions": [
{"expression": "order_date"},
{"expression": "order_date"},
],
"measures": [{"name": "revenue", "expression": "SUM(amount)"}],
}
)
model = graph.models["orders"]

# Exactly one dimension claims the timeseries name; the repeat keeps its
# positional name. No duplicate dimension names.
names = [d.name for d in model.dimensions]
assert names.count("order_date") == 1
assert "dimension_1" in names
assert len(names) == len(set(names))

# The timeseries dimension is still the time dimension that backs the default.
assert model.get_dimension("order_date").type == "time"
assert model.default_time_dimension == "order_date"

# The CLI-first path (add_model -> validate_model) accepts the model.
assert validate_model(model) == []
layer = SemanticLayer()
layer.add_model(model)


def test_unnamed_timeseries_expression_dimension_yields_to_natural_owner():
"""An expression dimension yields the timeseries name to a column-named sibling.

Regression: with `timeseries: order_date` and
`dimensions: [{expression: order_date}, {column: order_date}]`, the
expression dimension was renamed to `order_date` while the column dimension
also naturally derived `order_date` (Rill's name->column fallback). Both
shared the name -> validate_model rejected the duplicates and add_model
failed on an otherwise parseable Rill project. Rill keeps the unnamed
expression dimension as `dimension_0`; only the column dimension owns the
timeseries name. The special-case rename must defer to that natural owner.
"""
graph = _parse_inline(
{
"type": "metrics_view",
"name": "orders",
"model": "orders_tbl",
"timeseries": "order_date",
"smallest_time_grain": "day",
"dimensions": [
{"expression": "order_date"},
{"column": "order_date"},
],
"measures": [{"name": "revenue", "expression": "SUM(amount)"}],
}
)
model = graph.models["orders"]

# The column dimension owns the timeseries name; the expression dimension
# keeps its positional name. No duplicate dimension names.
names = [d.name for d in model.dimensions]
assert names.count("order_date") == 1
assert "dimension_0" in names
assert len(names) == len(set(names))

# The timeseries dimension is the time dimension backing the default.
assert model.get_dimension("order_date").type == "time"
assert model.default_time_dimension == "order_date"

# The CLI-first path (add_model -> validate_model) accepts the model.
assert validate_model(model) == []
layer = SemanticLayer()
layer.add_model(model)


def test_unnamed_timeseries_expression_before_named_owner_stays_distinct():
"""The natural owner is honored even when it appears before the expression dim."""
graph = _parse_inline(
{
"type": "metrics_view",
"name": "orders",
"model": "orders_tbl",
"timeseries": "order_date",
"smallest_time_grain": "day",
"dimensions": [
{"column": "order_date"},
{"expression": "order_date"},
],
"measures": [{"name": "revenue", "expression": "SUM(amount)"}],
}
)
model = graph.models["orders"]

names = [d.name for d in model.dimensions]
assert names.count("order_date") == 1
assert "dimension_1" in names
assert len(names) == len(set(names))
assert model.get_dimension("order_date").type == "time"
assert model.default_time_dimension == "order_date"
assert validate_model(model) == []
SemanticLayer().add_model(model)


if __name__ == "__main__":
pytest.main([__file__, "-v"])
Loading