From 5494325ff13c5f5d97b9d4ec11953b7f435fcac5 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Tue, 16 Jun 2026 20:39:47 -0700 Subject: [PATCH 1/3] Keep unnamed Rill timeseries expression dimensions addressable When a Rill metrics view declares timeseries: order_date alongside an unnamed expression dimension whose expression is that same column ({expression: order_date}), the name fallback produced dimension_0. The auto-create check then saw the column already present and skipped creating an order_date dimension, while default_time_dimension stayed order_date, so validate_model rejected the model because the default time dimension referenced no dimension. Name such an unnamed dimension after the timeseries column so the generated time dimension stays addressable and default_time_dimension resolves. --- sidemantic/adapters/rill.py | 14 +++++- tests/adapters/rill/test_modern_features.py | 55 +++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/sidemantic/adapters/rill.py b/sidemantic/adapters/rill.py index a73dd6dd..3fe5c59c 100644 --- a/sidemantic/adapters/rill.py +++ b/sidemantic/adapters/rill.py @@ -359,9 +359,21 @@ def _parse_dimension( lookup_key_column = dim_def.get("lookup_key_column") # Derive name following Rill's rules: name -> column -> dimension_. + # When an unnamed dimension's SQL expression *is* the timeseries column + # (e.g. `dimensions: [{expression: order_date}]` alongside + # `timeseries: order_date`), name it after the timeseries column rather + # than the positional `dimension_` fallback. Otherwise the generated + # time dimension stays addressable only as `dimension_`, the later + # auto-create check sees the column already present and skips it, and + # `default_time_dimension` (set to the timeseries column) resolves to no + # dimension -- causing validate_model to reject the model. name = dim_def.get("name") if not name: - name = column or lookup_key_column or f"dimension_{index}" + sql_expr = expression or column or lookup_key_column + if timeseries_column and sql_expr == timeseries_column: + name = timeseries_column + else: + name = column or lookup_key_column or f"dimension_{index}" # `label` is the deprecated alias for `display_name`. label = dim_def.get("display_name") or dim_def.get("label") diff --git a/tests/adapters/rill/test_modern_features.py b/tests/adapters/rill/test_modern_features.py index a5410143..868d3c15 100644 --- a/tests/adapters/rill/test_modern_features.py +++ b/tests/adapters/rill/test_modern_features.py @@ -691,5 +691,60 @@ def test_time_comparison_type_records_metadata(): assert m.meta["rill_type"] == "time_comparison" +def test_unnamed_timeseries_expression_dimension_stays_addressable(): + """An unnamed expression dimension that IS the timeseries keeps the timeseries name. + + Regression: with `timeseries: order_date` and an unnamed expression dimension + `{expression: order_date}`, the name fallback produced `dimension_0`. The + auto-create check then saw `sql == timeseries_column` and skipped creating an + `order_date` dimension, while `default_time_dimension` stayed `order_date` -> + validate_model rejected the model because the default time dimension referenced + no dimension. The dimension must be addressable under the timeseries name so the + default time dimension resolves. + """ + graph = _parse_inline( + { + "type": "metrics_view", + "name": "test", + "model": "m", + "timeseries": "order_date", + "smallest_time_grain": "day", + "dimensions": [{"expression": "order_date"}], + } + ) + model = graph.models["test"] + + # The unnamed expression dimension is named after the timeseries column, and + # exactly one time dimension exists (no duplicate dimension_0 + order_date). + time_dim = model.get_dimension("order_date") + assert time_dim is not None + assert time_dim.sql == "order_date" + assert time_dim.type == "time" + assert not any(d.name == "dimension_0" for d in model.dimensions) + + # default_time_dimension now resolves, so the model validates. + assert model.default_time_dimension == "order_date" + assert validate_model(model) == [] + + +def test_unnamed_timeseries_expression_dimension_importable_via_semantic_layer(): + """The CLI-first path (add_model -> validate_model) accepts the model.""" + graph = _parse_inline( + { + "type": "metrics_view", + "name": "orders", + "model": "orders_tbl", + "timeseries": "order_date", + "smallest_time_grain": "day", + "dimensions": [{"expression": "order_date"}], + "measures": [{"name": "revenue", "expression": "SUM(amount)"}], + } + ) + layer = SemanticLayer() + # add_model runs validate_model and raises on failure. + layer.add_model(graph.models["orders"]) + assert layer.get_model("orders").default_time_dimension == "order_date" + + if __name__ == "__main__": pytest.main([__file__, "-v"]) From efbf83298c533cb385d4a0e7d309891b64549f67 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Thu, 18 Jun 2026 09:59:38 -0700 Subject: [PATCH 2/3] Preserve unique names for repeated timeseries expression dimensions The unnamed-expression-dimension fallback renamed every dimension whose expression equalled the timeseries column to the timeseries name. Two such dimensions both became the timeseries column name, so validate_model rejected the duplicate dimension names and add_model failed on an otherwise parseable Rill project. Only the first unnamed match now claims the timeseries name; later matches keep their positional dimension_ name, matching Rill's own fallback. --- sidemantic/adapters/rill.py | 21 +++++++++- tests/adapters/rill/test_modern_features.py | 44 +++++++++++++++++++++ 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/sidemantic/adapters/rill.py b/sidemantic/adapters/rill.py index 3fe5c59c..8c0f3db5 100644 --- a/sidemantic/adapters/rill.py +++ b/sidemantic/adapters/rill.py @@ -258,10 +258,18 @@ def _parse_file(self, file_path: Path) -> Model | None: timeseries_column = data.get("timeseries") smallest_time_grain = data.get("smallest_time_grain") + # Track whether an unnamed expression dimension has already claimed the + # timeseries name. Rill keeps repeated unnamed expression dimensions + # distinct as `dimension_`, so only the first match may take the + # timeseries name -- otherwise two of them collide and validate_model + # rejects the duplicate dimension names. + timeseries_name_taken = False for i, dim_def in enumerate(data.get("dimensions") or []): - dimension = self._parse_dimension(dim_def, i, timeseries_column, smallest_time_grain) + dimension = self._parse_dimension(dim_def, i, timeseries_column, smallest_time_grain, timeseries_name_taken) if dimension: dimensions.append(dimension) + if timeseries_column and dimension.name == timeseries_column: + timeseries_name_taken = True # If timeseries is specified but not found in dimensions, create it if timeseries_column: @@ -321,6 +329,7 @@ def _parse_dimension( index: int, timeseries_column: str | None, smallest_time_grain: str | None, + timeseries_name_taken: bool = False, ) -> Dimension | None: """Parse a Rill dimension into a Sidemantic Dimension. @@ -339,6 +348,9 @@ def _parse_dimension( index: Position of the dimension in the dimensions list (for name fallback) timeseries_column: Name of the timeseries column smallest_time_grain: Smallest time grain for time dimensions + timeseries_name_taken: Whether an earlier unnamed expression dimension + already claimed the timeseries name (so a repeated match keeps its + positional `dimension_` name instead of colliding). Returns: Dimension or None if parsing fails @@ -367,10 +379,15 @@ def _parse_dimension( # auto-create check sees the column already present and skips it, and # `default_time_dimension` (set to the timeseries column) resolves to no # dimension -- causing validate_model to reject the model. + # + # Only the *first* such match may claim the timeseries name. Rill keeps + # repeated unnamed expression dimensions distinct as `dimension_`, so + # once the name is taken later matches fall back to their positional name + # to avoid colliding (validate_model rejects duplicate dimension names). name = dim_def.get("name") if not name: sql_expr = expression or column or lookup_key_column - if timeseries_column and sql_expr == timeseries_column: + if timeseries_column and sql_expr == timeseries_column and not timeseries_name_taken: name = timeseries_column else: name = column or lookup_key_column or f"dimension_{index}" diff --git a/tests/adapters/rill/test_modern_features.py b/tests/adapters/rill/test_modern_features.py index 868d3c15..c1780ccd 100644 --- a/tests/adapters/rill/test_modern_features.py +++ b/tests/adapters/rill/test_modern_features.py @@ -746,5 +746,49 @@ def test_unnamed_timeseries_expression_dimension_importable_via_semantic_layer() assert layer.get_model("orders").default_time_dimension == "order_date" +def test_repeated_unnamed_timeseries_expression_dimensions_keep_unique_names(): + """Two unnamed expression dimensions matching the timeseries stay distinct. + + Regression: the timeseries-name fallback renamed *every* unnamed expression + dimension whose expression equalled the timeseries column to the timeseries + name, so two such dimensions both became `order_date` -> validate_model + rejected the duplicate dimension names and add_model failed on an otherwise + parseable Rill project. Only the first match may claim the timeseries name; + later matches keep their positional `dimension_` name (Rill's own + fallback). + """ + graph = _parse_inline( + { + "type": "metrics_view", + "name": "orders", + "model": "orders_tbl", + "timeseries": "order_date", + "smallest_time_grain": "day", + "dimensions": [ + {"expression": "order_date"}, + {"expression": "order_date"}, + ], + "measures": [{"name": "revenue", "expression": "SUM(amount)"}], + } + ) + model = graph.models["orders"] + + # Exactly one dimension claims the timeseries name; the repeat keeps its + # positional name. No duplicate dimension names. + names = [d.name for d in model.dimensions] + assert names.count("order_date") == 1 + assert "dimension_1" in names + assert len(names) == len(set(names)) + + # The timeseries dimension is still the time dimension that backs the default. + assert model.get_dimension("order_date").type == "time" + assert model.default_time_dimension == "order_date" + + # The CLI-first path (add_model -> validate_model) accepts the model. + assert validate_model(model) == [] + layer = SemanticLayer() + layer.add_model(model) + + if __name__ == "__main__": pytest.main([__file__, "-v"]) From 66099925fdbcb32c7712d3634d1b191ecd33f19f Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 21 Jun 2026 10:56:20 -0700 Subject: [PATCH 3/3] Defer Rill timeseries name to its natural owner An unnamed expression dimension whose SQL equals the timeseries column was renamed to the timeseries name even when another dimension already owned that name through Rill's name->column derivation (e.g. [{expression: order_date}, {column: order_date}]). Both dimensions then shared the name and validate_model rejected the duplicates, so add_model failed on an otherwise parseable project. Pre-scan the dimensions' natural Rill names; the expression dimension only claims the timeseries name when no sibling already owns it, regardless of position. Keeps the unnamed expression dimension as dimension_ otherwise. --- sidemantic/adapters/rill.py | 48 ++++++++++++-- tests/adapters/rill/test_modern_features.py | 73 +++++++++++++++++++++ 2 files changed, 114 insertions(+), 7 deletions(-) diff --git a/sidemantic/adapters/rill.py b/sidemantic/adapters/rill.py index 8c0f3db5..ccb7d361 100644 --- a/sidemantic/adapters/rill.py +++ b/sidemantic/adapters/rill.py @@ -258,13 +258,23 @@ def _parse_file(self, file_path: Path) -> Model | None: timeseries_column = data.get("timeseries") smallest_time_grain = data.get("smallest_time_grain") - # Track whether an unnamed expression dimension has already claimed the - # timeseries name. Rill keeps repeated unnamed expression dimensions - # distinct as `dimension_`, so only the first match may take the - # timeseries name -- otherwise two of them collide and validate_model - # rejects the duplicate dimension names. - timeseries_name_taken = False - for i, dim_def in enumerate(data.get("dimensions") or []): + # An unnamed expression dimension whose SQL is the timeseries column gets + # renamed to the timeseries name so the default time dimension resolves. + # That rename must not collide with a dimension that *already* owns the + # timeseries name through Rill's own derivation (an explicit `name`, or a + # `column`/key matching the timeseries column -- which can appear at any + # position), nor with an earlier expression dimension that already claimed + # it. Otherwise two dimensions share a name and validate_model rejects the + # duplicates. Pre-scan the natural names so the special-case only fires + # when nothing else claims the name, and gate repeats with a running flag. + dim_defs = list(data.get("dimensions") or []) + # A pure unnamed-expression dimension derives to `dimension_`, so it is + # never a "natural" owner here -- only explicit names and column/key + # fallbacks that equal the timeseries column count as already claiming it. + timeseries_name_taken = bool(timeseries_column) and any( + self._natural_dimension_name(d, i) == timeseries_column for i, d in enumerate(dim_defs) + ) + for i, dim_def in enumerate(dim_defs): dimension = self._parse_dimension(dim_def, i, timeseries_column, smallest_time_grain, timeseries_name_taken) if dimension: dimensions.append(dimension) @@ -323,6 +333,30 @@ def _parse_file(self, file_path: Path) -> Model | None: meta=meta, ) + @staticmethod + def _natural_dimension_name(dim_def: dict[str, Any], index: int) -> str | None: + """Derive the name Rill itself assigns, ignoring the timeseries special-case. + + Mirrors Rill's `name -> column -> dimension_` derivation + (`property:` is the deprecated alias for `column:`). Returns ``None`` for + dimensions Rill drops entirely (`ignore: true`, or no resolvable SQL). + Used to detect when another dimension already owns the timeseries name so + an unnamed expression dimension does not collide by also claiming it. + """ + if dim_def.get("ignore"): + return None + + column = dim_def.get("column") or dim_def.get("property") + lookup_key_column = dim_def.get("lookup_key_column") + # Match `_parse_dimension`'s drop condition: no expression/column/key -> None. + if not (dim_def.get("expression") or column or lookup_key_column): + return None + + name = dim_def.get("name") + if name: + return name + return column or lookup_key_column or f"dimension_{index}" + def _parse_dimension( self, dim_def: dict[str, Any], diff --git a/tests/adapters/rill/test_modern_features.py b/tests/adapters/rill/test_modern_features.py index c1780ccd..65d45f13 100644 --- a/tests/adapters/rill/test_modern_features.py +++ b/tests/adapters/rill/test_modern_features.py @@ -790,5 +790,78 @@ def test_repeated_unnamed_timeseries_expression_dimensions_keep_unique_names(): layer.add_model(model) +def test_unnamed_timeseries_expression_dimension_yields_to_natural_owner(): + """An expression dimension yields the timeseries name to a column-named sibling. + + Regression: with `timeseries: order_date` and + `dimensions: [{expression: order_date}, {column: order_date}]`, the + expression dimension was renamed to `order_date` while the column dimension + also naturally derived `order_date` (Rill's name->column fallback). Both + shared the name -> validate_model rejected the duplicates and add_model + failed on an otherwise parseable Rill project. Rill keeps the unnamed + expression dimension as `dimension_0`; only the column dimension owns the + timeseries name. The special-case rename must defer to that natural owner. + """ + graph = _parse_inline( + { + "type": "metrics_view", + "name": "orders", + "model": "orders_tbl", + "timeseries": "order_date", + "smallest_time_grain": "day", + "dimensions": [ + {"expression": "order_date"}, + {"column": "order_date"}, + ], + "measures": [{"name": "revenue", "expression": "SUM(amount)"}], + } + ) + model = graph.models["orders"] + + # The column dimension owns the timeseries name; the expression dimension + # keeps its positional name. No duplicate dimension names. + names = [d.name for d in model.dimensions] + assert names.count("order_date") == 1 + assert "dimension_0" in names + assert len(names) == len(set(names)) + + # The timeseries dimension is the time dimension backing the default. + assert model.get_dimension("order_date").type == "time" + assert model.default_time_dimension == "order_date" + + # The CLI-first path (add_model -> validate_model) accepts the model. + assert validate_model(model) == [] + layer = SemanticLayer() + layer.add_model(model) + + +def test_unnamed_timeseries_expression_before_named_owner_stays_distinct(): + """The natural owner is honored even when it appears before the expression dim.""" + graph = _parse_inline( + { + "type": "metrics_view", + "name": "orders", + "model": "orders_tbl", + "timeseries": "order_date", + "smallest_time_grain": "day", + "dimensions": [ + {"column": "order_date"}, + {"expression": "order_date"}, + ], + "measures": [{"name": "revenue", "expression": "SUM(amount)"}], + } + ) + model = graph.models["orders"] + + names = [d.name for d in model.dimensions] + assert names.count("order_date") == 1 + assert "dimension_1" in names + assert len(names) == len(set(names)) + assert model.get_dimension("order_date").type == "time" + assert model.default_time_dimension == "order_date" + assert validate_model(model) == [] + SemanticLayer().add_model(model) + + if __name__ == "__main__": pytest.main([__file__, "-v"])