From 8ca4bd1bde4401566b347dc57518dad15f1376a4 Mon Sep 17 00:00:00 2001 From: ellisandrews-toast Date: Tue, 26 May 2026 12:52:42 -0400 Subject: [PATCH 1/4] Implement nested sourced_from support --- config/schema/artifacts/datastore_config.yaml | 250 ++++++++++++++++-- config/schema/artifacts/runtime_metadata.yaml | 38 +-- .../datastore_config.yaml | 250 ++++++++++++++++-- .../runtime_metadata.yaml | 38 +-- .../shared_examples.rb | 2 +- .../indexer/datastore_indexing_router_spec.rb | 2 +- .../indexer/operation/update_spec.rb | 32 +-- .../runtime_metadata/update_target.rb | 28 +- .../runtime_metadata/update_target.rbs | 12 + .../runtime_metadata/schema_spec.rb | 9 + .../runtime_metadata/update_target_spec.rb | 7 +- .../schema_definition/factory.rb | 5 +- .../indexing/derived_indexed_type.rb | 5 +- .../schema_definition/indexing/index.rb | 7 + .../nested_relationship_chain_resolver.rb | 181 +++++++++++++ .../indexing/nested_update_target_resolver.rb | 190 +++++++++++++ .../indexing/update_target_factory.rb | 16 +- .../schema_definition/mixins/has_indices.rb | 9 +- .../schema_definition/results.rb | 107 ++++++-- .../schema_elements/relationship.rb | 73 ++++- .../schema_elements/type_with_subfields.rb | 3 +- .../scripts/update/index_data.painless | 207 +++++++++++++-- .../schema_definition/factory.rbs | 3 +- .../nested_relationship_chain_resolver.rbs | 34 +++ .../nested_update_target_resolver.rbs | 43 +++ .../indexing/update_target_factory.rbs | 5 +- .../schema_definition/mixins/has_indices.rbs | 2 +- .../schema_definition/results.rbs | 3 +- .../schema_elements/relationship.rbs | 9 +- .../index_mappings/miscellaneous_spec.rb | 2 +- .../lib/elastic_graph/constants.rb | 2 +- .../spec_support/runtime_metadata_support.rb | 6 + 32 files changed, 1408 insertions(+), 172 deletions(-) create mode 100644 elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rb create mode 100644 elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb create mode 100644 elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rbs create mode 100644 elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs diff --git a/config/schema/artifacts/datastore_config.yaml b/config/schema/artifacts/datastore_config.yaml index 61028816d..8475cda25 100644 --- a/config/schema/artifacts/datastore_config.yaml +++ b/config/schema/artifacts/datastore_config.yaml @@ -1236,6 +1236,9 @@ index_templates: type: integer nested_fields2|the_seasons: type: integer + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1310,6 +1313,9 @@ index_templates: type: integer widget_options|colors: type: integer + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1476,6 +1482,9 @@ index_templates: type: integer fees|amount_cents: type: integer + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1527,6 +1536,9 @@ indices: type: integer shapes|coordinates: type: integer + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1593,6 +1605,9 @@ indices: type: integer owner_ids: type: integer + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1625,6 +1640,9 @@ indices: type: keyword __typename: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1654,6 +1672,9 @@ indices: type: integer manufacturer_id: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1690,6 +1711,9 @@ indices: type: keyword nationality: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1722,6 +1746,9 @@ indices: type: keyword manufacturer_id: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1753,6 +1780,9 @@ indices: type: keyword __typename: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1778,6 +1808,9 @@ indices: format: strict_date active: type: boolean + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1803,6 +1836,9 @@ indices: type: keyword name: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1835,6 +1871,9 @@ indices: created_at: type: date format: strict_date_time + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -2156,13 +2195,89 @@ scripts: // No timestamp values matched the params, so return `false`. return false; - update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d: + update_index_data_cd5bc011d0885b2dfd793c9f119f068d: context: update script: lang: painless source: |- - // --- Helper Functions --- // - void setup(Map source, String relationship, Map counts) { + // ============================================================ + // Helper Functions + // ============================================================ + + // Builds a composite nested element key from path segments. + // List segments contribute their matched identifier value; object segments contribute their field name. + String buildNestedElementKey(List pathSegments, Map pathIdentifiers) { + List parts = new ArrayList(); + for (Map segment : pathSegments) { + if (segment.containsKey("list")) { + parts.add(pathIdentifiers[segment.source_field]); + } else { + parts.add(segment.get("object")); + } + } + return String.join(":", parts); + } + + // Looks up path segments for the given relationship and builds the nested element key. + // Returns "" if no path segments are configured (i.e., this is not a nested sourced event). + String buildNestedElementKeyForRelationship(String relationship, Map nestedSourcedPaths, Map pathIdentifiers) { + List pathSegments = (List) nestedSourcedPaths.get(relationship); + if (pathSegments == null) { + return ""; + } + return buildNestedElementKey(pathSegments, pathIdentifiers); + } + + // Splits a composite nested element key into a list of parts. + List splitNestedElementKey(String nestedElementKey) { + return Arrays.asList(nestedElementKey.splitOnToken(":")); + } + + // Finds an element in a list where element[matchField] equals matchValue. Returns null if not found. + def findInList(List elements, String matchField, String matchValue) { + for (Map element : elements) { + if (matchValue.equals(element[matchField])) { + return element; + } + } + return null; + } + + // Navigates from `source` through `pathSegments` to find the target nested element. + // Returns the matched element, or null if the path doesn't exist or no match is found. + def navigateToNestedElement(Map source, List pathSegments, List keyParts) { + Map current = source; + + for (int i = 0; i < pathSegments.size(); i++) { + Map segment = (Map) pathSegments.get(i); + boolean isList = segment.containsKey("list"); + String field = isList ? (String) segment.get("list") : (String) segment.get("object"); + + if (!current.containsKey(field)) { + return null; + } + + if (isList) { + current = (Map) findInList((List) current.get(field), (String) segment.get("match_field"), (String) keyParts.get(i)); + } else { + current = (Map) current.get(field); + } + + if (current == null) { + return null; + } + } + + return current; + } + + + // ============================================================ + // Main Functions + // ============================================================ + + // Initializes internal bookkeeping structures (__sources, __versions, __counts, __nested_sourced_data). + void setup(Map source, String relationship, String nestedElementKey, Map counts) { if (source.__sources == null) { source.__sources = []; } @@ -2175,38 +2290,66 @@ scripts: source.__versions[relationship] = [:]; } + if (!nestedElementKey.isEmpty() && source.__versions[relationship][nestedElementKey] == null) { + source.__versions[relationship][nestedElementKey] = [:]; + } + + if (!nestedElementKey.isEmpty()) { + if (source.__nested_sourced_data == null) { + source.__nested_sourced_data = [:]; + } + if (source.__nested_sourced_data[relationship] == null) { + source.__nested_sourced_data[relationship] = [:]; + } + } + if (counts != null && source.__counts == null) { source.__counts = [:]; } } - void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion) { - Map relationshipVersionsMap = source.__versions.get(relationship); - List previousSourceIdsForRelationship = relationshipVersionsMap.keySet().stream().filter(key -> key != sourceId).collect(Collectors.toList()); - - if (previousSourceIdsForRelationship.size() > 0) { - throw new IllegalArgumentException( - "Cannot update document " + id + " " + - "with data from related " + relationship + " " + sourceId + " " + - "because the related " + relationship + " has apparently changed (was: " + previousSourceIdsForRelationship + "), " + - "but mutations of relationships used with `sourced_from` are not supported because " + - "allowing it could break ElasticGraph's out-of-order processing guarantees." - ); - } + // Validates that this event is allowed: no relationship mutation and no stale version. + void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion, String nestedElementKey) { + // For nested events, validate per-element. For top-level events, validate per-relationship. + Map versionsMap = nestedElementKey.isEmpty() + ? source.__versions[relationship] + : source.__versions[relationship][nestedElementKey]; - Number maybeDocVersion = relationshipVersionsMap.get(sourceId); + // Check that no other source ID has previously written to this target. + List previousSourceIds = versionsMap.keySet().stream().filter(key -> key != sourceId).collect(Collectors.toList()); + if (previousSourceIds.size() > 0) { + if (nestedElementKey.isEmpty()) { + throw new IllegalArgumentException( + "Cannot update document " + id + " " + + "with data from related " + relationship + " " + sourceId + " " + + "because the related " + relationship + " has apparently changed (was: " + previousSourceIds + "), " + + "but mutations of relationships used with `sourced_from` are not supported because " + + "allowing it could break ElasticGraph's out-of-order processing guarantees." + ); + } else { + throw new IllegalArgumentException( + "Cannot update nested element [" + nestedElementKey + "] on document " + id + " " + + "with data from " + relationship + " " + sourceId + " " + + "because this element was previously sourced from a different event (" + previousSourceIds + "). " + + "Each nested element can only be sourced from one source document." + ); + } + } - // Our JSON schema requires event versions to be non-negative, so we can safely use Long.MIN_VALUE as a stand-in when the value is null. + // Check that the event version is newer than what we've already seen. + Number maybeDocVersion = versionsMap.get(sourceId); long docVersion = maybeDocVersion == null ? Long.MIN_VALUE : maybeDocVersion.longValue(); if (docVersion >= eventVersion) { + String target = nestedElementKey.isEmpty() ? id : id + "/" + nestedElementKey; throw new IllegalArgumentException("ElasticGraph update was a no-op: [" + - id + "]: version conflict, current version [" + + target + "]: version conflict, current version [" + docVersion + "] is higher or equal to the one provided [" + eventVersion + "]"); } } + // Applies top-level fields to the document via putAll, and merges __counts. void applyTopLevelFields(Map source, String id, Map topLevelFields, Map counts) { source.id = id; source.putAll(topLevelFields); @@ -2216,8 +2359,53 @@ scripts: } } - void recordSource(Map source, String relationship, String sourceId, long eventVersion) { - source.__versions[relationship][sourceId] = eventVersion; + // Stores nested sourced fields in the __nested_sourced_data buffer for later application. + void storeNestedSourcedData(Map source, String relationship, Map nestedSourcedFields, String nestedElementKey) { + if (nestedSourcedFields.isEmpty()) { + return; + } + + source.__nested_sourced_data[relationship][nestedElementKey] = nestedSourcedFields; + } + + // Applies nested sourced data from the __nested_sourced_data buffer to matched nested elements. + // Called after every event so that after a self-event's putAll overwrites nested arrays, + // the buffered data gets re-applied. + void applyNestedSourcedData(Map source, Map nestedSourcedPaths) { + if (source.__nested_sourced_data == null || nestedSourcedPaths.isEmpty()) { + return; + } + + for (sourcedEntry in source.__nested_sourced_data.entrySet()) { + String sourcedRelationship = sourcedEntry.getKey(); + Map elementDataByKey = (Map) sourcedEntry.getValue(); + + List pathSegments = (List) nestedSourcedPaths.get(sourcedRelationship); + if (pathSegments == null) { + continue; + } + + for (elementEntry in elementDataByKey.entrySet()) { + List keyParts = splitNestedElementKey((String) elementEntry.getKey()); + if (keyParts.size() != pathSegments.size()) { + continue; + } + + Map target = (Map) navigateToNestedElement(source, pathSegments, keyParts); + if (target != null) { + target.putAll((Map) elementEntry.getValue()); + } + } + } + } + + // Records the event version in __versions and adds the relationship to __sources. + void recordSource(Map source, String relationship, String sourceId, long eventVersion, String nestedElementKey) { + if (nestedElementKey.isEmpty()) { + source.__versions[relationship][sourceId] = eventVersion; + } else { + source.__versions[relationship][nestedElementKey][sourceId] = eventVersion; + } // Record the relationship in `__sources` if it's not already there. We maintain it as an append-only set using a sorted list. // This ensures deterministic ordering of its elements regardless of event ingestion order, and lets us check membership in O(log N) time. @@ -2234,15 +2422,25 @@ scripts: } } - // --- Main script body --- // + // ============================================================ + // Main Execution + // ============================================================ + Map source = ctx._source; String id = params.id; String relationship = params.relationship; String sourceId = params.sourceId; - long eventVersion = (long) params.version; // Cast to long since JSON parses numbers as doubles + long eventVersion = (long) params.version; Map counts = params.__counts; + Map nestedSourcedFields = params.nestedSourcedFields; + Map nestedSourcedPathIdentifiers = params.nestedSourcedPathIdentifiers; + Map nestedSourcedPaths = params.nestedSourcedPaths; + + String nestedElementKey = buildNestedElementKeyForRelationship(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); - setup(source, relationship, counts); - validateSource(source, id, relationship, sourceId, eventVersion); + setup(source, relationship, nestedElementKey, counts); + validateSource(source, id, relationship, sourceId, eventVersion, nestedElementKey); applyTopLevelFields(source, id, params.topLevelFields, counts); - recordSource(source, relationship, sourceId, eventVersion); + storeNestedSourcedData(source, relationship, nestedSourcedFields, nestedElementKey); + applyNestedSourcedData(source, nestedSourcedPaths); + recordSource(source, relationship, sourceId, eventVersion, nestedElementKey); diff --git a/config/schema/artifacts/runtime_metadata.yaml b/config/schema/artifacts/runtime_metadata.yaml index 19a30bf8f..49c610e3b 100644 --- a/config/schema/artifacts/runtime_metadata.yaml +++ b/config/schema/artifacts/runtime_metadata.yaml @@ -3085,7 +3085,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: full_address: cardinality: one @@ -3275,7 +3275,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: __typename: cardinality: one @@ -3314,7 +3314,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: __typename: cardinality: one @@ -3414,7 +3414,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: created_at: cardinality: one @@ -3724,7 +3724,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: __typename: cardinality: one @@ -3897,7 +3897,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: created_at: cardinality: one @@ -4382,7 +4382,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: ceo: cardinality: one @@ -4540,7 +4540,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: created_at: cardinality: one @@ -5501,7 +5501,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: __typename: cardinality: one @@ -5700,7 +5700,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: __typename: cardinality: one @@ -5767,7 +5767,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: active: cardinality: one @@ -6304,7 +6304,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: name: cardinality: one @@ -6654,7 +6654,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: formed_on routing_value_source: league - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: country_code: cardinality: one @@ -7754,7 +7754,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: created_at routing_value_source: workspace_id2 - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: amount_cents: cardinality: one @@ -7832,7 +7832,7 @@ object_types_by_name: version: cardinality: one relationship: widget - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: widget_cost: cardinality: one @@ -8062,7 +8062,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: introduced_on routing_value_source: primary_continent - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: details: cardinality: one @@ -9107,7 +9107,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: name: cardinality: one @@ -9129,7 +9129,7 @@ object_types_by_name: relationship: workspace rollover_timestamp_value_source: widget.created_at routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: workspace_name: cardinality: one @@ -9364,4 +9364,4 @@ static_script_ids_by_scoped_name: field/as_day_of_week: field_as_day_of_week_f2b5c7d9e8f75bf2457b52412bfb6537 field/as_time_of_day: field_as_time_of_day_ed82aba44fc66bff5635bec4305c1c66 filter/by_time_of_day: filter_by_time_of_day_ea12d0561b24961789ab68ed38435612 - update/index_data: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + update/index_data: update_index_data_cd5bc011d0885b2dfd793c9f119f068d diff --git a/config/schema/artifacts_with_apollo/datastore_config.yaml b/config/schema/artifacts_with_apollo/datastore_config.yaml index 61028816d..8475cda25 100644 --- a/config/schema/artifacts_with_apollo/datastore_config.yaml +++ b/config/schema/artifacts_with_apollo/datastore_config.yaml @@ -1236,6 +1236,9 @@ index_templates: type: integer nested_fields2|the_seasons: type: integer + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1310,6 +1313,9 @@ index_templates: type: integer widget_options|colors: type: integer + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1476,6 +1482,9 @@ index_templates: type: integer fees|amount_cents: type: integer + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1527,6 +1536,9 @@ indices: type: integer shapes|coordinates: type: integer + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1593,6 +1605,9 @@ indices: type: integer owner_ids: type: integer + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1625,6 +1640,9 @@ indices: type: keyword __typename: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1654,6 +1672,9 @@ indices: type: integer manufacturer_id: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1690,6 +1711,9 @@ indices: type: keyword nationality: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1722,6 +1746,9 @@ indices: type: keyword manufacturer_id: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1753,6 +1780,9 @@ indices: type: keyword __typename: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1778,6 +1808,9 @@ indices: format: strict_date active: type: boolean + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1803,6 +1836,9 @@ indices: type: keyword name: type: keyword + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -1835,6 +1871,9 @@ indices: created_at: type: date format: strict_date_time + __nested_sourced_data: + type: object + dynamic: 'false' __sources: type: keyword __versions: @@ -2156,13 +2195,89 @@ scripts: // No timestamp values matched the params, so return `false`. return false; - update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d: + update_index_data_cd5bc011d0885b2dfd793c9f119f068d: context: update script: lang: painless source: |- - // --- Helper Functions --- // - void setup(Map source, String relationship, Map counts) { + // ============================================================ + // Helper Functions + // ============================================================ + + // Builds a composite nested element key from path segments. + // List segments contribute their matched identifier value; object segments contribute their field name. + String buildNestedElementKey(List pathSegments, Map pathIdentifiers) { + List parts = new ArrayList(); + for (Map segment : pathSegments) { + if (segment.containsKey("list")) { + parts.add(pathIdentifiers[segment.source_field]); + } else { + parts.add(segment.get("object")); + } + } + return String.join(":", parts); + } + + // Looks up path segments for the given relationship and builds the nested element key. + // Returns "" if no path segments are configured (i.e., this is not a nested sourced event). + String buildNestedElementKeyForRelationship(String relationship, Map nestedSourcedPaths, Map pathIdentifiers) { + List pathSegments = (List) nestedSourcedPaths.get(relationship); + if (pathSegments == null) { + return ""; + } + return buildNestedElementKey(pathSegments, pathIdentifiers); + } + + // Splits a composite nested element key into a list of parts. + List splitNestedElementKey(String nestedElementKey) { + return Arrays.asList(nestedElementKey.splitOnToken(":")); + } + + // Finds an element in a list where element[matchField] equals matchValue. Returns null if not found. + def findInList(List elements, String matchField, String matchValue) { + for (Map element : elements) { + if (matchValue.equals(element[matchField])) { + return element; + } + } + return null; + } + + // Navigates from `source` through `pathSegments` to find the target nested element. + // Returns the matched element, or null if the path doesn't exist or no match is found. + def navigateToNestedElement(Map source, List pathSegments, List keyParts) { + Map current = source; + + for (int i = 0; i < pathSegments.size(); i++) { + Map segment = (Map) pathSegments.get(i); + boolean isList = segment.containsKey("list"); + String field = isList ? (String) segment.get("list") : (String) segment.get("object"); + + if (!current.containsKey(field)) { + return null; + } + + if (isList) { + current = (Map) findInList((List) current.get(field), (String) segment.get("match_field"), (String) keyParts.get(i)); + } else { + current = (Map) current.get(field); + } + + if (current == null) { + return null; + } + } + + return current; + } + + + // ============================================================ + // Main Functions + // ============================================================ + + // Initializes internal bookkeeping structures (__sources, __versions, __counts, __nested_sourced_data). + void setup(Map source, String relationship, String nestedElementKey, Map counts) { if (source.__sources == null) { source.__sources = []; } @@ -2175,38 +2290,66 @@ scripts: source.__versions[relationship] = [:]; } + if (!nestedElementKey.isEmpty() && source.__versions[relationship][nestedElementKey] == null) { + source.__versions[relationship][nestedElementKey] = [:]; + } + + if (!nestedElementKey.isEmpty()) { + if (source.__nested_sourced_data == null) { + source.__nested_sourced_data = [:]; + } + if (source.__nested_sourced_data[relationship] == null) { + source.__nested_sourced_data[relationship] = [:]; + } + } + if (counts != null && source.__counts == null) { source.__counts = [:]; } } - void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion) { - Map relationshipVersionsMap = source.__versions.get(relationship); - List previousSourceIdsForRelationship = relationshipVersionsMap.keySet().stream().filter(key -> key != sourceId).collect(Collectors.toList()); - - if (previousSourceIdsForRelationship.size() > 0) { - throw new IllegalArgumentException( - "Cannot update document " + id + " " + - "with data from related " + relationship + " " + sourceId + " " + - "because the related " + relationship + " has apparently changed (was: " + previousSourceIdsForRelationship + "), " + - "but mutations of relationships used with `sourced_from` are not supported because " + - "allowing it could break ElasticGraph's out-of-order processing guarantees." - ); - } + // Validates that this event is allowed: no relationship mutation and no stale version. + void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion, String nestedElementKey) { + // For nested events, validate per-element. For top-level events, validate per-relationship. + Map versionsMap = nestedElementKey.isEmpty() + ? source.__versions[relationship] + : source.__versions[relationship][nestedElementKey]; - Number maybeDocVersion = relationshipVersionsMap.get(sourceId); + // Check that no other source ID has previously written to this target. + List previousSourceIds = versionsMap.keySet().stream().filter(key -> key != sourceId).collect(Collectors.toList()); + if (previousSourceIds.size() > 0) { + if (nestedElementKey.isEmpty()) { + throw new IllegalArgumentException( + "Cannot update document " + id + " " + + "with data from related " + relationship + " " + sourceId + " " + + "because the related " + relationship + " has apparently changed (was: " + previousSourceIds + "), " + + "but mutations of relationships used with `sourced_from` are not supported because " + + "allowing it could break ElasticGraph's out-of-order processing guarantees." + ); + } else { + throw new IllegalArgumentException( + "Cannot update nested element [" + nestedElementKey + "] on document " + id + " " + + "with data from " + relationship + " " + sourceId + " " + + "because this element was previously sourced from a different event (" + previousSourceIds + "). " + + "Each nested element can only be sourced from one source document." + ); + } + } - // Our JSON schema requires event versions to be non-negative, so we can safely use Long.MIN_VALUE as a stand-in when the value is null. + // Check that the event version is newer than what we've already seen. + Number maybeDocVersion = versionsMap.get(sourceId); long docVersion = maybeDocVersion == null ? Long.MIN_VALUE : maybeDocVersion.longValue(); if (docVersion >= eventVersion) { + String target = nestedElementKey.isEmpty() ? id : id + "/" + nestedElementKey; throw new IllegalArgumentException("ElasticGraph update was a no-op: [" + - id + "]: version conflict, current version [" + + target + "]: version conflict, current version [" + docVersion + "] is higher or equal to the one provided [" + eventVersion + "]"); } } + // Applies top-level fields to the document via putAll, and merges __counts. void applyTopLevelFields(Map source, String id, Map topLevelFields, Map counts) { source.id = id; source.putAll(topLevelFields); @@ -2216,8 +2359,53 @@ scripts: } } - void recordSource(Map source, String relationship, String sourceId, long eventVersion) { - source.__versions[relationship][sourceId] = eventVersion; + // Stores nested sourced fields in the __nested_sourced_data buffer for later application. + void storeNestedSourcedData(Map source, String relationship, Map nestedSourcedFields, String nestedElementKey) { + if (nestedSourcedFields.isEmpty()) { + return; + } + + source.__nested_sourced_data[relationship][nestedElementKey] = nestedSourcedFields; + } + + // Applies nested sourced data from the __nested_sourced_data buffer to matched nested elements. + // Called after every event so that after a self-event's putAll overwrites nested arrays, + // the buffered data gets re-applied. + void applyNestedSourcedData(Map source, Map nestedSourcedPaths) { + if (source.__nested_sourced_data == null || nestedSourcedPaths.isEmpty()) { + return; + } + + for (sourcedEntry in source.__nested_sourced_data.entrySet()) { + String sourcedRelationship = sourcedEntry.getKey(); + Map elementDataByKey = (Map) sourcedEntry.getValue(); + + List pathSegments = (List) nestedSourcedPaths.get(sourcedRelationship); + if (pathSegments == null) { + continue; + } + + for (elementEntry in elementDataByKey.entrySet()) { + List keyParts = splitNestedElementKey((String) elementEntry.getKey()); + if (keyParts.size() != pathSegments.size()) { + continue; + } + + Map target = (Map) navigateToNestedElement(source, pathSegments, keyParts); + if (target != null) { + target.putAll((Map) elementEntry.getValue()); + } + } + } + } + + // Records the event version in __versions and adds the relationship to __sources. + void recordSource(Map source, String relationship, String sourceId, long eventVersion, String nestedElementKey) { + if (nestedElementKey.isEmpty()) { + source.__versions[relationship][sourceId] = eventVersion; + } else { + source.__versions[relationship][nestedElementKey][sourceId] = eventVersion; + } // Record the relationship in `__sources` if it's not already there. We maintain it as an append-only set using a sorted list. // This ensures deterministic ordering of its elements regardless of event ingestion order, and lets us check membership in O(log N) time. @@ -2234,15 +2422,25 @@ scripts: } } - // --- Main script body --- // + // ============================================================ + // Main Execution + // ============================================================ + Map source = ctx._source; String id = params.id; String relationship = params.relationship; String sourceId = params.sourceId; - long eventVersion = (long) params.version; // Cast to long since JSON parses numbers as doubles + long eventVersion = (long) params.version; Map counts = params.__counts; + Map nestedSourcedFields = params.nestedSourcedFields; + Map nestedSourcedPathIdentifiers = params.nestedSourcedPathIdentifiers; + Map nestedSourcedPaths = params.nestedSourcedPaths; + + String nestedElementKey = buildNestedElementKeyForRelationship(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); - setup(source, relationship, counts); - validateSource(source, id, relationship, sourceId, eventVersion); + setup(source, relationship, nestedElementKey, counts); + validateSource(source, id, relationship, sourceId, eventVersion, nestedElementKey); applyTopLevelFields(source, id, params.topLevelFields, counts); - recordSource(source, relationship, sourceId, eventVersion); + storeNestedSourcedData(source, relationship, nestedSourcedFields, nestedElementKey); + applyNestedSourcedData(source, nestedSourcedPaths); + recordSource(source, relationship, sourceId, eventVersion, nestedElementKey); diff --git a/config/schema/artifacts_with_apollo/runtime_metadata.yaml b/config/schema/artifacts_with_apollo/runtime_metadata.yaml index 8e7ff906c..c6ac4a47d 100644 --- a/config/schema/artifacts_with_apollo/runtime_metadata.yaml +++ b/config/schema/artifacts_with_apollo/runtime_metadata.yaml @@ -3114,7 +3114,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: full_address: cardinality: one @@ -3304,7 +3304,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: __typename: cardinality: one @@ -3343,7 +3343,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: __typename: cardinality: one @@ -3464,7 +3464,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: created_at: cardinality: one @@ -3826,7 +3826,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: __typename: cardinality: one @@ -3999,7 +3999,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: created_at: cardinality: one @@ -4484,7 +4484,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: ceo: cardinality: one @@ -4642,7 +4642,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: created_at: cardinality: one @@ -5624,7 +5624,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: __typename: cardinality: one @@ -5823,7 +5823,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: __typename: cardinality: one @@ -5890,7 +5890,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: active: cardinality: one @@ -6433,7 +6433,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: name: cardinality: one @@ -6783,7 +6783,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: formed_on routing_value_source: league - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: country_code: cardinality: one @@ -7883,7 +7883,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: created_at routing_value_source: workspace_id2 - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: amount_cents: cardinality: one @@ -7961,7 +7961,7 @@ object_types_by_name: version: cardinality: one relationship: widget - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: widget_cost: cardinality: one @@ -8191,7 +8191,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: introduced_on routing_value_source: primary_continent - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: details: cardinality: one @@ -9236,7 +9236,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: name: cardinality: one @@ -9258,7 +9258,7 @@ object_types_by_name: relationship: workspace rollover_timestamp_value_source: widget.created_at routing_value_source: id - script_id: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d top_level_fields_params: workspace_name: cardinality: one @@ -9536,4 +9536,4 @@ static_script_ids_by_scoped_name: field/as_day_of_week: field_as_day_of_week_f2b5c7d9e8f75bf2457b52412bfb6537 field/as_time_of_day: field_as_time_of_day_ed82aba44fc66bff5635bec4305c1c66 filter/by_time_of_day: filter_by_time_of_day_ea12d0561b24961789ab68ed38435612 - update/index_data: update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d + update/index_data: update_index_data_cd5bc011d0885b2dfd793c9f119f068d diff --git a/elasticgraph-admin/spec/integration/elastic_graph/admin/index_definition_configurator/shared_examples.rb b/elasticgraph-admin/spec/integration/elastic_graph/admin/index_definition_configurator/shared_examples.rb index 639e39234..a68cc4d7a 100644 --- a/elasticgraph-admin/spec/integration/elastic_graph/admin/index_definition_configurator/shared_examples.rb +++ b/elasticgraph-admin/spec/integration/elastic_graph/admin/index_definition_configurator/shared_examples.rb @@ -42,7 +42,7 @@ def simulate_presence_of_extra_setting(admin, index_definition_name, name, value let(:output_io) { StringIO.new } let(:clock) { class_double(::Time, now: ::Time.utc(2024, 3, 20, 12, 0, 0)) } let(:mapping_removal_note_snippet) { "extra fields listed here will not actually get removed" } - let(:index_meta_fields) { ["__sources", "__typename", "__versions"] } + let(:index_meta_fields) { ["__nested_sourced_data", "__sources", "__typename", "__versions"] } it "idempotently creates an index or index template, avoiding unneeded datastore write calls" do expect { diff --git a/elasticgraph-indexer/spec/unit/elastic_graph/indexer/datastore_indexing_router_spec.rb b/elasticgraph-indexer/spec/unit/elastic_graph/indexer/datastore_indexing_router_spec.rb index 6d77a53e5..bd58ef4de 100644 --- a/elasticgraph-indexer/spec/unit/elastic_graph/indexer/datastore_indexing_router_spec.rb +++ b/elasticgraph-indexer/spec/unit/elastic_graph/indexer/datastore_indexing_router_spec.rb @@ -265,7 +265,7 @@ def type_name_for_index(index_name) upsert: {}, script: a_hash_including( id: /WidgetCurrency_from_Widget_/, - params: {"topLevelFields" => {"name" => ["thing1"]}, "id" => "USD"} + params: {"nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "USD"} ) ) end diff --git a/elasticgraph-indexer/spec/unit/elastic_graph/indexer/operation/update_spec.rb b/elasticgraph-indexer/spec/unit/elastic_graph/indexer/operation/update_spec.rb index 1a15a9e62..28471ec6f 100644 --- a/elasticgraph-indexer/spec/unit/elastic_graph/indexer/operation/update_spec.rb +++ b/elasticgraph-indexer/spec/unit/elastic_graph/indexer/operation/update_spec.rb @@ -73,7 +73,7 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "topLevelFields" => {"name" => ["thing1"]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "17" }}, scripted_upsert: true, @@ -102,7 +102,7 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: INDEX_DATA_UPDATE_SCRIPT_ID, params: { - "topLevelFields" => {"name" => "thing1"}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => "thing1"}, "id" => "17", "staticValue" => 47, "sourceType" => "Widget", @@ -156,7 +156,7 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "topLevelFields" => {"name" => []}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => []}, "id" => "17" }}, scripted_upsert: true, @@ -177,7 +177,7 @@ module Operation {update: {_id: "embedded_workspace_id", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "topLevelFields" => {"name" => ["thing1"]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "embedded_workspace_id" }}, scripted_upsert: true, @@ -202,7 +202,7 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "topLevelFields" => {"embedded_values.missing_field" => [], "name" => nil}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"embedded_values.missing_field" => [], "name" => nil}, "id" => "17" }}, scripted_upsert: true, @@ -229,13 +229,13 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "topLevelFields" => { - "embedded_values" => ["thing1"], - "name" => { - "name" => "embedded_name", - "workspace_id" => "embedded_workspace_id" - } - }, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => { + "embedded_values" => ["thing1"], + "name" => { + "name" => "embedded_name", + "workspace_id" => "embedded_workspace_id" + } + }, "id" => "17" }}, scripted_upsert: true, @@ -261,7 +261,7 @@ module Operation { script: {id: operations.first.update_target.script_id, params: { # Float-typed integer values are coerced to true ints before indexing - "topLevelFields" => {"size" => [an_instance_of(::Integer).and(eq_to(4))]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"size" => [an_instance_of(::Integer).and(eq_to(4))]}, "id" => "17" }}, scripted_upsert: true, @@ -282,7 +282,7 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "topLevelFields" => {"name" => ["thing1"]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "17" }}, scripted_upsert: true, @@ -291,7 +291,7 @@ module Operation {update: {_id: "18", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "topLevelFields" => {"name" => ["thing1"]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "18" }}, scripted_upsert: true, @@ -300,7 +300,7 @@ module Operation {update: {_id: "19", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "topLevelFields" => {"name" => ["thing1"]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "19" }}, scripted_upsert: true, diff --git a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/update_target.rb b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/update_target.rb index 37c021ae5..f8d18fcee 100644 --- a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/update_target.rb +++ b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/update_target.rb @@ -23,6 +23,9 @@ class UpdateTarget < ::Data.define( :routing_value_source, :rollover_timestamp_value_source, :top_level_fields_params, + :nested_sourced_fields_params, + :nested_sourced_path_identifiers_params, + :nested_sourced_paths, :metadata_params ) TYPE = "type" @@ -32,6 +35,9 @@ class UpdateTarget < ::Data.define( ROUTING_VALUE_SOURCE = "routing_value_source" ROLLOVER_TIMESTAMP_VALUE_SOURCE = "rollover_timestamp_value_source" TOP_LEVEL_FIELDS_PARAMS = "top_level_fields_params" + NESTED_SOURCED_FIELDS_PARAMS = "nested_sourced_fields_params" + NESTED_SOURCED_PATH_IDENTIFIERS_PARAMS = "nested_sourced_path_identifiers_params" + NESTED_SOURCED_PATHS = "nested_sourced_paths" METADATA_PARAMS = "metadata_params" def self.from_hash(hash) @@ -43,6 +49,9 @@ def self.from_hash(hash) routing_value_source: hash[ROUTING_VALUE_SOURCE], rollover_timestamp_value_source: hash[ROLLOVER_TIMESTAMP_VALUE_SOURCE], top_level_fields_params: Param.load_params_hash(hash[TOP_LEVEL_FIELDS_PARAMS] || {}), + nested_sourced_fields_params: Param.load_params_hash(hash[NESTED_SOURCED_FIELDS_PARAMS] || {}), + nested_sourced_path_identifiers_params: Param.load_params_hash(hash[NESTED_SOURCED_PATH_IDENTIFIERS_PARAMS] || {}), + nested_sourced_paths: hash[NESTED_SOURCED_PATHS] || {}, metadata_params: Param.load_params_hash(hash[METADATA_PARAMS] || {}) ) end @@ -52,6 +61,9 @@ def to_dumpable_hash # Keys here are ordered alphabetically; please keep them that way. ID_SOURCE => id_source, METADATA_PARAMS => Param.dump_params_hash(metadata_params), + NESTED_SOURCED_FIELDS_PARAMS => Param.dump_params_hash(nested_sourced_fields_params), + NESTED_SOURCED_PATH_IDENTIFIERS_PARAMS => Param.dump_params_hash(nested_sourced_path_identifiers_params), + NESTED_SOURCED_PATHS => nested_sourced_paths, RELATIONSHIP => relationship, ROLLOVER_TIMESTAMP_VALUE_SOURCE => rollover_timestamp_value_source, ROUTING_VALUE_SOURCE => routing_value_source, @@ -74,7 +86,21 @@ def params_for(doc_id:, event:, prepared_record:) [name, param.value_for(event)] end - meta.merge({"id" => doc_id, "topLevelFields" => top_level_fields}) + nested_sourced_fields = nested_sourced_fields_params.to_h do |name, param| + [name, param.value_for(prepared_record)] + end + + nested_sourced_path_identifiers = nested_sourced_path_identifiers_params.to_h do |name, param| + [name, param.value_for(prepared_record)] + end + + meta.merge({ + "id" => doc_id, + "topLevelFields" => top_level_fields, + "nestedSourcedFields" => nested_sourced_fields, + "nestedSourcedPathIdentifiers" => nested_sourced_path_identifiers, + "nestedSourcedPaths" => nested_sourced_paths + }) end end end diff --git a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/update_target.rbs b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/update_target.rbs index 4a43bfdb3..46d9ac97c 100644 --- a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/update_target.rbs +++ b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/update_target.rbs @@ -9,6 +9,9 @@ module ElasticGraph attr_reader routing_value_source: ::String? attr_reader rollover_timestamp_value_source: ::String? attr_reader top_level_fields_params: paramsHash + attr_reader nested_sourced_fields_params: paramsHash + attr_reader nested_sourced_path_identifiers_params: paramsHash + attr_reader nested_sourced_paths: ::Hash[::String, ::Array[::Hash[::String, untyped]]] attr_reader metadata_params: paramsHash def initialize: ( @@ -19,6 +22,9 @@ module ElasticGraph routing_value_source: ::String?, rollover_timestamp_value_source: ::String?, top_level_fields_params: paramsHash, + nested_sourced_fields_params: paramsHash, + nested_sourced_path_identifiers_params: paramsHash, + nested_sourced_paths: ::Hash[::String, ::Array[::Hash[::String, untyped]]], metadata_params: paramsHash ) -> void @@ -30,6 +36,9 @@ module ElasticGraph ?routing_value_source: ::String?, ?rollover_timestamp_value_source: ::String?, ?top_level_fields_params: paramsHash, + ?nested_sourced_fields_params: paramsHash, + ?nested_sourced_path_identifiers_params: paramsHash, + ?nested_sourced_paths: ::Hash[::String, ::Array[::Hash[::String, untyped]]], ?metadata_params: paramsHash ) -> UpdateTarget @@ -46,6 +55,9 @@ module ElasticGraph ROUTING_VALUE_SOURCE: "routing_value_source" ROLLOVER_TIMESTAMP_VALUE_SOURCE: "rollover_timestamp_value_source" TOP_LEVEL_FIELDS_PARAMS: "top_level_fields_params" + NESTED_SOURCED_FIELDS_PARAMS: "nested_sourced_fields_params" + NESTED_SOURCED_PATH_IDENTIFIERS_PARAMS: "nested_sourced_path_identifiers_params" + NESTED_SOURCED_PATHS: "nested_sourced_paths" METADATA_PARAMS: "metadata_params" def self.from_hash: (::Hash[::String, untyped]) -> UpdateTarget diff --git a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb index c9f3733eb..6cbdb2e2e 100644 --- a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb +++ b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb @@ -43,6 +43,9 @@ module RuntimeMetadata routing_value_source: "cost.currency_name", rollover_timestamp_value_source: "currency_introduced_on", top_level_fields_params: {"workspace_id" => DynamicParam.new(source_path: "wid", cardinality: :one)}, + nested_sourced_fields_params: {}, + nested_sourced_path_identifiers_params: {}, + nested_sourced_paths: {}, metadata_params: {"relationshipName" => StaticParam.new(value: "currency")} ), UpdateTarget.new( @@ -53,6 +56,9 @@ module RuntimeMetadata routing_value_source: nil, rollover_timestamp_value_source: nil, top_level_fields_params: {}, + nested_sourced_fields_params: {}, + nested_sourced_path_identifiers_params: {}, + nested_sourced_paths: {}, metadata_params: {} ) ], @@ -311,6 +317,9 @@ module RuntimeMetadata routing_value_source: nil, rollover_timestamp_value_source: nil, top_level_fields_params: {"workspace_id" => dynamic_param_with(cardinality: :many)}, + nested_sourced_fields_params: {}, + nested_sourced_path_identifiers_params: {}, + nested_sourced_paths: {}, metadata_params: {} )]), "IndexDefinitionNamesOnly" => object_type_with(index_definition_names: ["foo", "bar"]), diff --git a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/update_target_spec.rb b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/update_target_spec.rb index d3ee76a61..95eb6a41b 100644 --- a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/update_target_spec.rb +++ b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/update_target_spec.rb @@ -26,6 +26,9 @@ module RuntimeMetadata routing_value_source: nil, rollover_timestamp_value_source: nil, top_level_fields_params: {}, + nested_sourced_fields_params: {}, + nested_sourced_path_identifiers_params: {}, + nested_sourced_paths: {}, metadata_params: {} ) end @@ -84,9 +87,9 @@ module RuntimeMetadata } ) - without_id_or_top_level_fields = params.except("id", "topLevelFields") + without_omitted_fields = params.except("id", "topLevelFields", "nestedSourcedFields", "nestedSourcedPathIdentifiers", "nestedSourcedPaths") - expect(without_id_or_top_level_fields).to eq( + expect(without_omitted_fields).to eq( "foo" => 43, "bar" => "hello", "bazz" => [12] diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb index cd1bf8a8f..a993b8ab3 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb @@ -273,13 +273,14 @@ def new_field_source(relationship_name:, field_path:) end @@field_source_new = prevent_non_factory_instantiation_of(SchemaElements::FieldSource) - def new_relationship(field, cardinality:, related_type:, foreign_key:, direction:) + def new_relationship(field, cardinality:, related_type:, foreign_key:, direction:, indexing_only: false) @@relationship_new.call( field, cardinality: cardinality, related_type: related_type, foreign_key: foreign_key, - direction: direction + direction: direction, + indexing_only: indexing_only ) end @@relationship_new = prevent_non_factory_instantiation_of(SchemaElements::Relationship) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb index a01df76c5..d524f7f02 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb @@ -272,7 +272,10 @@ def runtime_metadata_for_source_type metadata_params: {}, top_level_fields_params: fields.map(&:source_field).to_h do |f| [f, SchemaArtifacts::RuntimeMetadata::DynamicParam.new(source_path: f, cardinality: :many)] - end + end, + nested_sourced_fields_params: {}, + nested_sourced_path_identifiers_params: {}, + nested_sourced_paths: {} ) end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb index a6400db64..07e4dee50 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb @@ -298,6 +298,13 @@ def mappings .then { |mapping| ListCountsMapping.merged_into(mapping, for_type: indexed_type) } .then do |fm| internal_fields = { + "__nested_sourced_data" => { + "type" => "object", + # __nested_sourced_data stores sourced data for nested sourced_from fields. Its keys are not + # statically known (they're relationship names and composite element keys), so we + # set dynamic to "false" to allow arbitrary keys in _source without indexing them. + "dynamic" => "false" + }, "__sources" => {"type" => "keyword"}, "__versions" => { "type" => "object", diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rb new file mode 100644 index 000000000..314f33510 --- /dev/null +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rb @@ -0,0 +1,181 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/errors" + +module ElasticGraph + module SchemaDefinition + module Indexing + # Resolves a chain of `parent_relationship` links from a leaf embedded type up to the + # root indexed type. Produces a `ResolvedNestedChain` describing the nested path and + # match fields at each level. + # + # @private + class NestedRelationshipChainResolver + def initialize(schema_def_state:) + @schema_def_state = schema_def_state + end + + # Resolves the chain starting from `starting_relationship` (which must have a + # `parent_relationship_config`) on `starting_type`. + # + # Returns a tuple of [resolved_chain, errors]. + # If errors is non-empty, resolved_chain will be nil. + def resolve(starting_relationship, starting_type) + errors = [] # : ::Array[::String] + chain = [] # : ::Array[PathSegment] + current_rel = starting_relationship + current_type = starting_type + visited_types = ::Set.new([starting_type.name]) + + # Walk from leaf to root, building path segments in reverse. Each iteration validates + # the current relationship's parent_relationship link and advances up one level. + while current_rel.parent_relationship_config + config = current_rel.parent_relationship_config + + # Validate that parent_relationship is used with indexing_only + unless current_rel.indexing_only + errors << "#{rel_description(current_type, current_rel)} uses `parent_relationship` but is not declared with " \ + "`indexing_only: true`. Relationships with `parent_relationship` must be indexing-only." + break + end + + # Detect circular chains + if visited_types.include?(config[:parent_type_name]) + errors << "#{rel_description(current_type, current_rel)} creates a circular `parent_relationship` chain " \ + "— `#{config[:parent_type_name]}` was already visited. The chain must terminate at a root indexed type." + break + end + + # Find the parent type + parent_type = @schema_def_state.object_types_by_name[config[:parent_type_name]] + unless parent_type + errors << "#{rel_description(current_type, current_rel)} references parent type " \ + "`#{config[:parent_type_name]}` via `parent_relationship`, but that type does not exist." + break + end + + # Find the parent relationship + parent_rel = parent_type.relationships_by_name[config[:parent_relationship_name]] + unless parent_rel + errors << "#{rel_description(current_type, current_rel)} references parent relationship " \ + "`#{parent_type.name}.#{config[:parent_relationship_name]}` via `parent_relationship`, " \ + "but that relationship does not exist. Is it misspelled?" + break + end + + # Validate both relationships target the same source type + current_source_type_name = current_rel.related_type.unwrap_non_null.name + parent_source_type_name = parent_rel.related_type.unwrap_non_null.name + unless current_source_type_name == parent_source_type_name + errors << "#{rel_description(current_type, current_rel)} relates to `#{current_source_type_name}`, " \ + "but its parent relationship `#{parent_type.name}.#{config[:parent_relationship_name]}` relates to " \ + "`#{parent_source_type_name}`. All relationships in a `parent_relationship` chain must relate to the same source type." + break + end + + # Find the embedding field (field on parent_type whose type is current_type) + embedding_field = find_embedding_field(parent_type, current_type, errors) + unless embedding_field + break if errors.any? + errors << "#{rel_description(current_type, current_rel)} declares `#{parent_type.name}` as its parent type " \ + "via `parent_relationship`, but `#{parent_type.name}` has no field of type `#{current_type.name}`." + break + end + + # For list segments, validate that the embedded type has an `id` field to match on. + if embedding_field.type.list? + unless current_type.indexing_fields_by_name_in_index["id"] + errors << "#{rel_description(current_type, current_rel)} requires an `id` field on `#{current_type.name}` " \ + "for nested element matching, but `#{current_type.name}` has no field named `id`." + break + end + end + + source_field_name = current_rel.foreign_key + + # We use "id" as the match field, consistent with how ElasticGraph relationships always join on `id` + # via foreign keys. In the future, it would be nice if this field name were configurable. Additionally, the + # composite key separator ":" in the Painless script assumes id values do not contain that character. + # It would be nice to explicitly guard against that somehow. + chain << PathSegment.new( + parent_type: parent_type, + embedding_field: embedding_field, + match_field: "id", + source_field: source_field_name + ) + + # Move up the chain + current_rel = parent_rel + current_type = parent_type + visited_types.add(parent_type.name) + end + + return [nil, errors] if errors.any? + + # The loop terminated because current_rel has no parent_relationship_config — + # this is the root relationship. Validate that current_type is indexed. + unless current_type.root_document_type? + errors << "The `parent_relationship` chain from #{rel_description(starting_type, starting_relationship)} " \ + "terminates at `#{current_type.name}`, but `#{current_type.name}` is not an indexed type. " \ + "The chain must terminate at an indexed type." + return [nil, errors] + end + + resolved_chain = ResolvedNestedChain.new( + root_indexed_type: current_type, + path_segments: chain.reverse, # reverse so root-to-leaf order + root_relationship: current_rel + ) + + [resolved_chain, errors] + end + + private + + def find_embedding_field(parent_type, child_type, errors) + matches = parent_type.graphql_fields_by_name.values.select do |field| + field.type.fully_unwrapped.name == child_type.name + end + + if matches.size > 1 + field_names = matches.map(&:name).join(", ") + errors << "`#{parent_type.name}` has multiple fields of type `#{child_type.name}` (#{field_names}). " \ + "Ambiguous embedding path for `parent_relationship` — cannot determine which field to use." + nil + else + matches.first + end + end + + def rel_description(type, relationship) + "`#{type.name}.#{relationship.name}`" + end + end + + # The result of resolving a nested relationship chain. + # + # @private + ResolvedNestedChain = ::Data.define( + :root_indexed_type, # ObjectType - the indexed type at the root + :path_segments, # Array - ordered root-to-leaf + :root_relationship # Relationship - the root relationship (no parent_relationship) + ) + + # A single segment of the nested path. + # + # @private + PathSegment = ::Data.define( + :parent_type, # ObjectType - the parent type at this level + :embedding_field, # Field - the field on parent_type that embeds the child type + :match_field, # String - field on the nested type to match (e.g., "id") + :source_field # String - field on the source type with the match value (from `via`) + ) + end + end +end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb new file mode 100644 index 000000000..8f037028e --- /dev/null +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb @@ -0,0 +1,190 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/schema_artifacts/runtime_metadata/params" +require "elastic_graph/schema_definition/indexing/update_target_factory" + +module ElasticGraph + module SchemaDefinition + module Indexing + # Responsible for resolving a nested relationship chain and a set of `sourced_from` fields + # into an `UpdateTarget` for updating nested elements within a root indexed type. + # + # @private + class NestedUpdateTargetResolver + def initialize( + object_type:, + relationship:, + sourced_fields:, + resolved_chain:, + field_path_resolver:, + schema_def_state: + ) + @object_type = object_type + @relationship = relationship + @sourced_fields = sourced_fields + @resolved_chain = resolved_chain + @field_path_resolver = field_path_resolver + @schema_def_state = schema_def_state + end + + # Returns a tuple of [update_target, errors]. + # If errors is non-empty, update_target will be nil. + def resolve + errors = [] # : ::Array[::String] + + if relationship.many? + errors << "`#{object_type.name}.#{relationship.name}` is a `relates_to_many` relationship, " \ + "but nested `sourced_from` is only supported on a `relates_to_one` relationship." + return [nil, errors] + end + + nested_sourced_fields_params = resolve_nested_sourced_fields_params(errors) + return [nil, errors] if nested_sourced_fields_params.empty? && errors.any? + + nested_sourced_path_identifiers_params = build_path_identifier_params + nested_sourced_paths = build_nested_sourced_paths + routing_value_source = resolve_routing(errors) + rollover_timestamp_value_source = resolve_rollover(errors) + validate_has_had_multiple_sources(errors) + + if errors.any? + [nil, errors] + else + # Wrap in map keyed by relationship name — the script uses this to look up + # the path config for the specific relationship being processed. + nested_sourced_paths_map = {relationship.name => nested_sourced_paths} + + update_target = UpdateTargetFactory.new_normal_indexing_update_target( + type: resolved_chain.root_indexed_type.name, + relationship: relationship.name, + id_source: resolved_chain.root_relationship.foreign_key, + top_level_fields_params: {}, + nested_sourced_fields_params: nested_sourced_fields_params, + nested_sourced_path_identifiers_params: nested_sourced_path_identifiers_params, + routing_value_source: routing_value_source, + rollover_timestamp_value_source: rollover_timestamp_value_source, + nested_sourced_paths: nested_sourced_paths_map + ) + + [update_target, errors] + end + end + + private + + # @dynamic object_type, relationship, sourced_fields, resolved_chain, field_path_resolver, schema_def_state + attr_reader :object_type, :relationship, :sourced_fields, :resolved_chain, :field_path_resolver, :schema_def_state + + def related_type + @related_type ||= schema_def_state.object_types_by_name[relationship.related_type.unwrap_non_null.name] + end + + def resolve_nested_sourced_fields_params(errors) + sourced_fields.filter_map do |field| + field_source = field.source # : SchemaElements::FieldSource + referenced_field_path = field_path_resolver.resolve_public_path(related_type, field_source.field_path) do |parent_field| + !parent_field.type.list? + end + + if referenced_field_path.nil? + errors << "`#{object_type.name}.#{field.name}` has an invalid `sourced_from` argument: " \ + "`#{related_type.name}.#{field_source.field_path}` does not exist as an indexing field." + nil + else + param = SchemaArtifacts::RuntimeMetadata::DynamicParam.new( + source_path: referenced_field_path.path_in_index, + cardinality: :one + ) + [field.name_in_index, param] + end + end.to_h + end + + def build_path_identifier_params + resolved_chain.path_segments.filter_map do |segment| + # Only list segments need identifier fields — object segments have no ambiguity. + next unless segment.embedding_field.type.list? + + source_field = segment.source_field + [source_field, SchemaArtifacts::RuntimeMetadata::DynamicParam.new( + source_path: source_field, + cardinality: :one + )] + end.to_h + end + + def build_nested_sourced_paths + resolved_chain.path_segments.map do |segment| + if segment.embedding_field.type.list? + { + "list" => segment.embedding_field.name_in_index, + "match_field" => segment.match_field, + "source_field" => segment.source_field + } + else + {"object" => segment.embedding_field.name_in_index} + end + end + end + + def resolve_routing(errors) + root_rel = resolved_chain.root_relationship + root_index = resolved_chain.root_indexed_type.index_def + + routing_value_source = root_rel.routing_value_source_for_index(root_index) do |local_need| + errors << "Cannot update `#{resolved_chain.root_indexed_type.name}` documents with nested sourced data from " \ + "`#{relationship.name}` events, because `#{resolved_chain.root_indexed_type.name}` uses custom shard routing " \ + "but we don't know what field to use to route the update requests. To fix it, add a call like this to the " \ + "`#{resolved_chain.root_indexed_type.name}.#{root_rel.name}` relationship definition: " \ + "`rel.equivalent_field \"[#{related_type.name} field]\", locally_named: \"#{local_need}\"`." + return [nil, errors] + end + + if routing_value_source + field_path = field_path_resolver.resolve_public_path(related_type, routing_value_source) do |parent_field| + !parent_field.type.list? + end + field_path&.path_in_index + end + end + + def resolve_rollover(errors) + root_rel = resolved_chain.root_relationship + root_index = resolved_chain.root_indexed_type.index_def + + rollover_value_source = root_rel.rollover_timestamp_value_source_for_index(root_index) do |local_need| + errors << "Cannot update `#{resolved_chain.root_indexed_type.name}` documents with nested sourced data from " \ + "`#{relationship.name}` events, because `#{resolved_chain.root_indexed_type.name}` uses a rollover index " \ + "but we don't know what field to use to select an index for the update requests. To fix it, add a call like this to the " \ + "`#{resolved_chain.root_indexed_type.name}.#{root_rel.name}` relationship definition: " \ + "`rel.equivalent_field \"[#{related_type.name} field]\", locally_named: \"#{local_need}\"`." + return [nil, errors] + end + + if rollover_value_source + field_path = field_path_resolver.resolve_public_path(related_type, rollover_value_source) do |parent_field| + !parent_field.type.list? + end + field_path&.path_in_index + end + end + + def validate_has_had_multiple_sources(errors) + root_type = resolved_chain.root_indexed_type + root_index_def = root_type.index_def + if root_index_def && !root_index_def.has_had_multiple_sources_flag + errors << "Type `#{root_type.name}` has nested `sourced_from` fields (via `#{object_type.name}.#{relationship.name}`) " \ + "but its index `#{root_index_def.name}` has not been configured with `has_had_multiple_sources!`. " \ + "To resolve this, add `i.has_had_multiple_sources!` within the `t.index \"#{root_index_def.name}\"` block." + end + end + end + end + end +end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb index 95790c774..2c0dc5199 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb @@ -18,19 +18,25 @@ def self.new_normal_indexing_update_target( id_source:, top_level_fields_params:, routing_value_source:, - rollover_timestamp_value_source: + rollover_timestamp_value_source:, + nested_sourced_fields_params: {}, + nested_sourced_path_identifiers_params: {}, + nested_sourced_paths: {} ) SchemaArtifacts::RuntimeMetadata::UpdateTarget.new( type: type, relationship: relationship, script_id: INDEX_DATA_UPDATE_SCRIPT_ID, id_source: id_source, + routing_value_source: routing_value_source, + rollover_timestamp_value_source: rollover_timestamp_value_source, + top_level_fields_params: top_level_fields_params, + nested_sourced_fields_params: nested_sourced_fields_params, + nested_sourced_path_identifiers_params: nested_sourced_path_identifiers_params, + nested_sourced_paths: nested_sourced_paths, metadata_params: standard_metadata_params.merge({ "relationship" => SchemaArtifacts::RuntimeMetadata::StaticParam.new(value: relationship) - }), - top_level_fields_params: top_level_fields_params, - routing_value_source: routing_value_source, - rollover_timestamp_value_source: rollover_timestamp_value_source + }) ) end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb index 98dd547a7..085123d47 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb @@ -257,9 +257,9 @@ def override_runtime_metadata(**overrides) end # @private - def runtime_metadata(extra_update_targets) + def runtime_metadata(extra_update_targets, nested_sourced_paths: {}) SchemaArtifacts::RuntimeMetadata::ObjectType.new( - update_targets: derived_indexed_types.map(&:runtime_metadata_for_source_type) + [self_update_target].compact + extra_update_targets, + update_targets: derived_indexed_types.map(&:runtime_metadata_for_source_type) + [self_update_target(nested_sourced_paths: nested_sourced_paths)].compact + extra_update_targets, index_definition_names: [index_def&.name].compact, graphql_fields_by_name: runtime_metadata_graphql_fields_by_name, elasticgraph_category: nil, @@ -427,7 +427,7 @@ def initialize_has_indices @can_configure_index = false end - def self_update_target + def self_update_target(nested_sourced_paths: {}) return nil if abstract? || !root_document_type? # We exclude `id` from `top_level_fields_params` because `Indexer::Operator::Update` automatically includes @@ -450,7 +450,8 @@ def self_update_target # we'll need to change the runtime metadata here to have a map of these values, keyed by index # name. routing_value_source: index_runtime_metadata.route_with, - rollover_timestamp_value_source: index_runtime_metadata.rollover&.timestamp_field_path + rollover_timestamp_value_source: index_runtime_metadata.rollover&.timestamp_field_path, + nested_sourced_paths: nested_sourced_paths ) end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb index 59a8c3891..00ff139b0 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb @@ -12,6 +12,8 @@ require "elastic_graph/schema_artifacts/artifacts_helper_methods" require "elastic_graph/schema_definition/indexing/event_envelope" require "elastic_graph/schema_definition/indexing/json_schema_with_metadata" +require "elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver" +require "elastic_graph/schema_definition/indexing/nested_update_target_resolver" require "elastic_graph/schema_definition/indexing/relationship_resolver" require "elastic_graph/schema_definition/indexing/update_target_resolver" require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect" @@ -143,11 +145,20 @@ def build_dynamic_scripts end def build_runtime_metadata - extra_update_targets_by_object_type_name = identify_extra_update_targets_by_object_type_name + extra_update_targets_by_object_type_name, nested_sourced_paths_by_type_name = identify_extra_update_targets_by_object_type_name object_types_by_name = all_types .select { |t| t.respond_to?(:graphql_fields_by_name) } - .to_h { |type| [type.name, (_ = type).runtime_metadata(extra_update_targets_by_object_type_name.fetch(type.name) { [] })] } + .to_h do |type| + extra_targets = extra_update_targets_by_object_type_name.fetch(type.name) { [] } # : ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget] + metadata = if type.respond_to?(:own_index_def) + nested_config = nested_sourced_paths_by_type_name.fetch(type.name) { {} } # : ::Hash[::String, ::Array[::Hash[::String, untyped]]] + (_ = type).runtime_metadata(extra_targets, nested_sourced_paths: nested_config) + else + (_ = type).runtime_metadata(extra_targets) + end + [type.name, metadata] + end scalar_types_by_name = state.scalar_types_by_name.transform_values(&:runtime_metadata) @@ -182,13 +193,15 @@ def build_runtime_metadata # Builds a map, keyed by object type name, of extra `update_targets` that have been generated # from any fields that use `sourced_from` on other types. + # + # Returns a tuple of [update_targets_by_type_name, nested_sourced_paths_by_type_name]. def identify_extra_update_targets_by_object_type_name sourced_field_errors = [] # : ::Array[::String] relationship_errors = [] # : ::Array[::String] + extra_update_targets_by_type_name = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[untyped, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] + nested_sourced_paths_by_type = {} # : ::Hash[::String, ::Hash[::String, ::Array[::Hash[::String, untyped]]]] - state.object_types_by_name.except(*state.namespace_types_by_name.keys).values.each_with_object( - ::Hash.new { |h, k| h[k] = [] } # : ::Hash[untyped, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] - ) do |object_type, accum| + state.object_types_by_name.except(*state.namespace_types_by_name.keys).values.each do |object_type| fields_with_sources_by_relationship_name = if object_type.own_index_def.nil? # only indexed types can have `sourced_from` fields, and resolving `fields_with_sources` on an unindexed union type @@ -224,7 +237,7 @@ def identify_extra_update_targets_by_object_type_name ) update_target, errors = update_target_resolver.resolve - accum[resolved_relationship.related_type.name] << update_target if update_target + extra_update_targets_by_type_name[resolved_relationship.related_type.name] << update_target if update_target sourced_field_errors.concat(errors) # Validate that has_had_multiple_sources! has been called when sourced_from is used @@ -237,20 +250,84 @@ def identify_extra_update_targets_by_object_type_name end end end - end.tap do - full_errors = [] # : ::Array[::String] - if sourced_field_errors.any? - full_errors << "Schema had #{sourced_field_errors.size} error(s) related to `sourced_from` fields:\n\n#{sourced_field_errors.map.with_index(1) { |e, i| "#{i}. #{e}" }.join("\n\n")}" + # Process nested sourced_from fields on non-indexed types. + if object_type.own_index_def.nil? + identify_nested_sourced_update_targets(object_type, extra_update_targets_by_type_name, nested_sourced_paths_by_type, sourced_field_errors) end + end - if relationship_errors.any? - full_errors << "Schema had #{relationship_errors.size} error(s) related to relationship fields:\n\n#{relationship_errors.map.with_index(1) { |e, i| "#{i}. #{e}" }.join("\n\n")}" - end + full_errors = [] # : ::Array[::String] + + if sourced_field_errors.any? + full_errors << "Schema had #{sourced_field_errors.size} error(s) related to `sourced_from` fields:\n\n#{sourced_field_errors.map.with_index(1) { |e, i| "#{i}. #{e}" }.join("\n\n")}" + end + + if relationship_errors.any? + full_errors << "Schema had #{relationship_errors.size} error(s) related to relationship fields:\n\n#{relationship_errors.map.with_index(1) { |e, i| "#{i}. #{e}" }.join("\n\n")}" + end + + unless full_errors.empty? + raise Errors::SchemaError, full_errors.join("\n\n") + end + + [extra_update_targets_by_type_name, nested_sourced_paths_by_type] + end + + # Identifies update targets for sourced_from fields on non-indexed embedded types + # that use parent_relationship chains. + def identify_nested_sourced_update_targets(object_type, extra_update_targets_by_type_name, nested_sourced_paths_by_type, errors) + # Find relationships on this type that have parent_relationship configured + nested_relationships = object_type.relationships_by_name + .select { |_, rel| rel.parent_relationship_config } + + return if nested_relationships.empty? - unless full_errors.empty? - raise Errors::SchemaError, full_errors.join("\n\n") + # Find sourced_from fields on this type, grouped by relationship name + fields_with_sources_by_relationship_name = object_type + .indexing_fields_by_name_in_index.values + .reject { |f| f.source.nil? } + .group_by { |f| (_ = f.source).relationship_name } + + nested_relationships.each do |rel_name, relationship| + empty_fields = [] # : ::Array[SchemaElements::Field] + sourced_fields = fields_with_sources_by_relationship_name.fetch(rel_name) { empty_fields } + + next if sourced_fields.empty? + + # Resolve the chain from this type up to the root indexed type + chain_resolver = Indexing::NestedRelationshipChainResolver.new(schema_def_state: state) + resolved_chain, chain_errors = chain_resolver.resolve(relationship, object_type) + + if chain_errors.any? + errors.concat(chain_errors) + next end + + # Resolve the update target + resolved_chain = _ = resolved_chain # : Indexing::ResolvedNestedChain + resolver = Indexing::NestedUpdateTargetResolver.new( + object_type: object_type, + relationship: relationship, + sourced_fields: sourced_fields, + resolved_chain: resolved_chain, + field_path_resolver: state.field_path_resolver, + schema_def_state: state + ) + + update_target, resolve_errors = resolver.resolve + errors.concat(resolve_errors) + + next unless update_target + + # Store on the source type + related_type_name = relationship.related_type.unwrap_non_null.name + extra_update_targets_by_type_name[related_type_name] << update_target + + # Record the path config for the root indexed type's self-update target. + root_type_name = resolved_chain.root_indexed_type.name + nested_sourced_paths_by_type[root_type_name] ||= {} + nested_sourced_paths_by_type[root_type_name].merge!(update_target.nested_sourced_paths) end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/relationship.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/relationship.rb index 29c7ec2f0..2bb5c9a06 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/relationship.rb @@ -37,24 +37,38 @@ module SchemaElements # end # end class Relationship < DelegateClass(Field) - # @dynamic related_type, hide_relationship_runtime_metadata, hide_relationship_runtime_metadata= + # @dynamic related_type, foreign_key, hide_relationship_runtime_metadata, hide_relationship_runtime_metadata=, parent_relationship_config, indexing_only # @return [ObjectType, InterfaceType, UnionType] the type this relationship relates to attr_reader :related_type + # @return [String] the foreign key field name (the `via` parameter) + # @private + attr_reader :foreign_key + # @private attr_accessor :hide_relationship_runtime_metadata + # @return [Hash, nil] configuration for parent relationship in a nested sourced_from chain + # @private + attr_reader :parent_relationship_config + + # @return [Boolean] true if this relationship is for indexing only (not exposed in GraphQL) # @private - def initialize(field, cardinality:, related_type:, foreign_key:, direction:) + attr_reader :indexing_only + + # @private + def initialize(field, cardinality:, related_type:, foreign_key:, direction:, indexing_only: false) super(field) self.hide_relationship_runtime_metadata = false @cardinality = cardinality @related_type = related_type @foreign_key = foreign_key @direction = direction + @indexing_only = indexing_only @equivalent_field_paths_by_local_path = {} @additional_filter = {} + @parent_relationship_config = nil end # Adds additional filter conditions to a relationship beyond the foreign key. @@ -136,6 +150,61 @@ def equivalent_field(path, locally_named: path) end end + # Indicates that this relationship chains through a parent relationship to reach the root indexed type. + # + # Use this API when defining relationships on embedded (non-indexed) types that need to use `sourced_from` + # on their fields. By chaining relationships through parent types, ElasticGraph can resolve the path from + # the nested type up to the root indexed type and properly update nested fields when source events arrive. + # + # @param parent_type_name [String] name of the parent type in the nesting hierarchy + # @param parent_relationship_name [String] name of the relationship on the parent type + # @return [void] + # + # @example Define a nested sourced_from relationship chain + # ElasticGraph.define_schema do |schema| + # schema.object_type "Team" do |t| + # t.field "id", "ID!" + # t.field "seasons", "[Season!]" do |f| + # f.mapping type: "nested" + # end + # t.relates_to_many "gameScores", "GameScore", via: "teamId", dir: :in, indexing_only: true + # t.index "teams" do |i| + # i.has_had_multiple_sources! + # end + # end + # + # schema.object_type "Season" do |t| + # t.field "id", "ID" + # t.field "games", "[Game!]" do |f| + # f.mapping type: "nested" + # end + # t.relates_to_many "seasonGameScores", "GameScore", via: "seasonId", dir: :in, indexing_only: true do |r| + # r.parent_relationship "Team", "gameScores" + # end + # end + # + # schema.object_type "Game" do |t| + # t.field "id", "ID" + # t.field "score", "Score" do |f| + # f.sourced_from "gameScore", "score" + # end + # t.relates_to_one "gameScore", "GameScore", via: "gameId", dir: :in, indexing_only: true do |r| + # r.parent_relationship "Season", "seasonGameScores" + # end + # end + # end + def parent_relationship(parent_type_name, parent_relationship_name) + if @parent_relationship_config + raise Errors::SchemaError, "`parent_relationship` has been called multiple times on `#{parent_type.name}.#{name}`, " \ + "but each relationship can have only one `parent_relationship`." + end + + @parent_relationship_config = { + parent_type_name: parent_type_name, + parent_relationship_name: parent_relationship_name + } + end + # Gets the `routing_value_source` from this relationship for the given `index`, based on the configured # routing used by `index` and the configured equivalent fields. # diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb index 4fc5f9c2f..692281b2a 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb @@ -574,7 +574,8 @@ def relates_to(field_name, type, via:, dir:, foreign_key_type:, cardinality:, re cardinality: cardinality, related_type: schema_def_state.type_ref(related_type).to_final_form, foreign_key: via, - direction: dir + direction: dir, + indexing_only: indexing_only ) field.relationship = relationship diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless index b618f13fa..ede92079b 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless @@ -1,5 +1,81 @@ -// --- Helper Functions --- // -void setup(Map source, String relationship, Map counts) { +// ============================================================ +// Helper Functions +// ============================================================ + +// Builds a composite nested element key from path segments. +// List segments contribute their matched identifier value; object segments contribute their field name. +String buildNestedElementKey(List pathSegments, Map pathIdentifiers) { + List parts = new ArrayList(); + for (Map segment : pathSegments) { + if (segment.containsKey("list")) { + parts.add(pathIdentifiers[segment.source_field]); + } else { + parts.add(segment.get("object")); + } + } + return String.join(":", parts); +} + +// Looks up path segments for the given relationship and builds the nested element key. +// Returns "" if no path segments are configured (i.e., this is not a nested sourced event). +String buildNestedElementKeyForRelationship(String relationship, Map nestedSourcedPaths, Map pathIdentifiers) { + List pathSegments = (List) nestedSourcedPaths.get(relationship); + if (pathSegments == null) { + return ""; + } + return buildNestedElementKey(pathSegments, pathIdentifiers); +} + +// Splits a composite nested element key into a list of parts. +List splitNestedElementKey(String nestedElementKey) { + return Arrays.asList(nestedElementKey.splitOnToken(":")); +} + +// Finds an element in a list where element[matchField] equals matchValue. Returns null if not found. +def findInList(List elements, String matchField, String matchValue) { + for (Map element : elements) { + if (matchValue.equals(element[matchField])) { + return element; + } + } + return null; +} + +// Navigates from `source` through `pathSegments` to find the target nested element. +// Returns the matched element, or null if the path doesn't exist or no match is found. +def navigateToNestedElement(Map source, List pathSegments, List keyParts) { + Map current = source; + + for (int i = 0; i < pathSegments.size(); i++) { + Map segment = (Map) pathSegments.get(i); + boolean isList = segment.containsKey("list"); + String field = isList ? (String) segment.get("list") : (String) segment.get("object"); + + if (!current.containsKey(field)) { + return null; + } + + if (isList) { + current = (Map) findInList((List) current.get(field), (String) segment.get("match_field"), (String) keyParts.get(i)); + } else { + current = (Map) current.get(field); + } + + if (current == null) { + return null; + } + } + + return current; +} + + +// ============================================================ +// Main Functions +// ============================================================ + +// Initializes internal bookkeeping structures (__sources, __versions, __counts, __nested_sourced_data). +void setup(Map source, String relationship, String nestedElementKey, Map counts) { if (source.__sources == null) { source.__sources = []; } @@ -12,38 +88,66 @@ void setup(Map source, String relationship, Map counts) { source.__versions[relationship] = [:]; } + if (!nestedElementKey.isEmpty() && source.__versions[relationship][nestedElementKey] == null) { + source.__versions[relationship][nestedElementKey] = [:]; + } + + if (!nestedElementKey.isEmpty()) { + if (source.__nested_sourced_data == null) { + source.__nested_sourced_data = [:]; + } + if (source.__nested_sourced_data[relationship] == null) { + source.__nested_sourced_data[relationship] = [:]; + } + } + if (counts != null && source.__counts == null) { source.__counts = [:]; } } -void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion) { - Map relationshipVersionsMap = source.__versions.get(relationship); - List previousSourceIdsForRelationship = relationshipVersionsMap.keySet().stream().filter(key -> key != sourceId).collect(Collectors.toList()); +// Validates that this event is allowed: no relationship mutation and no stale version. +void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion, String nestedElementKey) { + // For nested events, validate per-element. For top-level events, validate per-relationship. + Map versionsMap = nestedElementKey.isEmpty() + ? source.__versions[relationship] + : source.__versions[relationship][nestedElementKey]; - if (previousSourceIdsForRelationship.size() > 0) { - throw new IllegalArgumentException( - "Cannot update document " + id + " " + - "with data from related " + relationship + " " + sourceId + " " + - "because the related " + relationship + " has apparently changed (was: " + previousSourceIdsForRelationship + "), " + - "but mutations of relationships used with `sourced_from` are not supported because " + - "allowing it could break ElasticGraph's out-of-order processing guarantees." - ); + // Check that no other source ID has previously written to this target. + List previousSourceIds = versionsMap.keySet().stream().filter(key -> key != sourceId).collect(Collectors.toList()); + if (previousSourceIds.size() > 0) { + if (nestedElementKey.isEmpty()) { + throw new IllegalArgumentException( + "Cannot update document " + id + " " + + "with data from related " + relationship + " " + sourceId + " " + + "because the related " + relationship + " has apparently changed (was: " + previousSourceIds + "), " + + "but mutations of relationships used with `sourced_from` are not supported because " + + "allowing it could break ElasticGraph's out-of-order processing guarantees." + ); + } else { + throw new IllegalArgumentException( + "Cannot update nested element [" + nestedElementKey + "] on document " + id + " " + + "with data from " + relationship + " " + sourceId + " " + + "because this element was previously sourced from a different event (" + previousSourceIds + "). " + + "Each nested element can only be sourced from one source document." + ); + } } - Number maybeDocVersion = relationshipVersionsMap.get(sourceId); - - // Our JSON schema requires event versions to be non-negative, so we can safely use Long.MIN_VALUE as a stand-in when the value is null. + // Check that the event version is newer than what we've already seen. + Number maybeDocVersion = versionsMap.get(sourceId); long docVersion = maybeDocVersion == null ? Long.MIN_VALUE : maybeDocVersion.longValue(); if (docVersion >= eventVersion) { + String target = nestedElementKey.isEmpty() ? id : id + "/" + nestedElementKey; throw new IllegalArgumentException("ElasticGraph update was a no-op: [" + - id + "]: version conflict, current version [" + + target + "]: version conflict, current version [" + docVersion + "] is higher or equal to the one provided [" + eventVersion + "]"); } } +// Applies top-level fields to the document via putAll, and merges __counts. void applyTopLevelFields(Map source, String id, Map topLevelFields, Map counts) { source.id = id; source.putAll(topLevelFields); @@ -53,8 +157,53 @@ void applyTopLevelFields(Map source, String id, Map topLevelFields, Map counts) } } -void recordSource(Map source, String relationship, String sourceId, long eventVersion) { - source.__versions[relationship][sourceId] = eventVersion; +// Stores nested sourced fields in the __nested_sourced_data buffer for later application. +void storeNestedSourcedData(Map source, String relationship, Map nestedSourcedFields, String nestedElementKey) { + if (nestedSourcedFields.isEmpty()) { + return; + } + + source.__nested_sourced_data[relationship][nestedElementKey] = nestedSourcedFields; +} + +// Applies nested sourced data from the __nested_sourced_data buffer to matched nested elements. +// Called after every event so that after a self-event's putAll overwrites nested arrays, +// the buffered data gets re-applied. +void applyNestedSourcedData(Map source, Map nestedSourcedPaths) { + if (source.__nested_sourced_data == null || nestedSourcedPaths.isEmpty()) { + return; + } + + for (sourcedEntry in source.__nested_sourced_data.entrySet()) { + String sourcedRelationship = sourcedEntry.getKey(); + Map elementDataByKey = (Map) sourcedEntry.getValue(); + + List pathSegments = (List) nestedSourcedPaths.get(sourcedRelationship); + if (pathSegments == null) { + continue; + } + + for (elementEntry in elementDataByKey.entrySet()) { + List keyParts = splitNestedElementKey((String) elementEntry.getKey()); + if (keyParts.size() != pathSegments.size()) { + continue; + } + + Map target = (Map) navigateToNestedElement(source, pathSegments, keyParts); + if (target != null) { + target.putAll((Map) elementEntry.getValue()); + } + } + } +} + +// Records the event version in __versions and adds the relationship to __sources. +void recordSource(Map source, String relationship, String sourceId, long eventVersion, String nestedElementKey) { + if (nestedElementKey.isEmpty()) { + source.__versions[relationship][sourceId] = eventVersion; + } else { + source.__versions[relationship][nestedElementKey][sourceId] = eventVersion; + } // Record the relationship in `__sources` if it's not already there. We maintain it as an append-only set using a sorted list. // This ensures deterministic ordering of its elements regardless of event ingestion order, and lets us check membership in O(log N) time. @@ -71,15 +220,25 @@ void recordSource(Map source, String relationship, String sourceId, long eventVe } } -// --- Main script body --- // +// ============================================================ +// Main Execution +// ============================================================ + Map source = ctx._source; String id = params.id; String relationship = params.relationship; String sourceId = params.sourceId; -long eventVersion = (long) params.version; // Cast to long since JSON parses numbers as doubles +long eventVersion = (long) params.version; Map counts = params.__counts; +Map nestedSourcedFields = params.nestedSourcedFields; +Map nestedSourcedPathIdentifiers = params.nestedSourcedPathIdentifiers; +Map nestedSourcedPaths = params.nestedSourcedPaths; + +String nestedElementKey = buildNestedElementKeyForRelationship(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); -setup(source, relationship, counts); -validateSource(source, id, relationship, sourceId, eventVersion); +setup(source, relationship, nestedElementKey, counts); +validateSource(source, id, relationship, sourceId, eventVersion, nestedElementKey); applyTopLevelFields(source, id, params.topLevelFields, counts); -recordSource(source, relationship, sourceId, eventVersion); +storeNestedSourcedData(source, relationship, nestedSourcedFields, nestedElementKey); +applyNestedSourcedData(source, nestedSourcedPaths); +recordSource(source, relationship, sourceId, eventVersion, nestedElementKey); diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/factory.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/factory.rbs index e40b2fa50..10b39933b 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/factory.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/factory.rbs @@ -112,7 +112,8 @@ module ElasticGraph cardinality: SchemaElements::Relationship::cardinality, related_type: SchemaElements::TypeReference, foreign_key: ::String, - direction: SchemaElements::foreignKeyDirection + direction: SchemaElements::foreignKeyDirection, + ?indexing_only: bool ) -> SchemaElements::Relationship @@relationship_new: ::Method diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rbs new file mode 100644 index 000000000..01a70b7b6 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rbs @@ -0,0 +1,34 @@ +module ElasticGraph + module SchemaDefinition + module Indexing + class NestedRelationshipChainResolver + @schema_def_state: State + + def initialize: (schema_def_state: State) -> void + def resolve: (SchemaElements::Relationship, untyped) -> [ResolvedNestedChain?, ::Array[::String]] + + private + + def find_embedding_field: (untyped, untyped, ::Array[::String]) -> SchemaElements::Field? + def rel_description: (untyped, SchemaElements::Relationship) -> ::String + end + + class ResolvedNestedChain + attr_reader root_indexed_type: untyped + attr_reader path_segments: ::Array[PathSegment] + attr_reader root_relationship: SchemaElements::Relationship + + def initialize: (root_indexed_type: untyped, path_segments: ::Array[PathSegment], root_relationship: SchemaElements::Relationship) -> void + end + + class PathSegment + attr_reader parent_type: untyped + attr_reader embedding_field: SchemaElements::Field + attr_reader match_field: ::String + attr_reader source_field: ::String + + def initialize: (parent_type: untyped, embedding_field: SchemaElements::Field, match_field: ::String, source_field: ::String) -> void + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs new file mode 100644 index 000000000..c3223a3d3 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs @@ -0,0 +1,43 @@ +module ElasticGraph + module SchemaDefinition + module Indexing + class NestedUpdateTargetResolver + @object_type: untyped + @relationship: SchemaElements::Relationship + @sourced_fields: ::Array[SchemaElements::Field] + @resolved_chain: ResolvedNestedChain + @field_path_resolver: SchemaElements::FieldPath::Resolver + @schema_def_state: State + @related_type: untyped + + def initialize: ( + object_type: untyped, + relationship: SchemaElements::Relationship, + sourced_fields: ::Array[SchemaElements::Field], + resolved_chain: ResolvedNestedChain, + field_path_resolver: SchemaElements::FieldPath::Resolver, + schema_def_state: State + ) -> void + + def resolve: () -> [SchemaArtifacts::RuntimeMetadata::UpdateTarget?, ::Array[::String]] + + private + + attr_reader object_type: untyped + attr_reader relationship: SchemaElements::Relationship + attr_reader sourced_fields: ::Array[SchemaElements::Field] + attr_reader resolved_chain: ResolvedNestedChain + attr_reader field_path_resolver: SchemaElements::FieldPath::Resolver + attr_reader schema_def_state: State + + def related_type: () -> untyped + def resolve_nested_sourced_fields_params: (::Array[::String]) -> SchemaArtifacts::RuntimeMetadata::paramsHash + def build_path_identifier_params: () -> SchemaArtifacts::RuntimeMetadata::paramsHash + def build_nested_sourced_paths: () -> ::Array[::Hash[::String, untyped]] + def resolve_routing: (::Array[::String]) -> untyped + def resolve_rollover: (::Array[::String]) -> untyped + def validate_has_had_multiple_sources: (::Array[::String]) -> void + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/update_target_factory.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/update_target_factory.rbs index 4375b8138..b0687c40d 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/update_target_factory.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/update_target_factory.rbs @@ -8,7 +8,10 @@ module ElasticGraph id_source: ::String, top_level_fields_params: SchemaArtifacts::RuntimeMetadata::paramsHash, routing_value_source: ::String?, - rollover_timestamp_value_source: ::String? + rollover_timestamp_value_source: ::String?, + ?nested_sourced_fields_params: SchemaArtifacts::RuntimeMetadata::paramsHash, + ?nested_sourced_path_identifiers_params: SchemaArtifacts::RuntimeMetadata::paramsHash, + ?nested_sourced_paths: ::Hash[::String, ::Array[::Hash[::String, untyped]]] ) -> SchemaArtifacts::RuntimeMetadata::UpdateTarget private diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_indices.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_indices.rbs index ad0c91043..0b4bc8be6 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_indices.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_indices.rbs @@ -13,7 +13,7 @@ module ElasticGraph def index: (::String, ::Hash[::Symbol, ::String | ::Integer]) ?{ (Indexing::Index) -> void } -> Indexing::Index def resolve_fields_with: (::Symbol?) -> void def override_runtime_metadata: (**untyped) -> void - def runtime_metadata: (::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]) -> SchemaArtifacts::RuntimeMetadata::ObjectType + def runtime_metadata: (::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget], ?nested_sourced_paths: ::Hash[::String, ::Array[::Hash[::String, untyped]]]) -> SchemaArtifacts::RuntimeMetadata::ObjectType def derived_indexed_types: () -> ::Array[Indexing::DerivedIndexedType] def derive_indexed_type_fields: ( ::String, diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs index c35f971f2..fc0f8598f 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs @@ -39,7 +39,8 @@ module ElasticGraph def generate_datastore_config: () -> ::Hash[::String, untyped] def build_dynamic_scripts: () -> ::Array[Scripting::Script] def build_runtime_metadata: () -> SchemaArtifacts::RuntimeMetadata::Schema - def identify_extra_update_targets_by_object_type_name: () -> ::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] + def identify_extra_update_targets_by_object_type_name: () -> [::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]], ::Hash[::String, ::Hash[::String, ::Array[::Hash[::String, untyped]]]]] + def identify_nested_sourced_update_targets: (untyped, ::Hash[untyped, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]], ::Hash[::String, ::Hash[::String, ::Array[::Hash[::String, untyped]]]], ::Array[::String]) -> void def generate_sdl: () -> ::String def build_public_json_schema: () -> ::Hash[::String, untyped] def json_schema_indexing_field_types_by_name: () -> ::Hash[::String, Indexing::_FieldType] diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/relationship.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/relationship.rbs index ee1d03122..c2262acd3 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/relationship.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/relationship.rbs @@ -8,25 +8,32 @@ module ElasticGraph class Relationship < RelationshipSupertype type cardinality = :one | :many attr_reader related_type: TypeReference + attr_reader foreign_key: ::String attr_accessor hide_relationship_runtime_metadata: bool + attr_reader parent_relationship_config: ::Hash[::Symbol, ::String]? + attr_reader indexing_only: bool @cardinality: cardinality @related_type: TypeReference @foreign_key: ::String @direction: foreignKeyDirection + @indexing_only: bool @equivalent_field_paths_by_local_path: ::Hash[::String, ::String] @additional_filter: ::Hash[::String, untyped] + @parent_relationship_config: ::Hash[::Symbol, ::String]? def initialize: ( Field, cardinality: cardinality, related_type: TypeReference, foreign_key: ::String, - direction: foreignKeyDirection + direction: foreignKeyDirection, + ?indexing_only: bool ) -> void def additional_filter: (::Hash[::String, untyped]) -> void def equivalent_field: (::String, ?locally_named: ::String) -> void + def parent_relationship: (::String, ::String) -> void def routing_value_source_for_index: [T] (Indexing::Index) { (::String) -> bot } -> ::String? def rollover_timestamp_value_source_for_index: [T] (Indexing::Index) { (::String) -> bot } -> ::String? def validate_equivalent_fields: (SchemaElements::FieldPath::Resolver) -> ::Array[::String] diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb index 0e0922eb6..7d9523044 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb @@ -347,7 +347,7 @@ module SchemaDefinition mapping = generate_mapping.call(graphql_only: true) # Verify that it does not have a property for `size` or `options.size` - expect(mapping.fetch("properties").keys).to contain_exactly("id", "options", "__sources", "__versions", "__typename") + expect(mapping.fetch("properties").keys).to contain_exactly("id", "options", "__nested_sourced_data", "__sources", "__versions", "__typename") expect(mapping.fetch("properties")).to include({ "id" => {"type" => "keyword"}, "options" => { diff --git a/elasticgraph-support/lib/elastic_graph/constants.rb b/elasticgraph-support/lib/elastic_graph/constants.rb index f0449d88c..73d765794 100644 --- a/elasticgraph-support/lib/elastic_graph/constants.rb +++ b/elasticgraph-support/lib/elastic_graph/constants.rb @@ -140,7 +140,7 @@ module ElasticGraph # # Note: this constant is automatically kept up-to-date by our `schema_artifacts:dump` rake task. # @private - INDEX_DATA_UPDATE_SCRIPT_ID = "update_index_data_b9e2b105d736d8d16ae269ab6ff81e4d" + INDEX_DATA_UPDATE_SCRIPT_ID = "update_index_data_cd5bc011d0885b2dfd793c9f119f068d" # When an update script has a no-op result we often want to communicate more information about # why it was a no-op back to ElatsicGraph from the script. The only way to do that is to throw diff --git a/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb b/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb index 705031d12..f05a1ecd1 100644 --- a/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb +++ b/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb @@ -72,6 +72,9 @@ def derived_indexing_update_target_with( routing_value_source: routing_value_source, rollover_timestamp_value_source: rollover_timestamp_value_source, top_level_fields_params: top_level_fields_params, + nested_sourced_fields_params: {}, + nested_sourced_path_identifiers_params: {}, + nested_sourced_paths: {}, metadata_params: metadata_params ) end @@ -93,6 +96,9 @@ def normal_indexing_update_target_with( routing_value_source: routing_value_source, rollover_timestamp_value_source: rollover_timestamp_value_source, top_level_fields_params: top_level_fields_params, + nested_sourced_fields_params: {}, + nested_sourced_path_identifiers_params: {}, + nested_sourced_paths: {}, metadata_params: metadata_params ) end From 5c503ba45b5a8c960af0ecb881055bf7c8d68e5c Mon Sep 17 00:00:00 2001 From: ellisandrews-toast Date: Thu, 28 May 2026 13:56:03 -0400 Subject: [PATCH 2/4] Store nested sourced path config in document instead of threading through self-update target --- config/schema/artifacts/datastore_config.yaml | 27 ++++++------- config/schema/artifacts/runtime_metadata.yaml | 38 +++++++++---------- .../datastore_config.yaml | 27 ++++++------- .../runtime_metadata.yaml | 38 +++++++++---------- .../schema_definition/mixins/has_indices.rb | 9 ++--- .../schema_definition/results.rb | 27 +++---------- .../scripts/update/index_data.painless | 25 ++++++------ .../schema_definition/mixins/has_indices.rbs | 2 +- .../schema_definition/results.rbs | 4 +- .../lib/elastic_graph/constants.rb | 2 +- 10 files changed, 92 insertions(+), 107 deletions(-) diff --git a/config/schema/artifacts/datastore_config.yaml b/config/schema/artifacts/datastore_config.yaml index 8475cda25..be858b3df 100644 --- a/config/schema/artifacts/datastore_config.yaml +++ b/config/schema/artifacts/datastore_config.yaml @@ -2195,7 +2195,7 @@ scripts: // No timestamp values matched the params, so return `false`. return false; - update_index_data_cd5bc011d0885b2dfd793c9f119f068d: + update_index_data_59be3b4ab537ea412a3ead0408fb4137: context: update script: lang: painless @@ -2277,7 +2277,7 @@ scripts: // ============================================================ // Initializes internal bookkeeping structures (__sources, __versions, __counts, __nested_sourced_data). - void setup(Map source, String relationship, String nestedElementKey, Map counts) { + void setup(Map source, String relationship, String nestedElementKey, Map nestedSourcedPaths, Map counts) { if (source.__sources == null) { source.__sources = []; } @@ -2299,7 +2299,7 @@ scripts: source.__nested_sourced_data = [:]; } if (source.__nested_sourced_data[relationship] == null) { - source.__nested_sourced_data[relationship] = [:]; + source.__nested_sourced_data[relationship] = ["path_segments": nestedSourcedPaths.get(relationship), "data": [:]]; } } @@ -2365,27 +2365,28 @@ scripts: return; } - source.__nested_sourced_data[relationship][nestedElementKey] = nestedSourcedFields; + ((Map) source.__nested_sourced_data[relationship]).get("data").put(nestedElementKey, nestedSourcedFields); } // Applies nested sourced data from the __nested_sourced_data buffer to matched nested elements. + // Reads path config from the document itself — no external params needed. // Called after every event so that after a self-event's putAll overwrites nested arrays, // the buffered data gets re-applied. - void applyNestedSourcedData(Map source, Map nestedSourcedPaths) { - if (source.__nested_sourced_data == null || nestedSourcedPaths.isEmpty()) { + void applyNestedSourcedData(Map source) { + if (source.__nested_sourced_data == null) { return; } for (sourcedEntry in source.__nested_sourced_data.entrySet()) { - String sourcedRelationship = sourcedEntry.getKey(); - Map elementDataByKey = (Map) sourcedEntry.getValue(); + Map relationshipData = (Map) sourcedEntry.getValue(); + List pathSegments = (List) relationshipData.get("path_segments"); + Map dataByKey = (Map) relationshipData.get("data"); - List pathSegments = (List) nestedSourcedPaths.get(sourcedRelationship); - if (pathSegments == null) { + if (pathSegments == null || dataByKey == null) { continue; } - for (elementEntry in elementDataByKey.entrySet()) { + for (elementEntry in dataByKey.entrySet()) { List keyParts = splitNestedElementKey((String) elementEntry.getKey()); if (keyParts.size() != pathSegments.size()) { continue; @@ -2438,9 +2439,9 @@ scripts: String nestedElementKey = buildNestedElementKeyForRelationship(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); - setup(source, relationship, nestedElementKey, counts); + setup(source, relationship, nestedElementKey, nestedSourcedPaths, counts); validateSource(source, id, relationship, sourceId, eventVersion, nestedElementKey); applyTopLevelFields(source, id, params.topLevelFields, counts); storeNestedSourcedData(source, relationship, nestedSourcedFields, nestedElementKey); - applyNestedSourcedData(source, nestedSourcedPaths); + applyNestedSourcedData(source); recordSource(source, relationship, sourceId, eventVersion, nestedElementKey); diff --git a/config/schema/artifacts/runtime_metadata.yaml b/config/schema/artifacts/runtime_metadata.yaml index 49c610e3b..2b4cc29a2 100644 --- a/config/schema/artifacts/runtime_metadata.yaml +++ b/config/schema/artifacts/runtime_metadata.yaml @@ -3085,7 +3085,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: full_address: cardinality: one @@ -3275,7 +3275,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: __typename: cardinality: one @@ -3314,7 +3314,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: __typename: cardinality: one @@ -3414,7 +3414,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: created_at: cardinality: one @@ -3724,7 +3724,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: __typename: cardinality: one @@ -3897,7 +3897,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: created_at: cardinality: one @@ -4382,7 +4382,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: ceo: cardinality: one @@ -4540,7 +4540,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: created_at: cardinality: one @@ -5501,7 +5501,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: __typename: cardinality: one @@ -5700,7 +5700,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: __typename: cardinality: one @@ -5767,7 +5767,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: active: cardinality: one @@ -6304,7 +6304,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: name: cardinality: one @@ -6654,7 +6654,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: formed_on routing_value_source: league - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: country_code: cardinality: one @@ -7754,7 +7754,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: created_at routing_value_source: workspace_id2 - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: amount_cents: cardinality: one @@ -7832,7 +7832,7 @@ object_types_by_name: version: cardinality: one relationship: widget - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: widget_cost: cardinality: one @@ -8062,7 +8062,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: introduced_on routing_value_source: primary_continent - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: details: cardinality: one @@ -9107,7 +9107,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: name: cardinality: one @@ -9129,7 +9129,7 @@ object_types_by_name: relationship: workspace rollover_timestamp_value_source: widget.created_at routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: workspace_name: cardinality: one @@ -9364,4 +9364,4 @@ static_script_ids_by_scoped_name: field/as_day_of_week: field_as_day_of_week_f2b5c7d9e8f75bf2457b52412bfb6537 field/as_time_of_day: field_as_time_of_day_ed82aba44fc66bff5635bec4305c1c66 filter/by_time_of_day: filter_by_time_of_day_ea12d0561b24961789ab68ed38435612 - update/index_data: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + update/index_data: update_index_data_59be3b4ab537ea412a3ead0408fb4137 diff --git a/config/schema/artifacts_with_apollo/datastore_config.yaml b/config/schema/artifacts_with_apollo/datastore_config.yaml index 8475cda25..be858b3df 100644 --- a/config/schema/artifacts_with_apollo/datastore_config.yaml +++ b/config/schema/artifacts_with_apollo/datastore_config.yaml @@ -2195,7 +2195,7 @@ scripts: // No timestamp values matched the params, so return `false`. return false; - update_index_data_cd5bc011d0885b2dfd793c9f119f068d: + update_index_data_59be3b4ab537ea412a3ead0408fb4137: context: update script: lang: painless @@ -2277,7 +2277,7 @@ scripts: // ============================================================ // Initializes internal bookkeeping structures (__sources, __versions, __counts, __nested_sourced_data). - void setup(Map source, String relationship, String nestedElementKey, Map counts) { + void setup(Map source, String relationship, String nestedElementKey, Map nestedSourcedPaths, Map counts) { if (source.__sources == null) { source.__sources = []; } @@ -2299,7 +2299,7 @@ scripts: source.__nested_sourced_data = [:]; } if (source.__nested_sourced_data[relationship] == null) { - source.__nested_sourced_data[relationship] = [:]; + source.__nested_sourced_data[relationship] = ["path_segments": nestedSourcedPaths.get(relationship), "data": [:]]; } } @@ -2365,27 +2365,28 @@ scripts: return; } - source.__nested_sourced_data[relationship][nestedElementKey] = nestedSourcedFields; + ((Map) source.__nested_sourced_data[relationship]).get("data").put(nestedElementKey, nestedSourcedFields); } // Applies nested sourced data from the __nested_sourced_data buffer to matched nested elements. + // Reads path config from the document itself — no external params needed. // Called after every event so that after a self-event's putAll overwrites nested arrays, // the buffered data gets re-applied. - void applyNestedSourcedData(Map source, Map nestedSourcedPaths) { - if (source.__nested_sourced_data == null || nestedSourcedPaths.isEmpty()) { + void applyNestedSourcedData(Map source) { + if (source.__nested_sourced_data == null) { return; } for (sourcedEntry in source.__nested_sourced_data.entrySet()) { - String sourcedRelationship = sourcedEntry.getKey(); - Map elementDataByKey = (Map) sourcedEntry.getValue(); + Map relationshipData = (Map) sourcedEntry.getValue(); + List pathSegments = (List) relationshipData.get("path_segments"); + Map dataByKey = (Map) relationshipData.get("data"); - List pathSegments = (List) nestedSourcedPaths.get(sourcedRelationship); - if (pathSegments == null) { + if (pathSegments == null || dataByKey == null) { continue; } - for (elementEntry in elementDataByKey.entrySet()) { + for (elementEntry in dataByKey.entrySet()) { List keyParts = splitNestedElementKey((String) elementEntry.getKey()); if (keyParts.size() != pathSegments.size()) { continue; @@ -2438,9 +2439,9 @@ scripts: String nestedElementKey = buildNestedElementKeyForRelationship(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); - setup(source, relationship, nestedElementKey, counts); + setup(source, relationship, nestedElementKey, nestedSourcedPaths, counts); validateSource(source, id, relationship, sourceId, eventVersion, nestedElementKey); applyTopLevelFields(source, id, params.topLevelFields, counts); storeNestedSourcedData(source, relationship, nestedSourcedFields, nestedElementKey); - applyNestedSourcedData(source, nestedSourcedPaths); + applyNestedSourcedData(source); recordSource(source, relationship, sourceId, eventVersion, nestedElementKey); diff --git a/config/schema/artifacts_with_apollo/runtime_metadata.yaml b/config/schema/artifacts_with_apollo/runtime_metadata.yaml index c6ac4a47d..50f8fb739 100644 --- a/config/schema/artifacts_with_apollo/runtime_metadata.yaml +++ b/config/schema/artifacts_with_apollo/runtime_metadata.yaml @@ -3114,7 +3114,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: full_address: cardinality: one @@ -3304,7 +3304,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: __typename: cardinality: one @@ -3343,7 +3343,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: __typename: cardinality: one @@ -3464,7 +3464,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: created_at: cardinality: one @@ -3826,7 +3826,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: __typename: cardinality: one @@ -3999,7 +3999,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: created_at: cardinality: one @@ -4484,7 +4484,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: ceo: cardinality: one @@ -4642,7 +4642,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: created_at: cardinality: one @@ -5624,7 +5624,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: __typename: cardinality: one @@ -5823,7 +5823,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: __typename: cardinality: one @@ -5890,7 +5890,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: active: cardinality: one @@ -6433,7 +6433,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: name: cardinality: one @@ -6783,7 +6783,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: formed_on routing_value_source: league - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: country_code: cardinality: one @@ -7883,7 +7883,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: created_at routing_value_source: workspace_id2 - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: amount_cents: cardinality: one @@ -7961,7 +7961,7 @@ object_types_by_name: version: cardinality: one relationship: widget - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: widget_cost: cardinality: one @@ -8191,7 +8191,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: introduced_on routing_value_source: primary_continent - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: details: cardinality: one @@ -9236,7 +9236,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: name: cardinality: one @@ -9258,7 +9258,7 @@ object_types_by_name: relationship: workspace rollover_timestamp_value_source: widget.created_at routing_value_source: id - script_id: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 top_level_fields_params: workspace_name: cardinality: one @@ -9536,4 +9536,4 @@ static_script_ids_by_scoped_name: field/as_day_of_week: field_as_day_of_week_f2b5c7d9e8f75bf2457b52412bfb6537 field/as_time_of_day: field_as_time_of_day_ed82aba44fc66bff5635bec4305c1c66 filter/by_time_of_day: filter_by_time_of_day_ea12d0561b24961789ab68ed38435612 - update/index_data: update_index_data_cd5bc011d0885b2dfd793c9f119f068d + update/index_data: update_index_data_59be3b4ab537ea412a3ead0408fb4137 diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb index 085123d47..98dd547a7 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb @@ -257,9 +257,9 @@ def override_runtime_metadata(**overrides) end # @private - def runtime_metadata(extra_update_targets, nested_sourced_paths: {}) + def runtime_metadata(extra_update_targets) SchemaArtifacts::RuntimeMetadata::ObjectType.new( - update_targets: derived_indexed_types.map(&:runtime_metadata_for_source_type) + [self_update_target(nested_sourced_paths: nested_sourced_paths)].compact + extra_update_targets, + update_targets: derived_indexed_types.map(&:runtime_metadata_for_source_type) + [self_update_target].compact + extra_update_targets, index_definition_names: [index_def&.name].compact, graphql_fields_by_name: runtime_metadata_graphql_fields_by_name, elasticgraph_category: nil, @@ -427,7 +427,7 @@ def initialize_has_indices @can_configure_index = false end - def self_update_target(nested_sourced_paths: {}) + def self_update_target return nil if abstract? || !root_document_type? # We exclude `id` from `top_level_fields_params` because `Indexer::Operator::Update` automatically includes @@ -450,8 +450,7 @@ def self_update_target(nested_sourced_paths: {}) # we'll need to change the runtime metadata here to have a map of these values, keyed by index # name. routing_value_source: index_runtime_metadata.route_with, - rollover_timestamp_value_source: index_runtime_metadata.rollover&.timestamp_field_path, - nested_sourced_paths: nested_sourced_paths + rollover_timestamp_value_source: index_runtime_metadata.rollover&.timestamp_field_path ) end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb index 00ff139b0..a027ef89e 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb @@ -145,20 +145,11 @@ def build_dynamic_scripts end def build_runtime_metadata - extra_update_targets_by_object_type_name, nested_sourced_paths_by_type_name = identify_extra_update_targets_by_object_type_name + extra_update_targets_by_object_type_name = identify_extra_update_targets_by_object_type_name object_types_by_name = all_types .select { |t| t.respond_to?(:graphql_fields_by_name) } - .to_h do |type| - extra_targets = extra_update_targets_by_object_type_name.fetch(type.name) { [] } # : ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget] - metadata = if type.respond_to?(:own_index_def) - nested_config = nested_sourced_paths_by_type_name.fetch(type.name) { {} } # : ::Hash[::String, ::Array[::Hash[::String, untyped]]] - (_ = type).runtime_metadata(extra_targets, nested_sourced_paths: nested_config) - else - (_ = type).runtime_metadata(extra_targets) - end - [type.name, metadata] - end + .to_h { |type| [type.name, (_ = type).runtime_metadata(extra_update_targets_by_object_type_name.fetch(type.name) { [] })] } scalar_types_by_name = state.scalar_types_by_name.transform_values(&:runtime_metadata) @@ -193,13 +184,10 @@ def build_runtime_metadata # Builds a map, keyed by object type name, of extra `update_targets` that have been generated # from any fields that use `sourced_from` on other types. - # - # Returns a tuple of [update_targets_by_type_name, nested_sourced_paths_by_type_name]. def identify_extra_update_targets_by_object_type_name sourced_field_errors = [] # : ::Array[::String] relationship_errors = [] # : ::Array[::String] extra_update_targets_by_type_name = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[untyped, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] - nested_sourced_paths_by_type = {} # : ::Hash[::String, ::Hash[::String, ::Array[::Hash[::String, untyped]]]] state.object_types_by_name.except(*state.namespace_types_by_name.keys).values.each do |object_type| fields_with_sources_by_relationship_name = @@ -253,7 +241,7 @@ def identify_extra_update_targets_by_object_type_name # Process nested sourced_from fields on non-indexed types. if object_type.own_index_def.nil? - identify_nested_sourced_update_targets(object_type, extra_update_targets_by_type_name, nested_sourced_paths_by_type, sourced_field_errors) + identify_nested_sourced_update_targets(object_type, extra_update_targets_by_type_name, sourced_field_errors) end end @@ -271,12 +259,12 @@ def identify_extra_update_targets_by_object_type_name raise Errors::SchemaError, full_errors.join("\n\n") end - [extra_update_targets_by_type_name, nested_sourced_paths_by_type] + extra_update_targets_by_type_name end # Identifies update targets for sourced_from fields on non-indexed embedded types # that use parent_relationship chains. - def identify_nested_sourced_update_targets(object_type, extra_update_targets_by_type_name, nested_sourced_paths_by_type, errors) + def identify_nested_sourced_update_targets(object_type, extra_update_targets_by_type_name, errors) # Find relationships on this type that have parent_relationship configured nested_relationships = object_type.relationships_by_name .select { |_, rel| rel.parent_relationship_config } @@ -323,11 +311,6 @@ def identify_nested_sourced_update_targets(object_type, extra_update_targets_by_ # Store on the source type related_type_name = relationship.related_type.unwrap_non_null.name extra_update_targets_by_type_name[related_type_name] << update_target - - # Record the path config for the root indexed type's self-update target. - root_type_name = resolved_chain.root_indexed_type.name - nested_sourced_paths_by_type[root_type_name] ||= {} - nested_sourced_paths_by_type[root_type_name].merge!(update_target.nested_sourced_paths) end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless index ede92079b..4705d6681 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless @@ -75,7 +75,7 @@ def navigateToNestedElement(Map source, List pathSegments, List keyParts) { // ============================================================ // Initializes internal bookkeeping structures (__sources, __versions, __counts, __nested_sourced_data). -void setup(Map source, String relationship, String nestedElementKey, Map counts) { +void setup(Map source, String relationship, String nestedElementKey, Map nestedSourcedPaths, Map counts) { if (source.__sources == null) { source.__sources = []; } @@ -97,7 +97,7 @@ void setup(Map source, String relationship, String nestedElementKey, Map counts) source.__nested_sourced_data = [:]; } if (source.__nested_sourced_data[relationship] == null) { - source.__nested_sourced_data[relationship] = [:]; + source.__nested_sourced_data[relationship] = ["path_segments": nestedSourcedPaths.get(relationship), "data": [:]]; } } @@ -163,27 +163,28 @@ void storeNestedSourcedData(Map source, String relationship, Map nestedSourcedFi return; } - source.__nested_sourced_data[relationship][nestedElementKey] = nestedSourcedFields; + ((Map) source.__nested_sourced_data[relationship]).get("data").put(nestedElementKey, nestedSourcedFields); } // Applies nested sourced data from the __nested_sourced_data buffer to matched nested elements. +// Reads path config from the document itself — no external params needed. // Called after every event so that after a self-event's putAll overwrites nested arrays, // the buffered data gets re-applied. -void applyNestedSourcedData(Map source, Map nestedSourcedPaths) { - if (source.__nested_sourced_data == null || nestedSourcedPaths.isEmpty()) { +void applyNestedSourcedData(Map source) { + if (source.__nested_sourced_data == null) { return; } for (sourcedEntry in source.__nested_sourced_data.entrySet()) { - String sourcedRelationship = sourcedEntry.getKey(); - Map elementDataByKey = (Map) sourcedEntry.getValue(); + Map relationshipData = (Map) sourcedEntry.getValue(); + List pathSegments = (List) relationshipData.get("path_segments"); + Map dataByKey = (Map) relationshipData.get("data"); - List pathSegments = (List) nestedSourcedPaths.get(sourcedRelationship); - if (pathSegments == null) { + if (pathSegments == null || dataByKey == null) { continue; } - for (elementEntry in elementDataByKey.entrySet()) { + for (elementEntry in dataByKey.entrySet()) { List keyParts = splitNestedElementKey((String) elementEntry.getKey()); if (keyParts.size() != pathSegments.size()) { continue; @@ -236,9 +237,9 @@ Map nestedSourcedPaths = params.nestedSourcedPaths; String nestedElementKey = buildNestedElementKeyForRelationship(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); -setup(source, relationship, nestedElementKey, counts); +setup(source, relationship, nestedElementKey, nestedSourcedPaths, counts); validateSource(source, id, relationship, sourceId, eventVersion, nestedElementKey); applyTopLevelFields(source, id, params.topLevelFields, counts); storeNestedSourcedData(source, relationship, nestedSourcedFields, nestedElementKey); -applyNestedSourcedData(source, nestedSourcedPaths); +applyNestedSourcedData(source); recordSource(source, relationship, sourceId, eventVersion, nestedElementKey); diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_indices.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_indices.rbs index 0b4bc8be6..ad0c91043 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_indices.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_indices.rbs @@ -13,7 +13,7 @@ module ElasticGraph def index: (::String, ::Hash[::Symbol, ::String | ::Integer]) ?{ (Indexing::Index) -> void } -> Indexing::Index def resolve_fields_with: (::Symbol?) -> void def override_runtime_metadata: (**untyped) -> void - def runtime_metadata: (::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget], ?nested_sourced_paths: ::Hash[::String, ::Array[::Hash[::String, untyped]]]) -> SchemaArtifacts::RuntimeMetadata::ObjectType + def runtime_metadata: (::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]) -> SchemaArtifacts::RuntimeMetadata::ObjectType def derived_indexed_types: () -> ::Array[Indexing::DerivedIndexedType] def derive_indexed_type_fields: ( ::String, diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs index fc0f8598f..b7ace8566 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs @@ -39,8 +39,8 @@ module ElasticGraph def generate_datastore_config: () -> ::Hash[::String, untyped] def build_dynamic_scripts: () -> ::Array[Scripting::Script] def build_runtime_metadata: () -> SchemaArtifacts::RuntimeMetadata::Schema - def identify_extra_update_targets_by_object_type_name: () -> [::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]], ::Hash[::String, ::Hash[::String, ::Array[::Hash[::String, untyped]]]]] - def identify_nested_sourced_update_targets: (untyped, ::Hash[untyped, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]], ::Hash[::String, ::Hash[::String, ::Array[::Hash[::String, untyped]]]], ::Array[::String]) -> void + def identify_extra_update_targets_by_object_type_name: () -> ::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] + def identify_nested_sourced_update_targets: (untyped, ::Hash[untyped, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]], ::Array[::String]) -> void def generate_sdl: () -> ::String def build_public_json_schema: () -> ::Hash[::String, untyped] def json_schema_indexing_field_types_by_name: () -> ::Hash[::String, Indexing::_FieldType] diff --git a/elasticgraph-support/lib/elastic_graph/constants.rb b/elasticgraph-support/lib/elastic_graph/constants.rb index 73d765794..58ae2a6fc 100644 --- a/elasticgraph-support/lib/elastic_graph/constants.rb +++ b/elasticgraph-support/lib/elastic_graph/constants.rb @@ -140,7 +140,7 @@ module ElasticGraph # # Note: this constant is automatically kept up-to-date by our `schema_artifacts:dump` rake task. # @private - INDEX_DATA_UPDATE_SCRIPT_ID = "update_index_data_cd5bc011d0885b2dfd793c9f119f068d" + INDEX_DATA_UPDATE_SCRIPT_ID = "update_index_data_59be3b4ab537ea412a3ead0408fb4137" # When an update script has a no-op result we often want to communicate more information about # why it was a no-op back to ElatsicGraph from the script. The only way to do that is to throw From df5f1c44a0f5eb8971d0e2428b28b5e62624df39 Mon Sep 17 00:00:00 2001 From: ellisandrews-toast Date: Fri, 29 May 2026 18:51:12 -0400 Subject: [PATCH 3/4] Address PR feedback: refactor nested sourced_from implementation --- config/schema/artifacts/datastore_config.yaml | 153 ++++++++------- config/schema/artifacts/runtime_metadata.yaml | 38 ++-- .../datastore_config.yaml | 153 ++++++++------- .../runtime_metadata.yaml | 38 ++-- .../datastore_core/index_definition.rb | 3 +- .../datastore_core/index_definition/index.rb | 5 +- .../rollover_index_template.rb | 4 +- .../datastore_core/index_definition.rbs | 1 + .../elastic_graph/indexer/operation/update.rb | 9 +- .../indexer/datastore_indexing_router_spec.rb | 2 +- .../indexer/operation/update_spec.rb | 31 +-- .../runtime_metadata/index_definition.rb | 13 +- .../nested_sourced_data_params.rb | 54 ++++++ .../nested_sourced_path_segment.rb | 60 ++++++ .../runtime_metadata/update_target.rb | 44 ++--- .../runtime_metadata/index_definition.rbs | 11 +- .../nested_sourced_data_params.rbs | 23 +++ .../nested_sourced_path_segment.rbs | 43 +++++ .../runtime_metadata/update_target.rbs | 16 +- .../runtime_metadata/schema_spec.rb | 12 +- .../runtime_metadata/update_target_spec.rb | 6 +- .../indexing/derived_indexed_type.rb | 4 +- .../schema_definition/indexing/index.rb | 14 +- .../nested_relationship_chain_resolver.rb | 182 +++++++++--------- .../indexing/nested_update_target_resolver.rb | 39 ++-- .../sourced_update_targets_resolver.rb | 165 ++++++++++++++++ .../indexing/update_target_factory.rb | 8 +- .../schema_definition/results.rb | 141 +------------- .../schema_elements/relationship.rb | 56 +++--- .../scripts/update/index_data.painless | 151 ++++++++------- .../schema_definition/indexing/index.rbs | 2 + .../nested_relationship_chain_resolver.rbs | 3 + .../nested_update_target_resolver.rbs | 4 +- .../sourced_update_targets_resolver.rbs | 22 +++ .../indexing/update_target_factory.rbs | 4 +- .../schema_definition/results.rbs | 2 - .../schema_elements/relationship.rbs | 14 +- .../lib/elastic_graph/constants.rb | 2 +- .../spec_support/runtime_metadata_support.rb | 8 +- 39 files changed, 901 insertions(+), 639 deletions(-) create mode 100644 elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_data_params.rb create mode 100644 elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rb create mode 100644 elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_data_params.rbs create mode 100644 elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rbs create mode 100644 elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rb create mode 100644 elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rbs diff --git a/config/schema/artifacts/datastore_config.yaml b/config/schema/artifacts/datastore_config.yaml index be858b3df..ae3276abb 100644 --- a/config/schema/artifacts/datastore_config.yaml +++ b/config/schema/artifacts/datastore_config.yaml @@ -2195,7 +2195,7 @@ scripts: // No timestamp values matched the params, so return `false`. return false; - update_index_data_59be3b4ab537ea412a3ead0408fb4137: + update_index_data_079bafcf4d739acd8659a631377fa9c8: context: update script: lang: painless @@ -2204,33 +2204,61 @@ scripts: // Helper Functions // ============================================================ - // Builds a composite nested element key from path segments. - // List segments contribute their matched identifier value; object segments contribute their field name. - String buildNestedElementKey(List pathSegments, Map pathIdentifiers) { + // Encodes a list of strings into a length-prefixed string. + // Each part is encoded as "length:value" and parts are concatenated directly. + // This encoding is unambiguous regardless of what characters the values contain. + String encodeKey(List parts) { + StringBuilder sb = new StringBuilder(); + for (String part : parts) { + sb.append(part.length()); + sb.append(':'); + sb.append(part); + } + return sb.toString(); + } + + // Decodes a length-prefixed string back into a list of strings. + List decodeKey(String key) { List parts = new ArrayList(); - for (Map segment : pathSegments) { - if (segment.containsKey("list")) { - parts.add(pathIdentifiers[segment.source_field]); - } else { - parts.add(segment.get("object")); - } + int i = 0; + while (i < key.length()) { + int colonPos = key.indexOf(":", i); + int length = Integer.parseInt(key.substring(i, colonPos)); + int valueStart = colonPos + 1; + parts.add(key.substring(valueStart, valueStart + length)); + i = valueStart + length; } - return String.join(":", parts); + return parts; } - // Looks up path segments for the given relationship and builds the nested element key. + // Builds a nested element key (as an encoded string) from path segments. + // List segments contribute their matched identifier value; object segments contribute their field name. // Returns "" if no path segments are configured (i.e., this is not a nested sourced event). - String buildNestedElementKeyForRelationship(String relationship, Map nestedSourcedPaths, Map pathIdentifiers) { - List pathSegments = (List) nestedSourcedPaths.get(relationship); - if (pathSegments == null) { - return ""; + String buildNestedElementKey(String relationship, Map nestedSourcedPaths, Map pathIdentifiers) { + List pathSegments = (List) nestedSourcedPaths.get(relationship); + if (pathSegments == null) { + return ""; + } + List parts = new ArrayList(); + for (Map segment : pathSegments) { + if ("list".equals(segment.get("type"))) { + parts.add(pathIdentifiers[segment.sourceField]); + } else { + parts.add(segment.get("field")); } - return buildNestedElementKey(pathSegments, pathIdentifiers); + } + return encodeKey(parts); } - // Splits a composite nested element key into a list of parts. - List splitNestedElementKey(String nestedElementKey) { - return Arrays.asList(nestedElementKey.splitOnToken(":")); + // Builds the versions key by combining the relationship name with the element key parts. + // For top-level events (empty element key), returns just the relationship name. + String buildVersionsKey(String relationship, String nestedElementKey) { + if (nestedElementKey.isEmpty()) { + return relationship; + } + List parts = decodeKey(nestedElementKey); + parts.add(0, relationship); + return encodeKey(parts); } // Finds an element in a list where element[matchField] equals matchValue. Returns null if not found. @@ -2250,15 +2278,14 @@ scripts: for (int i = 0; i < pathSegments.size(); i++) { Map segment = (Map) pathSegments.get(i); - boolean isList = segment.containsKey("list"); - String field = isList ? (String) segment.get("list") : (String) segment.get("object"); + String field = (String) segment.get("field"); if (!current.containsKey(field)) { return null; } - if (isList) { - current = (Map) findInList((List) current.get(field), (String) segment.get("match_field"), (String) keyParts.get(i)); + if ("list".equals(segment.get("type"))) { + current = (Map) findInList((List) current.get(field), (String) segment.get("matchField"), (String) keyParts.get(i)); } else { current = (Map) current.get(field); } @@ -2277,7 +2304,7 @@ scripts: // ============================================================ // Initializes internal bookkeeping structures (__sources, __versions, __counts, __nested_sourced_data). - void setup(Map source, String relationship, String nestedElementKey, Map nestedSourcedPaths, Map counts) { + void setup(Map source, String versionsKey, String relationship, String nestedElementKey, Map counts) { if (source.__sources == null) { source.__sources = []; } @@ -2286,12 +2313,8 @@ scripts: source.__versions = [:]; } - if (source.__versions[relationship] == null) { - source.__versions[relationship] = [:]; - } - - if (!nestedElementKey.isEmpty() && source.__versions[relationship][nestedElementKey] == null) { - source.__versions[relationship][nestedElementKey] = [:]; + if (source.__versions[versionsKey] == null) { + source.__versions[versionsKey] = [:]; } if (!nestedElementKey.isEmpty()) { @@ -2299,7 +2322,7 @@ scripts: source.__nested_sourced_data = [:]; } if (source.__nested_sourced_data[relationship] == null) { - source.__nested_sourced_data[relationship] = ["path_segments": nestedSourcedPaths.get(relationship), "data": [:]]; + source.__nested_sourced_data[relationship] = [:]; } } @@ -2309,31 +2332,19 @@ scripts: } // Validates that this event is allowed: no relationship mutation and no stale version. - void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion, String nestedElementKey) { - // For nested events, validate per-element. For top-level events, validate per-relationship. - Map versionsMap = nestedElementKey.isEmpty() - ? source.__versions[relationship] - : source.__versions[relationship][nestedElementKey]; + void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion, String versionsKey) { + Map versionsMap = source.__versions[versionsKey]; // Check that no other source ID has previously written to this target. List previousSourceIds = versionsMap.keySet().stream().filter(key -> key != sourceId).collect(Collectors.toList()); if (previousSourceIds.size() > 0) { - if (nestedElementKey.isEmpty()) { - throw new IllegalArgumentException( - "Cannot update document " + id + " " + - "with data from related " + relationship + " " + sourceId + " " + - "because the related " + relationship + " has apparently changed (was: " + previousSourceIds + "), " + - "but mutations of relationships used with `sourced_from` are not supported because " + - "allowing it could break ElasticGraph's out-of-order processing guarantees." - ); - } else { - throw new IllegalArgumentException( - "Cannot update nested element [" + nestedElementKey + "] on document " + id + " " + - "with data from " + relationship + " " + sourceId + " " + - "because this element was previously sourced from a different event (" + previousSourceIds + "). " + - "Each nested element can only be sourced from one source document." - ); - } + throw new IllegalArgumentException( + "Cannot update document " + id + " " + + "with data from related " + relationship + " " + sourceId + " " + + "because the related " + relationship + " has apparently changed (was: " + previousSourceIds + "), " + + "but mutations of relationships used with `sourced_from` are not supported because " + + "allowing it could break ElasticGraph's out-of-order processing guarantees." + ); } // Check that the event version is newer than what we've already seen. @@ -2341,9 +2352,8 @@ scripts: long docVersion = maybeDocVersion == null ? Long.MIN_VALUE : maybeDocVersion.longValue(); if (docVersion >= eventVersion) { - String target = nestedElementKey.isEmpty() ? id : id + "/" + nestedElementKey; throw new IllegalArgumentException("ElasticGraph update was a no-op: [" + - target + "]: version conflict, current version [" + + id + "]: version conflict, current version [" + docVersion + "] is higher or equal to the one provided [" + eventVersion + "]"); } @@ -2365,29 +2375,29 @@ scripts: return; } - ((Map) source.__nested_sourced_data[relationship]).get("data").put(nestedElementKey, nestedSourcedFields); + ((Map) source.__nested_sourced_data[relationship]).put(nestedElementKey, nestedSourcedFields); } // Applies nested sourced data from the __nested_sourced_data buffer to matched nested elements. - // Reads path config from the document itself — no external params needed. + // Reads path config from the nestedSourcedPaths param. // Called after every event so that after a self-event's putAll overwrites nested arrays, // the buffered data gets re-applied. - void applyNestedSourcedData(Map source) { + void applyNestedSourcedData(Map source, Map nestedSourcedPaths) { if (source.__nested_sourced_data == null) { return; } for (sourcedEntry in source.__nested_sourced_data.entrySet()) { - Map relationshipData = (Map) sourcedEntry.getValue(); - List pathSegments = (List) relationshipData.get("path_segments"); - Map dataByKey = (Map) relationshipData.get("data"); + String relationship = (String) sourcedEntry.getKey(); + Map dataByKey = (Map) sourcedEntry.getValue(); + List pathSegments = (List) nestedSourcedPaths.get(relationship); if (pathSegments == null || dataByKey == null) { continue; } for (elementEntry in dataByKey.entrySet()) { - List keyParts = splitNestedElementKey((String) elementEntry.getKey()); + List keyParts = decodeKey((String) elementEntry.getKey()); if (keyParts.size() != pathSegments.size()) { continue; } @@ -2401,12 +2411,8 @@ scripts: } // Records the event version in __versions and adds the relationship to __sources. - void recordSource(Map source, String relationship, String sourceId, long eventVersion, String nestedElementKey) { - if (nestedElementKey.isEmpty()) { - source.__versions[relationship][sourceId] = eventVersion; - } else { - source.__versions[relationship][nestedElementKey][sourceId] = eventVersion; - } + void recordSource(Map source, String versionsKey, String relationship, String sourceId, long eventVersion) { + source.__versions[versionsKey][sourceId] = eventVersion; // Record the relationship in `__sources` if it's not already there. We maintain it as an append-only set using a sorted list. // This ensures deterministic ordering of its elements regardless of event ingestion order, and lets us check membership in O(log N) time. @@ -2437,11 +2443,12 @@ scripts: Map nestedSourcedPathIdentifiers = params.nestedSourcedPathIdentifiers; Map nestedSourcedPaths = params.nestedSourcedPaths; - String nestedElementKey = buildNestedElementKeyForRelationship(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); + String nestedElementKey = buildNestedElementKey(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); + String versionsKey = buildVersionsKey(relationship, nestedElementKey); - setup(source, relationship, nestedElementKey, nestedSourcedPaths, counts); - validateSource(source, id, relationship, sourceId, eventVersion, nestedElementKey); + setup(source, versionsKey, relationship, nestedElementKey, counts); + validateSource(source, id, relationship, sourceId, eventVersion, versionsKey); applyTopLevelFields(source, id, params.topLevelFields, counts); storeNestedSourcedData(source, relationship, nestedSourcedFields, nestedElementKey); - applyNestedSourcedData(source); - recordSource(source, relationship, sourceId, eventVersion, nestedElementKey); + applyNestedSourcedData(source, nestedSourcedPaths); + recordSource(source, versionsKey, relationship, sourceId, eventVersion); diff --git a/config/schema/artifacts/runtime_metadata.yaml b/config/schema/artifacts/runtime_metadata.yaml index 2b4cc29a2..42aa4630b 100644 --- a/config/schema/artifacts/runtime_metadata.yaml +++ b/config/schema/artifacts/runtime_metadata.yaml @@ -3085,7 +3085,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: full_address: cardinality: one @@ -3275,7 +3275,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: __typename: cardinality: one @@ -3314,7 +3314,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: __typename: cardinality: one @@ -3414,7 +3414,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: created_at: cardinality: one @@ -3724,7 +3724,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: __typename: cardinality: one @@ -3897,7 +3897,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: created_at: cardinality: one @@ -4382,7 +4382,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: ceo: cardinality: one @@ -4540,7 +4540,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: created_at: cardinality: one @@ -5501,7 +5501,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: __typename: cardinality: one @@ -5700,7 +5700,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: __typename: cardinality: one @@ -5767,7 +5767,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: active: cardinality: one @@ -6304,7 +6304,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: name: cardinality: one @@ -6654,7 +6654,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: formed_on routing_value_source: league - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: country_code: cardinality: one @@ -7754,7 +7754,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: created_at routing_value_source: workspace_id2 - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: amount_cents: cardinality: one @@ -7832,7 +7832,7 @@ object_types_by_name: version: cardinality: one relationship: widget - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: widget_cost: cardinality: one @@ -8062,7 +8062,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: introduced_on routing_value_source: primary_continent - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: details: cardinality: one @@ -9107,7 +9107,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: name: cardinality: one @@ -9129,7 +9129,7 @@ object_types_by_name: relationship: workspace rollover_timestamp_value_source: widget.created_at routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: workspace_name: cardinality: one @@ -9364,4 +9364,4 @@ static_script_ids_by_scoped_name: field/as_day_of_week: field_as_day_of_week_f2b5c7d9e8f75bf2457b52412bfb6537 field/as_time_of_day: field_as_time_of_day_ed82aba44fc66bff5635bec4305c1c66 filter/by_time_of_day: filter_by_time_of_day_ea12d0561b24961789ab68ed38435612 - update/index_data: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + update/index_data: update_index_data_079bafcf4d739acd8659a631377fa9c8 diff --git a/config/schema/artifacts_with_apollo/datastore_config.yaml b/config/schema/artifacts_with_apollo/datastore_config.yaml index be858b3df..ae3276abb 100644 --- a/config/schema/artifacts_with_apollo/datastore_config.yaml +++ b/config/schema/artifacts_with_apollo/datastore_config.yaml @@ -2195,7 +2195,7 @@ scripts: // No timestamp values matched the params, so return `false`. return false; - update_index_data_59be3b4ab537ea412a3ead0408fb4137: + update_index_data_079bafcf4d739acd8659a631377fa9c8: context: update script: lang: painless @@ -2204,33 +2204,61 @@ scripts: // Helper Functions // ============================================================ - // Builds a composite nested element key from path segments. - // List segments contribute their matched identifier value; object segments contribute their field name. - String buildNestedElementKey(List pathSegments, Map pathIdentifiers) { + // Encodes a list of strings into a length-prefixed string. + // Each part is encoded as "length:value" and parts are concatenated directly. + // This encoding is unambiguous regardless of what characters the values contain. + String encodeKey(List parts) { + StringBuilder sb = new StringBuilder(); + for (String part : parts) { + sb.append(part.length()); + sb.append(':'); + sb.append(part); + } + return sb.toString(); + } + + // Decodes a length-prefixed string back into a list of strings. + List decodeKey(String key) { List parts = new ArrayList(); - for (Map segment : pathSegments) { - if (segment.containsKey("list")) { - parts.add(pathIdentifiers[segment.source_field]); - } else { - parts.add(segment.get("object")); - } + int i = 0; + while (i < key.length()) { + int colonPos = key.indexOf(":", i); + int length = Integer.parseInt(key.substring(i, colonPos)); + int valueStart = colonPos + 1; + parts.add(key.substring(valueStart, valueStart + length)); + i = valueStart + length; } - return String.join(":", parts); + return parts; } - // Looks up path segments for the given relationship and builds the nested element key. + // Builds a nested element key (as an encoded string) from path segments. + // List segments contribute their matched identifier value; object segments contribute their field name. // Returns "" if no path segments are configured (i.e., this is not a nested sourced event). - String buildNestedElementKeyForRelationship(String relationship, Map nestedSourcedPaths, Map pathIdentifiers) { - List pathSegments = (List) nestedSourcedPaths.get(relationship); - if (pathSegments == null) { - return ""; + String buildNestedElementKey(String relationship, Map nestedSourcedPaths, Map pathIdentifiers) { + List pathSegments = (List) nestedSourcedPaths.get(relationship); + if (pathSegments == null) { + return ""; + } + List parts = new ArrayList(); + for (Map segment : pathSegments) { + if ("list".equals(segment.get("type"))) { + parts.add(pathIdentifiers[segment.sourceField]); + } else { + parts.add(segment.get("field")); } - return buildNestedElementKey(pathSegments, pathIdentifiers); + } + return encodeKey(parts); } - // Splits a composite nested element key into a list of parts. - List splitNestedElementKey(String nestedElementKey) { - return Arrays.asList(nestedElementKey.splitOnToken(":")); + // Builds the versions key by combining the relationship name with the element key parts. + // For top-level events (empty element key), returns just the relationship name. + String buildVersionsKey(String relationship, String nestedElementKey) { + if (nestedElementKey.isEmpty()) { + return relationship; + } + List parts = decodeKey(nestedElementKey); + parts.add(0, relationship); + return encodeKey(parts); } // Finds an element in a list where element[matchField] equals matchValue. Returns null if not found. @@ -2250,15 +2278,14 @@ scripts: for (int i = 0; i < pathSegments.size(); i++) { Map segment = (Map) pathSegments.get(i); - boolean isList = segment.containsKey("list"); - String field = isList ? (String) segment.get("list") : (String) segment.get("object"); + String field = (String) segment.get("field"); if (!current.containsKey(field)) { return null; } - if (isList) { - current = (Map) findInList((List) current.get(field), (String) segment.get("match_field"), (String) keyParts.get(i)); + if ("list".equals(segment.get("type"))) { + current = (Map) findInList((List) current.get(field), (String) segment.get("matchField"), (String) keyParts.get(i)); } else { current = (Map) current.get(field); } @@ -2277,7 +2304,7 @@ scripts: // ============================================================ // Initializes internal bookkeeping structures (__sources, __versions, __counts, __nested_sourced_data). - void setup(Map source, String relationship, String nestedElementKey, Map nestedSourcedPaths, Map counts) { + void setup(Map source, String versionsKey, String relationship, String nestedElementKey, Map counts) { if (source.__sources == null) { source.__sources = []; } @@ -2286,12 +2313,8 @@ scripts: source.__versions = [:]; } - if (source.__versions[relationship] == null) { - source.__versions[relationship] = [:]; - } - - if (!nestedElementKey.isEmpty() && source.__versions[relationship][nestedElementKey] == null) { - source.__versions[relationship][nestedElementKey] = [:]; + if (source.__versions[versionsKey] == null) { + source.__versions[versionsKey] = [:]; } if (!nestedElementKey.isEmpty()) { @@ -2299,7 +2322,7 @@ scripts: source.__nested_sourced_data = [:]; } if (source.__nested_sourced_data[relationship] == null) { - source.__nested_sourced_data[relationship] = ["path_segments": nestedSourcedPaths.get(relationship), "data": [:]]; + source.__nested_sourced_data[relationship] = [:]; } } @@ -2309,31 +2332,19 @@ scripts: } // Validates that this event is allowed: no relationship mutation and no stale version. - void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion, String nestedElementKey) { - // For nested events, validate per-element. For top-level events, validate per-relationship. - Map versionsMap = nestedElementKey.isEmpty() - ? source.__versions[relationship] - : source.__versions[relationship][nestedElementKey]; + void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion, String versionsKey) { + Map versionsMap = source.__versions[versionsKey]; // Check that no other source ID has previously written to this target. List previousSourceIds = versionsMap.keySet().stream().filter(key -> key != sourceId).collect(Collectors.toList()); if (previousSourceIds.size() > 0) { - if (nestedElementKey.isEmpty()) { - throw new IllegalArgumentException( - "Cannot update document " + id + " " + - "with data from related " + relationship + " " + sourceId + " " + - "because the related " + relationship + " has apparently changed (was: " + previousSourceIds + "), " + - "but mutations of relationships used with `sourced_from` are not supported because " + - "allowing it could break ElasticGraph's out-of-order processing guarantees." - ); - } else { - throw new IllegalArgumentException( - "Cannot update nested element [" + nestedElementKey + "] on document " + id + " " + - "with data from " + relationship + " " + sourceId + " " + - "because this element was previously sourced from a different event (" + previousSourceIds + "). " + - "Each nested element can only be sourced from one source document." - ); - } + throw new IllegalArgumentException( + "Cannot update document " + id + " " + + "with data from related " + relationship + " " + sourceId + " " + + "because the related " + relationship + " has apparently changed (was: " + previousSourceIds + "), " + + "but mutations of relationships used with `sourced_from` are not supported because " + + "allowing it could break ElasticGraph's out-of-order processing guarantees." + ); } // Check that the event version is newer than what we've already seen. @@ -2341,9 +2352,8 @@ scripts: long docVersion = maybeDocVersion == null ? Long.MIN_VALUE : maybeDocVersion.longValue(); if (docVersion >= eventVersion) { - String target = nestedElementKey.isEmpty() ? id : id + "/" + nestedElementKey; throw new IllegalArgumentException("ElasticGraph update was a no-op: [" + - target + "]: version conflict, current version [" + + id + "]: version conflict, current version [" + docVersion + "] is higher or equal to the one provided [" + eventVersion + "]"); } @@ -2365,29 +2375,29 @@ scripts: return; } - ((Map) source.__nested_sourced_data[relationship]).get("data").put(nestedElementKey, nestedSourcedFields); + ((Map) source.__nested_sourced_data[relationship]).put(nestedElementKey, nestedSourcedFields); } // Applies nested sourced data from the __nested_sourced_data buffer to matched nested elements. - // Reads path config from the document itself — no external params needed. + // Reads path config from the nestedSourcedPaths param. // Called after every event so that after a self-event's putAll overwrites nested arrays, // the buffered data gets re-applied. - void applyNestedSourcedData(Map source) { + void applyNestedSourcedData(Map source, Map nestedSourcedPaths) { if (source.__nested_sourced_data == null) { return; } for (sourcedEntry in source.__nested_sourced_data.entrySet()) { - Map relationshipData = (Map) sourcedEntry.getValue(); - List pathSegments = (List) relationshipData.get("path_segments"); - Map dataByKey = (Map) relationshipData.get("data"); + String relationship = (String) sourcedEntry.getKey(); + Map dataByKey = (Map) sourcedEntry.getValue(); + List pathSegments = (List) nestedSourcedPaths.get(relationship); if (pathSegments == null || dataByKey == null) { continue; } for (elementEntry in dataByKey.entrySet()) { - List keyParts = splitNestedElementKey((String) elementEntry.getKey()); + List keyParts = decodeKey((String) elementEntry.getKey()); if (keyParts.size() != pathSegments.size()) { continue; } @@ -2401,12 +2411,8 @@ scripts: } // Records the event version in __versions and adds the relationship to __sources. - void recordSource(Map source, String relationship, String sourceId, long eventVersion, String nestedElementKey) { - if (nestedElementKey.isEmpty()) { - source.__versions[relationship][sourceId] = eventVersion; - } else { - source.__versions[relationship][nestedElementKey][sourceId] = eventVersion; - } + void recordSource(Map source, String versionsKey, String relationship, String sourceId, long eventVersion) { + source.__versions[versionsKey][sourceId] = eventVersion; // Record the relationship in `__sources` if it's not already there. We maintain it as an append-only set using a sorted list. // This ensures deterministic ordering of its elements regardless of event ingestion order, and lets us check membership in O(log N) time. @@ -2437,11 +2443,12 @@ scripts: Map nestedSourcedPathIdentifiers = params.nestedSourcedPathIdentifiers; Map nestedSourcedPaths = params.nestedSourcedPaths; - String nestedElementKey = buildNestedElementKeyForRelationship(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); + String nestedElementKey = buildNestedElementKey(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); + String versionsKey = buildVersionsKey(relationship, nestedElementKey); - setup(source, relationship, nestedElementKey, nestedSourcedPaths, counts); - validateSource(source, id, relationship, sourceId, eventVersion, nestedElementKey); + setup(source, versionsKey, relationship, nestedElementKey, counts); + validateSource(source, id, relationship, sourceId, eventVersion, versionsKey); applyTopLevelFields(source, id, params.topLevelFields, counts); storeNestedSourcedData(source, relationship, nestedSourcedFields, nestedElementKey); - applyNestedSourcedData(source); - recordSource(source, relationship, sourceId, eventVersion, nestedElementKey); + applyNestedSourcedData(source, nestedSourcedPaths); + recordSource(source, versionsKey, relationship, sourceId, eventVersion); diff --git a/config/schema/artifacts_with_apollo/runtime_metadata.yaml b/config/schema/artifacts_with_apollo/runtime_metadata.yaml index 50f8fb739..953a0338f 100644 --- a/config/schema/artifacts_with_apollo/runtime_metadata.yaml +++ b/config/schema/artifacts_with_apollo/runtime_metadata.yaml @@ -3114,7 +3114,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: full_address: cardinality: one @@ -3304,7 +3304,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: __typename: cardinality: one @@ -3343,7 +3343,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: __typename: cardinality: one @@ -3464,7 +3464,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: created_at: cardinality: one @@ -3826,7 +3826,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: __typename: cardinality: one @@ -3999,7 +3999,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: created_at: cardinality: one @@ -4484,7 +4484,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: ceo: cardinality: one @@ -4642,7 +4642,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: created_at: cardinality: one @@ -5624,7 +5624,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: __typename: cardinality: one @@ -5823,7 +5823,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: __typename: cardinality: one @@ -5890,7 +5890,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: active: cardinality: one @@ -6433,7 +6433,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: name: cardinality: one @@ -6783,7 +6783,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: formed_on routing_value_source: league - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: country_code: cardinality: one @@ -7883,7 +7883,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: created_at routing_value_source: workspace_id2 - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: amount_cents: cardinality: one @@ -7961,7 +7961,7 @@ object_types_by_name: version: cardinality: one relationship: widget - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: widget_cost: cardinality: one @@ -8191,7 +8191,7 @@ object_types_by_name: relationship: __self rollover_timestamp_value_source: introduced_on routing_value_source: primary_continent - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: details: cardinality: one @@ -9236,7 +9236,7 @@ object_types_by_name: cardinality: one relationship: __self routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: name: cardinality: one @@ -9258,7 +9258,7 @@ object_types_by_name: relationship: workspace rollover_timestamp_value_source: widget.created_at routing_value_source: id - script_id: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + script_id: update_index_data_079bafcf4d739acd8659a631377fa9c8 top_level_fields_params: workspace_name: cardinality: one @@ -9536,4 +9536,4 @@ static_script_ids_by_scoped_name: field/as_day_of_week: field_as_day_of_week_f2b5c7d9e8f75bf2457b52412bfb6537 field/as_time_of_day: field_as_time_of_day_ed82aba44fc66bff5635bec4305c1c66 filter/by_time_of_day: filter_by_time_of_day_ea12d0561b24961789ab68ed38435612 - update/index_data: update_index_data_59be3b4ab537ea412a3ead0408fb4137 + update/index_data: update_index_data_079bafcf4d739acd8659a631377fa9c8 diff --git a/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition.rb b/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition.rb index c95dc2f7c..aa239284b 100644 --- a/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition.rb +++ b/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition.rb @@ -33,7 +33,8 @@ def self.with(name:, runtime_metadata:, config:, datastore_clients_by_name:, sch env_index_config: env_index_config, defined_clusters: config.clusters.keys.to_set, datastore_clients_by_name: datastore_clients_by_name, - has_had_multiple_sources: runtime_metadata.has_had_multiple_sources + has_had_multiple_sources: runtime_metadata.has_had_multiple_sources, + nested_sourced_paths: runtime_metadata.nested_sourced_paths } if (rollover = runtime_metadata.rollover) diff --git a/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition/index.rb b/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition/index.rb index 1b99e7a4e..a85c309fb 100644 --- a/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition/index.rb +++ b/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition/index.rb @@ -15,11 +15,12 @@ class DatastoreCore module IndexDefinition class Index < Support::MemoizableData.define( :name, :route_with, :default_sort_clauses, :current_sources, :fields_by_path, - :env_index_config, :defined_clusters, :datastore_clients_by_name, :env_agnostic_settings, :has_had_multiple_sources + :env_index_config, :defined_clusters, :datastore_clients_by_name, :env_agnostic_settings, :has_had_multiple_sources, + :nested_sourced_paths ) # `Data.define` provides all these methods: # @dynamic name, route_with, default_sort_clauses, current_sources, fields_by_path, env_index_config, env_agnostic_settings - # @dynamic defined_clusters, datastore_clients_by_name, initialize, has_had_multiple_sources + # @dynamic defined_clusters, datastore_clients_by_name, initialize, has_had_multiple_sources, nested_sourced_paths # `include IndexDefinition::Base` provides all these methods. Steep should be able to detect it # but can't for some reason so we have to declare them with `@dynamic`. diff --git a/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition/rollover_index_template.rb b/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition/rollover_index_template.rb index 9a70836da..44ea8bda7 100644 --- a/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition/rollover_index_template.rb +++ b/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition/rollover_index_template.rb @@ -23,11 +23,11 @@ module IndexDefinition class RolloverIndexTemplate < Support::MemoizableData.define( :name, :route_with, :default_sort_clauses, :current_sources, :fields_by_path, :env_index_config, :index_args, :defined_clusters, :datastore_clients_by_name, :timestamp_field_path, :frequency, - :env_agnostic_settings, :has_had_multiple_sources + :env_agnostic_settings, :has_had_multiple_sources, :nested_sourced_paths ) # `Data.define` provides all these methods: # @dynamic name, route_with, default_sort_clauses, current_sources, fields_by_path, env_index_config, env_agnostic_settings - # @dynamic index_args, defined_clusters, datastore_clients_by_name, timestamp_field_path, frequency, initialize, has_had_multiple_sources + # @dynamic index_args, defined_clusters, datastore_clients_by_name, timestamp_field_path, frequency, initialize, has_had_multiple_sources, nested_sourced_paths # `include IndexDefinition::Base` provides all these methods. Steep should be able to detect it # but can't for some reason so we have to declare them with `@dynamic`. diff --git a/elasticgraph-datastore_core/sig/elastic_graph/datastore_core/index_definition.rbs b/elasticgraph-datastore_core/sig/elastic_graph/datastore_core/index_definition.rbs index b12ac2343..837612e48 100644 --- a/elasticgraph-datastore_core/sig/elastic_graph/datastore_core/index_definition.rbs +++ b/elasticgraph-datastore_core/sig/elastic_graph/datastore_core/index_definition.rbs @@ -26,6 +26,7 @@ module ElasticGraph def current_sources: () -> ::Set[::String] def fields_by_path: () -> ::Hash[::String, SchemaArtifacts::RuntimeMetadata::IndexField] def has_had_multiple_sources: () -> bool + def nested_sourced_paths: () -> ::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::nestedSourcedPathSegment]] def env_index_config: () -> Configuration::IndexDefinition def env_agnostic_settings: () -> ::Hash[::String, untyped] def defined_clusters: () -> ::Set[::String] diff --git a/elasticgraph-indexer/lib/elastic_graph/indexer/operation/update.rb b/elasticgraph-indexer/lib/elastic_graph/indexer/operation/update.rb index 32e04c04a..3161a184d 100644 --- a/elasticgraph-indexer/lib/elastic_graph/indexer/operation/update.rb +++ b/elasticgraph-indexer/lib/elastic_graph/indexer/operation/update.rb @@ -145,11 +145,14 @@ def script_params prepared_record: prepared_record ) - # The normal indexing script uses `__counts`. Other indexing scripts (e.g. the ones generated - # for derived indexing) do not use `__counts` so there's no point in spending effort on computing - # it. Plus, the logic below raises an exception in that case, so it's important we avoid it. + # The normal indexing script uses `__counts` and `nestedSourcedPaths`. Other indexing scripts + # (e.g. the ones generated for derived indexing) do not use these so there's no point in + # spending effort on computing them. Plus, the logic below raises an exception in that case, + # so it's important we avoid it. return initial_params unless update_target.for_normal_indexing? + initial_params["nestedSourcedPaths"] = destination_index_def.nested_sourced_paths.transform_values { |segments| segments.map(&:to_dumpable_hash) } + CountAccumulator.merge_list_counts_into( initial_params, mapping: destination_index_mapping, diff --git a/elasticgraph-indexer/spec/unit/elastic_graph/indexer/datastore_indexing_router_spec.rb b/elasticgraph-indexer/spec/unit/elastic_graph/indexer/datastore_indexing_router_spec.rb index bd58ef4de..6dffcadc3 100644 --- a/elasticgraph-indexer/spec/unit/elastic_graph/indexer/datastore_indexing_router_spec.rb +++ b/elasticgraph-indexer/spec/unit/elastic_graph/indexer/datastore_indexing_router_spec.rb @@ -265,7 +265,7 @@ def type_name_for_index(index_name) upsert: {}, script: a_hash_including( id: /WidgetCurrency_from_Widget_/, - params: {"nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "USD"} + params: {"nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "USD"} ) ) end diff --git a/elasticgraph-indexer/spec/unit/elastic_graph/indexer/operation/update_spec.rb b/elasticgraph-indexer/spec/unit/elastic_graph/indexer/operation/update_spec.rb index 28471ec6f..0c54b2975 100644 --- a/elasticgraph-indexer/spec/unit/elastic_graph/indexer/operation/update_spec.rb +++ b/elasticgraph-indexer/spec/unit/elastic_graph/indexer/operation/update_spec.rb @@ -73,7 +73,7 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "17" }}, scripted_upsert: true, @@ -156,7 +156,7 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => []}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "topLevelFields" => {"name" => []}, "id" => "17" }}, scripted_upsert: true, @@ -177,7 +177,7 @@ module Operation {update: {_id: "embedded_workspace_id", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "embedded_workspace_id" }}, scripted_upsert: true, @@ -202,7 +202,7 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"embedded_values.missing_field" => [], "name" => nil}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "topLevelFields" => {"embedded_values.missing_field" => [], "name" => nil}, "id" => "17" }}, scripted_upsert: true, @@ -229,13 +229,14 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => { - "embedded_values" => ["thing1"], - "name" => { - "name" => "embedded_name", - "workspace_id" => "embedded_workspace_id" - } - }, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, + "topLevelFields" => { + "embedded_values" => ["thing1"], + "name" => { + "name" => "embedded_name", + "workspace_id" => "embedded_workspace_id" + } + }, "id" => "17" }}, scripted_upsert: true, @@ -261,7 +262,7 @@ module Operation { script: {id: operations.first.update_target.script_id, params: { # Float-typed integer values are coerced to true ints before indexing - "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"size" => [an_instance_of(::Integer).and(eq_to(4))]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "topLevelFields" => {"size" => [an_instance_of(::Integer).and(eq_to(4))]}, "id" => "17" }}, scripted_upsert: true, @@ -282,7 +283,7 @@ module Operation {update: {_id: "17", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "17" }}, scripted_upsert: true, @@ -291,7 +292,7 @@ module Operation {update: {_id: "18", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "18" }}, scripted_upsert: true, @@ -300,7 +301,7 @@ module Operation {update: {_id: "19", _index: "widget_workspaces", retry_on_conflict: Update::CONFLICT_RETRIES}}, { script: {id: operations.first.update_target.script_id, params: { - "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "nestedSourcedPaths" => {}, "topLevelFields" => {"name" => ["thing1"]}, + "nestedSourcedFields" => {}, "nestedSourcedPathIdentifiers" => {}, "topLevelFields" => {"name" => ["thing1"]}, "id" => "19" }}, scripted_upsert: true, diff --git a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/index_definition.rb b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/index_definition.rb index 9e4f26eb0..59a44c093 100644 --- a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/index_definition.rb +++ b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/index_definition.rb @@ -8,6 +8,7 @@ require "elastic_graph/schema_artifacts/runtime_metadata/hash_dumper" require "elastic_graph/schema_artifacts/runtime_metadata/index_field" +require "elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment" require "elastic_graph/schema_artifacts/runtime_metadata/sort_field" module ElasticGraph @@ -16,22 +17,24 @@ module RuntimeMetadata # Runtime metadata related to a datastore index definition. # # @private - class IndexDefinition < ::Data.define(:route_with, :rollover, :default_sort_fields, :current_sources, :fields_by_path, :has_had_multiple_sources) + class IndexDefinition < ::Data.define(:route_with, :rollover, :default_sort_fields, :current_sources, :fields_by_path, :has_had_multiple_sources, :nested_sourced_paths) ROUTE_WITH = "route_with" ROLLOVER = "rollover" DEFAULT_SORT_FIELDS = "default_sort_fields" CURRENT_SOURCES = "current_sources" FIELDS_BY_PATH = "fields_by_path" HAS_HAD_MULTIPLE_SOURCES = "has_had_multiple_sources" + NESTED_SOURCED_PATHS = "nested_sourced_paths" - def initialize(route_with:, rollover:, default_sort_fields:, current_sources:, fields_by_path:, has_had_multiple_sources:) + def initialize(route_with:, rollover:, default_sort_fields:, current_sources:, fields_by_path:, has_had_multiple_sources:, nested_sourced_paths: {}) super( route_with: route_with, rollover: rollover, default_sort_fields: default_sort_fields, current_sources: current_sources.to_set, fields_by_path: fields_by_path, - has_had_multiple_sources: has_had_multiple_sources + has_had_multiple_sources: has_had_multiple_sources, + nested_sourced_paths: nested_sourced_paths ) end @@ -42,7 +45,8 @@ def self.from_hash(hash) default_sort_fields: hash[DEFAULT_SORT_FIELDS]&.map { |h| SortField.from_hash(h) } || [], current_sources: hash[CURRENT_SOURCES] || [], fields_by_path: (hash[FIELDS_BY_PATH] || {}).transform_values { |h| IndexField.from_hash(h) }, - has_had_multiple_sources: hash[HAS_HAD_MULTIPLE_SOURCES] || false + has_had_multiple_sources: hash[HAS_HAD_MULTIPLE_SOURCES] || false, + nested_sourced_paths: (hash[NESTED_SOURCED_PATHS] || {}).transform_values { |segments| segments.map { |h| NestedSourcedPathSegment.from_hash(h) } } ) end @@ -53,6 +57,7 @@ def to_dumpable_hash DEFAULT_SORT_FIELDS => default_sort_fields.map(&:to_dumpable_hash), FIELDS_BY_PATH => HashDumper.dump_hash(fields_by_path, &:to_dumpable_hash), HAS_HAD_MULTIPLE_SOURCES => (true if has_had_multiple_sources), + NESTED_SOURCED_PATHS => nested_sourced_paths.transform_values { |segments| segments.map(&:to_dumpable_hash) }, ROLLOVER => rollover&.to_dumpable_hash, ROUTE_WITH => route_with } diff --git a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_data_params.rb b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_data_params.rb new file mode 100644 index 000000000..b989390b5 --- /dev/null +++ b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_data_params.rb @@ -0,0 +1,54 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/schema_artifacts/runtime_metadata/params" + +module ElasticGraph + module SchemaArtifacts + module RuntimeMetadata + # Bundles the param configuration for nested sourced_from update targets. + # `field_params` defines which fields to extract from the event and write onto + # the target nested element. `path_identifier_params` defines which values to + # extract from the event to identify which nested element to target. + # + # @private + class NestedSourcedDataParams < ::Data.define(:field_params, :path_identifier_params) + FIELD_PARAMS = "field_params" + PATH_IDENTIFIER_PARAMS = "path_identifier_params" + + EMPTY = new(field_params: {}, path_identifier_params: {}) + + def self.from_hash(hash) + new( + field_params: Param.load_params_hash(hash[FIELD_PARAMS] || {}), + path_identifier_params: Param.load_params_hash(hash[PATH_IDENTIFIER_PARAMS] || {}) + ) + end + + def to_dumpable_hash + { + FIELD_PARAMS => Param.dump_params_hash(field_params), + PATH_IDENTIFIER_PARAMS => Param.dump_params_hash(path_identifier_params) + } + end + + def empty? + field_params.empty? && path_identifier_params.empty? + end + + # Resolves params into script-ready values from the given prepared record. + def script_params_for(prepared_record) + { + "nestedSourcedFields" => field_params.transform_values { |param| param.value_for(prepared_record) }, + "nestedSourcedPathIdentifiers" => path_identifier_params.transform_values { |param| param.value_for(prepared_record) } + } + end + end + end + end +end diff --git a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rb b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rb new file mode 100644 index 000000000..ff99da8a4 --- /dev/null +++ b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rb @@ -0,0 +1,60 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module SchemaArtifacts + module RuntimeMetadata + # Represents a segment in a nested sourced path that navigates into a list field, + # matching an element by a key field. + # + # @private + class ListPathSegment < ::Data.define(:field, :match_field, :source_field) + TYPE = "list" + FIELD = "field" + MATCH_FIELD = "matchField" + SOURCE_FIELD = "sourceField" + + def to_dumpable_hash + {"type" => TYPE, FIELD => field, MATCH_FIELD => match_field, SOURCE_FIELD => source_field} + end + + def self.from_hash(hash) + new(field: hash[FIELD], match_field: hash[MATCH_FIELD], source_field: hash[SOURCE_FIELD]) + end + end + + # Represents a segment in a nested sourced path that navigates into an object field. + # + # @private + class ObjectPathSegment < ::Data.define(:field) + TYPE = "object" + FIELD = "field" + + def to_dumpable_hash + {"type" => TYPE, FIELD => field} + end + + def self.from_hash(hash) + new(field: hash[FIELD]) + end + end + + # @private + module NestedSourcedPathSegment + def self.from_hash(hash) + case hash["type"] + when ListPathSegment::TYPE + ListPathSegment.from_hash(hash) + when ObjectPathSegment::TYPE + ObjectPathSegment.from_hash(hash) + end + end + end + end + end +end diff --git a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/update_target.rb b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/update_target.rb index f8d18fcee..b0a8ff2d8 100644 --- a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/update_target.rb +++ b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/update_target.rb @@ -7,6 +7,7 @@ # frozen_string_literal: true require "elastic_graph/constants" +require "elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_data_params" require "elastic_graph/schema_artifacts/runtime_metadata/params" module ElasticGraph @@ -23,9 +24,7 @@ class UpdateTarget < ::Data.define( :routing_value_source, :rollover_timestamp_value_source, :top_level_fields_params, - :nested_sourced_fields_params, - :nested_sourced_path_identifiers_params, - :nested_sourced_paths, + :nested_sourced_data_params, :metadata_params ) TYPE = "type" @@ -35,9 +34,7 @@ class UpdateTarget < ::Data.define( ROUTING_VALUE_SOURCE = "routing_value_source" ROLLOVER_TIMESTAMP_VALUE_SOURCE = "rollover_timestamp_value_source" TOP_LEVEL_FIELDS_PARAMS = "top_level_fields_params" - NESTED_SOURCED_FIELDS_PARAMS = "nested_sourced_fields_params" - NESTED_SOURCED_PATH_IDENTIFIERS_PARAMS = "nested_sourced_path_identifiers_params" - NESTED_SOURCED_PATHS = "nested_sourced_paths" + NESTED_SOURCED_DATA_PARAMS = "nested_sourced_data_params" METADATA_PARAMS = "metadata_params" def self.from_hash(hash) @@ -49,9 +46,7 @@ def self.from_hash(hash) routing_value_source: hash[ROUTING_VALUE_SOURCE], rollover_timestamp_value_source: hash[ROLLOVER_TIMESTAMP_VALUE_SOURCE], top_level_fields_params: Param.load_params_hash(hash[TOP_LEVEL_FIELDS_PARAMS] || {}), - nested_sourced_fields_params: Param.load_params_hash(hash[NESTED_SOURCED_FIELDS_PARAMS] || {}), - nested_sourced_path_identifiers_params: Param.load_params_hash(hash[NESTED_SOURCED_PATH_IDENTIFIERS_PARAMS] || {}), - nested_sourced_paths: hash[NESTED_SOURCED_PATHS] || {}, + nested_sourced_data_params: NestedSourcedDataParams.from_hash(hash[NESTED_SOURCED_DATA_PARAMS] || {}), metadata_params: Param.load_params_hash(hash[METADATA_PARAMS] || {}) ) end @@ -61,9 +56,7 @@ def to_dumpable_hash # Keys here are ordered alphabetically; please keep them that way. ID_SOURCE => id_source, METADATA_PARAMS => Param.dump_params_hash(metadata_params), - NESTED_SOURCED_FIELDS_PARAMS => Param.dump_params_hash(nested_sourced_fields_params), - NESTED_SOURCED_PATH_IDENTIFIERS_PARAMS => Param.dump_params_hash(nested_sourced_path_identifiers_params), - NESTED_SOURCED_PATHS => nested_sourced_paths, + NESTED_SOURCED_DATA_PARAMS => nested_sourced_data_params.to_dumpable_hash, RELATIONSHIP => relationship, ROLLOVER_TIMESTAMP_VALUE_SOURCE => rollover_timestamp_value_source, ROUTING_VALUE_SOURCE => routing_value_source, @@ -78,29 +71,18 @@ def for_normal_indexing? end def params_for(doc_id:, event:, prepared_record:) - top_level_fields = top_level_fields_params.to_h do |name, param| - [name, param.value_for(prepared_record)] + top_level_fields = top_level_fields_params.transform_values do |param| + param.value_for(prepared_record) end - meta = metadata_params.to_h do |name, param| - [name, param.value_for(event)] + meta = metadata_params.transform_values do |param| + param.value_for(event) end - nested_sourced_fields = nested_sourced_fields_params.to_h do |name, param| - [name, param.value_for(prepared_record)] - end - - nested_sourced_path_identifiers = nested_sourced_path_identifiers_params.to_h do |name, param| - [name, param.value_for(prepared_record)] - end - - meta.merge({ - "id" => doc_id, - "topLevelFields" => top_level_fields, - "nestedSourcedFields" => nested_sourced_fields, - "nestedSourcedPathIdentifiers" => nested_sourced_path_identifiers, - "nestedSourcedPaths" => nested_sourced_paths - }) + meta.merge( + {"id" => doc_id, "topLevelFields" => top_level_fields}, + nested_sourced_data_params.script_params_for(prepared_record) + ) end end end diff --git a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/index_definition.rbs b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/index_definition.rbs index fc6bacfcd..d1e52c042 100644 --- a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/index_definition.rbs +++ b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/index_definition.rbs @@ -8,6 +8,7 @@ module ElasticGraph attr_reader current_sources: ::Set[::String] attr_reader fields_by_path: ::Hash[::String, IndexField] attr_reader has_had_multiple_sources: bool + attr_reader nested_sourced_paths: ::Hash[::String, ::Array[nestedSourcedPathSegment]] def initialize: ( route_with: ::String, @@ -15,7 +16,8 @@ module ElasticGraph default_sort_fields: ::Array[SortField], current_sources: ::Set[::String], fields_by_path: ::Hash[::String, IndexField], - has_had_multiple_sources: bool + has_had_multiple_sources: bool, + ?nested_sourced_paths: ::Hash[::String, ::Array[nestedSourcedPathSegment]] ) -> void def with: ( @@ -24,7 +26,8 @@ module ElasticGraph ?default_sort_fields: ::Array[SortField], ?current_sources: ::Enumerable[::String], ?fields_by_path: ::Hash[::String, IndexField], - ?has_had_multiple_sources: bool + ?has_had_multiple_sources: bool, + ?nested_sourced_paths: ::Hash[::String, ::Array[nestedSourcedPathSegment]] ) -> IndexDefinition end @@ -35,6 +38,7 @@ module ElasticGraph CURRENT_SOURCES: "current_sources" FIELDS_BY_PATH: "fields_by_path" HAS_HAD_MULTIPLE_SOURCES: "has_had_multiple_sources" + NESTED_SOURCED_PATHS: "nested_sourced_paths" def initialize: ( route_with: ::String, @@ -42,7 +46,8 @@ module ElasticGraph default_sort_fields: ::Array[SortField], current_sources: ::Enumerable[::String], fields_by_path: ::Hash[::String, IndexField], - has_had_multiple_sources: bool + has_had_multiple_sources: bool, + ?nested_sourced_paths: ::Hash[::String, ::Array[nestedSourcedPathSegment]] ) -> void def self.from_hash: (::Hash[::String, untyped]) -> IndexDefinition diff --git a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_data_params.rbs b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_data_params.rbs new file mode 100644 index 000000000..3a00c7632 --- /dev/null +++ b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_data_params.rbs @@ -0,0 +1,23 @@ +module ElasticGraph + module SchemaArtifacts + module RuntimeMetadata + class NestedSourcedDataParamsSuperType + attr_reader field_params: paramsHash + attr_reader path_identifier_params: paramsHash + + def initialize: (field_params: paramsHash, path_identifier_params: paramsHash) -> void + end + + class NestedSourcedDataParams < NestedSourcedDataParamsSuperType + FIELD_PARAMS: "field_params" + PATH_IDENTIFIER_PARAMS: "path_identifier_params" + EMPTY: NestedSourcedDataParams + + def self.from_hash: (::Hash[::String, untyped]) -> NestedSourcedDataParams + def to_dumpable_hash: () -> ::Hash[::String, untyped] + def empty?: () -> bool + def script_params_for: (::Hash[::String, untyped]) -> ::Hash[::String, ::Hash[::String, untyped]] + end + end + end +end diff --git a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rbs b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rbs new file mode 100644 index 000000000..f912a96a8 --- /dev/null +++ b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rbs @@ -0,0 +1,43 @@ +module ElasticGraph + module SchemaArtifacts + module RuntimeMetadata + class ListPathSegmentSuperType + attr_reader field: ::String + attr_reader match_field: ::String + attr_reader source_field: ::String + + def initialize: (field: ::String, match_field: ::String, source_field: ::String) -> void + end + + class ListPathSegment < ListPathSegmentSuperType + TYPE: "list" + FIELD: "field" + MATCH_FIELD: "matchField" + SOURCE_FIELD: "sourceField" + + def self.from_hash: (::Hash[::String, untyped]) -> ListPathSegment + def to_dumpable_hash: () -> ::Hash[::String, ::String] + end + + class ObjectPathSegmentSuperType + attr_reader field: ::String + + def initialize: (field: ::String) -> void + end + + class ObjectPathSegment < ObjectPathSegmentSuperType + TYPE: "object" + FIELD: "field" + + def self.from_hash: (::Hash[::String, untyped]) -> ObjectPathSegment + def to_dumpable_hash: () -> ::Hash[::String, ::String] + end + + type nestedSourcedPathSegment = ListPathSegment | ObjectPathSegment + + module NestedSourcedPathSegment + def self.from_hash: (::Hash[::String, untyped]) -> nestedSourcedPathSegment? + end + end + end +end diff --git a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/update_target.rbs b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/update_target.rbs index 46d9ac97c..99647cd38 100644 --- a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/update_target.rbs +++ b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/update_target.rbs @@ -9,9 +9,7 @@ module ElasticGraph attr_reader routing_value_source: ::String? attr_reader rollover_timestamp_value_source: ::String? attr_reader top_level_fields_params: paramsHash - attr_reader nested_sourced_fields_params: paramsHash - attr_reader nested_sourced_path_identifiers_params: paramsHash - attr_reader nested_sourced_paths: ::Hash[::String, ::Array[::Hash[::String, untyped]]] + attr_reader nested_sourced_data_params: NestedSourcedDataParams attr_reader metadata_params: paramsHash def initialize: ( @@ -22,9 +20,7 @@ module ElasticGraph routing_value_source: ::String?, rollover_timestamp_value_source: ::String?, top_level_fields_params: paramsHash, - nested_sourced_fields_params: paramsHash, - nested_sourced_path_identifiers_params: paramsHash, - nested_sourced_paths: ::Hash[::String, ::Array[::Hash[::String, untyped]]], + nested_sourced_data_params: NestedSourcedDataParams, metadata_params: paramsHash ) -> void @@ -36,9 +32,7 @@ module ElasticGraph ?routing_value_source: ::String?, ?rollover_timestamp_value_source: ::String?, ?top_level_fields_params: paramsHash, - ?nested_sourced_fields_params: paramsHash, - ?nested_sourced_path_identifiers_params: paramsHash, - ?nested_sourced_paths: ::Hash[::String, ::Array[::Hash[::String, untyped]]], + ?nested_sourced_data_params: NestedSourcedDataParams, ?metadata_params: paramsHash ) -> UpdateTarget @@ -55,9 +49,7 @@ module ElasticGraph ROUTING_VALUE_SOURCE: "routing_value_source" ROLLOVER_TIMESTAMP_VALUE_SOURCE: "rollover_timestamp_value_source" TOP_LEVEL_FIELDS_PARAMS: "top_level_fields_params" - NESTED_SOURCED_FIELDS_PARAMS: "nested_sourced_fields_params" - NESTED_SOURCED_PATH_IDENTIFIERS_PARAMS: "nested_sourced_path_identifiers_params" - NESTED_SOURCED_PATHS: "nested_sourced_paths" + NESTED_SOURCED_DATA_PARAMS: "nested_sourced_data_params" METADATA_PARAMS: "metadata_params" def self.from_hash: (::Hash[::String, untyped]) -> UpdateTarget diff --git a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb index 6cbdb2e2e..132a82dc7 100644 --- a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb +++ b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb @@ -43,9 +43,7 @@ module RuntimeMetadata routing_value_source: "cost.currency_name", rollover_timestamp_value_source: "currency_introduced_on", top_level_fields_params: {"workspace_id" => DynamicParam.new(source_path: "wid", cardinality: :one)}, - nested_sourced_fields_params: {}, - nested_sourced_path_identifiers_params: {}, - nested_sourced_paths: {}, + nested_sourced_data_params: NestedSourcedDataParams::EMPTY, metadata_params: {"relationshipName" => StaticParam.new(value: "currency")} ), UpdateTarget.new( @@ -56,9 +54,7 @@ module RuntimeMetadata routing_value_source: nil, rollover_timestamp_value_source: nil, top_level_fields_params: {}, - nested_sourced_fields_params: {}, - nested_sourced_path_identifiers_params: {}, - nested_sourced_paths: {}, + nested_sourced_data_params: NestedSourcedDataParams::EMPTY, metadata_params: {} ) ], @@ -317,9 +313,7 @@ module RuntimeMetadata routing_value_source: nil, rollover_timestamp_value_source: nil, top_level_fields_params: {"workspace_id" => dynamic_param_with(cardinality: :many)}, - nested_sourced_fields_params: {}, - nested_sourced_path_identifiers_params: {}, - nested_sourced_paths: {}, + nested_sourced_data_params: NestedSourcedDataParams::EMPTY, metadata_params: {} )]), "IndexDefinitionNamesOnly" => object_type_with(index_definition_names: ["foo", "bar"]), diff --git a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/update_target_spec.rb b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/update_target_spec.rb index 95eb6a41b..2050a653b 100644 --- a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/update_target_spec.rb +++ b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/update_target_spec.rb @@ -26,9 +26,7 @@ module RuntimeMetadata routing_value_source: nil, rollover_timestamp_value_source: nil, top_level_fields_params: {}, - nested_sourced_fields_params: {}, - nested_sourced_path_identifiers_params: {}, - nested_sourced_paths: {}, + nested_sourced_data_params: NestedSourcedDataParams::EMPTY, metadata_params: {} ) end @@ -87,7 +85,7 @@ module RuntimeMetadata } ) - without_omitted_fields = params.except("id", "topLevelFields", "nestedSourcedFields", "nestedSourcedPathIdentifiers", "nestedSourcedPaths") + without_omitted_fields = params.except("id", "topLevelFields", "nestedSourcedFields", "nestedSourcedPathIdentifiers") expect(without_omitted_fields).to eq( "foo" => 43, diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb index d524f7f02..a8cd351f9 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb @@ -273,9 +273,7 @@ def runtime_metadata_for_source_type top_level_fields_params: fields.map(&:source_field).to_h do |f| [f, SchemaArtifacts::RuntimeMetadata::DynamicParam.new(source_path: f, cardinality: :many)] end, - nested_sourced_fields_params: {}, - nested_sourced_path_identifiers_params: {}, - nested_sourced_paths: {} + nested_sourced_data_params: SchemaArtifacts::RuntimeMetadata::NestedSourcedDataParams::EMPTY ) end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb index 07e4dee50..89cd9283d 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb @@ -38,7 +38,7 @@ module Indexing # @return [RolloverConfig, nil] rollover configuration for the index # @!attribute [r] has_had_multiple_sources_flag # @return [Boolean] whether this index has ever had multiple sources - class Index < Struct.new(:name, :default_sort_pairs, :settings, :schema_def_state, :indexed_type, :routing_field_path, :rollover_config, :has_had_multiple_sources_flag) + class Index < Struct.new(:name, :default_sort_pairs, :settings, :schema_def_state, :indexed_type, :routing_field_path, :rollover_config, :has_had_multiple_sources_flag, :nested_sourced_paths) include Mixins::HasReadableToSAndInspect.new { |i| i.name } # @param name [String] name of the index @@ -55,7 +55,7 @@ def initialize(name, settings, schema_def_state, indexed_type) settings = DEFAULT_SETTINGS.merge(Support::HashUtil.flatten_and_stringify_keys(settings, prefix: "index")) - super(name, [], settings, schema_def_state, indexed_type, nil, nil, false) + super(name, [], settings, schema_def_state, indexed_type, nil, nil, false, {}) schema_def_state.after_user_definition_complete do # `id` is the field Elasticsearch/OpenSearch use for routing by default: @@ -251,6 +251,13 @@ def to_index_template_config } end + # Registers the nested sourced path segments for a relationship on this index. + # Called by `NestedUpdateTargetResolver` during schema resolution. + # @api private + def register_nested_sourced_paths(relationship_name, path_segments) + nested_sourced_paths[relationship_name] = path_segments + end + # @return [SchemaArtifacts::RuntimeMetadata::IndexDefinition] runtime metadata for this index def runtime_metadata SchemaArtifacts::RuntimeMetadata::IndexDefinition.new( @@ -264,7 +271,8 @@ def runtime_metadata direction: direction ) end, - has_had_multiple_sources: has_had_multiple_sources_flag + has_had_multiple_sources: has_had_multiple_sources_flag, + nested_sourced_paths: nested_sourced_paths ) end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rb index 314f33510..541aa0bb7 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rb @@ -22,103 +22,22 @@ def initialize(schema_def_state:) end # Resolves the chain starting from `starting_relationship` (which must have a - # `parent_relationship_config`) on `starting_type`. + # `parent_ref`) on `starting_type`. # # Returns a tuple of [resolved_chain, errors]. # If errors is non-empty, resolved_chain will be nil. def resolve(starting_relationship, starting_type) errors = [] # : ::Array[::String] chain = [] # : ::Array[PathSegment] - current_rel = starting_relationship - current_type = starting_type visited_types = ::Set.new([starting_type.name]) - # Walk from leaf to root, building path segments in reverse. Each iteration validates - # the current relationship's parent_relationship link and advances up one level. - while current_rel.parent_relationship_config - config = current_rel.parent_relationship_config - - # Validate that parent_relationship is used with indexing_only - unless current_rel.indexing_only - errors << "#{rel_description(current_type, current_rel)} uses `parent_relationship` but is not declared with " \ - "`indexing_only: true`. Relationships with `parent_relationship` must be indexing-only." - break - end - - # Detect circular chains - if visited_types.include?(config[:parent_type_name]) - errors << "#{rel_description(current_type, current_rel)} creates a circular `parent_relationship` chain " \ - "— `#{config[:parent_type_name]}` was already visited. The chain must terminate at a root indexed type." - break - end - - # Find the parent type - parent_type = @schema_def_state.object_types_by_name[config[:parent_type_name]] - unless parent_type - errors << "#{rel_description(current_type, current_rel)} references parent type " \ - "`#{config[:parent_type_name]}` via `parent_relationship`, but that type does not exist." - break - end - - # Find the parent relationship - parent_rel = parent_type.relationships_by_name[config[:parent_relationship_name]] - unless parent_rel - errors << "#{rel_description(current_type, current_rel)} references parent relationship " \ - "`#{parent_type.name}.#{config[:parent_relationship_name]}` via `parent_relationship`, " \ - "but that relationship does not exist. Is it misspelled?" - break - end - - # Validate both relationships target the same source type - current_source_type_name = current_rel.related_type.unwrap_non_null.name - parent_source_type_name = parent_rel.related_type.unwrap_non_null.name - unless current_source_type_name == parent_source_type_name - errors << "#{rel_description(current_type, current_rel)} relates to `#{current_source_type_name}`, " \ - "but its parent relationship `#{parent_type.name}.#{config[:parent_relationship_name]}` relates to " \ - "`#{parent_source_type_name}`. All relationships in a `parent_relationship` chain must relate to the same source type." - break - end - - # Find the embedding field (field on parent_type whose type is current_type) - embedding_field = find_embedding_field(parent_type, current_type, errors) - unless embedding_field - break if errors.any? - errors << "#{rel_description(current_type, current_rel)} declares `#{parent_type.name}` as its parent type " \ - "via `parent_relationship`, but `#{parent_type.name}` has no field of type `#{current_type.name}`." - break - end - - # For list segments, validate that the embedded type has an `id` field to match on. - if embedding_field.type.list? - unless current_type.indexing_fields_by_name_in_index["id"] - errors << "#{rel_description(current_type, current_rel)} requires an `id` field on `#{current_type.name}` " \ - "for nested element matching, but `#{current_type.name}` has no field named `id`." - break - end - end - - source_field_name = current_rel.foreign_key - - # We use "id" as the match field, consistent with how ElasticGraph relationships always join on `id` - # via foreign keys. In the future, it would be nice if this field name were configurable. Additionally, the - # composite key separator ":" in the Painless script assumes id values do not contain that character. - # It would be nice to explicitly guard against that somehow. - chain << PathSegment.new( - parent_type: parent_type, - embedding_field: embedding_field, - match_field: "id", - source_field: source_field_name - ) - - # Move up the chain - current_rel = parent_rel - current_type = parent_type - visited_types.add(parent_type.name) - end + current_rel, current_type = resolve_chain( + starting_relationship, starting_type, chain, errors, visited_types + ) return [nil, errors] if errors.any? - # The loop terminated because current_rel has no parent_relationship_config — + # The recursion terminated because current_rel has no parent_ref — # this is the root relationship. Validate that current_type is indexed. unless current_type.root_document_type? errors << "The `parent_relationship` chain from #{rel_description(starting_type, starting_relationship)} " \ @@ -138,6 +57,97 @@ def resolve(starting_relationship, starting_type) private + # Recursively walks from leaf to root, building path segments in reverse. + # Returns the final [relationship, type] tuple when the chain terminates + # (i.e., no more parent_ref), or short-circuits on errors. + def resolve_chain(current_rel, current_type, chain, errors, visited_types) + ref = current_rel.parent_ref + return [current_rel, current_type] unless ref + + parent_type, parent_rel = validate_link(current_rel, current_type, ref, errors, visited_types) + return [current_rel, current_type] if errors.any? + + build_path_segment(current_rel, current_type, parent_type, chain, errors) + return [current_rel, current_type] if errors.any? + + visited_types.add(parent_type.name) + resolve_chain(parent_rel, parent_type, chain, errors, visited_types) + end + + # Validates a single link in the chain: checks indexing_only, circular refs, + # parent type existence, parent relationship existence, and source type consistency. + # Returns [parent_type, parent_rel] on success, or appends to errors and returns nils. + def validate_link(current_rel, current_type, ref, errors, visited_types) + unless current_rel.indexing_only + errors << "#{rel_description(current_type, current_rel)} uses `parent_relationship` but is not declared with " \ + "`indexing_only: true`. Relationships with `parent_relationship` must be indexing-only." + return [nil, nil] + end + + parent_type_name = ref.type_ref.name + if visited_types.include?(parent_type_name) + errors << "#{rel_description(current_type, current_rel)} creates a circular `parent_relationship` chain " \ + "— `#{parent_type_name}` was already visited. The chain must terminate at a root indexed type." + return [nil, nil] + end + + parent_type = ref.type_ref.as_object_type + unless parent_type + errors << "#{rel_description(current_type, current_rel)} references parent type " \ + "`#{parent_type_name}` via `parent_relationship`, but that type does not exist." + return [nil, nil] + end + + parent_rel = parent_type.relationships_by_name[ref.relationship_name] + unless parent_rel + errors << "#{rel_description(current_type, current_rel)} references parent relationship " \ + "`#{parent_type.name}.#{ref.relationship_name}` via `parent_relationship`, " \ + "but that relationship does not exist. Is it misspelled?" + return [nil, nil] + end + + current_source_type_name = current_rel.related_type.unwrap_non_null.name + parent_source_type_name = parent_rel.related_type.unwrap_non_null.name + unless current_source_type_name == parent_source_type_name + errors << "#{rel_description(current_type, current_rel)} relates to `#{current_source_type_name}`, " \ + "but its parent relationship `#{parent_type.name}.#{ref.relationship_name}` relates to " \ + "`#{parent_source_type_name}`. All relationships in a `parent_relationship` chain must relate to the same source type." + return [nil, nil] + end + + [parent_type, parent_rel] + end + + # Builds a PathSegment for the current level and appends it to chain. + # Validates the embedding field exists and (for list segments) that the child type has an id field. + def build_path_segment(current_rel, current_type, parent_type, chain, errors) + embedding_field = find_embedding_field(parent_type, current_type, errors) + return if errors.any? + + unless embedding_field + errors << "#{rel_description(current_type, current_rel)} declares `#{parent_type.name}` as its parent type " \ + "via `parent_relationship`, but `#{parent_type.name}` has no field of type `#{current_type.name}`." + return + end + + if embedding_field.type.list? + unless current_type.indexing_fields_by_name_in_index["id"] + errors << "#{rel_description(current_type, current_rel)} requires an `id` field on `#{current_type.name}` " \ + "for nested element matching, but `#{current_type.name}` has no field named `id`." + return + end + end + + # We use "id" as the match field, consistent with how ElasticGraph relationships always join on `id` + # via foreign keys. In the future, it would be nice if this field name were configurable. + chain << PathSegment.new( + parent_type: parent_type, + embedding_field: embedding_field, + match_field: "id", + source_field: current_rel.foreign_key + ) + end + def find_embedding_field(parent_type, child_type, errors) matches = parent_type.graphql_fields_by_name.values.select do |field| field.type.fully_unwrapped.name == child_type.name diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb index 8f037028e..46c6acde0 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb @@ -6,6 +6,7 @@ # # frozen_string_literal: true +require "elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment" require "elastic_graph/schema_artifacts/runtime_metadata/params" require "elastic_graph/schema_definition/indexing/update_target_factory" @@ -44,10 +45,10 @@ def resolve return [nil, errors] end - nested_sourced_fields_params = resolve_nested_sourced_fields_params(errors) - return [nil, errors] if nested_sourced_fields_params.empty? && errors.any? + field_params = resolve_nested_sourced_data_params(errors) + return [nil, errors] if field_params.empty? && errors.any? - nested_sourced_path_identifiers_params = build_path_identifier_params + path_identifier_params = build_path_identifier_params nested_sourced_paths = build_nested_sourced_paths routing_value_source = resolve_routing(errors) rollover_timestamp_value_source = resolve_rollover(errors) @@ -56,20 +57,22 @@ def resolve if errors.any? [nil, errors] else - # Wrap in map keyed by relationship name — the script uses this to look up - # the path config for the specific relationship being processed. - nested_sourced_paths_map = {relationship.name => nested_sourced_paths} + # Register the path config on the destination index so it's available at runtime. + resolved_chain.root_indexed_type.index_def.register_nested_sourced_paths(relationship.name, nested_sourced_paths) + + nested_sourced_data_params = SchemaArtifacts::RuntimeMetadata::NestedSourcedDataParams.new( + field_params: field_params, + path_identifier_params: path_identifier_params + ) update_target = UpdateTargetFactory.new_normal_indexing_update_target( type: resolved_chain.root_indexed_type.name, relationship: relationship.name, id_source: resolved_chain.root_relationship.foreign_key, top_level_fields_params: {}, - nested_sourced_fields_params: nested_sourced_fields_params, - nested_sourced_path_identifiers_params: nested_sourced_path_identifiers_params, + nested_sourced_data_params: nested_sourced_data_params, routing_value_source: routing_value_source, - rollover_timestamp_value_source: rollover_timestamp_value_source, - nested_sourced_paths: nested_sourced_paths_map + rollover_timestamp_value_source: rollover_timestamp_value_source ) [update_target, errors] @@ -85,7 +88,7 @@ def related_type @related_type ||= schema_def_state.object_types_by_name[relationship.related_type.unwrap_non_null.name] end - def resolve_nested_sourced_fields_params(errors) + def resolve_nested_sourced_data_params(errors) sourced_fields.filter_map do |field| field_source = field.source # : SchemaElements::FieldSource referenced_field_path = field_path_resolver.resolve_public_path(related_type, field_source.field_path) do |parent_field| @@ -122,13 +125,15 @@ def build_path_identifier_params def build_nested_sourced_paths resolved_chain.path_segments.map do |segment| if segment.embedding_field.type.list? - { - "list" => segment.embedding_field.name_in_index, - "match_field" => segment.match_field, - "source_field" => segment.source_field - } + SchemaArtifacts::RuntimeMetadata::ListPathSegment.new( + field: segment.embedding_field.name_in_index, + match_field: segment.match_field, + source_field: segment.source_field + ) else - {"object" => segment.embedding_field.name_in_index} + SchemaArtifacts::RuntimeMetadata::ObjectPathSegment.new( + field: segment.embedding_field.name_in_index + ) end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rb new file mode 100644 index 000000000..49d5c9981 --- /dev/null +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rb @@ -0,0 +1,165 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/errors" +require "elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver" +require "elastic_graph/schema_definition/indexing/nested_update_target_resolver" +require "elastic_graph/schema_definition/indexing/relationship_resolver" +require "elastic_graph/schema_definition/indexing/update_target_resolver" + +module ElasticGraph + module SchemaDefinition + module Indexing + # Resolves all `sourced_from` relationships across the schema into update targets, + # keyed by the source type name that publishes the events. + # + # @private + class SourcedUpdateTargetsResolver + def initialize(schema_def_state:) + @schema_def_state = schema_def_state + @sourced_field_errors = [] # : ::Array[::String] + @relationship_errors = [] # : ::Array[::String] + @sourced_update_targets_by_type_name = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[untyped, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] + end + + # Returns a map of object type name → list of sourced update targets for that type. + def resolve + @schema_def_state.object_types_by_name.except(*@schema_def_state.namespace_types_by_name.keys).values.each do |object_type| + resolve_for_type(object_type) + end + + raise_if_errors + @sourced_update_targets_by_type_name + end + + private + + def resolve_for_type(object_type) + fields_with_sources_by_relationship_name = + if object_type.own_index_def.nil? + # only indexed types can have `sourced_from` fields, and resolving `fields_with_sources` on an unindexed union type + # such as `_Entity` when we are using apollo can lead to exceptions when multiple entity types have the same field name + # that use different mapping types. + {} # : ::Hash[::String, ::Array[SchemaElements::Field]] + else + object_type + .fields_with_sources + .group_by { |f| (_ = f.source).relationship_name } + end + + defined_relationships = object_type.relationships_by_name.keys + + (defined_relationships | fields_with_sources_by_relationship_name.keys).each do |relationship_name| + sourced_fields = fields_with_sources_by_relationship_name.fetch(relationship_name) { [] } + relationship_resolver = RelationshipResolver.new( + schema_def_state: @schema_def_state, + object_type: object_type, + relationship_name: relationship_name, + sourced_fields: sourced_fields + ) + + resolved_relationship, relationship_error = relationship_resolver.resolve + @relationship_errors << relationship_error if relationship_error + + if object_type.own_index_def && resolved_relationship && sourced_fields.any? + resolve_top_level_update_target(object_type, resolved_relationship, sourced_fields) + end + end + + # Process nested sourced_from fields on non-indexed types. + if object_type.own_index_def.nil? + resolve_nested_update_targets(object_type) + end + end + + def resolve_top_level_update_target(object_type, resolved_relationship, sourced_fields) + update_target_resolver = UpdateTargetResolver.new( + object_type: object_type, + resolved_relationship: resolved_relationship, + sourced_fields: sourced_fields, + field_path_resolver: @schema_def_state.field_path_resolver + ) + + update_target, errors = update_target_resolver.resolve + @sourced_update_targets_by_type_name[resolved_relationship.related_type.name] << update_target if update_target + @sourced_field_errors.concat(errors) + + # Validate that has_had_multiple_sources! has been called when sourced_from is used + if (index_def = object_type.own_index_def) && !index_def.has_had_multiple_sources_flag + @sourced_field_errors << "Type `#{object_type.name}` uses `sourced_from` fields but its index `#{index_def.name}` " \ + "has not been configured with `has_had_multiple_sources!`. To resolve this, add `i.has_had_multiple_sources!` within the " \ + "`t.index \"#{index_def.name}\"` block. This flag is required because indices with multiple sources can contain " \ + "incomplete documents, and ElasticGraph needs to know this to apply proper filtering. Once set, this flag should remain even " \ + "if you later remove all `sourced_from` fields, as the index may still contain historical incomplete documents." + end + end + + def resolve_nested_update_targets(object_type) + nested_relationships = object_type.relationships_by_name + .select { |_, rel| rel.parent_ref } + + return if nested_relationships.empty? + + fields_with_sources_by_relationship_name = object_type + .indexing_fields_by_name_in_index.values + .reject { |f| f.source.nil? } + .group_by { |f| (_ = f.source).relationship_name } + + nested_relationships.each do |rel_name, relationship| + empty_fields = [] # : ::Array[SchemaElements::Field] + sourced_fields = fields_with_sources_by_relationship_name.fetch(rel_name) { empty_fields } + + next if sourced_fields.empty? + + chain_resolver = NestedRelationshipChainResolver.new(schema_def_state: @schema_def_state) + resolved_chain, chain_errors = chain_resolver.resolve(relationship, object_type) + + if chain_errors.any? + @sourced_field_errors.concat(chain_errors) + next + end + + resolved_chain = _ = resolved_chain # : ResolvedNestedChain + resolver = NestedUpdateTargetResolver.new( + object_type: object_type, + relationship: relationship, + sourced_fields: sourced_fields, + resolved_chain: resolved_chain, + field_path_resolver: @schema_def_state.field_path_resolver, + schema_def_state: @schema_def_state + ) + + update_target, resolve_errors = resolver.resolve + @sourced_field_errors.concat(resolve_errors) + + next unless update_target + + related_type_name = relationship.related_type.unwrap_non_null.name + @sourced_update_targets_by_type_name[related_type_name] << update_target + end + end + + def raise_if_errors + full_errors = [] # : ::Array[::String] + + if @sourced_field_errors.any? + full_errors << "Schema had #{@sourced_field_errors.size} error(s) related to `sourced_from` fields:\n\n#{@sourced_field_errors.map.with_index(1) { |e, i| "#{i}. #{e}" }.join("\n\n")}" + end + + if @relationship_errors.any? + full_errors << "Schema had #{@relationship_errors.size} error(s) related to relationship fields:\n\n#{@relationship_errors.map.with_index(1) { |e, i| "#{i}. #{e}" }.join("\n\n")}" + end + + unless full_errors.empty? + raise Errors::SchemaError, full_errors.join("\n\n") + end + end + end + end + end +end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb index 2c0dc5199..e17d66d16 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb @@ -19,9 +19,7 @@ def self.new_normal_indexing_update_target( top_level_fields_params:, routing_value_source:, rollover_timestamp_value_source:, - nested_sourced_fields_params: {}, - nested_sourced_path_identifiers_params: {}, - nested_sourced_paths: {} + nested_sourced_data_params: SchemaArtifacts::RuntimeMetadata::NestedSourcedDataParams::EMPTY ) SchemaArtifacts::RuntimeMetadata::UpdateTarget.new( type: type, @@ -31,9 +29,7 @@ def self.new_normal_indexing_update_target( routing_value_source: routing_value_source, rollover_timestamp_value_source: rollover_timestamp_value_source, top_level_fields_params: top_level_fields_params, - nested_sourced_fields_params: nested_sourced_fields_params, - nested_sourced_path_identifiers_params: nested_sourced_path_identifiers_params, - nested_sourced_paths: nested_sourced_paths, + nested_sourced_data_params: nested_sourced_data_params, metadata_params: standard_metadata_params.merge({ "relationship" => SchemaArtifacts::RuntimeMetadata::StaticParam.new(value: relationship) }) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb index a027ef89e..68e47212b 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/results.rb @@ -12,10 +12,7 @@ require "elastic_graph/schema_artifacts/artifacts_helper_methods" require "elastic_graph/schema_definition/indexing/event_envelope" require "elastic_graph/schema_definition/indexing/json_schema_with_metadata" -require "elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver" -require "elastic_graph/schema_definition/indexing/nested_update_target_resolver" -require "elastic_graph/schema_definition/indexing/relationship_resolver" -require "elastic_graph/schema_definition/indexing/update_target_resolver" +require "elastic_graph/schema_definition/indexing/sourced_update_targets_resolver" require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect" require "elastic_graph/schema_definition/schema_elements/field_path" require "elastic_graph/schema_definition/scripting/file_system_repository" @@ -145,11 +142,11 @@ def build_dynamic_scripts end def build_runtime_metadata - extra_update_targets_by_object_type_name = identify_extra_update_targets_by_object_type_name + sourced_update_targets_by_type_name = Indexing::SourcedUpdateTargetsResolver.new(schema_def_state: state).resolve object_types_by_name = all_types .select { |t| t.respond_to?(:graphql_fields_by_name) } - .to_h { |type| [type.name, (_ = type).runtime_metadata(extra_update_targets_by_object_type_name.fetch(type.name) { [] })] } + .to_h { |type| [type.name, (_ = type).runtime_metadata(sourced_update_targets_by_type_name.fetch(type.name) { [] })] } scalar_types_by_name = state.scalar_types_by_name.transform_values(&:runtime_metadata) @@ -182,138 +179,6 @@ def build_runtime_metadata ).tap { |rm| verify_runtime_metadata(rm) } end - # Builds a map, keyed by object type name, of extra `update_targets` that have been generated - # from any fields that use `sourced_from` on other types. - def identify_extra_update_targets_by_object_type_name - sourced_field_errors = [] # : ::Array[::String] - relationship_errors = [] # : ::Array[::String] - extra_update_targets_by_type_name = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[untyped, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] - - state.object_types_by_name.except(*state.namespace_types_by_name.keys).values.each do |object_type| - fields_with_sources_by_relationship_name = - if object_type.own_index_def.nil? - # only indexed types can have `sourced_from` fields, and resolving `fields_with_sources` on an unindexed union type - # such as `_Entity` when we are using apollo can lead to exceptions when multiple entity types have the same field name - # that use different mapping types. - {} # : ::Hash[::String, ::Array[SchemaElements::Field]] - else - object_type - .fields_with_sources - .group_by { |f| (_ = f.source).relationship_name } - end - - defined_relationships = object_type.relationships_by_name.keys - - (defined_relationships | fields_with_sources_by_relationship_name.keys).each do |relationship_name| - sourced_fields = fields_with_sources_by_relationship_name.fetch(relationship_name) { [] } - relationship_resolver = Indexing::RelationshipResolver.new( - schema_def_state: state, - object_type: object_type, - relationship_name: relationship_name, - sourced_fields: sourced_fields - ) - - resolved_relationship, relationship_error = relationship_resolver.resolve - relationship_errors << relationship_error if relationship_error - - if object_type.own_index_def && resolved_relationship && sourced_fields.any? - update_target_resolver = Indexing::UpdateTargetResolver.new( - object_type: object_type, - resolved_relationship: resolved_relationship, - sourced_fields: sourced_fields, - field_path_resolver: state.field_path_resolver - ) - - update_target, errors = update_target_resolver.resolve - extra_update_targets_by_type_name[resolved_relationship.related_type.name] << update_target if update_target - sourced_field_errors.concat(errors) - - # Validate that has_had_multiple_sources! has been called when sourced_from is used - if (index_def = object_type.own_index_def) && !index_def.has_had_multiple_sources_flag - sourced_field_errors << "Type `#{object_type.name}` uses `sourced_from` fields but its index `#{index_def.name}` " \ - "has not been configured with `has_had_multiple_sources!`. To resolve this, add `i.has_had_multiple_sources!` within the " \ - "`t.index \"#{index_def.name}\"` block. This flag is required because indices with multiple sources can contain " \ - "incomplete documents, and ElasticGraph needs to know this to apply proper filtering. Once set, this flag should remain even " \ - "if you later remove all `sourced_from` fields, as the index may still contain historical incomplete documents." - end - end - end - - # Process nested sourced_from fields on non-indexed types. - if object_type.own_index_def.nil? - identify_nested_sourced_update_targets(object_type, extra_update_targets_by_type_name, sourced_field_errors) - end - end - - full_errors = [] # : ::Array[::String] - - if sourced_field_errors.any? - full_errors << "Schema had #{sourced_field_errors.size} error(s) related to `sourced_from` fields:\n\n#{sourced_field_errors.map.with_index(1) { |e, i| "#{i}. #{e}" }.join("\n\n")}" - end - - if relationship_errors.any? - full_errors << "Schema had #{relationship_errors.size} error(s) related to relationship fields:\n\n#{relationship_errors.map.with_index(1) { |e, i| "#{i}. #{e}" }.join("\n\n")}" - end - - unless full_errors.empty? - raise Errors::SchemaError, full_errors.join("\n\n") - end - - extra_update_targets_by_type_name - end - - # Identifies update targets for sourced_from fields on non-indexed embedded types - # that use parent_relationship chains. - def identify_nested_sourced_update_targets(object_type, extra_update_targets_by_type_name, errors) - # Find relationships on this type that have parent_relationship configured - nested_relationships = object_type.relationships_by_name - .select { |_, rel| rel.parent_relationship_config } - - return if nested_relationships.empty? - - # Find sourced_from fields on this type, grouped by relationship name - fields_with_sources_by_relationship_name = object_type - .indexing_fields_by_name_in_index.values - .reject { |f| f.source.nil? } - .group_by { |f| (_ = f.source).relationship_name } - - nested_relationships.each do |rel_name, relationship| - empty_fields = [] # : ::Array[SchemaElements::Field] - sourced_fields = fields_with_sources_by_relationship_name.fetch(rel_name) { empty_fields } - - next if sourced_fields.empty? - - # Resolve the chain from this type up to the root indexed type - chain_resolver = Indexing::NestedRelationshipChainResolver.new(schema_def_state: state) - resolved_chain, chain_errors = chain_resolver.resolve(relationship, object_type) - - if chain_errors.any? - errors.concat(chain_errors) - next - end - - # Resolve the update target - resolved_chain = _ = resolved_chain # : Indexing::ResolvedNestedChain - resolver = Indexing::NestedUpdateTargetResolver.new( - object_type: object_type, - relationship: relationship, - sourced_fields: sourced_fields, - resolved_chain: resolved_chain, - field_path_resolver: state.field_path_resolver, - schema_def_state: state - ) - - update_target, resolve_errors = resolver.resolve - errors.concat(resolve_errors) - - next unless update_target - - # Store on the source type - related_type_name = relationship.related_type.unwrap_non_null.name - extra_update_targets_by_type_name[related_type_name] << update_target - end - end - # Generates the SDL defined by your schema. Intended to be called only once # at the very end (after evaluating the "main" template). `Evaluator` calls this # automatically at the end. diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/relationship.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/relationship.rb index 2bb5c9a06..3acc8c590 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/relationship.rb @@ -37,7 +37,11 @@ module SchemaElements # end # end class Relationship < DelegateClass(Field) - # @dynamic related_type, foreign_key, hide_relationship_runtime_metadata, hide_relationship_runtime_metadata=, parent_relationship_config, indexing_only + # @dynamic related_type, foreign_key, hide_relationship_runtime_metadata, hide_relationship_runtime_metadata=, parent_ref, indexing_only + + # References a parent relationship in a nested sourced_from chain. + # @private + ParentRef = ::Data.define(:type_ref, :relationship_name) # @return [ObjectType, InterfaceType, UnionType] the type this relationship relates to attr_reader :related_type @@ -49,9 +53,9 @@ class Relationship < DelegateClass(Field) # @private attr_accessor :hide_relationship_runtime_metadata - # @return [Hash, nil] configuration for parent relationship in a nested sourced_from chain + # @return [ParentRef, nil] reference to the parent relationship in a nested sourced_from chain # @private - attr_reader :parent_relationship_config + attr_reader :parent_ref # @return [Boolean] true if this relationship is for indexing only (not exposed in GraphQL) # @private @@ -68,7 +72,7 @@ def initialize(field, cardinality:, related_type:, foreign_key:, direction:, ind @indexing_only = indexing_only @equivalent_field_paths_by_local_path = {} @additional_filter = {} - @parent_relationship_config = nil + @parent_ref = nil end # Adds additional filter conditions to a relationship beyond the foreign key. @@ -164,45 +168,43 @@ def equivalent_field(path, locally_named: path) # ElasticGraph.define_schema do |schema| # schema.object_type "Team" do |t| # t.field "id", "ID!" - # t.field "seasons", "[Season!]" do |f| - # f.mapping type: "nested" - # end - # t.relates_to_many "gameScores", "GameScore", via: "teamId", dir: :in, indexing_only: true + # t.field "name", "String" + # t.field "players", "[Player!]" + # t.relates_to_many "statLines", "StatLine", via: "teamId", dir: :in, indexing_only: true # t.index "teams" do |i| # i.has_had_multiple_sources! # end # end # - # schema.object_type "Season" do |t| - # t.field "id", "ID" - # t.field "games", "[Game!]" do |f| - # f.mapping type: "nested" + # schema.object_type "Player" do |t| + # t.field "id", "ID!" + # t.field "name", "String" + # t.field "goalsScored", "Int" do |f| + # f.sourced_from "statLine", "goals" # end - # t.relates_to_many "seasonGameScores", "GameScore", via: "seasonId", dir: :in, indexing_only: true do |r| - # r.parent_relationship "Team", "gameScores" + # t.relates_to_one "statLine", "StatLine", via: "playerId", dir: :in, indexing_only: true do |r| + # r.parent_relationship "Team", "statLines" # end # end # - # schema.object_type "Game" do |t| - # t.field "id", "ID" - # t.field "score", "Score" do |f| - # f.sourced_from "gameScore", "score" - # end - # t.relates_to_one "gameScore", "GameScore", via: "gameId", dir: :in, indexing_only: true do |r| - # r.parent_relationship "Season", "seasonGameScores" - # end + # schema.object_type "StatLine" do |t| + # t.field "id", "ID!" + # t.field "teamId", "ID" + # t.field "playerId", "ID" + # t.field "goals", "Int" + # t.index "stat_lines" # end # end def parent_relationship(parent_type_name, parent_relationship_name) - if @parent_relationship_config + if @parent_ref raise Errors::SchemaError, "`parent_relationship` has been called multiple times on `#{parent_type.name}.#{name}`, " \ "but each relationship can have only one `parent_relationship`." end - @parent_relationship_config = { - parent_type_name: parent_type_name, - parent_relationship_name: parent_relationship_name - } + @parent_ref = ParentRef.new( + type_ref: schema_def_state.type_ref(parent_type_name), + relationship_name: parent_relationship_name + ) end # Gets the `routing_value_source` from this relationship for the given `index`, based on the configured diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless index 4705d6681..9bc8dabe6 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless @@ -2,33 +2,61 @@ // Helper Functions // ============================================================ -// Builds a composite nested element key from path segments. +// Encodes a list of strings into a length-prefixed string. +// Each part is encoded as "length:value" and parts are concatenated directly. +// This encoding is unambiguous regardless of what characters the values contain. +String encodeKey(List parts) { + StringBuilder sb = new StringBuilder(); + for (String part : parts) { + sb.append(part.length()); + sb.append(':'); + sb.append(part); + } + return sb.toString(); +} + +// Decodes a length-prefixed string back into a list of strings. +List decodeKey(String key) { + List parts = new ArrayList(); + int i = 0; + while (i < key.length()) { + int colonPos = key.indexOf(":", i); + int length = Integer.parseInt(key.substring(i, colonPos)); + int valueStart = colonPos + 1; + parts.add(key.substring(valueStart, valueStart + length)); + i = valueStart + length; + } + return parts; +} + +// Builds a nested element key (as an encoded string) from path segments. // List segments contribute their matched identifier value; object segments contribute their field name. -String buildNestedElementKey(List pathSegments, Map pathIdentifiers) { +// Returns "" if no path segments are configured (i.e., this is not a nested sourced event). +String buildNestedElementKey(String relationship, Map nestedSourcedPaths, Map pathIdentifiers) { + List pathSegments = (List) nestedSourcedPaths.get(relationship); + if (pathSegments == null) { + return ""; + } List parts = new ArrayList(); for (Map segment : pathSegments) { - if (segment.containsKey("list")) { - parts.add(pathIdentifiers[segment.source_field]); + if ("list".equals(segment.get("type"))) { + parts.add(pathIdentifiers[segment.sourceField]); } else { - parts.add(segment.get("object")); + parts.add(segment.get("field")); } } - return String.join(":", parts); + return encodeKey(parts); } -// Looks up path segments for the given relationship and builds the nested element key. -// Returns "" if no path segments are configured (i.e., this is not a nested sourced event). -String buildNestedElementKeyForRelationship(String relationship, Map nestedSourcedPaths, Map pathIdentifiers) { - List pathSegments = (List) nestedSourcedPaths.get(relationship); - if (pathSegments == null) { - return ""; - } - return buildNestedElementKey(pathSegments, pathIdentifiers); -} - -// Splits a composite nested element key into a list of parts. -List splitNestedElementKey(String nestedElementKey) { - return Arrays.asList(nestedElementKey.splitOnToken(":")); +// Builds the versions key by combining the relationship name with the element key parts. +// For top-level events (empty element key), returns just the relationship name. +String buildVersionsKey(String relationship, String nestedElementKey) { + if (nestedElementKey.isEmpty()) { + return relationship; + } + List parts = decodeKey(nestedElementKey); + parts.add(0, relationship); + return encodeKey(parts); } // Finds an element in a list where element[matchField] equals matchValue. Returns null if not found. @@ -48,15 +76,14 @@ def navigateToNestedElement(Map source, List pathSegments, List keyParts) { for (int i = 0; i < pathSegments.size(); i++) { Map segment = (Map) pathSegments.get(i); - boolean isList = segment.containsKey("list"); - String field = isList ? (String) segment.get("list") : (String) segment.get("object"); + String field = (String) segment.get("field"); if (!current.containsKey(field)) { return null; } - if (isList) { - current = (Map) findInList((List) current.get(field), (String) segment.get("match_field"), (String) keyParts.get(i)); + if ("list".equals(segment.get("type"))) { + current = (Map) findInList((List) current.get(field), (String) segment.get("matchField"), (String) keyParts.get(i)); } else { current = (Map) current.get(field); } @@ -75,7 +102,7 @@ def navigateToNestedElement(Map source, List pathSegments, List keyParts) { // ============================================================ // Initializes internal bookkeeping structures (__sources, __versions, __counts, __nested_sourced_data). -void setup(Map source, String relationship, String nestedElementKey, Map nestedSourcedPaths, Map counts) { +void setup(Map source, String versionsKey, String relationship, String nestedElementKey, Map counts) { if (source.__sources == null) { source.__sources = []; } @@ -84,12 +111,8 @@ void setup(Map source, String relationship, String nestedElementKey, Map nestedS source.__versions = [:]; } - if (source.__versions[relationship] == null) { - source.__versions[relationship] = [:]; - } - - if (!nestedElementKey.isEmpty() && source.__versions[relationship][nestedElementKey] == null) { - source.__versions[relationship][nestedElementKey] = [:]; + if (source.__versions[versionsKey] == null) { + source.__versions[versionsKey] = [:]; } if (!nestedElementKey.isEmpty()) { @@ -97,7 +120,7 @@ void setup(Map source, String relationship, String nestedElementKey, Map nestedS source.__nested_sourced_data = [:]; } if (source.__nested_sourced_data[relationship] == null) { - source.__nested_sourced_data[relationship] = ["path_segments": nestedSourcedPaths.get(relationship), "data": [:]]; + source.__nested_sourced_data[relationship] = [:]; } } @@ -107,31 +130,19 @@ void setup(Map source, String relationship, String nestedElementKey, Map nestedS } // Validates that this event is allowed: no relationship mutation and no stale version. -void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion, String nestedElementKey) { - // For nested events, validate per-element. For top-level events, validate per-relationship. - Map versionsMap = nestedElementKey.isEmpty() - ? source.__versions[relationship] - : source.__versions[relationship][nestedElementKey]; +void validateSource(Map source, String id, String relationship, String sourceId, long eventVersion, String versionsKey) { + Map versionsMap = source.__versions[versionsKey]; // Check that no other source ID has previously written to this target. List previousSourceIds = versionsMap.keySet().stream().filter(key -> key != sourceId).collect(Collectors.toList()); if (previousSourceIds.size() > 0) { - if (nestedElementKey.isEmpty()) { - throw new IllegalArgumentException( - "Cannot update document " + id + " " + - "with data from related " + relationship + " " + sourceId + " " + - "because the related " + relationship + " has apparently changed (was: " + previousSourceIds + "), " + - "but mutations of relationships used with `sourced_from` are not supported because " + - "allowing it could break ElasticGraph's out-of-order processing guarantees." - ); - } else { - throw new IllegalArgumentException( - "Cannot update nested element [" + nestedElementKey + "] on document " + id + " " + - "with data from " + relationship + " " + sourceId + " " + - "because this element was previously sourced from a different event (" + previousSourceIds + "). " + - "Each nested element can only be sourced from one source document." - ); - } + throw new IllegalArgumentException( + "Cannot update document " + id + " " + + "with data from related " + relationship + " " + sourceId + " " + + "because the related " + relationship + " has apparently changed (was: " + previousSourceIds + "), " + + "but mutations of relationships used with `sourced_from` are not supported because " + + "allowing it could break ElasticGraph's out-of-order processing guarantees." + ); } // Check that the event version is newer than what we've already seen. @@ -139,9 +150,8 @@ void validateSource(Map source, String id, String relationship, String sourceId, long docVersion = maybeDocVersion == null ? Long.MIN_VALUE : maybeDocVersion.longValue(); if (docVersion >= eventVersion) { - String target = nestedElementKey.isEmpty() ? id : id + "/" + nestedElementKey; throw new IllegalArgumentException("ElasticGraph update was a no-op: [" + - target + "]: version conflict, current version [" + + id + "]: version conflict, current version [" + docVersion + "] is higher or equal to the one provided [" + eventVersion + "]"); } @@ -163,29 +173,29 @@ void storeNestedSourcedData(Map source, String relationship, Map nestedSourcedFi return; } - ((Map) source.__nested_sourced_data[relationship]).get("data").put(nestedElementKey, nestedSourcedFields); + ((Map) source.__nested_sourced_data[relationship]).put(nestedElementKey, nestedSourcedFields); } // Applies nested sourced data from the __nested_sourced_data buffer to matched nested elements. -// Reads path config from the document itself — no external params needed. +// Reads path config from the nestedSourcedPaths param. // Called after every event so that after a self-event's putAll overwrites nested arrays, // the buffered data gets re-applied. -void applyNestedSourcedData(Map source) { +void applyNestedSourcedData(Map source, Map nestedSourcedPaths) { if (source.__nested_sourced_data == null) { return; } for (sourcedEntry in source.__nested_sourced_data.entrySet()) { - Map relationshipData = (Map) sourcedEntry.getValue(); - List pathSegments = (List) relationshipData.get("path_segments"); - Map dataByKey = (Map) relationshipData.get("data"); + String relationship = (String) sourcedEntry.getKey(); + Map dataByKey = (Map) sourcedEntry.getValue(); + List pathSegments = (List) nestedSourcedPaths.get(relationship); if (pathSegments == null || dataByKey == null) { continue; } for (elementEntry in dataByKey.entrySet()) { - List keyParts = splitNestedElementKey((String) elementEntry.getKey()); + List keyParts = decodeKey((String) elementEntry.getKey()); if (keyParts.size() != pathSegments.size()) { continue; } @@ -199,12 +209,8 @@ void applyNestedSourcedData(Map source) { } // Records the event version in __versions and adds the relationship to __sources. -void recordSource(Map source, String relationship, String sourceId, long eventVersion, String nestedElementKey) { - if (nestedElementKey.isEmpty()) { - source.__versions[relationship][sourceId] = eventVersion; - } else { - source.__versions[relationship][nestedElementKey][sourceId] = eventVersion; - } +void recordSource(Map source, String versionsKey, String relationship, String sourceId, long eventVersion) { + source.__versions[versionsKey][sourceId] = eventVersion; // Record the relationship in `__sources` if it's not already there. We maintain it as an append-only set using a sorted list. // This ensures deterministic ordering of its elements regardless of event ingestion order, and lets us check membership in O(log N) time. @@ -235,11 +241,12 @@ Map nestedSourcedFields = params.nestedSourcedFields; Map nestedSourcedPathIdentifiers = params.nestedSourcedPathIdentifiers; Map nestedSourcedPaths = params.nestedSourcedPaths; -String nestedElementKey = buildNestedElementKeyForRelationship(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); +String nestedElementKey = buildNestedElementKey(relationship, nestedSourcedPaths, nestedSourcedPathIdentifiers); +String versionsKey = buildVersionsKey(relationship, nestedElementKey); -setup(source, relationship, nestedElementKey, nestedSourcedPaths, counts); -validateSource(source, id, relationship, sourceId, eventVersion, nestedElementKey); +setup(source, versionsKey, relationship, nestedElementKey, counts); +validateSource(source, id, relationship, sourceId, eventVersion, versionsKey); applyTopLevelFields(source, id, params.topLevelFields, counts); storeNestedSourcedData(source, relationship, nestedSourcedFields, nestedElementKey); -applyNestedSourcedData(source); -recordSource(source, relationship, sourceId, eventVersion, nestedElementKey); +applyNestedSourcedData(source, nestedSourcedPaths); +recordSource(source, versionsKey, relationship, sourceId, eventVersion); diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/index.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/index.rbs index 0dc296381..e898655a7 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/index.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/index.rbs @@ -8,8 +8,10 @@ module ElasticGraph attr_reader rollover_config: RolloverConfig? attr_reader has_had_multiple_sources_flag: bool attr_reader indexed_type: indexableType + attr_reader nested_sourced_paths: ::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::nestedSourcedPathSegment]] def uses_custom_routing?: () -> bool + def register_nested_sourced_paths: (::String, ::Array[SchemaArtifacts::RuntimeMetadata::nestedSourcedPathSegment]) -> void def to_index_config: () -> ::Hash[::String, untyped] def to_index_template_config: () -> ::Hash[::String, untyped] def runtime_metadata: () -> SchemaArtifacts::RuntimeMetadata::IndexDefinition diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rbs index 01a70b7b6..44c487038 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_relationship_chain_resolver.rbs @@ -9,6 +9,9 @@ module ElasticGraph private + def resolve_chain: (SchemaElements::Relationship, untyped, ::Array[PathSegment], ::Array[::String], ::Set[::String]) -> [SchemaElements::Relationship, untyped] + def validate_link: (SchemaElements::Relationship, untyped, SchemaElements::Relationship::ParentRef, ::Array[::String], ::Set[::String]) -> [untyped, untyped] + def build_path_segment: (SchemaElements::Relationship, untyped, untyped, ::Array[PathSegment], ::Array[::String]) -> void def find_embedding_field: (untyped, untyped, ::Array[::String]) -> SchemaElements::Field? def rel_description: (untyped, SchemaElements::Relationship) -> ::String end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs index c3223a3d3..6a05eeb3f 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs @@ -31,9 +31,9 @@ module ElasticGraph attr_reader schema_def_state: State def related_type: () -> untyped - def resolve_nested_sourced_fields_params: (::Array[::String]) -> SchemaArtifacts::RuntimeMetadata::paramsHash + def resolve_nested_sourced_data_params: (::Array[::String]) -> SchemaArtifacts::RuntimeMetadata::paramsHash def build_path_identifier_params: () -> SchemaArtifacts::RuntimeMetadata::paramsHash - def build_nested_sourced_paths: () -> ::Array[::Hash[::String, untyped]] + def build_nested_sourced_paths: () -> ::Array[SchemaArtifacts::RuntimeMetadata::nestedSourcedPathSegment] def resolve_routing: (::Array[::String]) -> untyped def resolve_rollover: (::Array[::String]) -> untyped def validate_has_had_multiple_sources: (::Array[::String]) -> void diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rbs new file mode 100644 index 000000000..9eb131fc2 --- /dev/null +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rbs @@ -0,0 +1,22 @@ +module ElasticGraph + module SchemaDefinition + module Indexing + class SourcedUpdateTargetsResolver + @schema_def_state: State + @sourced_field_errors: ::Array[::String] + @relationship_errors: ::Array[::String] + @sourced_update_targets_by_type_name: ::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] + + def initialize: (schema_def_state: State) -> void + def resolve: () -> ::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] + + private + + def resolve_for_type: (untyped) -> void + def resolve_top_level_update_target: (untyped, untyped, ::Array[SchemaElements::Field]) -> void + def resolve_nested_update_targets: (untyped) -> void + def raise_if_errors: () -> void + end + end + end +end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/update_target_factory.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/update_target_factory.rbs index b0687c40d..a950336a0 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/update_target_factory.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/update_target_factory.rbs @@ -9,9 +9,7 @@ module ElasticGraph top_level_fields_params: SchemaArtifacts::RuntimeMetadata::paramsHash, routing_value_source: ::String?, rollover_timestamp_value_source: ::String?, - ?nested_sourced_fields_params: SchemaArtifacts::RuntimeMetadata::paramsHash, - ?nested_sourced_path_identifiers_params: SchemaArtifacts::RuntimeMetadata::paramsHash, - ?nested_sourced_paths: ::Hash[::String, ::Array[::Hash[::String, untyped]]] + ?nested_sourced_data_params: SchemaArtifacts::RuntimeMetadata::NestedSourcedDataParams ) -> SchemaArtifacts::RuntimeMetadata::UpdateTarget private diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs index b7ace8566..5de76d5e4 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/results.rbs @@ -39,8 +39,6 @@ module ElasticGraph def generate_datastore_config: () -> ::Hash[::String, untyped] def build_dynamic_scripts: () -> ::Array[Scripting::Script] def build_runtime_metadata: () -> SchemaArtifacts::RuntimeMetadata::Schema - def identify_extra_update_targets_by_object_type_name: () -> ::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]] - def identify_nested_sourced_update_targets: (untyped, ::Hash[untyped, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]], ::Array[::String]) -> void def generate_sdl: () -> ::String def build_public_json_schema: () -> ::Hash[::String, untyped] def json_schema_indexing_field_types_by_name: () -> ::Hash[::String, Indexing::_FieldType] diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/relationship.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/relationship.rbs index c2262acd3..6fa82a6fd 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/relationship.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/relationship.rbs @@ -10,7 +10,7 @@ module ElasticGraph attr_reader related_type: TypeReference attr_reader foreign_key: ::String attr_accessor hide_relationship_runtime_metadata: bool - attr_reader parent_relationship_config: ::Hash[::Symbol, ::String]? + attr_reader parent_ref: Relationship::ParentRef? attr_reader indexing_only: bool @cardinality: cardinality @@ -20,7 +20,17 @@ module ElasticGraph @indexing_only: bool @equivalent_field_paths_by_local_path: ::Hash[::String, ::String] @additional_filter: ::Hash[::String, untyped] - @parent_relationship_config: ::Hash[::Symbol, ::String]? + @parent_ref: Relationship::ParentRef? + + class ParentRefSuperType + attr_reader type_ref: TypeReference + attr_reader relationship_name: ::String + + def initialize: (type_ref: TypeReference, relationship_name: ::String) -> void + end + + class ParentRef < ParentRefSuperType + end def initialize: ( Field, diff --git a/elasticgraph-support/lib/elastic_graph/constants.rb b/elasticgraph-support/lib/elastic_graph/constants.rb index 58ae2a6fc..b749cbf69 100644 --- a/elasticgraph-support/lib/elastic_graph/constants.rb +++ b/elasticgraph-support/lib/elastic_graph/constants.rb @@ -140,7 +140,7 @@ module ElasticGraph # # Note: this constant is automatically kept up-to-date by our `schema_artifacts:dump` rake task. # @private - INDEX_DATA_UPDATE_SCRIPT_ID = "update_index_data_59be3b4ab537ea412a3ead0408fb4137" + INDEX_DATA_UPDATE_SCRIPT_ID = "update_index_data_079bafcf4d739acd8659a631377fa9c8" # When an update script has a no-op result we often want to communicate more information about # why it was a no-op back to ElatsicGraph from the script. The only way to do that is to throw diff --git a/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb b/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb index f05a1ecd1..4d156166e 100644 --- a/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb +++ b/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb @@ -72,9 +72,7 @@ def derived_indexing_update_target_with( routing_value_source: routing_value_source, rollover_timestamp_value_source: rollover_timestamp_value_source, top_level_fields_params: top_level_fields_params, - nested_sourced_fields_params: {}, - nested_sourced_path_identifiers_params: {}, - nested_sourced_paths: {}, + nested_sourced_data_params: NestedSourcedDataParams::EMPTY, metadata_params: metadata_params ) end @@ -96,9 +94,7 @@ def normal_indexing_update_target_with( routing_value_source: routing_value_source, rollover_timestamp_value_source: rollover_timestamp_value_source, top_level_fields_params: top_level_fields_params, - nested_sourced_fields_params: {}, - nested_sourced_path_identifiers_params: {}, - nested_sourced_paths: {}, + nested_sourced_data_params: NestedSourcedDataParams::EMPTY, metadata_params: metadata_params ) end From a1a49f66a23d6ca0d601d4d2ba4ff89267805e3b Mon Sep 17 00:00:00 2001 From: ellisandrews-toast Date: Mon, 1 Jun 2026 14:08:10 -0400 Subject: [PATCH 4/4] Address PR feedback: separate concerns in nested sourced_from --- .../lib/elastic_graph/datastore_core/index_definition.rb | 2 +- .../sig/elastic_graph/datastore_core/index_definition.rbs | 2 +- .../lib/elastic_graph/indexer/operation/update.rb | 2 +- .../runtime_metadata/nested_sourced_path_segment.rb | 6 ++++++ .../runtime_metadata/nested_sourced_path_segment.rbs | 2 ++ .../lib/elastic_graph/schema_definition/indexing/index.rb | 6 +++++- .../indexing/nested_update_target_resolver.rb | 3 +-- .../indexing/sourced_update_targets_resolver.rb | 4 +++- .../indexing/nested_update_target_resolver.rbs | 2 +- 9 files changed, 21 insertions(+), 8 deletions(-) diff --git a/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition.rb b/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition.rb index aa239284b..c1024792f 100644 --- a/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition.rb +++ b/elasticgraph-datastore_core/lib/elastic_graph/datastore_core/index_definition.rb @@ -34,7 +34,7 @@ def self.with(name:, runtime_metadata:, config:, datastore_clients_by_name:, sch defined_clusters: config.clusters.keys.to_set, datastore_clients_by_name: datastore_clients_by_name, has_had_multiple_sources: runtime_metadata.has_had_multiple_sources, - nested_sourced_paths: runtime_metadata.nested_sourced_paths + nested_sourced_paths: runtime_metadata.nested_sourced_paths.transform_values { |segments| segments.map(&:to_painless_param) } } if (rollover = runtime_metadata.rollover) diff --git a/elasticgraph-datastore_core/sig/elastic_graph/datastore_core/index_definition.rbs b/elasticgraph-datastore_core/sig/elastic_graph/datastore_core/index_definition.rbs index 837612e48..8d087e939 100644 --- a/elasticgraph-datastore_core/sig/elastic_graph/datastore_core/index_definition.rbs +++ b/elasticgraph-datastore_core/sig/elastic_graph/datastore_core/index_definition.rbs @@ -26,7 +26,7 @@ module ElasticGraph def current_sources: () -> ::Set[::String] def fields_by_path: () -> ::Hash[::String, SchemaArtifacts::RuntimeMetadata::IndexField] def has_had_multiple_sources: () -> bool - def nested_sourced_paths: () -> ::Hash[::String, ::Array[SchemaArtifacts::RuntimeMetadata::nestedSourcedPathSegment]] + def nested_sourced_paths: () -> ::Hash[::String, ::Array[::Hash[::String, ::String]]] def env_index_config: () -> Configuration::IndexDefinition def env_agnostic_settings: () -> ::Hash[::String, untyped] def defined_clusters: () -> ::Set[::String] diff --git a/elasticgraph-indexer/lib/elastic_graph/indexer/operation/update.rb b/elasticgraph-indexer/lib/elastic_graph/indexer/operation/update.rb index 3161a184d..5766e597f 100644 --- a/elasticgraph-indexer/lib/elastic_graph/indexer/operation/update.rb +++ b/elasticgraph-indexer/lib/elastic_graph/indexer/operation/update.rb @@ -151,7 +151,7 @@ def script_params # so it's important we avoid it. return initial_params unless update_target.for_normal_indexing? - initial_params["nestedSourcedPaths"] = destination_index_def.nested_sourced_paths.transform_values { |segments| segments.map(&:to_dumpable_hash) } + initial_params["nestedSourcedPaths"] = destination_index_def.nested_sourced_paths CountAccumulator.merge_list_counts_into( initial_params, diff --git a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rb b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rb index ff99da8a4..09d7e76c0 100644 --- a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rb +++ b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rb @@ -14,6 +14,7 @@ module RuntimeMetadata # # @private class ListPathSegment < ::Data.define(:field, :match_field, :source_field) + # @dynamic to_painless_param TYPE = "list" FIELD = "field" MATCH_FIELD = "matchField" @@ -23,6 +24,8 @@ def to_dumpable_hash {"type" => TYPE, FIELD => field, MATCH_FIELD => match_field, SOURCE_FIELD => source_field} end + alias_method :to_painless_param, :to_dumpable_hash + def self.from_hash(hash) new(field: hash[FIELD], match_field: hash[MATCH_FIELD], source_field: hash[SOURCE_FIELD]) end @@ -32,6 +35,7 @@ def self.from_hash(hash) # # @private class ObjectPathSegment < ::Data.define(:field) + # @dynamic to_painless_param TYPE = "object" FIELD = "field" @@ -39,6 +43,8 @@ def to_dumpable_hash {"type" => TYPE, FIELD => field} end + alias_method :to_painless_param, :to_dumpable_hash + def self.from_hash(hash) new(field: hash[FIELD]) end diff --git a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rbs b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rbs index f912a96a8..dffd3d525 100644 --- a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rbs +++ b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/nested_sourced_path_segment.rbs @@ -17,6 +17,7 @@ module ElasticGraph def self.from_hash: (::Hash[::String, untyped]) -> ListPathSegment def to_dumpable_hash: () -> ::Hash[::String, ::String] + alias to_painless_param to_dumpable_hash end class ObjectPathSegmentSuperType @@ -31,6 +32,7 @@ module ElasticGraph def self.from_hash: (::Hash[::String, untyped]) -> ObjectPathSegment def to_dumpable_hash: () -> ::Hash[::String, ::String] + alias to_painless_param to_dumpable_hash end type nestedSourcedPathSegment = ListPathSegment | ObjectPathSegment diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb index 89cd9283d..8bef98b55 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb @@ -38,6 +38,10 @@ module Indexing # @return [RolloverConfig, nil] rollover configuration for the index # @!attribute [r] has_had_multiple_sources_flag # @return [Boolean] whether this index has ever had multiple sources + # @!attribute [r] nested_sourced_paths + # @return [Hash>] + # map from relationship name to the path segments that the painless script uses to + # navigate to nested elements whose fields are sourced from another type via `sourced_from`. class Index < Struct.new(:name, :default_sort_pairs, :settings, :schema_def_state, :indexed_type, :routing_field_path, :rollover_config, :has_had_multiple_sources_flag, :nested_sourced_paths) include Mixins::HasReadableToSAndInspect.new { |i| i.name } @@ -252,7 +256,7 @@ def to_index_template_config end # Registers the nested sourced path segments for a relationship on this index. - # Called by `NestedUpdateTargetResolver` during schema resolution. + # Called by `SourcedUpdateTargetsResolver` during schema resolution. # @api private def register_nested_sourced_paths(relationship_name, path_segments) nested_sourced_paths[relationship_name] = path_segments diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb index 46c6acde0..5a004e007 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rb @@ -57,8 +57,7 @@ def resolve if errors.any? [nil, errors] else - # Register the path config on the destination index so it's available at runtime. - resolved_chain.root_indexed_type.index_def.register_nested_sourced_paths(relationship.name, nested_sourced_paths) + yield resolved_chain.root_indexed_type, relationship, nested_sourced_paths nested_sourced_data_params = SchemaArtifacts::RuntimeMetadata::NestedSourcedDataParams.new( field_params: field_params, diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rb index 49d5c9981..222c259ec 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/sourced_update_targets_resolver.rb @@ -134,7 +134,9 @@ def resolve_nested_update_targets(object_type) schema_def_state: @schema_def_state ) - update_target, resolve_errors = resolver.resolve + update_target, resolve_errors = resolver.resolve do |indexed_type, rel, nested_sourced_paths| + indexed_type.index_def.register_nested_sourced_paths(rel.name, nested_sourced_paths) + end @sourced_field_errors.concat(resolve_errors) next unless update_target diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs index 6a05eeb3f..e5343ad4e 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/nested_update_target_resolver.rbs @@ -19,7 +19,7 @@ module ElasticGraph schema_def_state: State ) -> void - def resolve: () -> [SchemaArtifacts::RuntimeMetadata::UpdateTarget?, ::Array[::String]] + def resolve: () { (untyped, SchemaElements::Relationship, ::Array[SchemaArtifacts::RuntimeMetadata::nestedSourcedPathSegment]) -> void } -> [SchemaArtifacts::RuntimeMetadata::UpdateTarget?, ::Array[::String]] private