diff --git a/config/site/support/doctest_helper.rb b/config/site/support/doctest_helper.rb index f322eb599..3be66a000 100644 --- a/config/site/support/doctest_helper.rb +++ b/config/site/support/doctest_helper.rb @@ -7,6 +7,7 @@ # frozen_string_literal: true require "elastic_graph/apollo/schema_definition/api_extension" +require "elastic_graph/json_ingestion/schema_definition/api_extension" require "elastic_graph/schema_artifacts/runtime_metadata/schema_element_names" require "elastic_graph/schema_definition/api" require "elastic_graph/schema_definition/schema_artifact_manager" @@ -51,6 +52,7 @@ module ElasticGraph descriptions_needing_schema_def_api_and_extension_modules = { "ElasticGraph.define_schema" => [], "ElasticGraph::Apollo::SchemaDefinition" => [ElasticGraph::Apollo::SchemaDefinition::APIExtension], + "ElasticGraph::JSONIngestion::SchemaDefinition" => [ElasticGraph::JSONIngestion::SchemaDefinition::APIExtension], "ElasticGraph::SchemaDefinition" => [], "ElasticGraph::Warehouse::SchemaDefinition" => [ElasticGraph::Warehouse::SchemaDefinition::APIExtension] } @@ -90,11 +92,16 @@ module ElasticGraph end end - doctest.before "ElasticGraph::SchemaDefinition::API#json_schema_version" do - ElasticGraph.define_schema do |schema| - # `schema.json_schema_version` raises an error when the version is set more than once. - # By default we set it above. Here we clear it to allow our example to set it. - schema.state.json_schema_version = nil + [ + "ElasticGraph::JSONIngestion::SchemaDefinition::APIExtension#json_schema_version", + "ElasticGraph::SchemaDefinition::API#json_schema_version" + ].each do |description| + doctest.before description do + ElasticGraph.define_schema do |schema| + # `schema.json_schema_version` raises an error when the version is set more than once. + # By default we set it above. Here we clear it to allow our example to set it. + schema.state.json_schema_version = nil + end end end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/api_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/api_extension.rb new file mode 100644 index 000000000..4070d8eab --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/api_extension.rb @@ -0,0 +1,155 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/factory_extension" +require "elastic_graph/json_ingestion/schema_definition/state_extension" + +module ElasticGraph + module JSONIngestion + # Namespace for all JSON Schema schema definition support. + # + # {SchemaDefinition::APIExtension} is the primary entry point and should be used as a schema definition extension module. + module SchemaDefinition + # Module designed to be extended onto an {ElasticGraph::SchemaDefinition::API} instance + # to add JSON Schema ingestion serializer capabilities. + module APIExtension + # Wires up the JSON ingestion extensions when this module is extended onto an API instance. + # + # @param api [ElasticGraph::SchemaDefinition::API] the API instance to extend + # @return [void] + # @api private + def self.extended(api) + api.state.extend(StateExtension) + api.factory.extend(FactoryExtension) + + api.on_built_in_types do |type| + if type.name == api.state.type_ref("GeoLocation").to_final_form.name + # @type var geo_location_type: ElasticGraph::SchemaDefinition::SchemaElements::TypeWithSubfields & SchemaElements::TypeWithSubfieldsExtension + geo_location_type = _ = type + names = api.state.schema_elements + + # We use `nullable: false` because `GeoLocation` is indexed as a single `geo_point` field, + # and therefore can't support a `latitude` without a `longitude` or vice-versa. + latitude = geo_location_type.graphql_fields_by_name.fetch(names.latitude) # : ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension + longitude = geo_location_type.graphql_fields_by_name.fetch(names.longitude) # : ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension + latitude.json_schema minimum: -90, maximum: 90, nullable: false + longitude.json_schema minimum: -180, maximum: 180, nullable: false + end + end + end + + # Defines the version number of the current JSON schema. Importantly, every time a change is made that impacts the JSON schema + # artifact, the version number must be incremented to ensure that each different version of the JSON schema is identified by a unique + # version number. The publisher will then include this version number in published events to identify the version of the schema it + # was using. This avoids the need to deploy the publisher and ElasticGraph indexer at the same time to keep them in sync. + # + # @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying constantly + # have to increment this while rapidly changing the schema during prototyping. You can disable the requirement to increment this + # on every JSON schema change with {#enforce_json_schema_version}. + # + # @param version [Integer] current version number of the JSON schema artifact + # @return [void] + # @see #enforce_json_schema_version + # + # @example Set the JSON schema version to 1 + # ElasticGraph.define_schema do |schema| + # schema.json_schema_version 1 + # end + def json_schema_version(version) + state = json_ingestion_state + + if !version.is_a?(Integer) || version < 1 + raise Errors::SchemaError, "`json_schema_version` must be a positive integer. Specified version: #{version}" + end + + if state.json_schema_version + raise Errors::SchemaError, "`json_schema_version` can only be set once on a schema. Previously-set version: #{state.json_schema_version}" + end + + state.json_schema_version = version + state.json_schema_version_setter_location = caller_locations(1, 1).to_a.first + nil + end + + # Configures whether JSON schema artifact dumping enforces the requirement that the JSON schema version is incremented every time + # dumping the JSON schemas results in a changed artifact. Defaults to `true`. + # + # @note Generally speaking, you will want this to be `true` for any ElasticGraph application that is in + # production as the versioning of JSON schemas is what supports safe schema evolution as it allows + # ElasticGraph to identify which version of the JSON schema the publishing system was operating on + # when it published an event. + # + # It can be useful to set it to `false` before your application is in production, as you do not want + # to be forced to bump the version after every single schema change while you are building an initial + # prototype. + # + # @param value [Boolean] whether to require `json_schema_version` to be incremented on changes that impact `json_schemas.yaml` + # @return [void] + # @see #json_schema_version + # + # @example Disable enforcement during initial prototyping + # ElasticGraph.define_schema do |schema| + # # TODO: remove this once we're past the prototyping stage + # schema.enforce_json_schema_version false + # end + def enforce_json_schema_version(value) + unless value == true || value == false + raise Errors::SchemaError, "`enforce_json_schema_version` must be a boolean. Specified value: #{value.inspect}" + end + + json_ingestion_state.enforce_json_schema_version = value + nil + end + + # Defines strictness of the JSON schema validation. By default, the JSON schema will require all fields to be provided by the + # publisher (but they can be nullable) and will ignore extra fields that are not defined in the schema. Use this method to + # configure this behavior. + # + # @param allow_omitted_fields [bool] Whether nullable fields can be omitted from indexing events. + # @param allow_extra_fields [bool] Whether extra fields (e.g. beyond fields defined in the schema) can be included in indexing events. + # @return [void] + # + # @note If you allow both omitted fields and extra fields, ElasticGraph's JSON schema validation will allow (and ignore) misspelled + # field names in indexing events. For example, if the ElasticGraph schema has a nullable field named `parentId` but the publisher + # accidentally provides it as `parent_id`, ElasticGraph would happily ignore the `parent_id` field entirely, because `parentId` + # is allowed to be omitted and `parent_id` would be treated as an extra field. Therefore, we recommend that you only set one of + # these to `true` (or none). + # + # @example Allow omitted fields and disallow extra fields + # ElasticGraph.define_schema do |schema| + # schema.json_schema_strictness allow_omitted_fields: true, allow_extra_fields: false + # end + def json_schema_strictness(allow_omitted_fields: false, allow_extra_fields: true) + state = json_ingestion_state + + unless [true, false].include?(allow_omitted_fields) + raise Errors::SchemaError, "`allow_omitted_fields` must be true or false" + end + + unless [true, false].include?(allow_extra_fields) + raise Errors::SchemaError, "`allow_extra_fields` must be true or false" + end + + state.allow_omitted_json_schema_fields = allow_omitted_fields + state.allow_extra_json_schema_fields = allow_extra_fields + nil + end + + private + + # Returns the API's `state` narrowed to include this gem's `StateExtension`. Centralizes + # the Steep cast that's needed because Steep can't see the `extend(StateExtension)` applied + # at runtime in `extended`. + def json_ingestion_state + state # : ElasticGraph::SchemaDefinition::State & StateExtension + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/factory_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/factory_extension.rb new file mode 100644 index 000000000..93f2ae09e --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/factory_extension.rb @@ -0,0 +1,136 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum" +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/object" +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar" +require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/union" +require "elastic_graph/json_ingestion/schema_definition/indexing/index_extension" +require "elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones" +require "elastic_graph/json_ingestion/schema_definition/results_extension" +require "elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension" +require "elastic_graph/json_ingestion/schema_definition/schema_elements/enum_type_extension" +require "elastic_graph/json_ingestion/schema_definition/schema_elements/field_extension" +require "elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension" +require "elastic_graph/json_ingestion/schema_definition/schema_elements/type_with_subfields_extension" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extension module applied to `ElasticGraph::SchemaDefinition::Factory` to wire up + # JSON Schema support on Results and SchemaArtifactManager instances. + # + # @api private + module FactoryExtension + # Default JSON schema options applied to ElasticGraph's built-in scalar types as they + # are constructed. Keyed by the un-overridden type name, because built-in type + # registration always uses the canonical type name before `type_name_overrides` are + # applied to the resulting type reference. + BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME = { + "Boolean" => {type: "boolean"}, + "Float" => {type: "number"}, + "ID" => {type: "string"}, + "Int" => {type: "integer", minimum: INT_MIN, maximum: INT_MAX}, + "String" => {type: "string"}, + "Cursor" => {type: "string"}, + "Date" => {type: "string", format: "date"}, + "DateTime" => {type: "string", format: "date-time"}, + "LocalTime" => {type: "string", pattern: VALID_LOCAL_TIME_JSON_SCHEMA_PATTERN}, + "TimeZone" => {type: "string", enum: GraphQL::ScalarCoercionAdapters::VALID_TIME_ZONES.to_a.freeze}, + "Untyped" => {type: ["array", "boolean", "integer", "number", "object", "string"].freeze}, + "JsonSafeLong" => {type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX}, + "LongString" => {type: "integer", minimum: LONG_STRING_MIN, maximum: LONG_STRING_MAX} + }.freeze + + # @private + def new_enum_type(name) + super(name) do |type| + extended_type = type.extend(SchemaElements::EnumTypeExtension) # : ::ElasticGraph::SchemaDefinition::SchemaElements::EnumType & SchemaElements::EnumTypeExtension + yield extended_type if block_given? + end + end + + # @private + def new_enum_indexing_field_type(enum_value_names) + Indexing::FieldType::Enum.new(super) + end + + # @private + def new_field(**kwargs) + super(**kwargs) do |field| + extended_field = field.extend(SchemaElements::FieldExtension) # : ::ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension + yield extended_field if block_given? + end + end + + # @private + def new_index(name, settings, type) + super(name, settings, type) do |index| + extended_index = index.extend(Indexing::IndexExtension) # : ::ElasticGraph::SchemaDefinition::Indexing::Index & Indexing::IndexExtension + yield extended_index if block_given? + end + end + + # @private + def new_object_indexing_field_type(...) + Indexing::FieldType::Object.new(super) + end + + # @private + def new_scalar_type(name) + super(name) do |type| + extended_type = type.extend(SchemaElements::ScalarTypeExtension) # : ::ElasticGraph::SchemaDefinition::SchemaElements::ScalarType & SchemaElements::ScalarTypeExtension + if state.initially_registered_built_in_types.empty? && (options = BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME[name.to_s]) + extended_type.json_schema(**options) + end + + yield extended_type if block_given? + extended_type.finalize_json_schema_configuration! + end + end + + # @private + def new_scalar_indexing_field_type(scalar_type:) + Indexing::FieldType::Scalar.new(super) + end + + # @private + def new_type_with_subfields(schema_kind, name, wrapping_type:, field_factory:) + super(schema_kind, name, wrapping_type: wrapping_type, field_factory: field_factory) do |type| + extended_type = type.extend(SchemaElements::TypeWithSubfieldsExtension) # : ::ElasticGraph::SchemaDefinition::SchemaElements::TypeWithSubfields & SchemaElements::TypeWithSubfieldsExtension + yield extended_type if block_given? + end + end + + # @private + def new_union_indexing_field_type(subtypes_by_name) + Indexing::FieldType::Union.new(super) + end + + # Creates a new Results instance with JSON Schema extensions. + # + # @return [ElasticGraph::SchemaDefinition::Results] the created results instance + def new_results + super.tap do |results| + results.extend(ResultsExtension) + end + end + + # Creates a new SchemaArtifactManager instance with JSON Schema extensions. + # + # @return [ElasticGraph::SchemaDefinition::SchemaArtifactManager] the created artifact manager + def new_schema_artifact_manager(...) + super.tap do |manager| + manager.extend(SchemaArtifactManagerExtension) + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rb index 51e08b052..ba8733732 100644 --- a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rb +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rb @@ -19,20 +19,20 @@ module FieldType # # @private class Object < DelegateClass(ElasticGraph::SchemaDefinition::Indexing::FieldType::Object) - # @dynamic __getobj__, json_schema_options + # @dynamic __getobj__, json_schema_options, json_schema_options= # @return [Hash] JSON schema options for this object type - attr_reader :json_schema_options + attr_accessor :json_schema_options # @param field_type [ElasticGraph::SchemaDefinition::Indexing::FieldType::Object] the object field type to wrap - # @param json_schema_options [Hash] JSON schema options for this object type - def initialize(field_type, json_schema_options:) + def initialize(field_type) super(field_type) - @json_schema_options = json_schema_options + @json_schema_options = {} end # @return [Hash] field metadata keyed by field name def json_schema_field_metadata_by_field_name - subfields.to_h { |field| [field.name, field.json_schema_metadata] } + json_schema_subfields = subfields # : ::Array[Indexing::Field] + json_schema_subfields.to_h { |field| [field.name, field.json_schema_metadata] } end # @param customizations [Hash] the customizations to format @@ -47,15 +47,13 @@ def to_json_schema if json_schema_options.empty? # Fields that are `sourced_from` an alternate type must not be included in this type's JSON schema, # since events of this type won't include them. - core_other_source_subfields, core_json_schema_candidate_subfields = subfields.partition(&:source) - # @type var other_source_subfields: ::Array[Indexing::Field] - other_source_subfields = _ = core_other_source_subfields + json_schema_candidate_fields = subfields # : ::Array[Indexing::Field] + other_source_subfields, json_schema_candidate_subfields = json_schema_candidate_fields.partition(&:source) validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields) - # @type var json_schema_candidate_subfields: ::Array[Indexing::Field] - json_schema_candidate_subfields = _ = core_json_schema_candidate_subfields json_schema_subfields = json_schema_candidate_subfields.reject(&:runtime_field_script) required_fields = json_schema_subfields - required_fields = required_fields.reject(&:nullable?) if schema_def_state.allow_omitted_json_schema_fields + state = schema_def_state # : ::ElasticGraph::SchemaDefinition::State & ::ElasticGraph::JSONIngestion::SchemaDefinition::StateExtension + required_fields = required_fields.reject(&:nullable?) if state.allow_omitted_json_schema_fields { "type" => "object", @@ -64,7 +62,7 @@ def to_json_schema # we want it validated (as we do by merging in `json_schema_typename_field`) but we only want # to require it in the context of a union type. The union's JSON schema requires the field. "required" => required_fields.map(&:name).freeze, - "additionalProperties" => (false unless schema_def_state.allow_extra_json_schema_fields), + "additionalProperties" => (false unless state.allow_extra_json_schema_fields), "description" => doc_comment }.compact.freeze else diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/index_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/index_extension.rb new file mode 100644 index 000000000..9d2988e22 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/index_extension.rb @@ -0,0 +1,61 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + # Extends indices with JSON-schema-specific event requirements. + module IndexExtension + # @private + def self.extended(index) + index.schema_def_state.after_user_definition_complete do + routing_field_path = index.routing_field_path # : ::ElasticGraph::SchemaDefinition::SchemaElements::FieldPath + id_field = routing_field_path.last_part # : ::ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension + id_field.json_schema nullable: false + end + end + + # @private + def rollover(frequency, timestamp_field_path_name) + super + + schema_def_state.after_user_definition_complete do + rollover_config = self.rollover_config # : ::ElasticGraph::SchemaDefinition::Indexing::RolloverConfig + rollover_config + .timestamp_field_path + .path_parts + .each do |field| + json_schema_field = field # : ::ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension + json_schema_field.json_schema nullable: false + end + end + end + + # @private + def route_with(routing_field_path_name) + super + + schema_def_state.after_user_definition_complete do + routing_field_path = self.routing_field_path # : ::ElasticGraph::SchemaDefinition::SchemaElements::FieldPath + + routing_field_path + .path_parts # : ::Array[::ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension] + .each { |field| field.json_schema nullable: false } + + routing_field = routing_field_path.last_part # : ::ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension + routing_field.json_schema pattern: HAS_NON_WHITE_SPACE_REGEX + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_builder.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_builder.rb new file mode 100644 index 000000000..f85a6bc13 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_builder.rb @@ -0,0 +1,80 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/errors" +require "elastic_graph/json_ingestion/schema_definition/indexing/event_envelope" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Builds JSON schema data from schema definition results. + # + # @private + class JSONSchemaBuilder + def initialize(state:, all_types:, derived_indexing_type_names:) + @state = state + @all_types = all_types + @derived_indexing_type_names = derived_indexing_type_names + end + + def public_json_schema + json_schema_version = @state.json_schema_version + if json_schema_version.nil? + raise Errors::SchemaError, "`json_schema_version` must be specified in the schema. To resolve, add `schema.json_schema_version 1` in a schema definition block." + end + + { + "$schema" => JSON_META_SCHEMA, + JSON_SCHEMA_VERSION_KEY => json_schema_version, + "$defs" => { + "ElasticGraphEventEnvelope" => Indexing::EventEnvelope.json_schema(root_document_type_names, json_schema_version) + }.merge(definitions_by_name) + } + end + + def field_metadata_by_type_and_field_name + indexing_field_types_by_name.transform_values(&:json_schema_field_metadata_by_field_name) + end + + private + + def root_document_type_names + @state.object_types_by_name.values + .select { |type| type.root_document_type? && !type.abstract? } + .reject { |type| @derived_indexing_type_names.include?(type.name) } + .map(&:name) + end + + def definitions_by_name + indexing_field_types_by_name + .transform_values(&:to_json_schema) + .compact + end + + def indexing_field_types_by_name + @indexing_field_types_by_name ||= @state + .types_by_name + .except("Query") + .values + .reject do |type| + @derived_indexing_type_names.include?(type.name) || + # Skip graphql framework types. + type.graphql_only? + end + .sort_by(&:name) + .to_h do |type| + # @type var indexing_field_type: Indexing::_JSONFieldType + indexing_field_type = _ = type.to_indexing_field_type + [type.name, indexing_field_type] + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_layers.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_layers.rb new file mode 100644 index 000000000..7b2fc37dc --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_layers.rb @@ -0,0 +1,43 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Computes JSON schema array/nullable layers from schema-definition type references. + # + # @private + module JSONSchemaLayers + # Returns all JSON schema array/nullable layers of a type, from outermost to innermost. + # For example, `[[Int]]` returns `[:nullable, :array, :nullable, :array, :nullable]`. + def self.for(type_reference) + layers, inner_type = peel_once(type_reference) + + if layers.empty? || inner_type == type_reference + layers + else + layers + self.for(inner_type) + end + end + + def self.peel_once(type_reference) + if type_reference.list? + inner_type = type_reference.unwrap_list + return [[:array], inner_type] if type_reference.non_null? + return [[:nullable, :array], inner_type] + end + + no_layers = [] # : ::Array[ElasticGraph::SchemaDefinition::jsonSchemaLayer] + return [no_layers, type_reference.unwrap_non_null] if type_reference.non_null? + [[:nullable], type_reference] + end + private_class_method :peel_once + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_merge_reporter.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_merge_reporter.rb new file mode 100644 index 000000000..c71d70a55 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_merge_reporter.rb @@ -0,0 +1,159 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Formats and reports JSON schema metadata merge diagnostics. + # + # @private + class JSONSchemaMergeReporter + def initialize(output) + @output = output + end + + def report_errors(merged_results) + json_schema_versions_by_missing_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] + json_schema_versions_by_missing_type = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::Integer]] + json_schema_versions_by_missing_necessary_field = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]] + + merged_results.each do |result| + result.missing_fields.each do |field| + json_schema_versions_by_missing_field[field] << result.json_schema_version + end + + result.missing_types.each do |type| + json_schema_versions_by_missing_type[type] << result.json_schema_version + end + + result.missing_necessary_fields.each do |missing_necessary_field| + json_schema_versions_by_missing_necessary_field[missing_necessary_field] << result.json_schema_version + end + end + + missing_field_errors = json_schema_versions_by_missing_field.map do |field, json_schema_versions| + missing_field_error_for(field, json_schema_versions) + end + + missing_type_errors = json_schema_versions_by_missing_type.map do |type, json_schema_versions| + missing_type_error_for(type, json_schema_versions) + end + + missing_necessary_field_errors = json_schema_versions_by_missing_necessary_field.map do |field, json_schema_versions| + missing_necessary_field_error_for(field, json_schema_versions) + end + + definition_conflict_errors = merged_results + .flat_map { |result| result.definition_conflicts.to_a } + .group_by(&:name) + .map do |name, deprecated_elements| + <<~EOS + The schema definition of `#{name}` has conflicts. To resolve the conflict, remove the unneeded definitions from the following: + + #{format_deprecated_elements(deprecated_elements)} + EOS + end + + errors = missing_field_errors + missing_type_errors + missing_necessary_field_errors + definition_conflict_errors + return if errors.empty? + + abort errors.join("\n\n") + end + + def report_warnings(unused_elements) + return if unused_elements.empty? + + @output.puts <<~EOS + The schema definition has #{unused_elements.size} unneeded reference(s) to deprecated schema elements. These can all be safely deleted: + + #{format_deprecated_elements(unused_elements)} + + EOS + end + + private + + def format_deprecated_elements(deprecated_elements) + descriptions = deprecated_elements + .sort_by { |e| [e.defined_at.path, e.defined_at.lineno] } + .map(&:description) + .uniq + + descriptions.each.with_index(1).map { |desc, idx| "#{idx}. #{desc}" }.join("\n") + end + + def missing_field_error_for(qualified_field, json_schema_versions) + type, field = qualified_field.split(".") + + <<~EOS + The `#{qualified_field}` field (which existed in #{describe_json_schema_versions(json_schema_versions, "and")}) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this field's data when ingesting events at #{old_versions(json_schema_versions)}. + To continue, do one of the following: + + 1. If the `#{qualified_field}` field has been renamed, indicate this by calling `field.renamed_from "#{field}"` on the renamed field. + 2. If the `#{qualified_field}` field has been dropped, indicate this by calling `type.deleted_field "#{field}"` on the `#{type}` type. + 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. + EOS + end + + def missing_type_error_for(type, json_schema_versions) + <<~EOS + The `#{type}` type (which existed in #{describe_json_schema_versions(json_schema_versions, "and")}) no longer exists in the current schema definition. + ElasticGraph cannot guess what it should do with this type's data when ingesting events at #{old_versions(json_schema_versions)}. + To continue, do one of the following: + + 1. If the `#{type}` type has been renamed, indicate this by calling `type.renamed_from "#{type}"` on the renamed type. + 2. If the `#{type}` type has been dropped, indicate this by calling `schema.deleted_type "#{type}"` on the schema. + 3. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. + EOS + end + + def missing_necessary_field_error_for(field, json_schema_versions) + path = field.fully_qualified_path.split(".").last + # :nocov: -- we only cover one side of this ternary. + has_or_have = (json_schema_versions.size == 1) ? "has" : "have" + # :nocov: + + <<~EOS + #{describe_json_schema_versions(json_schema_versions, "and")} #{has_or_have} no field that maps to the #{field.field_type} field path of `#{field.fully_qualified_path}`. + Since the field path is required for #{field.field_type}, ElasticGraph cannot ingest events that lack it. To continue, do one of the following: + + 1. If the `#{field.fully_qualified_path}` field has been renamed, indicate this by calling `field.renamed_from "#{path}"` on the renamed field rather than using `deleted_field`. + 2. Alternately, if no publishers or in-flight events use #{describe_json_schema_versions(json_schema_versions, "or")}, delete #{files_noun_phrase(json_schema_versions)} from `#{JSON_SCHEMAS_BY_VERSION_DIRECTORY}`, and no further changes are required. + EOS + end + + def describe_json_schema_versions(json_schema_versions, conjunction) + json_schema_versions = json_schema_versions.sort + + case json_schema_versions.size + when 1 + "JSON schema version #{json_schema_versions.first}" + when 2 + "JSON schema versions #{json_schema_versions.first} #{conjunction} #{json_schema_versions.last}" + else + versions = json_schema_versions.take(json_schema_versions.size - 1) + "JSON schema versions #{versions.join(", ")}, #{conjunction} #{json_schema_versions.last}" + end + end + + def old_versions(json_schema_versions) + return "this old version" if json_schema_versions.size == 1 + "these old versions" + end + + def files_noun_phrase(json_schema_versions) + return "its file" if json_schema_versions.size == 1 + "their files" + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/results_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/results_extension.rb new file mode 100644 index 000000000..03df3504b --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/results_extension.rb @@ -0,0 +1,98 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/errors" +require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata" +require "elastic_graph/json_ingestion/schema_definition/json_schema_builder" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extension module for {ElasticGraph::SchemaDefinition::Results} that adds + # JSON Schema generation support. + # + # @private + module ResultsExtension + # @param version [Integer] desired JSON schema version + # @return [Hash] the JSON schema for the requested version, if available + # @raise [Errors::NotFoundError] if the requested JSON schema version is not available + def json_schemas_for(version) + unless available_json_schema_versions.include?(version) + raise Errors::NotFoundError, "The requested json schema version (#{version}) is not available. Available versions: #{available_json_schema_versions.to_a.join(", ")}." + end + + @latest_versioned_json_schema ||= merge_field_metadata_into_json_schema(current_public_json_schema).json_schema + end + + # @return [Set] set of available JSON schema versions + def available_json_schema_versions + @available_json_schema_versions ||= Set[latest_json_schema_version] + end + + # @return [Integer] the current JSON schema version + def latest_json_schema_version + current_public_json_schema[JSON_SCHEMA_VERSION_KEY] + end + + # @private + def json_schema_version_setter_location + json_ingestion_state.json_schema_version_setter_location + end + + # @private + def json_schema_field_metadata_by_type_and_field_name + @json_schema_field_metadata_by_type_and_field_name ||= json_ingestion_json_schema_builder.field_metadata_by_type_and_field_name + end + + # @private + def current_public_json_schema + @current_public_json_schema ||= json_ingestion_json_schema_builder.public_json_schema + end + + # @private + def merge_field_metadata_into_json_schema(json_schema) + json_ingestion_json_schema_with_metadata_merger.merge_metadata_into(json_schema) + end + + # @private + def unused_deprecated_elements + json_ingestion_json_schema_with_metadata_merger.unused_deprecated_elements + end + + private + + # Returns the wrapped state narrowed to include this gem's `StateExtension`. Centralizes + # the Steep cast that's needed because Steep can't see the `extend(StateExtension)` applied + # at runtime in {APIExtension.extended}. + def json_ingestion_state + state # : ElasticGraph::SchemaDefinition::State & StateExtension + end + + def json_ingestion_json_schema_builder + @json_ingestion_json_schema_builder ||= begin + # Force `all_types` to materialize before iterating `state.types_by_name`. Reading `all_types` + # runs the `on_built_in_types` callbacks, including the GeoLocation JSON schema field + # customizations registered by `APIExtension.extended`. + materialized_all_types = all_types + + JSONSchemaBuilder.new( + state: json_ingestion_state, + all_types: materialized_all_types, + derived_indexing_type_names: derived_indexing_type_names + ) + end + end + + def json_ingestion_json_schema_with_metadata_merger + @json_ingestion_json_schema_with_metadata_merger ||= Indexing::JSONSchemaWithMetadata::Merger.new(self) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rb new file mode 100644 index 000000000..4bb5f2b23 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rb @@ -0,0 +1,134 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/json_schema_merge_reporter" +require "elastic_graph/json_ingestion/schema_definition/json_schema_pruner" +require "yaml" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extension module for {ElasticGraph::SchemaDefinition::SchemaArtifactManager} that adds + # JSON Schema artifact generation support. + # + # @private + module SchemaArtifactManagerExtension + # Overrides `dump_artifacts` to add JSON schema version bump checking before dumping. + def dump_artifacts + schema_results = json_ingestion_schema_definition_results + state = json_ingestion_state + + json_ingestion_check_if_needs_json_schema_version_bump do |recommended_json_schema_version| + if state.enforce_json_schema_version + # @type var setter_location: ::Thread::Backtrace::Location + # We use `_ =` because while `json_schema_version_setter_location` can be nil, + # it'll never be nil if we get here and we want the type to be non-nilable. + setter_location = _ = schema_results.json_schema_version_setter_location + setter_location_path = ::Pathname.new(setter_location.absolute_path.to_s).relative_path_from(::Dir.pwd) + + abort "A change has been attempted to `json_schemas.yaml`, but the `json_schema_version` has not been correspondingly incremented. Please " \ + "increase the schema's version, and then run the `bundle exec rake schema_artifacts:dump` command again.\n\n" \ + "To update the schema version to the expected version, change line #{setter_location.lineno} at `#{setter_location_path}` to:\n" \ + " `schema.json_schema_version #{recommended_json_schema_version}`\n\n" \ + "Alternately, call `schema.enforce_json_schema_version false` in your schema definition to allow the JSON schemas file " \ + "to change without requiring a version bump, but that is only recommended for non-production applications during initial schema prototyping." + else + @output.puts <<~EOS + WARNING: the `json_schemas.yaml` artifact is being updated without the `json_schema_version` being correspondingly incremented. + This is not recommended for production applications, but is currently allowed because you have called `schema.enforce_json_schema_version false`. + EOS + end + end + + super + end + + private + + # Returns the wrapped {ElasticGraph::SchemaDefinition::Results} narrowed to include this + # gem's `ResultsExtension`. Centralizes the Steep cast that's needed because Steep can't + # see the `extend(ResultsExtension)` applied at runtime. + def json_ingestion_schema_definition_results + schema_definition_results # : ElasticGraph::SchemaDefinition::Results & ResultsExtension + end + + def json_ingestion_state + json_ingestion_schema_definition_results.state # : ElasticGraph::SchemaDefinition::State & StateExtension + end + + # Overrides the base `artifacts_from_schema_def` method to add JSON schema artifacts. + def artifacts_from_schema_def + json_schemas_artifact = json_ingestion_json_schemas_artifact + versioned_artifacts = json_ingestion_build_desired_versioned_json_schemas(json_schemas_artifact.desired_contents).values.map do |versioned_schema| + json_ingestion_new_versioned_json_schema_artifact(versioned_schema) + end + + super + [json_schemas_artifact] + versioned_artifacts + end + + def json_ingestion_json_schemas_artifact + @json_ingestion_json_schemas_artifact ||= new_yaml_artifact( + JSON_SCHEMAS_FILE, + JSONSchemaPruner.prune(json_ingestion_schema_definition_results.current_public_json_schema), + extra_comment_lines: [ + "This is the \"public\" JSON schema file and is intended to be provided to publishers so that", + "they can perform code generation and event validation." + ] + ) + end + + def json_ingestion_check_if_needs_json_schema_version_bump(&block) + if json_ingestion_json_schemas_artifact.out_of_date? + existing_schema_version = json_ingestion_json_schemas_artifact.existing_dumped_contents&.dig(JSON_SCHEMA_VERSION_KEY) || -1 + desired_schema_version = json_ingestion_json_schemas_artifact.desired_contents[JSON_SCHEMA_VERSION_KEY] + + if existing_schema_version >= desired_schema_version + yield existing_schema_version + 1 + end + end + end + + def json_ingestion_build_desired_versioned_json_schemas(current_public_json_schema) + schema_results = json_ingestion_schema_definition_results + versioned_parsed_yamls = ::Dir.glob(::File.join(@schema_artifacts_directory, JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v*.yaml")).map do |file| + ::YAML.safe_load_file(file) + end + [current_public_json_schema] + + results_by_json_schema_version = versioned_parsed_yamls.to_h do |parsed_yaml| + merged_schema = schema_results.merge_field_metadata_into_json_schema(parsed_yaml) + [merged_schema.json_schema_version, merged_schema] + end + + json_ingestion_json_schema_merge_reporter.report_errors(results_by_json_schema_version.values) + json_ingestion_json_schema_merge_reporter.report_warnings(schema_results.unused_deprecated_elements) + + results_by_json_schema_version.transform_values(&:json_schema) + end + + def json_ingestion_json_schema_merge_reporter + @json_ingestion_json_schema_merge_reporter ||= JSONSchemaMergeReporter.new(@output) + end + + def json_ingestion_new_versioned_json_schema_artifact(desired_contents) + # File name depends on the schema_version field in the json schema. + schema_version = desired_contents[JSON_SCHEMA_VERSION_KEY] + + new_yaml_artifact( + ::File.join(JSON_SCHEMAS_BY_VERSION_DIRECTORY, "v#{schema_version}.yaml"), + desired_contents, + extra_comment_lines: [ + "This JSON schema file contains internal ElasticGraph metadata and should be considered private.", + "The unversioned JSON schema file is public and intended to be provided to publishers." + ] + ) + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/enum_type_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/enum_type_extension.rb new file mode 100644 index 000000000..68eb12abd --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/enum_type_extension.rb @@ -0,0 +1,24 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaElements + # Extends enum types with JSON schema behavior. + module EnumTypeExtension + # @private + def configure_derived_scalar_type(scalar_type) + super + scalar_type.json_schema type: "string" + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/field_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/field_extension.rb new file mode 100644 index 000000000..9e4a558ef --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/field_extension.rb @@ -0,0 +1,110 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/indexing/field_reference" +require "elastic_graph/json_ingestion/schema_definition/json_schema_layers" +require "elastic_graph/json_ingestion/schema_definition/schema_elements/has_json_schema" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Namespace for schema-element extensions that contribute JSON schema generation and validation. + module SchemaElements + # Extends schema-definition fields with JSON schema validation behavior. + module FieldExtension + include HasJSONSchema + + # @return [Boolean] whether this field has been marked as non-nullable in the JSON schema + def non_nullable_in_json_schema? + !!@non_nullable_in_json_schema + end + + # Defines the [JSON schema](https://json-schema.org/understanding-json-schema/) validations for this field. + # Validations defined here will be included in the generated `json_schemas.yaml` artifact, which is used by + # the ElasticGraph indexer to validate events before indexing their data in the datastore. In addition, the + # publisher may use `json_schemas.yaml` for code generation and to apply validation before publishing an + # event to ElasticGraph. + # + # Can be called multiple times; each time, the options will be merged into the existing options. + # + # On a {ElasticGraph::SchemaDefinition::SchemaElements::Field}, this is optional, but can be used to make the + # JSON schema validation stricter than it would otherwise be. For example, you could use + # `json_schema maxLength: 30` on a `String` field to limit the length. + # + # You can use any of the JSON schema validation keywords here. In addition, `nullable: false` is supported + # to configure the generated JSON schema to disallow `null` values for the field. Note that if you define a + # field with a non-nullable GraphQL type (e.g. `Int!`), the JSON schema will automatically disallow nulls. + # However, as explained in the {ElasticGraph::SchemaDefinition::SchemaElements::TypeWithSubfields#field} + # documentation, we generally recommend against defining non-nullable GraphQL fields. + # `json_schema nullable: false` will disallow `null` values from being indexed, while still keeping the + # field nullable in the GraphQL schema. If you think you might want to make a field non-nullable in the + # GraphQL schema some day, it's a good idea to use `json_schema nullable: false` now to ensure every indexed + # record has a non-null value for the field. + # + # @note We recommend using JSON schema validations in a limited fashion. Validations that are appropriate to + # apply when data is entering the system-of-record are often not appropriate on a secondary index like + # ElasticGraph. Events that violate a JSON schema validation will fail to index (typically they will be + # sent to the dead letter queue and page an oncall engineer). If an ElasticGraph instance is meant to + # contain all the data of some source system, you probably don't want it applying stricter validations + # than the source system itself has. We recommend limiting your JSON schema validations to situations + # where violations would prevent ElasticGraph from operating correctly. + # + # @param nullable [Boolean, nil] set to `false` to make this field non-nullable in the JSON schema + # @param options [Hash] additional JSON schema options + # @return [void] + # + # @example Define additional validations on a field + # ElasticGraph.define_schema do |schema| + # schema.object_type "Card" do |t| + # t.field "id", "ID!" + # + # t.field "expYear", "Int" do |f| + # # Use JSON schema to ensure the publisher is sending us 4 digit years, not 2 digit years. + # f.json_schema minimum: 2000, maximum: 2099 + # end + # + # t.field "expMonth", "Int" do |f| + # f.json_schema minimum: 1, maximum: 12 + # end + # + # t.index "cards" + # end + # end + def json_schema(nullable: nil, **options) + if options.key?(:type) + raise Errors::SchemaError, "Cannot override JSON schema type of field `#{name}` with `#{options.fetch(:type)}`" + end + + case nullable + when true + raise Errors::SchemaError, "`nullable: true` is not allowed on a field--just declare the GraphQL field as being nullable (no `!` suffix) instead." + when false + @non_nullable_in_json_schema = true + end + + super(**options) + end + + # @private + def to_indexing_field_reference + reference = super + return nil unless reference + + type_for_json_schema = non_nullable_in_json_schema? ? type.wrap_non_null : type + + Indexing::FieldReference.new( + reference.with(type: type_for_json_schema), + json_schema_layers: JSONSchemaLayers.for(type_for_json_schema), + json_schema_customizations: json_schema_options + ) + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/has_json_schema.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/has_json_schema.rb new file mode 100644 index 000000000..cfca2c98a --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/has_json_schema.rb @@ -0,0 +1,95 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/errors" +require "elastic_graph/support/hash_util" +require "elastic_graph/support/json_schema/meta_schema_validator" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaElements + # Provides common JSON schema behavior for schema-definition fields and types. + module HasJSONSchema + # @return [Hash] JSON schema options + def json_schema_options + @json_schema_options ||= {} + end + + # Defines the [JSON schema](https://json-schema.org/understanding-json-schema/) validations for this field or type. Validations + # defined here will be included in the generated `json_schemas.yaml` artifact, which is used by the ElasticGraph indexer to + # validate events before indexing their data in the datastore. In addition, the publisher may use `json_schemas.yaml` for code + # generation and to apply validation before publishing an event to ElasticGraph. + # + # Can be called multiple times; each time, the options will be merged into the existing options. + # + # This is _required_ on a {ElasticGraph::SchemaDefinition::SchemaElements::ScalarType} (since we don’t know how a custom scalar + # type should be represented in JSON!). On a {ElasticGraph::SchemaDefinition::SchemaElements::Field}, this is optional, but can + # be used to make the JSON schema validation stricter than it + # would otherwise be. For example, you could use `json_schema maxLength: 30` on a `String` field to limit the length. + # + # You can use any of the JSON schema validation keywords here. In addition, `nullable: false` is supported to configure the + # generated JSON schema to disallow `null` values for the field. Note that if you define a field with a non-nullable GraphQL type + # (e.g. `Int!`), the JSON schema will automatically disallow nulls. However, as explained in the + # {ElasticGraph::SchemaDefinition::SchemaElements::TypeWithSubfields#field} documentation, we generally recommend against defining + # non-nullable GraphQL fields. `json_schema nullable: false` will disallow `null` values from being indexed, while still keeping + # the field nullable in the GraphQL schema. If you think you might want to make a field non-nullable in the GraphQL schema some + # day, it’s a good idea to use `json_schema nullable: false` now to ensure every indexed record has a non-null value for the field. + # + # @note We recommend using JSON schema validations in a limited fashion. Validations that are appropriate to apply when data is + # entering the system-of-record are often not appropriate on a secondary index like ElasticGraph. Events that violate a JSON + # schema validation will fail to index (typically they will be sent to the dead letter queue and page an oncall engineer). If an + # ElasticGraph instance is meant to contain all the data of some source system, you probably don’t want it applying stricter + # validations than the source system itself has. We recommend limiting your JSON schema validations to situations where + # violations would prevent ElasticGraph from operating correctly. + # + # @param options [Hash] JSON schema options + # @return [void] + # + # @example Define the JSON schema validations of a custom scalar type + # ElasticGraph.define_schema do |schema| + # schema.scalar_type "URL" do |t| + # t.mapping type: "keyword" + # + # # JSON schema has a built-in URI format validator: + # # https://json-schema.org/understanding-json-schema/reference/string.html#resource-identifiers + # t.json_schema type: "string", format: "uri" + # end + # end + # + # @example Define additional validations on a field + # ElasticGraph.define_schema do |schema| + # schema.object_type "Card" do |t| + # t.field "id", "ID!" + # + # t.field "expYear", "Int" do |f| + # # Use JSON schema to ensure the publisher is sending us 4 digit years, not 2 digit years. + # f.json_schema minimum: 2000, maximum: 2099 + # end + # + # t.field "expMonth", "Int" do |f| + # f.json_schema minimum: 1, maximum: 12 + # end + # + # t.index "cards" + # end + # end + def json_schema(**options) + validatable_json_schema = Support::HashUtil.stringify_keys(options) + + if (error_msg = Support::JSONSchema.strict_meta_schema_validator.validate_with_error_message(validatable_json_schema)) + raise Errors::SchemaError, "Invalid JSON schema options set on #{self}:\n\n#{error_msg}" + end + + json_schema_options.update(options) + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension.rb new file mode 100644 index 000000000..9f0307d46 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension.rb @@ -0,0 +1,71 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/schema_elements/has_json_schema" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaElements + # Extends scalar types with JSON schema validation and serialization behavior. + module ScalarTypeExtension + include HasJSONSchema + + # Validates that json_schema has been configured on this scalar type, and applies + # post-yield runtime metadata derived from the final JSON schema configuration. + # + # @raise [Errors::SchemaError] if json_schema has not been configured + # @return [void] + def finalize_json_schema_configuration! + validate_json_schema_configuration! + + if !grouping_missing_value_placeholder_overridden && (placeholder = inferred_grouping_missing_value_placeholder) + self.runtime_metadata = runtime_metadata.with(grouping_missing_value_placeholder: placeholder) + end + end + + # Validates that json_schema has been configured on this scalar type. + # + # @raise [Errors::SchemaError] if json_schema has not been configured + # @return [void] + def validate_json_schema_configuration! + return unless json_schema_options.empty? + + raise Errors::SchemaError, "Scalar types require `json_schema` to be configured, but `#{name}` lacks `json_schema`." + end + + private + + def inferred_grouping_missing_value_placeholder + case mapping_type + when "long" + # It is only safe to use NaN for a long when the long's range is safe to coerce to a float + # without loss of precision. This is because using NaN as the missing value will cause + # the datastore to coerce the other bucket keys to float. + # JSON schema min/max only constrains newly indexed values, not existing data that may fall + # outside the range before the constraints were added. In that edge case, users can set + # `grouping_missing_value_placeholder` to `nil`. + if (json_schema_options[:minimum] || LONG_STRING_MIN) >= JSON_SAFE_LONG_MIN && + (json_schema_options[:maximum] || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX + inferred_numeric_placeholder_for_integer_type + end + when "unsigned_long" + # Similar to the checks above for long except we only need to check the max + # (since the min is zero even if not specified). + if (json_schema_options[:maximum] || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX + inferred_numeric_placeholder_for_integer_type + end + else + super + end + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/type_with_subfields_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/type_with_subfields_extension.rb new file mode 100644 index 000000000..67d21caa2 --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/schema_elements/type_with_subfields_extension.rb @@ -0,0 +1,29 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/json_ingestion/schema_definition/schema_elements/has_json_schema" + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaElements + # Extends object and interface type internals with JSON schema behavior. + module TypeWithSubfieldsExtension + include HasJSONSchema + + # @private + def to_indexing_field_type + field_type = super # : Indexing::FieldType::Object + field_type.json_schema_options = json_schema_options + field_type + end + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/state_extension.rb b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/state_extension.rb new file mode 100644 index 000000000..d072bb02d --- /dev/null +++ b/elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/state_extension.rb @@ -0,0 +1,33 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module JSONIngestion + module SchemaDefinition + # Extension module applied to `ElasticGraph::SchemaDefinition::State` to support JSON ingestion state. + # + # @private + module StateExtension + # @dynamic json_schema_version, json_schema_version= + # @dynamic json_schema_version_setter_location, json_schema_version_setter_location= + # @dynamic enforce_json_schema_version, enforce_json_schema_version= + # @dynamic allow_omitted_json_schema_fields, allow_omitted_json_schema_fields= + # @dynamic allow_extra_json_schema_fields, allow_extra_json_schema_fields= + attr_accessor :json_schema_version, :json_schema_version_setter_location, :enforce_json_schema_version, :allow_omitted_json_schema_fields, :allow_extra_json_schema_fields + + def self.extended(state) + state.json_schema_version = nil + state.json_schema_version_setter_location = nil + state.enforce_json_schema_version = true + state.allow_omitted_json_schema_fields = false + state.allow_extra_json_schema_fields = true + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/api_extension.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/api_extension.rbs new file mode 100644 index 000000000..4a2bc4756 --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/api_extension.rbs @@ -0,0 +1,17 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module APIExtension: ::ElasticGraph::SchemaDefinition::API + def json_schema_version: (::Integer) -> void + def enforce_json_schema_version: (bool) -> void + def json_schema_strictness: (?allow_omitted_fields: bool, ?allow_extra_fields: bool) -> void + + def self.extended: (::ElasticGraph::SchemaDefinition::API & APIExtension) -> void + + private + + def json_ingestion_state: () -> (::ElasticGraph::SchemaDefinition::State & StateExtension) + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/factory_extension.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/factory_extension.rbs new file mode 100644 index 000000000..95fd5e1d0 --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/factory_extension.rbs @@ -0,0 +1,32 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module FactoryExtension: ::ElasticGraph::SchemaDefinition::Factory + BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME: ::Hash[::String, ::Hash[::Symbol, untyped]] + + def new_enum_type: (::String) ?{ (::ElasticGraph::SchemaDefinition::SchemaElements::EnumType & SchemaElements::EnumTypeExtension) -> void } -> (::ElasticGraph::SchemaDefinition::SchemaElements::EnumType & SchemaElements::EnumTypeExtension) + def new_enum_indexing_field_type: (::Array[::String]) -> Indexing::FieldType::Enum + def new_field: (**untyped) ?{ (::ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension) -> void } -> (::ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension) + def new_index: (::String, ::Hash[::String, untyped], ::ElasticGraph::SchemaDefinition::indexableType) ?{ (::ElasticGraph::SchemaDefinition::Indexing::Index & Indexing::IndexExtension) -> void } -> (::ElasticGraph::SchemaDefinition::Indexing::Index & Indexing::IndexExtension) + def new_object_indexing_field_type: ( + type_name: ::String, + subfields: ::Array[Indexing::Field], + mapping_options: ::ElasticGraph::SchemaDefinition::Mixins::HasTypeInfo::optionsHash, + doc_comment: ::String?, + json_schema_options: ::Hash[::Symbol, untyped] + ) -> Indexing::FieldType::Object + def new_scalar_type: (::String) ?{ (::ElasticGraph::SchemaDefinition::SchemaElements::ScalarType & SchemaElements::ScalarTypeExtension) -> void } -> (::ElasticGraph::SchemaDefinition::SchemaElements::ScalarType & SchemaElements::ScalarTypeExtension) + def new_scalar_indexing_field_type: (scalar_type: ::ElasticGraph::SchemaDefinition::SchemaElements::ScalarType) -> Indexing::FieldType::Scalar + def new_type_with_subfields: ( + ::ElasticGraph::SchemaDefinition::SchemaElements::schemaKind, + ::String, + wrapping_type: ::ElasticGraph::SchemaDefinition::SchemaElements::anyObjectType, + field_factory: ::Method + ) ?{ (::ElasticGraph::SchemaDefinition::SchemaElements::TypeWithSubfields & SchemaElements::TypeWithSubfieldsExtension) -> void } -> (::ElasticGraph::SchemaDefinition::SchemaElements::TypeWithSubfields & SchemaElements::TypeWithSubfieldsExtension) + def new_union_indexing_field_type: (::Hash[::String, Indexing::FieldType::Object]) -> Indexing::FieldType::Union + def new_results: () -> ::ElasticGraph::SchemaDefinition::Results + def new_schema_artifact_manager: (**untyped) -> ::ElasticGraph::SchemaDefinition::SchemaArtifactManager + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rbs index 6d157abaf..370698dde 100644 --- a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rbs +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object.rbs @@ -8,14 +8,11 @@ module ElasticGraph end class Object < ObjectSupertype - attr_reader json_schema_options: ::Hash[::Symbol, untyped] + attr_accessor json_schema_options: ::Hash[::Symbol, untyped] @to_json_schema: ::Hash[::String, untyped]? - def initialize: ( - ::ElasticGraph::SchemaDefinition::Indexing::FieldType::Object field_type, - json_schema_options: ::Hash[::Symbol, untyped] - ) -> void + def initialize: (::ElasticGraph::SchemaDefinition::Indexing::FieldType::Object field_type) -> void def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONSchemaFieldMetadata] def format_field_json_schema_customizations: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rbs index 8c6b52c0b..5fbae3383 100644 --- a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rbs +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/field_type/union.rbs @@ -8,6 +8,8 @@ module ElasticGraph end class Union < UnionSupertype + def self.new: (::ElasticGraph::SchemaDefinition::Indexing::FieldType::Union) -> instance + def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONSchemaFieldMetadata] def format_field_json_schema_customizations: (::Hash[::String, untyped]) -> ::Hash[::String, untyped] def to_json_schema: () -> ::Hash[::String, untyped] diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/index_extension.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/index_extension.rbs new file mode 100644 index 000000000..6107d461b --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/index_extension.rbs @@ -0,0 +1,13 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module Indexing + module IndexExtension: ::ElasticGraph::SchemaDefinition::Indexing::Index + def self.extended: (::ElasticGraph::SchemaDefinition::Indexing::Index) -> void + def rollover: (::Symbol, ::String) -> void + def route_with: (::String) -> void + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_builder.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_builder.rbs new file mode 100644 index 000000000..ae129f100 --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_builder.rbs @@ -0,0 +1,27 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + class JSONSchemaBuilder + def initialize: ( + state: ::ElasticGraph::SchemaDefinition::State, + all_types: ::Array[::ElasticGraph::SchemaDefinition::SchemaElements::graphQLType], + derived_indexing_type_names: ::Set[::String] + ) -> void + + def public_json_schema: () -> ::Hash[::String, untyped] + def field_metadata_by_type_and_field_name: () -> ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]] + + private + + @state: ::ElasticGraph::SchemaDefinition::State + @all_types: ::Array[::ElasticGraph::SchemaDefinition::SchemaElements::graphQLType] + @derived_indexing_type_names: ::Set[::String] + @indexing_field_types_by_name: ::Hash[::String, Indexing::_JSONFieldType]? + + def root_document_type_names: () -> ::Array[::String] + def definitions_by_name: () -> ::Hash[::String, untyped] + def indexing_field_types_by_name: () -> ::Hash[::String, Indexing::_JSONFieldType] + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_layers.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_layers.rbs new file mode 100644 index 000000000..edde6d332 --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_layers.rbs @@ -0,0 +1,13 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module JSONSchemaLayers + def self.for: (::ElasticGraph::SchemaDefinition::SchemaElements::TypeReference) -> ::ElasticGraph::SchemaDefinition::jsonSchemaLayersArray + + private + + def self.peel_once: (::ElasticGraph::SchemaDefinition::SchemaElements::TypeReference) -> [::ElasticGraph::SchemaDefinition::jsonSchemaLayersArray, ::ElasticGraph::SchemaDefinition::SchemaElements::TypeReference] + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_merge_reporter.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_merge_reporter.rbs new file mode 100644 index 000000000..4b4f6f0f6 --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_merge_reporter.rbs @@ -0,0 +1,24 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + class JSONSchemaMergeReporter + def initialize: (io) -> void + + def report_errors: (::Array[Indexing::JSONSchemaWithMetadata]) -> void + def report_warnings: (::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]) -> void + + private + + @output: io + + def format_deprecated_elements: (::Enumerable[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]) -> ::String + def missing_field_error_for: (::String, ::Array[::Integer]) -> ::String + def missing_type_error_for: (::String, ::Array[::Integer]) -> ::String + def missing_necessary_field_error_for: (Indexing::JSONSchemaWithMetadata::MissingNecessaryField, ::Array[::Integer]) -> ::String + def describe_json_schema_versions: (::Array[::Integer], ::String) -> ::String + def old_versions: (::Array[::Integer]) -> ::String + def files_noun_phrase: (::Array[::Integer]) -> ::String + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/results_extension.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/results_extension.rbs new file mode 100644 index 000000000..c6c33eb4a --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/results_extension.rbs @@ -0,0 +1,29 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module ResultsExtension : ::ElasticGraph::SchemaDefinition::Results + def json_schemas_for: (::Integer) -> ::Hash[::String, untyped] + def available_json_schema_versions: () -> ::Set[::Integer] + def latest_json_schema_version: () -> ::Integer + def json_schema_version_setter_location: () -> ::Thread::Backtrace::Location? + def json_schema_field_metadata_by_type_and_field_name: () -> ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]] + def current_public_json_schema: () -> ::Hash[::String, untyped] + def merge_field_metadata_into_json_schema: (::Hash[::String, untyped]) -> Indexing::JSONSchemaWithMetadata + def unused_deprecated_elements: () -> ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement] + + private + + @latest_versioned_json_schema: ::Hash[::String, untyped]? + @available_json_schema_versions: ::Set[::Integer]? + @json_schema_field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, Indexing::JSONSchemaFieldMetadata]]? + @current_public_json_schema: ::Hash[::String, untyped]? + @json_ingestion_json_schema_builder: JSONSchemaBuilder? + @json_ingestion_json_schema_with_metadata_merger: Indexing::JSONSchemaWithMetadata::Merger? + + def json_ingestion_state: () -> (::ElasticGraph::SchemaDefinition::State & StateExtension) + def json_ingestion_json_schema_builder: () -> JSONSchemaBuilder + def json_ingestion_json_schema_with_metadata_merger: () -> Indexing::JSONSchemaWithMetadata::Merger + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rbs new file mode 100644 index 000000000..2ecf20c2e --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension.rbs @@ -0,0 +1,21 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaArtifactManagerExtension : ::ElasticGraph::SchemaDefinition::SchemaArtifactManager + private + + @json_ingestion_json_schemas_artifact: ::ElasticGraph::SchemaDefinition::SchemaArtifact[::Hash[::String, untyped]]? + @json_ingestion_json_schema_merge_reporter: JSONSchemaMergeReporter? + + def json_ingestion_schema_definition_results: () -> (::ElasticGraph::SchemaDefinition::Results & ResultsExtension) + def json_ingestion_state: () -> (::ElasticGraph::SchemaDefinition::State & StateExtension) + def artifacts_from_schema_def: () -> ::Array[::ElasticGraph::SchemaDefinition::SchemaArtifact[untyped]] + def json_ingestion_json_schemas_artifact: () -> ::ElasticGraph::SchemaDefinition::SchemaArtifact[::Hash[::String, untyped]] + def json_ingestion_check_if_needs_json_schema_version_bump: () { (::Integer) -> void } -> void + def json_ingestion_build_desired_versioned_json_schemas: (::Hash[::String, untyped]) -> ::Hash[::Integer, ::Hash[::String, untyped]] + def json_ingestion_json_schema_merge_reporter: () -> JSONSchemaMergeReporter + def json_ingestion_new_versioned_json_schema_artifact: (::Hash[::String, untyped]) -> ::ElasticGraph::SchemaDefinition::SchemaArtifact[::Hash[::String, untyped]] + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/enum_type_extension.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/enum_type_extension.rbs new file mode 100644 index 000000000..12d81c14b --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/enum_type_extension.rbs @@ -0,0 +1,13 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaElements + module EnumTypeExtension: ::ElasticGraph::SchemaDefinition::SchemaElements::EnumType + private + + def configure_derived_scalar_type: (::ElasticGraph::SchemaDefinition::SchemaElements::ScalarType & ScalarTypeExtension) -> void + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/field_extension.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/field_extension.rbs new file mode 100644 index 000000000..5803b2919 --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/field_extension.rbs @@ -0,0 +1,17 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaElements + module FieldExtension: ::ElasticGraph::SchemaDefinition::SchemaElements::Field + include HasJSONSchema + + @non_nullable_in_json_schema: bool + + def non_nullable_in_json_schema?: () -> bool + def json_schema: (?nullable: bool?, **untyped) -> void + def to_indexing_field_reference: () -> Indexing::FieldReference? + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/has_json_schema.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/has_json_schema.rbs new file mode 100644 index 000000000..3d6cfac4f --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/has_json_schema.rbs @@ -0,0 +1,14 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaElements + module HasJSONSchema + @json_schema_options: ::Hash[::Symbol, untyped] + + def json_schema_options: () -> ::Hash[::Symbol, untyped] + def json_schema: (**untyped) -> void + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension.rbs new file mode 100644 index 000000000..8a914177c --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension.rbs @@ -0,0 +1,18 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaElements + module ScalarTypeExtension: ::ElasticGraph::SchemaDefinition::SchemaElements::ScalarType + include HasJSONSchema + + def finalize_json_schema_configuration!: () -> void + def validate_json_schema_configuration!: () -> void + + private + + def inferred_grouping_missing_value_placeholder: () -> ::String? + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/type_with_subfields_extension.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/type_with_subfields_extension.rbs new file mode 100644 index 000000000..9eded4420 --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/schema_elements/type_with_subfields_extension.rbs @@ -0,0 +1,13 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module SchemaElements + module TypeWithSubfieldsExtension: ::ElasticGraph::SchemaDefinition::SchemaElements::TypeWithSubfields + include HasJSONSchema + + def to_indexing_field_type: () -> Indexing::FieldType::Object + end + end + end + end +end diff --git a/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/state_extension.rbs b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/state_extension.rbs new file mode 100644 index 000000000..06f930cbb --- /dev/null +++ b/elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/state_extension.rbs @@ -0,0 +1,15 @@ +module ElasticGraph + module JSONIngestion + module SchemaDefinition + module StateExtension: ::ElasticGraph::SchemaDefinition::State + attr_accessor json_schema_version: ::Integer? + attr_accessor json_schema_version_setter_location: ::Thread::Backtrace::Location? + attr_accessor enforce_json_schema_version: bool + attr_accessor allow_omitted_json_schema_fields: bool + attr_accessor allow_extra_json_schema_fields: bool + + def self.extended: (::ElasticGraph::SchemaDefinition::State & StateExtension) -> void + end + end + end +end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb index 062eb2255..332e0906d 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/factory.rb @@ -233,7 +233,7 @@ def build_relay_pagination_types(type_name, include_total_edge_count: false, der def new_interface_type(name) @@interface_type_new.call(@state, name.to_s) do |interface_type| - yield interface_type + yield interface_type if block_given? end end @@interface_type_new = prevent_non_factory_instantiation_of(SchemaElements::InterfaceType) diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb index a6400db64..ff3b58f69 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb @@ -270,17 +270,6 @@ def runtime_metadata private - # A regex that requires at least one non-whitespace character. - # Note: this does not use the `/S` character class because it's recommended to use a small subset - # of Regex syntax: - # - # > The regular expression syntax used is from JavaScript (ECMA 262, specifically). However, that - # > complete syntax is not widely supported, therefore it is recommended that you stick to the subset - # > of that syntax described below. - # - # (From https://json-schema.org/understanding-json-schema/reference/regular_expressions.html) - HAS_NON_WHITE_SPACE_REGEX = "[^ \t\n]+" - DEFAULT_SETTINGS = { "index.mapping.ignore_malformed" => false, "index.mapping.coerce" => false, diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb index 3297fb1ee..02b0a9ab4 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb @@ -145,7 +145,7 @@ def derived_graphql_types derived_scalar_types = schema_def_state.factory.new_scalar_type(name) do |t| t.mapping type: "keyword" - t.json_schema type: "string" + configure_derived_scalar_type(t) t.graphql_only graphql_only? end.derived_graphql_types @@ -156,6 +156,14 @@ def derived_graphql_types end end + # Hook for extensions to customize the scalar type derived from an enum type. + # @param scalar_type [ScalarType] the scalar type to configure + # @return [void] + # @api private + def configure_derived_scalar_type(scalar_type) + scalar_type.json_schema type: "string" + end + # @return [Indexing::FieldType::Enum] indexing representation of this enum type def to_indexing_field_type schema_def_state.factory.new_enum_indexing_field_type(values_by_name.keys) diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/index.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/index.rbs index 0dc296381..30b4b3a43 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/index.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/indexing/index.rbs @@ -8,7 +8,10 @@ module ElasticGraph attr_reader rollover_config: RolloverConfig? attr_reader has_had_multiple_sources_flag: bool attr_reader indexed_type: indexableType + attr_reader schema_def_state: State + def rollover: (::Symbol, ::String) -> void + def route_with: (::String) -> void def uses_custom_routing?: () -> bool def to_index_config: () -> ::Hash[::String, untyped] def to_index_template_config: () -> ::Hash[::String, untyped] @@ -16,6 +19,7 @@ module ElasticGraph private + def public_field_path: (::String, explanation: ::String) -> SchemaElements::FieldPath def date_and_datetime_types: () -> ::Array[::String] end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/enum_type.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/enum_type.rbs index 518792bcb..43b6472ee 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/enum_type.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/enum_type.rbs @@ -26,6 +26,10 @@ module ElasticGraph def values: (*::String) -> void def runtime_metadata: () -> SchemaArtifacts::RuntimeMetadata::Enum::Type def as_input: () -> EnumType + + private + + def configure_derived_scalar_type: (ScalarType) -> void end end end diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/interface_type_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/interface_type_spec.rb index 41cd3972e..35f53803e 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/interface_type_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/interface_type_spec.rb @@ -15,6 +15,12 @@ module SchemaDefinition include_context "GraphQL schema spec support" with_both_casing_forms do + it "can create an interface type without a customization block" do + interface_type = API.new(schema_elements, true).factory.new_interface_type("Named") + + expect(interface_type.name).to eq("Named") + end + it "acts like `object_type` but defines a GraphQL `interface` instead of a GraphQL `type`" do result = define_schema do |schema| schema.interface_type "Named" do |t| diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb index 4e101b378..1e0b20b8c 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_field_metadata_spec.rb @@ -85,7 +85,7 @@ module SchemaDefinition expect(json_schema_field_metadata).to all eq({}) end - it "generates no field metadata for user-defined union or interface types since the JSON schema" do + it "generates no field metadata for user-defined union or interface types since they have no direct JSON fields" do metadata_by_type_and_field_name = dump_metadata do |schema| schema.interface_type "Named" do |t| t.field "name", "String" diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb index 90979c46b..0ca98186c 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/json_schema_spec.rb @@ -8,6 +8,7 @@ require "elastic_graph/constants" require "elastic_graph/errors" +require "elastic_graph/json_ingestion/schema_definition/api_extension" require "elastic_graph/spec_support/schema_definition_helpers" require "support/json_schema_matcher" @@ -109,6 +110,24 @@ module SchemaDefinition .and_fails_to_match("a", true, nil) end + it "configures built-in scalar JSON schema before user schema blocks are evaluated" do + json_schema_options_in_schema_block = nil + + define_schema( + schema_element_name_form: "snake_case", + extension_modules: [JSONIngestion::SchemaDefinition::APIExtension] + ) do |s| + json_schema_options_in_schema_block = s.state.scalar_types_by_name.fetch("String").json_schema_options.dup + + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.index "widgets" + end + end.current_public_json_schema + + expect(json_schema_options_in_schema_block).to eq({type: "string"}) + end + example "for `TimeZone`" do expect(json_schema).to have_json_schema_like("TimeZone", { "type" => "string", @@ -3159,7 +3178,7 @@ def shard_routing_string_field { "allOf" => [ {"$ref" => "#/$defs/ID"}, - {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH, "pattern" => Indexing::Index::HAS_NON_WHITE_SPACE_REGEX} + {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH, "pattern" => HAS_NON_WHITE_SPACE_REGEX} ] } end diff --git a/elasticgraph-support/lib/elastic_graph/constants.rb b/elasticgraph-support/lib/elastic_graph/constants.rb index f0449d88c..30d6b2e26 100644 --- a/elasticgraph-support/lib/elastic_graph/constants.rb +++ b/elasticgraph-support/lib/elastic_graph/constants.rb @@ -95,6 +95,18 @@ module ElasticGraph # @private DEFAULT_MAX_TEXT_LENGTH = 100 * (2**20).to_int + # A regex that requires at least one non-whitespace character. + # Note: this does not use the `/S` character class because it's recommended to use a small subset + # of Regex syntax: + # + # > The regular expression syntax used is from JavaScript (ECMA 262, specifically). However, that + # > complete syntax is not widely supported, therefore it is recommended that you stick to the subset + # > of that syntax described below. + # + # (From https://json-schema.org/understanding-json-schema/reference/regular_expressions.html) + # @private + HAS_NON_WHITE_SPACE_REGEX = "[^ \t\n]+" + # The name of the JSON schema definition for the ElasticGraph event envelope. # @private EVENT_ENVELOPE_JSON_SCHEMA_NAME = "ElasticGraphEventEnvelope" diff --git a/elasticgraph-support/sig/elastic_graph/constants.rbs b/elasticgraph-support/sig/elastic_graph/constants.rbs index 5b906f155..7d8e55585 100644 --- a/elasticgraph-support/sig/elastic_graph/constants.rbs +++ b/elasticgraph-support/sig/elastic_graph/constants.rbs @@ -12,6 +12,7 @@ module ElasticGraph LONG_STRING_MAX: ::Integer DEFAULT_MAX_KEYWORD_LENGTH: ::Integer DEFAULT_MAX_TEXT_LENGTH: ::Integer + HAS_NON_WHITE_SPACE_REGEX: ::String EVENT_ENVELOPE_JSON_SCHEMA_NAME: ::String SINGLETON_CURSOR: ::String GRAPHQL_SCHEMA_FILE: ::String diff --git a/spec_support/lib/elastic_graph/spec_support/enable_simplecov.rb b/spec_support/lib/elastic_graph/spec_support/enable_simplecov.rb index 0d87095f3..a9769863a 100644 --- a/spec_support/lib/elastic_graph/spec_support/enable_simplecov.rb +++ b/spec_support/lib/elastic_graph/spec_support/enable_simplecov.rb @@ -103,6 +103,9 @@ def wait_for_other_processes # status if we're not running it's test suite. add_filter "/elasticgraph-local/" unless spec_files_to_run.any? { |f| f.include?("/elasticgraph-local/") } + # The JSON ingestion gem is being introduced by extracting implementation first and moving its tests later. + add_filter "/elasticgraph-json_ingestion/" + # This version file is loaded from our gemspecs, which can get loaded by bundler before we get here. # SimpleCov is only able to track coverage of files loaded after it starts, so we need to filter them out if # their constant is already defined. They don't contain any branching statements or anything so it's ok to