diff --git a/CODEBASE_OVERVIEW.md b/CODEBASE_OVERVIEW.md index a7f4b58ea..ef821246b 100644 --- a/CODEBASE_OVERVIEW.md +++ b/CODEBASE_OVERVIEW.md @@ -192,13 +192,14 @@ graph LR; click opensearch-ruby href "https://rubygems.org/gems/opensearch-ruby" "Open on RubyGems.org" _blank; ``` -### Extensions (6 gems) +### Extensions (7 gems) These libraries extend ElasticGraph to provide optional but commonly needed functionality. * [elasticgraph-apollo](elasticgraph-apollo/README.md): Transforms an ElasticGraph project into an Apollo subgraph. * [elasticgraph-health_check](elasticgraph-health_check/README.md): Provides a health check for high availability ElasticGraph deployments. * [elasticgraph-json_ingestion](elasticgraph-json_ingestion/README.md): JSON Schema ingestion support for ElasticGraph. +* [elasticgraph-protobuf](elasticgraph-protobuf/README.md): Generates Protocol Buffers schema artifacts from ElasticGraph schemas. * [elasticgraph-query_interceptor](elasticgraph-query_interceptor/README.md): Intercepts ElasticGraph datastore queries. * [elasticgraph-query_registry](elasticgraph-query_registry/README.md): Provides a source-controlled query registry for ElasticGraph applications. * [elasticgraph-warehouse](elasticgraph-warehouse/README.md): Extends ElasticGraph to support ingestion into a data warehouse. @@ -218,6 +219,7 @@ graph LR; elasticgraph-health_check["eg-health_check"]; elasticgraph-datastore_core["eg-datastore_core"]; elasticgraph-json_ingestion["eg-json_ingestion"]; + elasticgraph-protobuf["eg-protobuf"]; elasticgraph-query_interceptor["eg-query_interceptor"]; elasticgraph-schema_artifacts["eg-schema_artifacts"]; elasticgraph-query_registry["eg-query_registry"]; @@ -231,6 +233,7 @@ graph LR; elasticgraph-health_check --> elasticgraph-graphql; elasticgraph-health_check --> elasticgraph-support; elasticgraph-json_ingestion --> elasticgraph-support; + elasticgraph-protobuf --> elasticgraph-support; elasticgraph-query_interceptor --> elasticgraph-graphql; elasticgraph-query_interceptor --> elasticgraph-schema_artifacts; elasticgraph-query_registry --> elasticgraph-graphql; @@ -246,6 +249,7 @@ graph LR; class elasticgraph-health_check targetGemStyle; class elasticgraph-datastore_core otherEgGemStyle; class elasticgraph-json_ingestion targetGemStyle; + class elasticgraph-protobuf targetGemStyle; class elasticgraph-query_interceptor targetGemStyle; class elasticgraph-schema_artifacts otherEgGemStyle; class elasticgraph-query_registry targetGemStyle; diff --git a/Gemfile.lock b/Gemfile.lock index 1de3ddf40..7e40712e7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -140,6 +140,12 @@ PATH faraday-retry (~> 2.4) opensearch-ruby (~> 3.4) +PATH + remote: elasticgraph-protobuf + specs: + elasticgraph-protobuf (1.2.1.pre) + elasticgraph-support (= 1.2.1.pre) + PATH remote: elasticgraph-query_interceptor specs: @@ -696,6 +702,7 @@ DEPENDENCIES elasticgraph-lambda_support (= 1.2.1.pre)! elasticgraph-local (= 1.2.1.pre)! elasticgraph-opensearch (= 1.2.1.pre)! + elasticgraph-protobuf (= 1.2.1.pre)! elasticgraph-query_interceptor (= 1.2.1.pre)! elasticgraph-query_registry (= 1.2.1.pre)! elasticgraph-rack (= 1.2.1.pre)! @@ -789,6 +796,7 @@ CHECKSUMS elasticgraph-lambda_support (1.2.1.pre) elasticgraph-local (1.2.1.pre) elasticgraph-opensearch (1.2.1.pre) + elasticgraph-protobuf (1.2.1.pre) elasticgraph-query_interceptor (1.2.1.pre) elasticgraph-query_registry (1.2.1.pre) elasticgraph-rack (1.2.1.pre) diff --git a/config/docker_demo/Dockerfile b/config/docker_demo/Dockerfile index 2110740fd..eda165650 100644 --- a/config/docker_demo/Dockerfile +++ b/config/docker_demo/Dockerfile @@ -19,6 +19,7 @@ COPY elasticgraph-indexer elasticgraph-indexer/ COPY elasticgraph-json_ingestion elasticgraph-json_ingestion/ COPY elasticgraph-local elasticgraph-local/ COPY elasticgraph-opensearch elasticgraph-opensearch/ +COPY elasticgraph-protobuf elasticgraph-protobuf/ COPY elasticgraph-query_registry elasticgraph-query_registry/ COPY elasticgraph-rack elasticgraph-rack/ COPY elasticgraph-schema_artifacts elasticgraph-schema_artifacts/ diff --git a/config/site/support/doctest_helper.rb b/config/site/support/doctest_helper.rb index ee2ec66e7..3bc297e9a 100644 --- a/config/site/support/doctest_helper.rb +++ b/config/site/support/doctest_helper.rb @@ -8,6 +8,7 @@ require "elastic_graph/apollo/schema_definition/api_extension" require "elastic_graph/json_ingestion/schema_definition/api_extension" +require "elastic_graph/protobuf/schema_definition/api_extension" require "elastic_graph/schema_artifacts/runtime_metadata/schema_element_names" require "elastic_graph/schema_definition/api" require "elastic_graph/schema_definition/schema_artifact_manager" @@ -53,6 +54,7 @@ module ElasticGraph "ElasticGraph.define_schema" => [], "ElasticGraph::Apollo::SchemaDefinition" => [Apollo::SchemaDefinition::APIExtension], "ElasticGraph::JSONIngestion::SchemaDefinition" => [JSONIngestion::SchemaDefinition::APIExtension], + "ElasticGraph::Protobuf::SchemaDefinition" => [Protobuf::SchemaDefinition::APIExtension], "ElasticGraph::SchemaDefinition" => [], "ElasticGraph::Warehouse::SchemaDefinition" => [Warehouse::SchemaDefinition::APIExtension] } diff --git a/elasticgraph-protobuf/.rspec b/elasticgraph-protobuf/.rspec new file mode 120000 index 000000000..67e6e21b3 --- /dev/null +++ b/elasticgraph-protobuf/.rspec @@ -0,0 +1 @@ +../spec_support/subdir_dot_rspec \ No newline at end of file diff --git a/elasticgraph-protobuf/.yardopts b/elasticgraph-protobuf/.yardopts new file mode 120000 index 000000000..e11a2057f --- /dev/null +++ b/elasticgraph-protobuf/.yardopts @@ -0,0 +1 @@ +../config/site/yardopts \ No newline at end of file diff --git a/elasticgraph-protobuf/Gemfile b/elasticgraph-protobuf/Gemfile new file mode 120000 index 000000000..26cb2ad91 --- /dev/null +++ b/elasticgraph-protobuf/Gemfile @@ -0,0 +1 @@ +../Gemfile \ No newline at end of file diff --git a/elasticgraph-protobuf/LICENSE.txt b/elasticgraph-protobuf/LICENSE.txt new file mode 100644 index 000000000..aa18b5db8 --- /dev/null +++ b/elasticgraph-protobuf/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 - 2026 Block, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/elasticgraph-protobuf/README.md b/elasticgraph-protobuf/README.md new file mode 100644 index 000000000..6465f796e --- /dev/null +++ b/elasticgraph-protobuf/README.md @@ -0,0 +1,240 @@ +# ElasticGraph::Protobuf + +An ElasticGraph extension that generates Protocol Buffers schema artifacts from ElasticGraph schemas. +It emits `proto3` by default and can emit `proto2`, and supports arbitrary file-level headers (such +as `option` declarations). + +## Dependency Diagram + +```mermaid +graph LR; + classDef targetGemStyle fill:#FADBD8,stroke:#EC7063,color:#000,stroke-width:2px; + classDef otherEgGemStyle fill:#A9DFBF,stroke:#2ECC71,color:#000; + classDef externalGemStyle fill:#E0EFFF,stroke:#70A1D7,color:#2980B9; + elasticgraph-protobuf["elasticgraph-protobuf"]; + class elasticgraph-protobuf targetGemStyle; + elasticgraph-support["elasticgraph-support"]; + elasticgraph-protobuf --> elasticgraph-support; + class elasticgraph-support otherEgGemStyle; +``` + +## Usage + +First, add `elasticgraph-protobuf` to your `Gemfile`, alongside the other ElasticGraph gems: + +```diff +diff --git a/Gemfile b/Gemfile +index 4a5ef1e..5c16c2b 100644 +--- a/Gemfile ++++ b/Gemfile +@@ -8,6 +8,7 @@ gem "elasticgraph-query_registry", *elasticgraph_details + + # Can be elasticgraph-elasticsearch or elasticgraph-opensearch based on the datastore you want to use. + gem "elasticgraph-opensearch", *elasticgraph_details ++gem "elasticgraph-protobuf", *elasticgraph_details + + gem "httpx", "~> 1.3" + +``` + +Next, update your `Rakefile` so that `ElasticGraph::Protobuf::SchemaDefinition::APIExtension` is +included in the schema-definition extension modules: + +```diff +diff --git a/Rakefile b/Rakefile +index 2943335..26633c3 100644 +--- a/Rakefile ++++ b/Rakefile +@@ -3,5 +3,6 @@ + require "elastic_graph/json_ingestion/schema_definition/api_extension" + require "elastic_graph/local/rake_tasks" ++require "elastic_graph/protobuf/schema_definition/api_extension" + require "elastic_graph/query_registry/rake_tasks" + require "rspec/core/rake_task" + require "standard/rake" +@@ -16,6 +17,7 @@ ElasticGraph::Local::RakeTasks.new( + # Determines casing of field names. Can be either `:camelCase` or `:snake_case`. + tasks.schema_element_name_form = :camelCase + tasks.schema_definition_extension_modules << ElasticGraph::JSONIngestion::SchemaDefinition::APIExtension ++ tasks.schema_definition_extension_modules << ElasticGraph::Protobuf::SchemaDefinition::APIExtension + + # Customizes the names of fields generated by ElasticGraph. + tasks.schema_element_name_overrides = { +``` + +Then opt into proto generation from your schema definition: + +```ruby +# in config/schema/protobuf.rb + +ElasticGraph.define_schema do |schema| + schema.proto_schema_artifacts package_name: "myapp.events.v1" +end +``` + +After running `bundle exec rake schema_artifacts:dump`, ElasticGraph will generate: + +- `schema.proto` +- `proto_field_numbers.yaml` + +## Schema Definition Options + +### Protobuf Syntax (`proto2` / `proto3`) + +`proto_schema_artifacts` emits `proto3` by default. Pass `syntax: :proto2` to emit a proto2 file +instead (every field is then labeled `optional` or `repeated`). This is useful when the generated +messages need to reference proto2 types — for example, `protoc` forbids a `proto3` message from +referencing a `proto2` enum: + +```ruby +# in config/schema/protobuf.rb + +ElasticGraph.define_schema do |schema| + schema.proto_schema_artifacts package_name: "myapp.events.v1", syntax: :proto2 +end +``` + +### Custom Headers + +Pass `headers:` an array of strings to inject file-level lines (such as `option` declarations) +verbatim, as a contiguous section immediately after the `package` declaration. This lets you set +language-specific options without the gem baking in any particular convention: + +```ruby +# in config/schema/protobuf.rb + +ElasticGraph.define_schema do |schema| + schema.proto_schema_artifacts( + package_name: "myapp.events.v1", + headers: [ + %(option java_package = "com.myapp.events";), + "option java_multiple_files = true;" + ] + ) +end +``` + +produces: + +```text +syntax = "proto3"; + +package myapp.events.v1; + +option java_package = "com.myapp.events"; +option java_multiple_files = true; + +// ...messages... +``` + +### Custom Scalar Types + +Built-in ElasticGraph scalar types are automatically mapped to proto scalar types. +For custom scalar types, use `proto_field` to define the proto scalar type: + +```ruby +# in config/schema/money.rb + +ElasticGraph.define_schema do |schema| + schema.scalar_type "Money" do |t| + t.mapping type: "long" + t.json_schema type: "integer" + t.proto_field type: "int64" + end +end +``` + +### Sourcing Enum Values From Existing Protobuf Mappings + +If your project already maintains GraphQL-to-proto enum mappings (for example in tests), +you can reuse them for proto schema generation: + +```ruby +# in config/schema/proto_enum_mappings.rb + +ElasticGraph.define_schema do |schema| + schema.proto_enum_mappings( + SalesEg::ProtoEnumMappings::PROTO_ENUMS_BY_GRAPHQL_ENUM + ) if defined?(SalesEg::ProtoEnumMappings) +end +``` + +When a mapping exists for an enum, `elasticgraph-protobuf` uses the mapped proto enum(s) +as the source of enum values (respecting `exclusions`, `expected_extras`, and `name_transform`). + +### Referencing Existing Protobuf Types + +For enums that exactly match a canonical proto enum, you can import and reference +the existing proto type instead of generating a duplicate local enum: + +```ruby +# in config/schema/protobuf.rb + +ElasticGraph.define_schema do |schema| + if defined?(Squareup::Connect::V2::Resources::Card::Type) + schema.proto_enum_mappings( + "CardType" => { + Squareup::Connect::V2::Resources::Card::Type => {} + } + ) + + schema.proto_external_types( + "CardType" => { + proto: "squareup.connect.v2.resources.Card.Type", + import: "squareup/connect/v2/resources/card.proto" + } + ) + end +end +``` + +External type references currently support enums only. The matching +`proto_enum_mappings` entry must have exactly one source and no transform options; +otherwise the enum stays generated locally so value curation remains explicit. + +### Stable Field Numbers + +`schema_artifacts:dump` automatically reads and writes `proto_field_numbers.yaml` +in the schema artifacts directory. Existing numbers stay fixed even if field order +changes, and new fields get the next available numbers. + +`schema.proto` always uses the public GraphQL field names. When a field uses a +different `name_in_index`, the sidecar YAML stores that override privately: + +```yaml +messages: + Widget: + fields: + id: 1 + display_name: + field_number: 2 + name_in_index: displayName +``` + +If a field is renamed with `field.renamed_from`, `elasticgraph-protobuf` reuses the +existing field number under the new public field name. + +## Type Mappings + +The generated `schema.proto` uses these built-in scalar mappings: + +| ElasticGraph Type | Protobuf Type | +|-------------------|------------| +| `Boolean` | `bool` | +| `Cursor` | `string` | +| `Date` | `string` | +| `DateTime` | `string` | +| `Float` | `double` | +| `ID` | `string` | +| `Int` | `int32` | +| `JsonSafeLong` | `int64` | +| `LocalTime` | `string` | +| `LongString` | `int64` | +| `String` | `string` | +| `TimeZone` | `string` | +| `Untyped` | `string` | + +Additionally: +- List types become `repeated` fields. +- Nested list types generate wrapper messages so the output remains valid `proto3`. +- Enum types generate `enum` definitions whose values are prefixed with the enum type name in `UPPER_SNAKE_CASE`, including a zero-valued `*_UNSPECIFIED` entry. diff --git a/elasticgraph-protobuf/elasticgraph-protobuf.gemspec b/elasticgraph-protobuf/elasticgraph-protobuf.gemspec new file mode 100644 index 000000000..d2b7df9c9 --- /dev/null +++ b/elasticgraph-protobuf/elasticgraph-protobuf.gemspec @@ -0,0 +1,41 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require_relative "../elasticgraph-support/lib/elastic_graph/version" + +Gem::Specification.new do |spec| + spec.name = "elasticgraph-protobuf" + spec.version = ElasticGraph::VERSION + spec.authors = ["Josh Wilson", "Myron Marston", "Block Engineering"] + spec.email = ["joshuaw@squareup.com"] + spec.homepage = "https://block.github.io/elasticgraph/" + spec.license = "MIT" + spec.summary = "Generates Protocol Buffers schema artifacts from ElasticGraph schemas." + + spec.metadata = { + "bug_tracker_uri" => "https://github.com/block/elasticgraph/issues", + "changelog_uri" => "https://github.com/block/elasticgraph/releases/tag/v#{ElasticGraph::VERSION}", + "documentation_uri" => "https://block.github.io/elasticgraph/api-docs/v#{ElasticGraph::VERSION}/", + "homepage_uri" => "https://block.github.io/elasticgraph/", + "source_code_uri" => "https://github.com/block/elasticgraph/tree/v#{ElasticGraph::VERSION}/#{spec.name}", + "gem_category" => "extension" + } + + spec.files = Dir.chdir(File.expand_path(__dir__)) do + `git ls-files -z`.split("\x0").reject do |f| + (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features|sig)/|\.(?:git|travis|circleci)|appveyor)}) + end - [".rspec", "Gemfile", ".yardopts"] + end + + spec.required_ruby_version = [">= 3.4", "< 4.1"] + + spec.add_dependency "elasticgraph-support", ElasticGraph::VERSION + + spec.add_development_dependency "elasticgraph-json_ingestion", ElasticGraph::VERSION + spec.add_development_dependency "elasticgraph-schema_definition", ElasticGraph::VERSION +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf.rb new file mode 100644 index 000000000..f87686aae --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf.rb @@ -0,0 +1,18 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + # Namespace for Protocol Buffers schema artifact generation extensions. + module Protobuf + # The name of the generated Protocol Buffers schema file. + PROTO_SCHEMA_FILE = "schema.proto" + + # The name of the generated proto field-number mapping file. + PROTO_FIELD_NUMBERS_FILE = "proto_field_numbers.yaml" + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/api_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/api_extension.rb new file mode 100644 index 000000000..b61bdf635 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/api_extension.rb @@ -0,0 +1,152 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/errors" +require "elastic_graph/protobuf" +require "elastic_graph/protobuf/schema_definition/factory_extension" +require "elastic_graph/protobuf/schema_definition/schema" +require "elastic_graph/protobuf/schema_definition/state_extension" + +module ElasticGraph + module Protobuf + # Namespace for all protobuf schema definition support. + # + # {SchemaDefinition::APIExtension} is the primary entry point and should be used as a schema definition extension module. + module SchemaDefinition + # Module designed to be extended onto an {ElasticGraph::SchemaDefinition::API} instance + # to enable protobuf schema artifact generation. + module APIExtension + # Maps built-in ElasticGraph scalar types to proto field types. + PROTO_TYPES_BY_BUILT_IN_SCALAR_TYPE = { + "Boolean" => "bool", + "Cursor" => "string", + "Date" => "string", + "DateTime" => "string", + "Float" => "double", + "ID" => "string", + "Int" => "int32", + "JsonSafeLong" => "int64", + "LocalTime" => "string", + "LongString" => "int64", + "String" => "string", + "TimeZone" => "string", + "Untyped" => "string" + }.freeze + + # Wires up the protobuf extensions when this module is extended onto an API instance. + # + # @param api [ElasticGraph::SchemaDefinition::API] the API instance to extend + # @return [void] + # @api private + def self.extended(api) + api.state.extend(StateExtension) + api.factory.extend(FactoryExtension) + + api.on_built_in_types do |type| + if type.is_a?(SchemaElements::ScalarTypeExtension) + type.proto_field type: PROTO_TYPES_BY_BUILT_IN_SCALAR_TYPE.fetch(type.name) + end + end + end + + # Configures protobuf artifact generation behavior. + # + # @param package_name [String] proto package name to emit + # @param syntax [Symbol] `:proto3` (default) or `:proto2` + # @param headers [Array] file-level header lines (e.g. `option` declarations) rendered + # verbatim after the `package` declaration + # @return [void] + # + # @example Set the proto package name + # ElasticGraph.define_schema do |schema| + # schema.proto_schema_artifacts package_name: "myapp.events.v1" + # end + # + # @example Emit proto2 with custom file-level options + # ElasticGraph.define_schema do |schema| + # schema.proto_schema_artifacts( + # package_name: "myapp.events.v1", + # syntax: :proto2, + # headers: [ + # %(option java_package = "com.myapp.events";), + # "option java_multiple_files = true;" + # ] + # ) + # end + def proto_schema_artifacts(package_name: "elasticgraph", syntax: :proto3, headers: []) + if !package_name.is_a?(String) || package_name.empty? + raise Errors::SchemaError, "`package_name` must be a non-empty String" + end + unless Schema::SUPPORTED_SYNTAXES.include?(syntax.to_s) + raise Errors::SchemaError, "`syntax` must be one of #{Schema::SUPPORTED_SYNTAXES.inspect}, got: #{syntax.inspect}" + end + if !headers.is_a?(Array) || headers.any? { |header| !header.is_a?(String) } + raise Errors::SchemaError, "`headers` must be an Array of Strings" + end + + protobuf_state.proto_schema_package_name = package_name + protobuf_state.proto_schema_syntax = syntax + protobuf_state.proto_schema_headers = headers + nil + end + + # Registers mappings from GraphQL enum names to protobuf enum classes and transform options. + # This is intended to support reusing enum mappings already maintained by applications + # (for example in schema/proto consistency tests). + # + # @param proto_enums_by_graphql_enum [Hash] + # @return [void] + def proto_enum_mappings(proto_enums_by_graphql_enum) + protobuf_state.proto_enums_by_graphql_enum = proto_enums_by_graphql_enum + nil + end + + # Registers GraphQL types that should be referenced from existing proto files instead of + # generated locally in `schema.proto`. + # + # @param proto_external_types [Hash] map of GraphQL type name to `proto` and `import` values + # @return [void] + # + # @example Reference an external enum type + # ElasticGraph.define_schema do |schema| + # schema.proto_external_types( + # "CardType" => { + # proto: "squareup.connect.v2.resources.Card.Type", + # import: "squareup/connect/v2/resources/card.proto" + # } + # ) + # end + def proto_external_types(proto_external_types) + protobuf_state.proto_external_types = proto_external_types + nil + end + + # Configures proto field-number mappings directly from a hash. + # Useful for tests and advanced use cases where mappings are sourced outside artifacts. + # When artifacts are dumped, mappings from the existing `proto_field_numbers.yaml` artifact + # are loaded automatically; this method does not need to be called in that case. + # + # @param proto_field_number_mappings [Hash] + # @return [void] + def configure_proto_field_number_mappings(proto_field_number_mappings) + protobuf_state.proto_field_number_mappings = proto_field_number_mappings + nil + end + + private + + # Returns the API's `state` narrowed to include this gem's `StateExtension`. Centralizes + # the Steep cast that's needed because Steep can't see the `extend(StateExtension)` applied + # at runtime in `extended`. + def protobuf_state + state # : ElasticGraph::SchemaDefinition::State & StateExtension + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/factory_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/factory_extension.rb new file mode 100644 index 000000000..f869c5d6e --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/factory_extension.rb @@ -0,0 +1,100 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/schema_elements/enum_type_extension" +require "elastic_graph/protobuf/schema_definition/schema_elements/object_interface_and_union_extension" +require "elastic_graph/protobuf/schema_definition/results_extension" +require "elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension" +require "elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Extension module applied to Factory to add proto support. + module FactoryExtension + # Creates a new enum type with proto extensions. + # + # @param name [String] enum type name + # @yield [ElasticGraph::SchemaDefinition::SchemaElements::EnumType] + # @return [ElasticGraph::SchemaDefinition::SchemaElements::EnumType] + def new_enum_type(name) + super(name) do |type| + type.extend SchemaElements::EnumTypeExtension + yield type if block_given? + end + end + + # Creates a new interface type with proto extensions. + # + # @param name [String] interface type name + # @yield [ElasticGraph::SchemaDefinition::SchemaElements::InterfaceType] + # @return [ElasticGraph::SchemaDefinition::SchemaElements::InterfaceType] + def new_interface_type(name) + super(name) do |type| + type.extend SchemaElements::ObjectInterfaceAndUnionExtension + yield type if block_given? + end + end + + # Creates a new object type with proto extensions. + # + # @param name [String] object type name + # @yield [ElasticGraph::SchemaDefinition::SchemaElements::ObjectType] + # @return [ElasticGraph::SchemaDefinition::SchemaElements::ObjectType] + def new_object_type(name) + super(name) do |type| + type.extend SchemaElements::ObjectInterfaceAndUnionExtension + yield type if block_given? + end + end + + # Creates a new scalar type with proto extensions. + # + # @param name [String] scalar type name + # @yield [ElasticGraph::SchemaDefinition::SchemaElements::ScalarType] + # @return [ElasticGraph::SchemaDefinition::SchemaElements::ScalarType] + def new_scalar_type(name) + super(name) do |type| + type.extend SchemaElements::ScalarTypeExtension + yield type if block_given? + end + end + + # Creates a new union type with proto extensions. + # + # @param name [String] union type name + # @yield [ElasticGraph::SchemaDefinition::SchemaElements::UnionType] + # @return [ElasticGraph::SchemaDefinition::SchemaElements::UnionType] + def new_union_type(name) + super(name) do |type| + type.extend SchemaElements::ObjectInterfaceAndUnionExtension + yield type if block_given? + end + end + + # Creates a new results object and extends it with proto generation APIs. + # + # @return [ElasticGraph::SchemaDefinition::Results] + def new_results + super.tap do |results| + results.extend ResultsExtension + end + end + + # Creates a new schema artifact manager and extends it with proto artifact support. + # + # @return [ElasticGraph::SchemaDefinition::SchemaArtifactManager] + def new_schema_artifact_manager(...) + super.tap do |manager| + manager.extend SchemaArtifactManagerExtension + end + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/identifier.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/identifier.rb new file mode 100644 index 000000000..c50f58134 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/identifier.rb @@ -0,0 +1,83 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Helpers for rendering Protocol Buffers identifiers while avoiding keyword conflicts. + class Identifier + # Builds a protobuf package identifier and escapes reserved keywords in each segment. + # + # @param name [#to_s] + # @return [String] + def self.package_name(name) + name.to_s.split(".").map { |part| escape_keyword(part) }.join(".") + end + + # Builds a protobuf message identifier. + # + # @param name [#to_s] + # @return [String] + def self.message_name(name) + escape_keyword(name.to_s) + end + + # Builds a protobuf enum identifier. + # + # @param name [#to_s] + # @return [String] + def self.enum_name(name) + escape_keyword(name.to_s) + end + + # Builds a protobuf field identifier. + # + # @param name [#to_s] + # @return [String] + def self.field_name(name) + escape_keyword(name.to_s) + end + + # Builds a protobuf enum value identifier. + # + # @param name [#to_s] + # @return [String] + def self.enum_value_name(name) + escape_keyword(name.to_s) + end + + # Builds a reference to an externally-defined protobuf type. Unlike generated local + # identifiers, this preserves dotted fully-qualified names verbatim. + # + # @param name [#to_s] + # @return [String] + def self.external_type_name(name) + name.to_s + end + + # Escapes protobuf reserved keywords by suffixing them with an underscore. + # + # @param identifier [String] + # @return [String] + def self.escape_keyword(identifier) + return identifier unless PROTO_KEYWORDS.include?(identifier) + "#{identifier}_" + end + + # Reserved words in protobuf syntax that cannot be used as identifiers verbatim. + # + # @return [Set] + PROTO_KEYWORDS = ::Set[ + "bool", "bytes", "double", "enum", "false", "fixed32", "fixed64", "float", "import", "int32", "int64", "map", + "message", "oneof", "option", "package", "public", "repeated", "reserved", "rpc", "service", "sfixed32", "sfixed64", + "sint32", "sint64", "stream", "string", "syntax", "to", "true", "uint32", "uint64", "weak" + ].freeze + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/results_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/results_extension.rb new file mode 100644 index 000000000..57aecc12a --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/results_extension.rb @@ -0,0 +1,65 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/schema" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Extension module for {ElasticGraph::SchemaDefinition::Results} that adds proto schema generation support. + module ResultsExtension + # Returns the generated proto schema. + # + # @return [String] complete `proto3` schema file contents + def proto_schema + @proto_schema ||= protobuf_schema_generator.to_proto + end + + # Returns proto field-number mappings suitable for artifact storage. + # + # @return [Hash] + def proto_field_number_mappings + # Ensure generation has occurred before reading mappings from the generator. + proto_schema + protobuf_schema_generator.field_number_mappings_for_artifact + end + + private + + # Returns the wrapped state narrowed to include this gem's `StateExtension`. Centralizes + # the Steep cast that's needed because Steep can't see the `extend(StateExtension)` applied + # at runtime in {APIExtension.extended}. + def protobuf_state + state # : ElasticGraph::SchemaDefinition::State & StateExtension + end + + def protobuf_schema_generator + @protobuf_schema_generator ||= begin + state = protobuf_state + + # Force `all_types` to materialize before generating. That applies the `on_built_in_types` + # callbacks (including the built-in scalar `proto_field` configuration registered by + # `APIExtension.extended`) and registers lazily-built types (such as derived indexed types) + # in the state the generator reads from. + all_types + + Schema.new( + state: state, + package_name: state.proto_schema_package_name, + proto_enums_by_graphql_enum: state.proto_enums_by_graphql_enum, + proto_external_types: state.proto_external_types, + proto_field_number_mappings: state.proto_field_number_mappings, + syntax: state.proto_schema_syntax, + headers: state.proto_schema_headers + ) + end + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema.rb new file mode 100644 index 000000000..79a106da8 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema.rb @@ -0,0 +1,796 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/errors" +require "elastic_graph/protobuf/schema_definition/schema_elements/enum_type_extension" +require "elastic_graph/protobuf/schema_definition/identifier" +require "elastic_graph/protobuf/schema_definition/schema_elements/object_interface_and_union_extension" +require "elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Builds a `proto2` or `proto3` schema string from an ElasticGraph schema definition. + class Schema + # Internal representation of a protobuf field definition. + # + # @!attribute [r] name + # @return [String] + # @!attribute [r] type + # @return [String] + # @!attribute [r] field_number + # @return [Integer] + # @!attribute [r] repeated + # @return [Boolean] + # @!attribute [r] comment + # @return [String, nil] + FieldDefinition = ::Data.define(:name, :type, :field_number, :repeated, :comment) + # Internal representation of a protobuf message definition. + # + # @!attribute [r] name + # @return [String] + # @!attribute [r] fields + # @return [Array] + MessageDefinition = ::Data.define(:name, :fields) + # Internal representation of a protobuf enum value definition. + # + # @!attribute [r] name + # @return [String] + # @!attribute [r] number + # @return [Integer] + EnumValueDefinition = ::Data.define(:name, :number) + # Internal representation of a protobuf enum definition. + # + # @!attribute [r] name + # @return [String] + # @!attribute [r] zero_value_name + # @return [String] + # @!attribute [r] values + # @return [Array] + EnumDefinition = ::Data.define(:name, :zero_value_name, :values) + # Internal representation of a stored field-number mapping. + # + # @!attribute [r] field_number + # @return [Integer] + # @!attribute [r] name_in_index + # @return [String] + FieldNumberMapping = ::Data.define(:field_number, :name_in_index) + # Internal representation of an externally-defined protobuf type. + # + # @!attribute [r] fqn + # @return [String] + # @!attribute [r] import + # @return [String] + ExternalTypeDefinition = ::Data.define(:fqn, :import) + + # Protobuf syntaxes this generator can emit. + SUPPORTED_SYNTAXES = %w[proto2 proto3].freeze + + # @param state [ElasticGraph::SchemaDefinition::State] + # @param package_name [String] + # @param proto_enums_by_graphql_enum [Hash] + # @param proto_external_types [Hash] + # @param proto_field_number_mappings [Hash] + # @param syntax [Symbol, String] `:proto3` (default) or `:proto2`; validated by {APIExtension#proto_schema_artifacts} + # @param headers [Array] file-level header lines (e.g. `option` declarations) rendered verbatim + def initialize( + state:, + package_name:, + proto_enums_by_graphql_enum:, + proto_external_types: {}, + proto_field_number_mappings: {}, + syntax: :proto3, + headers: [] + ) + @syntax = syntax.to_s + @headers = headers + @state = state + @package_name = Identifier.package_name(package_name) + @proto_enums_by_graphql_enum = normalize_proto_enum_mappings(proto_enums_by_graphql_enum) + @proto_external_types_by_type_name = normalize_proto_external_types(proto_external_types) + @proto_field_number_mappings_by_message = normalize_proto_field_number_mappings(proto_field_number_mappings) + @imports = ::Set.new + @registered_external_type_names = ::Set.new + @message_definitions_by_name = {} + @enum_definitions_by_name = {} + @generated_message_definitions_by_name = {} + @wrapper_root_name_by_context = {} + @type_name_by_message_name = {} + @type_name_by_enum_name = {} + end + + # Renders the schema as a valid `proto2` or `proto3` file. + # + # @return [String] + def to_proto + root_types = indexed_types + return "" if root_types.empty? + + root_types.each { |type| register_type(type) } + + sections = [ + %(syntax = "#{@syntax}";), + "package #{@package_name};", + *render_headers, + *render_imports, + render_definitions + ].reject(&:empty?) + + sections.join("\n\n") + "\n" + end + + # Exposes normalized field-number mappings for writing to artifact YAML. + # + # @return [Hash>>] + def field_number_mappings_for_artifact + { + "messages" => @proto_field_number_mappings_by_message + .sort_by { |message_name, _| message_name } + .to_h do |message_name, field_numbers| + [message_name, { + "fields" => field_numbers.sort_by { |field_name, mapping| [mapping.field_number, field_name] }.to_h do |field_name, mapping| + artifact_mapping = + if mapping.name_in_index == field_name + mapping.field_number + else + { + "field_number" => mapping.field_number, + "name_in_index" => mapping.name_in_index + } + end + + [field_name, artifact_mapping] + end + }] + end + } + end + + private + + def indexed_types + @state.indexed_types_by_index_name.values.sort_by(&:name) + end + + # Registers the type's proto definition (if it needs one) and returns its proto field type name. + def register_type(type) + if type.respond_to?(:name) && (external_type = @proto_external_types_by_type_name[type.name.to_s]) + register_external_type(type, external_type) + return external_type.fqn + end + + case type + when SchemaElements::EnumTypeExtension + register_enum(type) + when SchemaElements::ObjectInterfaceAndUnionExtension + register_message(type) + when SchemaElements::ScalarTypeExtension + # Scalars don't get their own proto definition. Resolving their proto field type below + # surfaces a clear error during registration when a custom scalar is not configured. + else + raise Errors::SchemaError, "Type `#{type.name}` cannot be converted to proto." + end + + type.to_proto_field_type + end + + def register_external_type(type, external_type) + type_name = type.name.to_s + + case type + when SchemaElements::EnumTypeExtension + unless @registered_external_type_names.include?(type_name) + validate_external_enum_type(type) + @registered_external_type_names << type_name + end + + @imports << external_type.import + else + raise Errors::SchemaError, "External proto type `#{type.name}` cannot be referenced yet. " \ + "Only enum types are supported by `proto_external_types` in this release." + end + end + + def validate_external_enum_type(enum_type) + enum_type_name = enum_type.name.to_s + mapping_entries = @proto_enums_by_graphql_enum[enum_type_name] + if mapping_entries.nil? || mapping_entries.empty? + raise Errors::SchemaError, "External proto enum `#{enum_type_name}` must also configure " \ + "`proto_enum_mappings` with exactly one untransformed source so its values can be verified." + end + + unless mapping_entries.size == 1 + raise Errors::SchemaError, "External proto enum `#{enum_type_name}` must use exactly one " \ + "`proto_enum_mappings` source; multi-source enum mappings cannot be safely referenced externally." + end + + proto_type, options = mapping_entries.first + options_are_empty = options.nil? || (options.is_a?(Hash) && options.empty?) + unless options_are_empty + raise Errors::SchemaError, "External proto enum `#{enum_type_name}` must use an empty " \ + "`proto_enum_mappings` options hash; transformed, excluded, or extra values must stay generated locally." + end + + proto_value_names = enum_value_names_from_proto_mapping( + enum_type_name: enum_type_name, + proto_type: proto_type, + options: {} + ).uniq.sort + eg_value_names = enum_type.values_by_name.keys.map(&:to_s).uniq.sort + return if proto_value_names == eg_value_names + + raise Errors::SchemaError, "External proto enum `#{enum_type_name}` values do not match the ElasticGraph enum values. " \ + "External values: #{proto_value_names.join(", ")}. ElasticGraph values: #{eg_value_names.join(", ")}." + end + + def register_message(type) + message_name = Identifier.message_name(type.name) + check_message_name_collision(message_name, type.name) + return if @message_definitions_by_name.key?(message_name) + + # Register a placeholder first so recursive type references do not recurse forever. + @message_definitions_by_name[message_name] = MessageDefinition.new(name: message_name, fields: []) + + fields = type + .indexing_fields_by_name_in_index + .values + .filter_map(&:to_indexing_field) + .map do |field| + field_name = Identifier.field_name(field.name) + repeated, field_type = proto_field_type_for( + field.type, + context_message_name: message_name, + context_field_name: field.name + ) + field_number = field_number_for( + message_name: message_name, + type_name: type.name, + public_field_name: field.name, + name_in_index: field.name_in_index + ) + + comment = + if field_name == field.name + nil + else + "source name: #{field.name}" + end + + FieldDefinition.new( + name: field_name, + type: field_type, + field_number: field_number, + repeated: repeated, + comment: comment + ) + end + + duplicate_names = fields.group_by(&:name).select { |_, defs| defs.size > 1 } + if duplicate_names.any? + duplicates = duplicate_names.keys.sort.join(", ") + raise Errors::SchemaError, "Type `#{type.name}` maps to duplicate proto field names: #{duplicates}." + end + + @message_definitions_by_name[message_name] = MessageDefinition.new(name: message_name, fields: fields) + end + + def register_enum(enum_type) + enum_name = Identifier.enum_name(enum_type.name) + check_enum_name_collision(enum_name, enum_type.name) + return if @enum_definitions_by_name.key?(enum_name) + + values = enum_value_names_for(enum_type).each_with_index.map do |enum_value_name, i| + EnumValueDefinition.new( + name: proto_enum_value_name(enum_type.name, enum_value_name), + number: i + 1 + ) + end + + duplicate_names = values.group_by(&:name).select { |_, defs| defs.size > 1 } + if duplicate_names.any? + duplicates = duplicate_names.keys.sort.join(", ") + raise Errors::SchemaError, "Enum `#{enum_type.name}` maps to duplicate proto enum value names: #{duplicates}." + end + + zero_value_name = proto_zero_enum_value_name(enum_type.name) + while values.any? { |value| value.name == zero_value_name } + zero_value_name = "#{zero_value_name}_" + end + + @enum_definitions_by_name[enum_name] = EnumDefinition.new( + name: enum_name, + zero_value_name: zero_value_name, + values: values + ) + end + + def enum_value_names_for(enum_type) + mapping_entries = @proto_enums_by_graphql_enum[enum_type.name] + return enum_type.values_by_name.keys if mapping_entries.nil? || mapping_entries.empty? + + values_by_source = mapping_entries.map do |proto_type, options| + enum_value_names_from_proto_mapping(enum_type_name: enum_type.name, proto_type: proto_type, options: options || {}) + end + + canonical_values = values_by_source.first + canonical_set = canonical_values.uniq.sort + + values_by_source.drop(1).each do |source_values| + next if source_values.uniq.sort == canonical_set + + raise Errors::SchemaError, "Protobuf enum mappings for `#{enum_type.name}` produce inconsistent value sets. " \ + "Ensure each mapped proto enum (with exclusions/expected_extras/name_transform) resolves to the same values." + end + + canonical_values + end + + def enum_value_names_from_proto_mapping(enum_type_name:, proto_type:, options:) + unless proto_type.singleton_class.public_method_defined?(:enums) + raise Errors::SchemaError, "Protobuf enum mapping for `#{enum_type_name}` must map to a proto enum class with `.enums`, " \ + "but got: #{proto_type.inspect}." + end + + name_transform = fetch_mapping_option(options, :name_transform, :itself.to_proc) + exclusions = fetch_mapping_option(options, :exclusions, []).map(&:to_s) + expected_extras = fetch_mapping_option(options, :expected_extras, []).map(&:to_s) + + mapped_values = proto_type.enums.map(&:name).map(&:to_s).map do |name| + transformed = name_transform.call(name) + transformed.to_s + end + + (mapped_values - exclusions + expected_extras).uniq + rescue Errors::SchemaError + raise + rescue => e + raise Errors::SchemaError, "Failed loading proto enum mapping for `#{enum_type_name}` from `#{proto_type}`: #{e.message}" + end + + def field_number_for(message_name:, type_name:, public_field_name:, name_in_index:) + mappings_for_message = @proto_field_number_mappings_by_message[message_name] ||= {} + + mapping = + if mappings_for_message.key?(public_field_name) + mappings_for_message.fetch(public_field_name) + else + migrate_renamed_field_mapping( + mappings_for_message, + type_name: type_name, + public_field_name: public_field_name + ) || begin + next_field_number = next_available_field_number_for(mappings_for_message) + FieldNumberMapping.new(field_number: next_field_number, name_in_index: name_in_index) + end + end + + if mapping.name_in_index != name_in_index + mapping = FieldNumberMapping.new(field_number: mapping.field_number, name_in_index: name_in_index) + end + + mappings_for_message[public_field_name] = mapping + field_number = mapping.field_number + + duplicate_field_name = mappings_for_message.find do |mapped_field_name, mapped_field_number| + mapped_field_name != public_field_name && mapped_field_number.field_number == field_number + end&.first + + if duplicate_field_name + raise Errors::SchemaError, "Protobuf field-number mapping collision in message `#{message_name}`: " \ + "`#{duplicate_field_name}` and `#{public_field_name}` are both mapped to field number #{field_number}." + end + + field_number + end + + def next_available_field_number_for(mappings_for_message) + used_numbers = ::Set.new(mappings_for_message.values.map(&:field_number)) + candidate = 1 + candidate += 1 while used_numbers.include?(candidate) + candidate + end + + def migrate_renamed_field_mapping(mappings_for_message, type_name:, public_field_name:) + renames_for_type = renamed_public_field_names_by_type_name.fetch(type_name) { return nil } + old_field_names = renames_for_type.fetch(public_field_name) { return nil } + + old_field_names.each do |old_field_name| + return mappings_for_message.delete(old_field_name) if mappings_for_message.key?(old_field_name) + end + + nil + end + + def proto_field_type_for(type_ref, context_message_name:, context_field_name:) + list_depth, base_type_ref = list_depth_and_base_type(type_ref) + + resolved = base_type_ref.resolved + if resolved.nil? + raise Errors::SchemaError, "Type `#{base_type_ref.unwrapped_name}` cannot be resolved for proto generation." + end + + base_type_name = register_type(resolved) + + if list_depth <= 1 + [list_depth == 1, base_type_name] + else + wrapper_type = register_nested_list_wrappers( + context_message_name: context_message_name, + context_field_name: context_field_name, + list_depth: list_depth, + base_type_name: base_type_name + ) + + [true, wrapper_type] + end + end + + def list_depth_and_base_type(type_ref) + list_depth = 0 + current = type_ref.unwrap_non_null + + while current.list? + list_depth += 1 + current = current.unwrap_list.unwrap_non_null + end + + [list_depth, current] + end + + def register_nested_list_wrappers(context_message_name:, context_field_name:, list_depth:, base_type_name:) + context_key = [context_message_name, context_field_name, list_depth, base_type_name] + existing_root = @wrapper_root_name_by_context[context_key] + return existing_root if existing_root + + next_type_name = base_type_name + + (list_depth - 1).downto(1) do |level| + base_wrapper_name = "#{context_message_name}#{to_title_case(context_field_name)}ListLevel#{level}" + wrapper_name = unique_generated_message_name(base_wrapper_name) + + field = FieldDefinition.new( + name: "values", + type: next_type_name, + field_number: 1, + repeated: true, + comment: nil + ) + + @generated_message_definitions_by_name[wrapper_name] = MessageDefinition.new( + name: wrapper_name, + fields: [field] + ) + + next_type_name = wrapper_name + end + + # The last wrapper created (level 1) is the root wrapper that the field references directly. + @wrapper_root_name_by_context[context_key] = next_type_name + end + + def unique_generated_message_name(base_name) + index = 0 + + loop do + candidate_name = + if index.zero? + Identifier.message_name(base_name) + else + Identifier.message_name("#{base_name}#{index + 1}") + end + + return candidate_name unless name_taken?(candidate_name) + index += 1 + end + end + + def name_taken?(name) + @message_definitions_by_name.key?(name) || + @generated_message_definitions_by_name.key?(name) || + @enum_definitions_by_name.key?(name) + end + + def render_imports + @imports.sort.map { |import| "import \"#{import}\";" } + end + + # Renders the custom header lines as a single contiguous section (so they are not + # blank-line separated). Returns `[]` when no headers were configured. + def render_headers + return [] if @headers.empty? + [@headers.join("\n")] + end + + def render_definitions + rendered_enums = all_enum_definitions.sort_by(&:name).map { |definition| render_enum(definition) } + rendered_messages = all_message_definitions.sort_by(&:name).map { |definition| render_message(definition) } + (rendered_enums + rendered_messages).join("\n\n") + end + + def proto_enum_value_name(enum_type_name, enum_value_name) + Identifier.enum_value_name("#{enum_value_prefix(enum_type_name)}_#{to_upper_snake_case(enum_value_name)}") + end + + def proto_zero_enum_value_name(enum_type_name) + "#{enum_value_prefix(enum_type_name)}_UNSPECIFIED" + end + + def enum_value_prefix(enum_type_name) + to_upper_snake_case(enum_type_name) + end + + def render_enum(enum_definition) + lines = [ + "enum #{enum_definition.name} {", + " #{enum_definition.zero_value_name} = 0;" + ] + + enum_definition.values.each do |value| + lines << " #{value.name} = #{value.number};" + end + + lines << "}" + lines.join("\n") + end + + def render_message(message_definition) + lines = ["message #{message_definition.name} {"] + + if message_definition.fields.empty? + lines << " // No indexed fields were defined for this type." + else + message_definition.fields.each do |field| + # proto2 requires an explicit label on every field; proto3 only uses `repeated`. + label = + if @syntax == "proto2" + field.repeated ? "repeated " : "optional " + else + field.repeated ? "repeated " : "" + end + line = " #{label}#{field.type} #{field.name} = #{field.field_number};" + line += " // #{field.comment}" if field.comment + lines << line + end + end + + lines << "}" + lines.join("\n") + end + + def all_enum_definitions + @enum_definitions_by_name.values + end + + def all_message_definitions + @message_definitions_by_name.values + @generated_message_definitions_by_name.values + end + + def to_title_case(name) + name + .gsub(/([[:lower:]\d])([[:upper:]])/, "\\1_\\2") + .split("_") + .reject(&:empty?) + .map(&:capitalize) + .join + end + + def to_upper_snake_case(name) + name + .to_s + .gsub(/([[:upper:]]+)([[:upper:]][[:lower:]])/, "\\1_\\2") + .gsub(/([[:lower:]\d])([[:upper:]])/, "\\1_\\2") + .upcase + end + + def check_message_name_collision(message_name, type_name) + existing_type_name = @type_name_by_message_name.fetch(message_name, type_name) + @type_name_by_message_name[message_name] = existing_type_name + return if existing_type_name == type_name + + raise Errors::SchemaError, "Type names `#{existing_type_name}` and `#{type_name}` both map to the same proto message name `#{message_name}`." + end + + def check_enum_name_collision(enum_name, type_name) + existing_type_name = @type_name_by_enum_name.fetch(enum_name, type_name) + @type_name_by_enum_name[enum_name] = existing_type_name + return if existing_type_name == type_name + + raise Errors::SchemaError, "Type names `#{existing_type_name}` and `#{type_name}` both map to the same proto enum name `#{enum_name}`." + end + + def normalize_proto_enum_mappings(raw_mappings) + normalized = {} # : ::Hash[::String, untyped] + return normalized if raw_mappings.nil? + + raw_mappings.each do |graphql_enum_name, mappings| + normalized[graphql_enum_name.to_s] = mappings + end + + normalized + end + + def normalize_proto_external_types(raw_mappings) + normalized = {} # : ::Hash[::String, ExternalTypeDefinition] + return normalized if raw_mappings.nil? + + unless raw_mappings.is_a?(Hash) + raise Errors::SchemaError, "External proto type mappings must be a Hash, got: #{raw_mappings.class}." + end + + raw_mappings.each do |type_name, mapping| + unless mapping.is_a?(Hash) + raise Errors::SchemaError, "External proto type mapping for `#{type_name}` must be a Hash." + end + + proto_type_name = fetch_external_type_mapping_value(type_name, mapping, :proto) + import = fetch_external_type_mapping_value(type_name, mapping, :import) + + normalized[type_name.to_s] = ExternalTypeDefinition.new( + fqn: Identifier.external_type_name(proto_type_name), + import: import + ) + end + + normalized + end + + def fetch_external_type_mapping_value(type_name, mapping, key) + value = + if mapping.key?(key) + mapping.fetch(key) + elsif mapping.key?(key.to_s) + mapping.fetch(key.to_s) + end + + if value.is_a?(String) && !value.empty? + value + else + raise Errors::SchemaError, "External proto type mapping for `#{type_name}` must include a non-empty `#{key}` String." + end + end + + def normalize_proto_field_number_mappings(raw_mappings) + return {} if raw_mappings.nil? + unless raw_mappings.is_a?(Hash) + raise Errors::SchemaError, "Protobuf field-number mappings must be a Hash, got: #{raw_mappings.class}." + end + + messages_hash = + if raw_mappings.key?("messages") + raw_mappings.fetch("messages") + elsif raw_mappings.key?(:messages) + raw_mappings.fetch(:messages) + else + raw_mappings + end + + unless messages_hash.is_a?(Hash) + raise Errors::SchemaError, "Protobuf field-number mappings must have a `messages` Hash." + end + + normalized = {} # : ::Hash[::String, fieldNumberMappingsByFieldName] + + messages_hash.each do |message_name, field_numbers| + unless field_numbers.is_a?(Hash) + raise Errors::SchemaError, "Field-number mapping for message `#{message_name}` must be a Hash." + end + + normalized_fields = + if field_numbers.key?("fields") + field_numbers.fetch("fields") + elsif field_numbers.key?(:fields) + field_numbers.fetch(:fields) + else + field_numbers + end + + unless normalized_fields.is_a?(Hash) + raise Errors::SchemaError, "Field-number mapping for message `#{message_name}` must contain a `fields` Hash." + end + + normalized_message_name = message_name.to_s + normalized_field_numbers = {} # : fieldNumberMappingsByFieldName + + normalized_fields.each do |field_name, field_number_or_mapping| + normalized_field_name = field_name.to_s + normalized_field_number, normalized_name_in_index = normalize_field_number_mapping_entry( + normalized_message_name, + normalized_field_name, + field_number_or_mapping + ) + + if normalized_field_number <= 0 + raise Errors::SchemaError, "Field-number mapping for `#{normalized_message_name}.#{normalized_field_name}` " \ + "must be a positive integer, got: #{field_number_or_mapping.inspect}." + end + + normalized_field_numbers[normalized_field_name] = FieldNumberMapping.new( + field_number: normalized_field_number, + name_in_index: normalized_name_in_index + ) + rescue ArgumentError, TypeError + raise Errors::SchemaError, "Field-number mapping for `#{normalized_message_name}.#{normalized_field_name}` " \ + "must be an integer, got: #{field_number_or_mapping.inspect}." + end + + normalized[normalized_message_name] = normalized_field_numbers + end + + normalized + end + + def normalize_field_number_mapping_entry(message_name, field_name, field_number_or_mapping) + if field_number_or_mapping.is_a?(Hash) + raw_field_number = + if field_number_or_mapping.key?("field_number") + field_number_or_mapping.fetch("field_number") + elsif field_number_or_mapping.key?(:field_number) + field_number_or_mapping.fetch(:field_number) + else + raise Errors::SchemaError, "Field-number mapping for `#{message_name}.#{field_name}` must include `field_number`." + end + + raw_name_in_index = + if field_number_or_mapping.key?("name_in_index") + field_number_or_mapping.fetch("name_in_index") + elsif field_number_or_mapping.key?(:name_in_index) + field_number_or_mapping.fetch(:name_in_index) + else + field_name + end + + unless raw_name_in_index.is_a?(String) || raw_name_in_index.is_a?(Symbol) + raise Errors::SchemaError, "Field-number mapping for `#{message_name}.#{field_name}` " \ + "must use a String or Symbol `name_in_index`, got: #{raw_name_in_index.inspect}." + end + + [Integer(raw_field_number), raw_name_in_index.to_s] + else + [Integer(field_number_or_mapping), field_name] + end + end + + def renamed_public_field_names_by_type_name + @renamed_public_field_names_by_type_name ||= begin + mappings = {} # : ::Hash[::String, ::Hash[::String, ::Array[::String]]] + + # `renamed_from` field metadata can only be declared via an extension that adds + # `renamed_fields_by_type_name_and_old_field_name` to the schema definition state (such as + # `elasticgraph-json_ingestion`, which is optional). Without such an extension, no rename + # metadata can exist, so there are no field-number mappings to migrate. + if @state.respond_to?(:renamed_fields_by_type_name_and_old_field_name) + renaming_state = @state # : untyped + + renaming_state.renamed_fields_by_type_name_and_old_field_name.each do |type_name, old_to_new| + current_to_old = ::Hash.new { |h, k| h[k] = [] } # : ::Hash[::String, ::Array[::String]] + + old_to_new.each do |old_field_name, renamed_field| + current_to_old[renamed_field.name] << old_field_name + end + + mappings[type_name] = current_to_old + end + end + + mappings + end + end + + def fetch_mapping_option(options, key, default) + if options.key?(key) + options[key] + elsif options.key?(key.to_s) + options[key.to_s] + else + default + end + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rb new file mode 100644 index 000000000..45d99cb59 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rb @@ -0,0 +1,65 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf" +require "yaml" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Extension module for {ElasticGraph::SchemaDefinition::SchemaArtifactManager} that adds + # proto artifact generation support. + # + # @private + module SchemaArtifactManagerExtension + private + + # Overrides the base `artifacts_from_schema_def` method to add proto artifacts. + def artifacts_from_schema_def + protobuf_load_existing_field_number_mappings + + base_artifacts = super + proto_schema = protobuf_schema_definition_results.proto_schema + return base_artifacts if proto_schema.empty? + + base_artifacts + [ + new_yaml_artifact( + PROTO_FIELD_NUMBERS_FILE, + protobuf_schema_definition_results.proto_field_number_mappings, + extra_comment_lines: [ + "This file reserves protobuf field numbers to keep them stable over time.", + "Do not renumber existing entries." + ] + ), + new_raw_artifact(PROTO_SCHEMA_FILE, proto_schema.chomp, comment_prefix: "//") + ] + end + + # Returns the wrapped {ElasticGraph::SchemaDefinition::Results} narrowed to include this + # gem's `ResultsExtension`. Centralizes the Steep cast that's needed because Steep can't + # see the `extend(ResultsExtension)` applied at runtime. + def protobuf_schema_definition_results + schema_definition_results # : ElasticGraph::SchemaDefinition::Results & ResultsExtension + end + + def protobuf_state + protobuf_schema_definition_results.state # : ElasticGraph::SchemaDefinition::State & StateExtension + end + + # Seeds the schema generator with the field-number mappings from the previously dumped + # artifact (if any) so that field numbers remain stable across dumps. + def protobuf_load_existing_field_number_mappings + full_path = ::File.join(@schema_artifacts_directory, PROTO_FIELD_NUMBERS_FILE) + return unless ::File.exist?(full_path) + + protobuf_state.proto_field_number_mappings = ::YAML.safe_load_file(full_path, aliases: false) + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_elements/enum_type_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_elements/enum_type_extension.rb new file mode 100644 index 000000000..5d2f38f98 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_elements/enum_type_extension.rb @@ -0,0 +1,28 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/identifier" + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Protobuf schema definition extensions for ElasticGraph schema elements. + module SchemaElements + # Extends EnumType with proto field type conversion. + module EnumTypeExtension + # Returns the proto field type representation for this enum type. + # + # @return [String] + def to_proto_field_type + Identifier.enum_name(name) + end + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_elements/object_interface_and_union_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_elements/object_interface_and_union_extension.rb new file mode 100644 index 000000000..94a7ded57 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_elements/object_interface_and_union_extension.rb @@ -0,0 +1,27 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/identifier" + +module ElasticGraph + module Protobuf + module SchemaDefinition + module SchemaElements + # Extends object/interface/union types with proto field type conversion. + module ObjectInterfaceAndUnionExtension + # Returns the proto field type representation for this type. + # + # @return [String] + def to_proto_field_type + Identifier.message_name(name) + end + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension.rb new file mode 100644 index 000000000..686d09671 --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension.rb @@ -0,0 +1,42 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/errors" + +module ElasticGraph + module Protobuf + module SchemaDefinition + module SchemaElements + # Extends ScalarType with proto field type conversion. + module ScalarTypeExtension + # Configured proto field type (e.g. string, int64, bool). + # @dynamic proto_field_type + attr_reader :proto_field_type + + # Configures the proto field type for this scalar type. + # + # @param type [String] protobuf scalar type name + # @return [void] + def proto_field(type:) + @proto_field_type = type + end + + # Returns this scalar's proto field type. + # + # @return [String] + # @raise [Errors::SchemaError] when missing + def to_proto_field_type + proto_field_type || + raise(Errors::SchemaError, "Protobuf field type not configured for scalar type `#{name}`. " \ + 'To proceed, call `proto_field type: "TYPE"` on the scalar type definition.') + end + end + end + end + end +end diff --git a/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/state_extension.rb b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/state_extension.rb new file mode 100644 index 000000000..3eb207b3b --- /dev/null +++ b/elasticgraph-protobuf/lib/elastic_graph/protobuf/schema_definition/state_extension.rb @@ -0,0 +1,36 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +module ElasticGraph + module Protobuf + module SchemaDefinition + # Extension module applied to `ElasticGraph::SchemaDefinition::State` to hold protobuf configuration. + # + # @private + module StateExtension + # @dynamic proto_schema_package_name, proto_schema_package_name= + # @dynamic proto_enums_by_graphql_enum, proto_enums_by_graphql_enum= + # @dynamic proto_external_types, proto_external_types= + # @dynamic proto_field_number_mappings, proto_field_number_mappings= + # @dynamic proto_schema_syntax, proto_schema_syntax= + # @dynamic proto_schema_headers, proto_schema_headers= + attr_accessor :proto_schema_package_name, :proto_enums_by_graphql_enum, :proto_external_types, + :proto_field_number_mappings, :proto_schema_syntax, :proto_schema_headers + + def self.extended(state) + state.proto_schema_package_name = "elasticgraph" + state.proto_enums_by_graphql_enum = {} + state.proto_external_types = {} + state.proto_field_number_mappings = {} + state.proto_schema_syntax = :proto3 + state.proto_schema_headers = [] + end + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf.rbs new file mode 100644 index 000000000..321ca4187 --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf.rbs @@ -0,0 +1,6 @@ +module ElasticGraph + module Protobuf + PROTO_SCHEMA_FILE: ::String + PROTO_FIELD_NUMBERS_FILE: ::String + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/api_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/api_extension.rbs new file mode 100644 index 000000000..71b1d12aa --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/api_extension.rbs @@ -0,0 +1,19 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module APIExtension: ::ElasticGraph::SchemaDefinition::API + PROTO_TYPES_BY_BUILT_IN_SCALAR_TYPE: ::Hash[::String, ::String] + + def self.extended: (::ElasticGraph::SchemaDefinition::API & APIExtension) -> void + def proto_schema_artifacts: (?package_name: ::String, ?syntax: ::Symbol, ?headers: ::Array[::String]) -> void + def proto_enum_mappings: (untyped) -> void + def proto_external_types: (untyped) -> void + def configure_proto_field_number_mappings: (untyped) -> void + + private + + def protobuf_state: () -> (::ElasticGraph::SchemaDefinition::State & StateExtension) + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/factory_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/factory_extension.rbs new file mode 100644 index 000000000..178113baa --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/factory_extension.rbs @@ -0,0 +1,8 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module FactoryExtension: ::ElasticGraph::SchemaDefinition::Factory + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/identifier.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/identifier.rbs new file mode 100644 index 000000000..c8eeedbcb --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/identifier.rbs @@ -0,0 +1,17 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + class Identifier + PROTO_KEYWORDS: ::Set[::String] + + def self.package_name: (::String) -> ::String + def self.message_name: (::String) -> ::String + def self.enum_name: (::String) -> ::String + def self.field_name: (::String) -> ::String + def self.enum_value_name: (::String) -> ::String + def self.external_type_name: (::String) -> ::String + def self.escape_keyword: (::String) -> ::String + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/results_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/results_extension.rbs new file mode 100644 index 000000000..bdaa80f10 --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/results_extension.rbs @@ -0,0 +1,18 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module ResultsExtension : ::ElasticGraph::SchemaDefinition::Results + def proto_schema: () -> ::String + def proto_field_number_mappings: () -> untyped + + private + + def protobuf_state: () -> (::ElasticGraph::SchemaDefinition::State & StateExtension) + def protobuf_schema_generator: () -> Schema + + @proto_schema: ::String? + @protobuf_schema_generator: Schema? + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema.rbs new file mode 100644 index 000000000..764509e7c --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema.rbs @@ -0,0 +1,161 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + class Schema + class FieldDefinition + attr_reader name: ::String + attr_reader type: ::String + attr_reader field_number: ::Integer + attr_reader repeated: bool + attr_reader comment: ::String? + + def self.new: ( + name: ::String, + type: ::String, + field_number: ::Integer, + repeated: bool, + comment: ::String? + ) -> instance + end + + class MessageDefinition + attr_reader name: ::String + attr_reader fields: ::Array[FieldDefinition] + + def self.new: (name: ::String, fields: ::Array[FieldDefinition]) -> instance + end + + class EnumValueDefinition + attr_reader name: ::String + attr_reader number: ::Integer + + def self.new: (name: ::String, number: ::Integer) -> instance + end + + class EnumDefinition + attr_reader name: ::String + attr_reader zero_value_name: ::String + attr_reader values: ::Array[EnumValueDefinition] + + def self.new: ( + name: ::String, + zero_value_name: ::String, + values: ::Array[EnumValueDefinition] + ) -> instance + end + + class FieldNumberMapping + attr_reader field_number: ::Integer + attr_reader name_in_index: ::String + + def self.new: (field_number: ::Integer, name_in_index: ::String) -> instance + end + + class ExternalTypeDefinition + attr_reader fqn: ::String + attr_reader import: ::String + + def self.new: (fqn: ::String, import: ::String) -> instance + end + + type fieldNumberMappingsByFieldName = ::Hash[::String, FieldNumberMapping] + + SUPPORTED_SYNTAXES: ::Array[::String] + + @syntax: ::String + @headers: ::Array[::String] + @state: ::ElasticGraph::SchemaDefinition::State + @package_name: ::String + @proto_enums_by_graphql_enum: ::Hash[::String, untyped] + @proto_external_types_by_type_name: ::Hash[::String, ExternalTypeDefinition] + @proto_field_number_mappings_by_message: ::Hash[::String, fieldNumberMappingsByFieldName] + @imports: ::Set[::String] + @registered_external_type_names: ::Set[::String] + @message_definitions_by_name: ::Hash[::String, MessageDefinition] + @enum_definitions_by_name: ::Hash[::String, EnumDefinition] + @generated_message_definitions_by_name: ::Hash[::String, MessageDefinition] + @wrapper_root_name_by_context: ::Hash[::Array[::String | ::Integer], ::String] + @type_name_by_message_name: ::Hash[::String, ::String] + @type_name_by_enum_name: ::Hash[::String, ::String] + @renamed_public_field_names_by_type_name: ::Hash[::String, ::Hash[::String, ::Array[::String]]]? + + def initialize: ( + state: ::ElasticGraph::SchemaDefinition::State, + package_name: ::String, + proto_enums_by_graphql_enum: untyped, + ?proto_external_types: untyped, + ?proto_field_number_mappings: untyped, + ?syntax: (::Symbol | ::String), + ?headers: ::Array[::String] + ) -> void + + def to_proto: () -> ::String + def field_number_mappings_for_artifact: () -> ::Hash[::String, untyped] + + private + + def indexed_types: () -> ::Array[untyped] + def register_type: (untyped type) -> ::String + def register_external_type: (untyped type, ExternalTypeDefinition) -> void + def validate_external_enum_type: (untyped enum_type) -> void + def register_message: (untyped type) -> void + def register_enum: (untyped enum_type) -> void + def enum_value_names_for: (untyped enum_type) -> ::Array[::String] + def enum_value_names_from_proto_mapping: ( + enum_type_name: ::String, + proto_type: untyped, + options: ::Hash[untyped, untyped] + ) -> ::Array[::String] + def field_number_for: ( + message_name: ::String, + type_name: ::String, + public_field_name: ::String, + name_in_index: ::String + ) -> ::Integer + def next_available_field_number_for: (fieldNumberMappingsByFieldName) -> ::Integer + def migrate_renamed_field_mapping: ( + fieldNumberMappingsByFieldName, + type_name: ::String, + public_field_name: ::String + ) -> FieldNumberMapping? + def proto_field_type_for: ( + ::ElasticGraph::SchemaDefinition::SchemaElements::TypeReference, + context_message_name: ::String, + context_field_name: ::String + ) -> [bool, ::String] + def list_depth_and_base_type: ( + ::ElasticGraph::SchemaDefinition::SchemaElements::TypeReference + ) -> [::Integer, ::ElasticGraph::SchemaDefinition::SchemaElements::TypeReference] + def register_nested_list_wrappers: ( + context_message_name: ::String, + context_field_name: ::String, + list_depth: ::Integer, + base_type_name: ::String + ) -> ::String + def unique_generated_message_name: (::String) -> ::String + def name_taken?: (::String) -> bool + def render_imports: () -> ::Array[::String] + def render_headers: () -> ::Array[::String] + def render_definitions: () -> ::String + def proto_enum_value_name: (::String, ::String) -> ::String + def proto_zero_enum_value_name: (::String) -> ::String + def enum_value_prefix: (::String) -> ::String + def render_enum: (EnumDefinition) -> ::String + def render_message: (MessageDefinition) -> ::String + def all_enum_definitions: () -> ::Array[EnumDefinition] + def all_message_definitions: () -> ::Array[MessageDefinition] + def to_title_case: (::String) -> ::String + def to_upper_snake_case: (::String | ::Symbol) -> ::String + def check_message_name_collision: (::String, ::String) -> void + def check_enum_name_collision: (::String, ::String) -> void + def normalize_proto_enum_mappings: (untyped) -> ::Hash[::String, untyped] + def normalize_proto_external_types: (untyped) -> ::Hash[::String, ExternalTypeDefinition] + def fetch_external_type_mapping_value: (::String | ::Symbol, ::Hash[untyped, untyped], ::Symbol) -> ::String + def normalize_proto_field_number_mappings: (untyped) -> ::Hash[::String, fieldNumberMappingsByFieldName] + def normalize_field_number_mapping_entry: (::String, ::String, untyped) -> [::Integer, ::String] + def renamed_public_field_names_by_type_name: () -> ::Hash[::String, ::Hash[::String, ::Array[::String]]] + def fetch_mapping_option: (::Hash[untyped, untyped], ::Symbol, untyped) -> untyped + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rbs new file mode 100644 index 000000000..7e3646235 --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension.rbs @@ -0,0 +1,14 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module SchemaArtifactManagerExtension : ::ElasticGraph::SchemaDefinition::SchemaArtifactManager + private + + def artifacts_from_schema_def: () -> ::Array[::ElasticGraph::SchemaDefinition::SchemaArtifact[untyped]] + def protobuf_schema_definition_results: () -> (::ElasticGraph::SchemaDefinition::Results & ResultsExtension) + def protobuf_state: () -> (::ElasticGraph::SchemaDefinition::State & StateExtension) + def protobuf_load_existing_field_number_mappings: () -> void + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_elements/enum_type_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_elements/enum_type_extension.rbs new file mode 100644 index 000000000..f571e9cab --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_elements/enum_type_extension.rbs @@ -0,0 +1,11 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module SchemaElements + module EnumTypeExtension: ::ElasticGraph::SchemaDefinition::SchemaElements::EnumType + def to_proto_field_type: () -> ::String + end + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_elements/object_interface_and_union_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_elements/object_interface_and_union_extension.rbs new file mode 100644 index 000000000..bdc65ab32 --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_elements/object_interface_and_union_extension.rbs @@ -0,0 +1,11 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module SchemaElements + module ObjectInterfaceAndUnionExtension : ::ElasticGraph::SchemaDefinition::SchemaElements::ObjectType + def to_proto_field_type: () -> ::String + end + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension.rbs new file mode 100644 index 000000000..e035c83fd --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension.rbs @@ -0,0 +1,14 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module SchemaElements + module ScalarTypeExtension: ::ElasticGraph::SchemaDefinition::SchemaElements::ScalarType + attr_reader proto_field_type: ::String? + + def proto_field: (type: ::String) -> void + def to_proto_field_type: () -> ::String + end + end + end + end +end diff --git a/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/state_extension.rbs b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/state_extension.rbs new file mode 100644 index 000000000..29642503f --- /dev/null +++ b/elasticgraph-protobuf/sig/elastic_graph/protobuf/schema_definition/state_extension.rbs @@ -0,0 +1,16 @@ +module ElasticGraph + module Protobuf + module SchemaDefinition + module StateExtension: ::ElasticGraph::SchemaDefinition::State + attr_accessor proto_schema_package_name: ::String + attr_accessor proto_enums_by_graphql_enum: untyped + attr_accessor proto_external_types: untyped + attr_accessor proto_field_number_mappings: untyped + attr_accessor proto_schema_syntax: (::Symbol | ::String) + attr_accessor proto_schema_headers: ::Array[::String] + + def self.extended: (::ElasticGraph::SchemaDefinition::State & StateExtension) -> void + end + end + end +end diff --git a/elasticgraph-protobuf/spec/integration/elastic_graph/protobuf/schema_definition/rake_tasks_spec.rb b/elasticgraph-protobuf/spec/integration/elastic_graph/protobuf/schema_definition/rake_tasks_spec.rb new file mode 100644 index 000000000..7b23d5a01 --- /dev/null +++ b/elasticgraph-protobuf/spec/integration/elastic_graph/protobuf/schema_definition/rake_tasks_spec.rb @@ -0,0 +1,136 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/constants" +require "elastic_graph/json_ingestion/schema_definition/api_extension" +require "elastic_graph/protobuf" +require "elastic_graph/protobuf/schema_definition/api_extension" +require "elastic_graph/schema_definition/rake_tasks" +require "fileutils" +require "yaml" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe "Protobuf RakeTasks", :rake_task, :in_temp_dir do + describe "schema_artifacts:dump" do + it "dumps proto artifact when indexed types are defined" do + write_proto_schema(table_defs: <<~EOS) + s.object_type "Product" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "products" + end + EOS + + expect { + output = run_rake_with_proto("schema_artifacts:dump") + expect(output.lines).to include(a_string_including("Dumped", PROTO_SCHEMA_FILE)) + }.to change { read_artifact(PROTO_SCHEMA_FILE) } + .from(nil) + .to( + start_with( + "// Generated by `bundle exec rake schema_artifacts:dump`.\n" \ + "// DO NOT EDIT BY HAND. Any edits will be lost the next time the rake task is run.\n" + ).and(a_string_including('syntax = "proto3";', "message Product", "string name = 2;")) + ) + end + + it "idempotently dumps proto artifacts" do + write_proto_schema(table_defs: <<~EOS) + s.object_type "Product" do |t| + t.field "id", "ID" + t.index "products" + end + EOS + + run_rake_with_proto("schema_artifacts:dump") + + expect { + output = run_rake_with_proto("schema_artifacts:dump") + expect(output.lines).to include(a_string_including("already up to date", PROTO_SCHEMA_FILE)) + }.to maintain { read_artifact(PROTO_SCHEMA_FILE) } + end + + it "can persist and reuse proto field-number mappings from an artifact file" do + write_proto_schema(table_defs: <<~EOS) + s.object_type "Product" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "products" + end + EOS + + run_rake_with_proto("schema_artifacts:dump") + + expect(read_artifact(PROTO_FIELD_NUMBERS_FILE)).not_to be_nil + expect(parsed_proto_field_numbers).to eq({ + "messages" => { + "Product" => { + "fields" => { + "id" => 1, + "name" => 2 + } + } + } + }) + + write_proto_schema(table_defs: <<~EOS) + s.object_type "Product" do |t| + t.field "name", "String" + t.field "id", "ID" + t.index "products" + end + EOS + + run_rake_with_proto("schema_artifacts:dump") + + expect(read_artifact(PROTO_SCHEMA_FILE)).to include("string name = 2;") + expect(read_artifact(PROTO_SCHEMA_FILE)).to include("string id = 1;") + end + end + + private + + def write_proto_schema(table_defs:, proto_config: nil) + ::File.write("schema.rb", <<~EOS) + ElasticGraph.define_schema do |s| + s.json_schema_version 1 + s.enforce_json_schema_version false + #{proto_config} + + #{table_defs} + end + EOS + end + + def run_rake_with_proto(*args) + run_rake(*args) do |output| + ElasticGraph::SchemaDefinition::RakeTasks.new( + schema_element_name_form: :snake_case, + index_document_sizes: false, + path_to_schema: "schema.rb", + schema_artifacts_directory: "config/schema/artifacts", + extension_modules: [JSONIngestion::SchemaDefinition::APIExtension, SchemaDefinition::APIExtension], + output: output + ) + end + end + + def read_artifact(name) + path = File.join("config", "schema", "artifacts", name) + File.read(path) if File.exist?(path) + end + + def parsed_proto_field_numbers + ::YAML.safe_load(read_artifact(PROTO_FIELD_NUMBERS_FILE)) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/spec_helper.rb b/elasticgraph-protobuf/spec/spec_helper.rb new file mode 100644 index 000000000..8b4efe6ba --- /dev/null +++ b/elasticgraph-protobuf/spec/spec_helper.rb @@ -0,0 +1,16 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +# This file contains RSpec configuration for `elasticgraph-protobuf`. +# It is loaded by the shared spec helper at `spec_support/spec_helper.rb`. + +RSpec.configure do |config| + config.when_first_matching_example_defined(:proto_schema) do + require "support/proto_schema_support" + end +end diff --git a/elasticgraph-protobuf/spec/support/proto_schema_support.rb b/elasticgraph-protobuf/spec/support/proto_schema_support.rb new file mode 100644 index 000000000..88e085003 --- /dev/null +++ b/elasticgraph-protobuf/spec/support/proto_schema_support.rb @@ -0,0 +1,36 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/schema_definition/test_support" +require "elastic_graph/json_ingestion/schema_definition/api_extension" +require "elastic_graph/protobuf/schema_definition/api_extension" + +module ElasticGraph + module Protobuf + module SchemaSupport + include ElasticGraph::SchemaDefinition::TestSupport + + def define_proto_schema(**options, &block) + define_schema( + schema_element_name_form: :snake_case, + extension_modules: [JSONIngestion::SchemaDefinition::APIExtension, SchemaDefinition::APIExtension], + **options, + &block + ) + end + + def proto_schema_from(results) + results.proto_schema + end + end + + RSpec.configure do |config| + config.include SchemaSupport, :proto_schema + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/api_extension_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/api_extension_spec.rb new file mode 100644 index 000000000..bc196032c --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/api_extension_spec.rb @@ -0,0 +1,79 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/api_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe APIExtension, :proto_schema do + it "emits the configured package name and maps built-in scalars to proto field types" do + results = define_proto_schema do |s| + s.proto_schema_artifacts package_name: "sales.v1" + + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "count", "Int" + t.field "cost", "Float" + t.field "active", "Boolean" + t.field "created_at", "DateTime" + t.field "size_bytes", "JsonSafeLong" + t.index "widgets" + end + end + + expect(proto_schema_from(results)).to include( + "package sales.v1;", + "string id = 1;", + "int32 count = 2;", + "double cost = 3;", + "bool active = 4;", + "string created_at = 5;", + "int64 size_bytes = 6;" + ) + end + + it "requires `package_name` to be a non-empty String" do + expect { + define_proto_schema do |s| + s.proto_schema_artifacts package_name: "" + end + }.to raise_error(Errors::SchemaError, a_string_including("`package_name` must be a non-empty String")) + + expect { + define_proto_schema do |s| + s.proto_schema_artifacts package_name: :symbol_package + end + }.to raise_error(Errors::SchemaError, a_string_including("`package_name` must be a non-empty String")) + end + + it "requires `syntax` to be a supported protobuf syntax" do + expect { + define_proto_schema do |s| + s.proto_schema_artifacts syntax: :proto1 + end + }.to raise_error(Errors::SchemaError, a_string_including("`syntax` must be one of")) + end + + it "requires `headers` to be an Array of Strings" do + expect { + define_proto_schema do |s| + s.proto_schema_artifacts headers: %(option java_package = "com.example";) + end + }.to raise_error(Errors::SchemaError, a_string_including("`headers` must be an Array of Strings")) + + expect { + define_proto_schema do |s| + s.proto_schema_artifacts headers: [:not_a_string] + end + }.to raise_error(Errors::SchemaError, a_string_including("`headers` must be an Array of Strings")) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/factory_extension_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/factory_extension_spec.rb new file mode 100644 index 000000000..9cba37cb9 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/factory_extension_spec.rb @@ -0,0 +1,111 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/factory_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe FactoryExtension do + let(:factory_class) do + base_class = ::Class.new do + def new_enum_type(_name) + type = ::Object.new + yield type + type + end + + def new_interface_type(_name) + type = ::Object.new + yield type + type + end + + def new_object_type(_name) + type = ::Object.new + yield type + type + end + + def new_scalar_type(_name) + type = ::Object.new + yield type + type + end + + def new_union_type(_name) + type = ::Object.new + yield type + type + end + + def new_results + ::Object.new + end + + def new_schema_artifact_manager(*args, **kwargs) + @last_schema_artifact_manager_args = args + @last_schema_artifact_manager_kwargs = kwargs + ::Object.new + end + + attr_reader :last_schema_artifact_manager_args, :last_schema_artifact_manager_kwargs + end + + ::Class.new(base_class) do + prepend FactoryExtension + end + end + + it "extends enum types with enum conversion behavior" do + type = factory_class.new.new_enum_type("Status") + expect(type).to be_a(SchemaElements::EnumTypeExtension) + end + + it "extends interface and union types with object conversion behavior" do + factory = factory_class.new + interface_from_block = nil + union_from_block = nil + + factory.new_interface_type("Node") { |type| interface_from_block = type } + expect(factory.new_interface_type("Node")).to be_a(SchemaElements::ObjectInterfaceAndUnionExtension) + expect(interface_from_block).to be_a(SchemaElements::ObjectInterfaceAndUnionExtension) + + factory.new_union_type("SearchResult") { |type| union_from_block = type } + expect(factory.new_union_type("SearchResult")).to be_a(SchemaElements::ObjectInterfaceAndUnionExtension) + expect(union_from_block).to be_a(SchemaElements::ObjectInterfaceAndUnionExtension) + end + + it "extends object and scalar types and yields to provided blocks" do + object_type = nil + scalar_type = nil + + factory = factory_class.new + factory.new_object_type("Account") { |type| object_type = type } + factory.new_scalar_type("Custom") { |type| scalar_type = type } + + expect(object_type).to be_a(SchemaElements::ObjectInterfaceAndUnionExtension) + expect(scalar_type).to be_a(SchemaElements::ScalarTypeExtension) + expect(factory.new_object_type("Account")).to be_a(SchemaElements::ObjectInterfaceAndUnionExtension) + expect(factory.new_scalar_type("Custom")).to be_a(SchemaElements::ScalarTypeExtension) + end + + it "extends results and schema artifact managers" do + factory = factory_class.new + + expect(factory.new_results).to be_a(ResultsExtension) + + manager = factory.new_schema_artifact_manager(:positional, key: "value") + expect(manager).to be_a(SchemaArtifactManagerExtension) + expect(factory.last_schema_artifact_manager_args).to eq([:positional]) + expect(factory.last_schema_artifact_manager_kwargs).to eq({key: "value"}) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/identifier_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/identifier_spec.rb new file mode 100644 index 000000000..e1381fad2 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/identifier_spec.rb @@ -0,0 +1,38 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/identifier" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe Identifier do + it "escapes reserved keywords" do + expect(Identifier.escape_keyword("package")).to eq("package_") + expect(Identifier.escape_keyword("custom")).to eq("custom") + end + + it "escapes package name segments independently" do + expect(Identifier.package_name("proto.package.v1")).to eq("proto.package_.v1") + end + + it "escapes message, enum, field, and enum value names" do + expect(Identifier.message_name("service")).to eq("service_") + expect(Identifier.enum_name("message")).to eq("message_") + expect(Identifier.field_name("string")).to eq("string_") + expect(Identifier.enum_value_name("stream")).to eq("stream_") + end + + it "preserves external dotted type names" do + expect(Identifier.external_type_name("squareup.connect.v2.resources.Card.Type")) + .to eq("squareup.connect.v2.resources.Card.Type") + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/results_extension_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/results_extension_spec.rb new file mode 100644 index 000000000..15addafc1 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/results_extension_spec.rb @@ -0,0 +1,38 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/results_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe ResultsExtension, :proto_schema do + it "memoizes the schema generator and exposes its field-number mappings" do + allow(Schema).to receive(:new).and_call_original + + results = define_proto_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.index "widgets" + end + end + + expect(results.proto_schema).to include("message Widget") + expect(results.proto_schema).to include("message Widget") + expect(results.proto_field_number_mappings).to eq({ + "messages" => { + "Widget" => {"fields" => {"id" => 1}} + } + }) + + expect(Schema).to have_received(:new).once + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension_spec.rb new file mode 100644 index 000000000..de23bdbb7 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension_spec.rb @@ -0,0 +1,71 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/schema_artifact_manager_extension" +require "stringio" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe SchemaArtifactManagerExtension, :proto_schema, :in_temp_dir do + it "dumps proto artifacts alongside the base artifacts" do + artifact_base_names = artifacts_for(define_indexed_type_schema) + + expect(artifact_base_names).to include(PROTO_SCHEMA_FILE, PROTO_FIELD_NUMBERS_FILE) + end + + it "omits proto artifacts when the schema defines no indexed types" do + results = define_proto_schema do |s| + s.object_type "Point" do |t| + t.field "x", "Float" + t.field "y", "Float" + end + end + + artifact_base_names = artifacts_for(results) + + expect(artifact_base_names).not_to include(PROTO_SCHEMA_FILE, PROTO_FIELD_NUMBERS_FILE) + end + + it "seeds proto generation with the field-number mappings from a previously dumped artifact" do + ::FileUtils.mkdir_p("artifacts") + ::File.write(::File.join("artifacts", PROTO_FIELD_NUMBERS_FILE), <<~YAML) + messages: + Widget: + fields: + id: 7 + YAML + + results = define_indexed_type_schema + artifacts_for(results) + + expect(results.proto_schema).to include("string id = 7;") + end + + def define_indexed_type_schema + define_proto_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.index "widgets" + end + end + end + + def artifacts_for(results) + manager = results.state.api.factory.new_schema_artifact_manager( + schema_definition_results: results, + schema_artifacts_directory: "artifacts", + output: ::StringIO.new + ) + + manager.send(:artifacts_from_schema_def).map { |artifact| ::File.basename(artifact.file_name) } + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_edge_cases_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_edge_cases_spec.rb new file mode 100644 index 000000000..97e9ac7d5 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_edge_cases_spec.rb @@ -0,0 +1,852 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/api_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe Schema, :proto_schema do + it "returns an empty string when no indexed types are present" do + expect(build_schema_with_root_indexed_types.to_proto).to eq("") + end + + it "raises when a root indexed type cannot be converted to proto" do + bad_type = ::Object.new + bad_type.define_singleton_method(:name) { "BadType" } + + schema = build_schema_with_root_indexed_types(bad_type) + + expect { + schema.to_proto + }.to raise_error(Errors::SchemaError, a_string_including("Type `BadType` cannot be converted to proto")) + end + + it "raises when a field type reference cannot be resolved" do + message = build_fake_message_type( + "BrokenMessage", + "broken_field" => build_fake_type_ref(resolved: nil, unwrapped_name: "MissingType") + ) + schema = build_schema_with_root_indexed_types(message) + + expect { + schema.to_proto + }.to raise_error(Errors::SchemaError, a_string_including("Type `MissingType` cannot be resolved")) + end + + it "raises when enum values map to duplicate proto value names" do + results = define_proto_schema do |s| + s.enum_type "Status" do |t| + t.values "option", "OPTION" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("duplicate proto enum value names")) + end + + it "uses a suffixed zero enum value when needed to avoid collisions" do + results = define_proto_schema do |s| + s.enum_type "Status" do |t| + t.values "UNSPECIFIED", "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("STATUS_UNSPECIFIED_ = 0;") + expect(generated).to include("STATUS_UNSPECIFIED = 1;") + end + + it "raises when a configured proto enum mapping source does not expose .enums" do + results = define_proto_schema do |s| + s.proto_enum_mappings("Status" => {::Object.new => {}}) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must map to a proto enum class with `.enums`")) + end + + it "wraps unexpected exceptions from enum mapping sources" do + proto_status = ::Class.new do + def self.enums + [::Data.define(:name).new(name: :ACTIVE)] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + proto_status => { + "name_transform" => ->(_name) { raise "boom" } + } + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("Failed loading proto enum mapping for `Status`")) + end + + it "supports string-key mapping options in proto_enum_mappings" do + proto_status = ::Class.new do + def self.enums + [ + ::Data.define(:name).new(name: :UNKNOWN_DO_NOT_USE), + ::Data.define(:name).new(name: :ACTIVE) + ] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + proto_status => { + "exclusions" => [:UNKNOWN_DO_NOT_USE], + "expected_extras" => [:LEGACY] + } + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("STATUS_ACTIVE = 1;") + expect(generated).to include("STATUS_LEGACY = 2;") + end + + it "requires external enum types to have exactly one enum mapping source" do + results = define_proto_schema do |s| + s.proto_external_types( + "Status" => { + proto: "squareup.connect.v2.Status", + import: "squareup/connect/v2/status.proto" + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must also configure `proto_enum_mappings`")) + end + + it "rejects external enum types with transformed enum mappings" do + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + ::Object.new => { + expected_extras: [:LEGACY] + } + } + ) + s.proto_external_types( + "Status" => { + proto: "squareup.connect.v2.Status", + import: "squareup/connect/v2/status.proto" + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must use an empty `proto_enum_mappings` options hash")) + end + + it "rejects external enum types with multiple enum mapping sources" do + results = define_proto_schema do |s| + s.proto_enum_mappings("Status" => {::Object.new => {}, ::Object.new => {}}) + s.proto_external_types( + "Status" => { + proto: "squareup.connect.v2.Status", + import: "squareup/connect/v2/status.proto" + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must use exactly one `proto_enum_mappings` source")) + end + + it "deduplicates imports for repeated external enum references" do + proto_status = ::Class.new do + def self.enums + [::Data.define(:name).new(name: :ACTIVE)] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings("Status" => {proto_status => {}}) + s.proto_external_types( + "Status" => { + "proto" => "squareup.connect.v2.Status", + "import" => "squareup/connect/v2/status.proto" + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.field "previous_status", "Status" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated.scan('import "squareup/connect/v2/status.proto";').size).to eq(1) + expect(generated).to include("squareup.connect.v2.Status status = 2;") + expect(generated).to include("squareup.connect.v2.Status previous_status = 3;") + end + + it "validates external enum values against the ElasticGraph enum values" do + proto_status = ::Class.new do + def self.enums + [ + ::Data.define(:name).new(name: :ACTIVE), + ::Data.define(:name).new(name: :PENDING) + ] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings("Status" => {proto_status => {}}) + s.proto_external_types( + "Status" => { + proto: "squareup.connect.v2.Status", + import: "squareup/connect/v2/status.proto" + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE", "INACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("values do not match")) + end + + it "rejects external proto type references for non-enum types" do + results = define_proto_schema do |s| + s.proto_external_types( + "Address" => { + proto: "squareup.connect.v2.Address", + import: "squareup/connect/v2/address.proto" + } + ) + + s.object_type "Address" do |t| + t.field "street", "String" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "address", "Address" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("Only enum types are supported")) + end + + it "validates external proto type mapping input type" do + results = define_proto_schema do |s| + s.proto_external_types("bad") + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("External proto type mappings must be a Hash")) + end + + it "validates per-type external proto type mapping structure" do + results = define_proto_schema do |s| + s.proto_external_types("Status" => "bad") + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("External proto type mapping for `Status` must be a Hash")) + end + + it "requires external proto type mappings to define proto and import strings" do + results = define_proto_schema do |s| + s.proto_external_types("Status" => {proto: "squareup.connect.v2.Status"}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must include a non-empty `import` String")) + end + + it "raises on field-number mapping collisions for a message" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + "messages" => { + "Account" => { + "id" => 1, + "name" => 1 + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("field-number mapping collision")) + end + + it "raises when two fields collapse to the same proto field name after keyword escaping" do + results = define_proto_schema do |s| + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "string", "String" + t.field "string_", "String" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("duplicate proto field names")) + end + + it "generates unique nested-wrapper names when the base name is already taken" do + results = define_proto_schema do |s| + s.object_type "MatrixValuesListLevel1" do |t| + t.field "id", "ID" + end + + s.object_type "Matrix" do |t| + t.field "id", "ID" + t.field "already_taken", "MatrixValuesListLevel1" + t.field "values", "[[Float!]!]!" + t.index "matrices" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("message MatrixValuesListLevel12 {") + expect(generated).to include("repeated MatrixValuesListLevel12 values = 3;") + end + + it "renders a placeholder comment for indexed types with no fields" do + schema = build_schema_with_root_indexed_types(build_fake_message_type("EmptyType")) + expect(schema.to_proto).to include("// No indexed fields were defined for this type.") + end + + it "supports recursive message references without infinite recursion" do + node_type = build_fake_message_type( + "Node", + "id" => build_fake_type_ref(resolved: build_fake_scalar_type("string"), unwrapped_name: "ID"), + "parent" => build_fake_type_ref(resolved: nil, unwrapped_name: "Node") + ) + + node_type + .indexing_fields_by_name_in_index + .fetch("parent") + .to_indexing_field + .type + .define_singleton_method(:resolved) { node_type } + + schema = build_schema_with_root_indexed_types(node_type) + + expect(schema.to_proto).to include("Node parent = 2;") + end + + it "supports re-registering already-known enums from field references" do + status_enum = build_fake_enum_type("Status", values: ["ACTIVE"]) + account_type = build_fake_message_type( + "Account", + "status" => build_fake_type_ref(resolved: status_enum, unwrapped_name: "Status") + ) + + schema = build_schema_with_root_indexed_types(status_enum, account_type) + expect(schema.to_proto).to include("STATUS_ACTIVE = 1;") + end + + it "accepts multiple enum mapping sources when they resolve to the same values" do + proto_status_a = ::Class.new do + def self.enums + [::Data.define(:name).new(name: :ACTIVE)] + end + end + + proto_status_b = ::Class.new do + def self.enums + [::Data.define(:name).new(name: :ACTIVE)] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + proto_status_a => {}, + proto_status_b => {} + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("STATUS_ACTIVE = 1;") + end + + it "reuses nested wrapper types for repeated generation requests with the same context" do + schema = build_schema_with_root_indexed_types + + first = schema.send( + :register_nested_list_wrappers, + context_message_name: "Matrix", + context_field_name: "values", + list_depth: 2, + base_type_name: "double" + ) + second = schema.send( + :register_nested_list_wrappers, + context_message_name: "Matrix", + context_field_name: "values", + list_depth: 2, + base_type_name: "double" + ) + + expect(second).to eq(first) + end + + it "creates intermediate wrappers for deeply nested lists" do + results = define_proto_schema do |s| + s.object_type "Matrix" do |t| + t.field "id", "ID" + t.field "values", "[[[Float!]!]!]!" + t.index "matrices" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("message MatrixValuesListLevel2 {") + expect(generated).to include("message MatrixValuesListLevel1 {") + end + + it "normalizes nil proto enum and field-number mappings to empty hashes" do + schema = Schema.new( + state: build_fake_state, + package_name: "elasticgraph", + proto_enums_by_graphql_enum: nil, + proto_external_types: nil, + proto_field_number_mappings: nil + ) + + expect(schema.to_proto).to eq("") + expect(schema.field_number_mappings_for_artifact).to eq({"messages" => {}}) + end + + it "raises when type names collide after proto message escaping" do + first = build_fake_message_type("package") + second = build_fake_message_type("package_") + schema = build_schema_with_root_indexed_types(first, second) + + expect { + schema.to_proto + }.to raise_error(Errors::SchemaError, a_string_including("both map to the same proto message name")) + end + + it "raises when type names collide after proto enum escaping" do + first = build_fake_enum_type("option", values: ["ACTIVE"]) + second = build_fake_enum_type("option_", values: ["ACTIVE"]) + schema = build_schema_with_root_indexed_types(first, second) + + expect { + schema.to_proto + }.to raise_error(Errors::SchemaError, a_string_including("both map to the same proto enum name")) + end + + it "validates field-number mapping input type" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings("bad") + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must be a Hash")) + end + + it "validates that `messages` is a hash in field-number mappings" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings({"messages" => "bad"}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must have a `messages` Hash")) + end + + it "accepts symbol `:messages` key in field-number mappings" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + messages: { + "Account" => { + "id" => 7 + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string id = 7;") + end + + it "accepts symbol `:fields` and nested symbol keys in field-number mappings" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + messages: { + "Account" => { + fields: { + "id" => { + field_number: 7, + name_in_index: :account_id + } + } + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID", name_in_index: "account_id" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string id = 7;") + end + + it "validates per-message field-number mapping structure" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings({"messages" => {"Account" => "bad"}}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must be a Hash")) + end + + it "validates that nested `fields` is a hash in field-number mappings" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings({"messages" => {"Account" => {"fields" => "bad"}}}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must contain a `fields` Hash")) + end + + it "validates that mapped field numbers are positive integers" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings({"messages" => {"Account" => {"id" => 0}}}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must be a positive integer")) + end + + it "validates that mapped field numbers are integers" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings({"messages" => {"Account" => {"id" => "abc"}}}) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must be an integer")) + end + + it "validates that structured mappings include `field_number`" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + {"messages" => {"Account" => {"fields" => {"id" => {"name_in_index" => "account_id"}}}}} + ) + + s.object_type "Account" do |t| + t.field "id", "ID", name_in_index: "account_id" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must include `field_number`")) + end + + it "validates that structured mappings use a String or Symbol `name_in_index`" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + {"messages" => {"Account" => {"fields" => {"id" => {"field_number" => 7, "name_in_index" => 123}}}}} + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including("must use a String or Symbol `name_in_index`")) + end + + it "defaults structured mappings without `name_in_index` to the field name" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + {"messages" => {"Account" => {"fields" => {"display_name" => {"field_number" => 7}}}}} + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "display_name", "String" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string display_name = 7;") + end + + it "allocates the next available field number when a renamed field has no old mapping entry" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + {"messages" => {"Account" => {"fields" => {"other_name" => 7}}}} + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "display_name", "String" do |f| + f.renamed_from "full_name" + end + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string id = 1;") + expect(proto_schema_from(results)).to include("string display_name = 2;") + end + + private + + def build_schema_with_root_indexed_types(*indexed_types) + Schema.new( + state: build_fake_state(indexed_types), + package_name: "elasticgraph", + proto_enums_by_graphql_enum: {}, + proto_field_number_mappings: {} + ) + end + + def build_fake_state(indexed_types = []) + indexed_types_by_index_name = indexed_types.each_with_index.to_h do |type, index| + ["index_#{index}", type] + end + + ::Object.new.tap do |state| + state.define_singleton_method(:indexed_types_by_index_name) { indexed_types_by_index_name } + state.define_singleton_method(:renamed_fields_by_type_name_and_old_field_name) { {} } + end + end + + def build_fake_scalar_type(proto_type_name) + scalar = ::Object.new.extend(SchemaElements::ScalarTypeExtension) + scalar.proto_field type: proto_type_name + scalar + end + + def build_fake_type_ref(resolved:, unwrapped_name:) + ref = ::Object.new + ref.define_singleton_method(:unwrap_non_null) { ref } + ref.define_singleton_method(:list?) { false } + ref.define_singleton_method(:resolved) { resolved } + ref.define_singleton_method(:unwrapped_name) { unwrapped_name } + ref + end + + def build_fake_message_type(name, fields_by_name = {}) + indexing_field_class = ::Data.define(:name, :name_in_index, :type) + graphql_field_class = ::Data.define(:to_indexing_field) + + type = ::Object.new.extend(SchemaElements::ObjectInterfaceAndUnionExtension) + type.define_singleton_method(:name) { name } + type.define_singleton_method(:indexing_fields_by_name_in_index) do + fields_by_name.to_h do |field_name, type_ref| + indexing_field = indexing_field_class.new(name: field_name, name_in_index: field_name, type: type_ref) + [field_name, graphql_field_class.new(to_indexing_field: indexing_field)] + end + end + type + end + + def build_fake_enum_type(name, values:) + type = ::Object.new.extend(SchemaElements::EnumTypeExtension) + type.define_singleton_method(:name) { name } + type.define_singleton_method(:values_by_name) { values.to_h { |v| [v, true] } } + type + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension_spec.rb new file mode 100644 index 000000000..25ff3434b --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension_spec.rb @@ -0,0 +1,46 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/schema_elements/scalar_type_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + module SchemaElements + RSpec.describe ScalarTypeExtension do + let(:scalar_type_class) do + ::Class.new do + include ScalarTypeExtension + + attr_reader :name + + def initialize(name:) + @name = name + end + end + end + + it "returns an explicitly configured proto field type" do + scalar = scalar_type_class.new(name: "CustomScalar") + scalar.proto_field(type: "fixed64") + + expect(scalar.to_proto_field_type).to eq("fixed64") + end + + it "raises when no proto field type is configured" do + scalar = scalar_type_class.new(name: "CustomScalar") + + expect { + scalar.to_proto_field_type + }.to raise_error(Errors::SchemaError, a_string_including("Protobuf field type not configured for scalar type `CustomScalar`")) + end + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_spec.rb new file mode 100644 index 000000000..80745d86a --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf/schema_definition/schema_spec.rb @@ -0,0 +1,587 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf/schema_definition/api_extension" + +module ElasticGraph + module Protobuf + module SchemaDefinition + RSpec.describe Schema, :proto_schema do + it "generates a proto schema from indexed types" do + results = define_proto_schema do |s| + s.enum_type "Status" do |t| + t.values "ACTIVE", "INACTIVE" + end + + s.object_type "Address" do |t| + t.field "street", "String" + t.field "city", "String" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.field "address", "Address" + t.field "tags", "[String!]!" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to eq(<<~PROTO) + syntax = "proto3"; + + package elasticgraph; + + enum Status { + STATUS_UNSPECIFIED = 0; + STATUS_ACTIVE = 1; + STATUS_INACTIVE = 2; + } + + message Account { + string id = 1; + Status status = 2; + Address address = 3; + repeated string tags = 4; + } + + message Address { + string street = 1; + string city = 2; + } + PROTO + end + + it "emits proto2 syntax with an explicit label on every field when `syntax: :proto2`" do + results = define_proto_schema do |s| + s.proto_schema_artifacts syntax: :proto2 + + s.enum_type "Status" do |t| + t.values "ACTIVE", "INACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.field "tags", "[String!]!" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to eq(<<~PROTO) + syntax = "proto2"; + + package elasticgraph; + + enum Status { + STATUS_UNSPECIFIED = 0; + STATUS_ACTIVE = 1; + STATUS_INACTIVE = 2; + } + + message Account { + optional string id = 1; + optional Status status = 2; + repeated string tags = 3; + } + PROTO + end + + it "renders custom `headers` verbatim as a contiguous section after the package declaration" do + results = define_proto_schema do |s| + s.proto_schema_artifacts( + package_name: "myapp.events.v1", + headers: [ + %(option java_package = "com.myapp.events";), + "option java_multiple_files = true;" + ] + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to eq(<<~PROTO) + syntax = "proto3"; + + package myapp.events.v1; + + option java_package = "com.myapp.events"; + option java_multiple_files = true; + + message Account { + string id = 1; + } + PROTO + end + + it "generates a single message for an indexed abstract type, covering its subtypes' fields" do + results = define_proto_schema do |s| + s.object_type "Car" do |t| + t.implements "Vehicle" + t.field "id", "ID" + t.field "doors", "Int" + end + + s.object_type "Bike" do |t| + t.implements "Vehicle" + t.field "id", "ID" + t.field "gears", "Int" + end + + s.interface_type "Vehicle" do |t| + t.field "id", "ID" + t.index "vehicles" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("message Vehicle {") + expect(generated).to include("string id =", "int32 doors =", "int32 gears =") + expect(generated).not_to include("message Car {", "message Bike {") + end + + it "generates wrapper messages for nested lists" do + results = define_proto_schema do |s| + s.object_type "Matrix" do |t| + t.field "id", "ID" + t.field "values", "[[Float!]!]!" + t.index "matrices" + end + end + + expect(proto_schema_from(results)).to eq(<<~PROTO) + syntax = "proto3"; + + package elasticgraph; + + message Matrix { + string id = 1; + repeated MatrixValuesListLevel1 values = 2; + } + + message MatrixValuesListLevel1 { + repeated double values = 1; + } + PROTO + end + + it "uses custom proto scalar mappings" do + results = define_proto_schema do |s| + s.scalar_type "CustomTimestamp" do |t| + t.mapping type: "date" + t.json_schema type: "string", format: "date-time" + t.proto_field type: "int64" + end + + s.object_type "Event" do |t| + t.field "id", "ID" + t.field "occurred_at", "CustomTimestamp" + t.index "events" + end + end + + expect(proto_schema_from(results)).to include("int64 occurred_at = 2;") + end + + it "uses explicit proto_field mappings independent of json_schema type" do + results = define_proto_schema do |s| + s.scalar_type "UnixTimestamp" do |t| + t.mapping type: "long" + t.json_schema type: "integer" + t.proto_field type: "fixed64" + end + + s.object_type "Event" do |t| + t.field "id", "ID" + t.field "occurred_at", "UnixTimestamp" + t.index "events" + end + end + + expect(proto_schema_from(results)).to include("fixed64 occurred_at = 2;") + end + + it "can assign field numbers from configured mappings" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + "messages" => { + "Account" => { + "id" => 10, + "name" => 2 + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("string id = 10;") + expect(generated).to include("string name = 2;") + end + + it "assigns new field numbers after mapped values when mappings are partial" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + "messages" => { + "Account" => { + "id" => 1 + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string name = 2;") + end + + it "exposes generated field-number mappings as an artifact hash" do + results = define_proto_schema do |s| + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "accounts" + end + end + + expect(results.proto_field_number_mappings).to eq({ + "messages" => { + "Account" => { + "fields" => { + "id" => 1, + "name" => 2 + } + } + } + }) + end + + it "preserves reserved numbers for removed fields and allocates new numbers above them" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + "messages" => { + "Account" => { + "id" => 1, + "legacyField" => 2 + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("string id = 1;") + expect(generated).to include("string name = 3;") + + expect(results.proto_field_number_mappings).to eq({ + "messages" => { + "Account" => { + "fields" => { + "id" => 1, + "legacyField" => 2, + "name" => 3 + } + } + } + }) + end + + it "uses public field names in schema.proto and stores name_in_index overrides in the mapping artifact" do + results = define_proto_schema do |s| + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "display_name", "String", name_in_index: "displayName" + t.index "widgets" + end + end + + expect(proto_schema_from(results)).to include("string display_name = 2;") + expect(proto_schema_from(results)).not_to include("displayName") + + expect(results.proto_field_number_mappings).to eq({ + "messages" => { + "Widget" => { + "fields" => { + "id" => 1, + "display_name" => { + "field_number" => 2, + "name_in_index" => "displayName" + } + } + } + } + }) + end + + it "preserves a field number across a public field rename" do + results = define_proto_schema do |s| + s.configure_proto_field_number_mappings( + { + "messages" => { + "Account" => { + "fields" => { + "full_name" => 7 + } + } + } + } + ) + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "display_name", "String" do |f| + f.renamed_from "full_name" + end + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to include("string id = 1;") + expect(proto_schema_from(results)).to include("string display_name = 7;") + expect(results.proto_field_number_mappings).to eq({ + "messages" => { + "Account" => { + "fields" => { + "id" => 1, + "display_name" => 7 + } + } + } + }) + end + + it "generates the proto schema and assigns field numbers when used without `elasticgraph-json_ingestion`" do + # Overrides the extension modules to activate ONLY the protobuf extension. Without + # `elasticgraph-json_ingestion`, `renamed_from` cannot be declared, so no rename + # metadata exists on the schema definition state for proto generation to consult. + results = define_proto_schema(extension_modules: [SchemaDefinition::APIExtension]) do |s| + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "display_name", "String" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to eq(<<~PROTO) + syntax = "proto3"; + + package elasticgraph; + + message Account { + string id = 1; + string display_name = 2; + } + PROTO + + expect(results.proto_field_number_mappings).to eq({ + "messages" => { + "Account" => { + "fields" => { + "id" => 1, + "display_name" => 2 + } + } + } + }) + end + + it "raises an error when a custom scalar does not configure proto_field" do + results = define_proto_schema do |s| + s.scalar_type "UnconfiguredScalar" do |t| + t.mapping type: "keyword" + t.json_schema type: "object" + end + + s.object_type "Widget" do |t| + t.field "id", "ID" + t.field "value", "UnconfiguredScalar" + t.index "widgets" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including( + "Protobuf field type not configured for scalar type `UnconfiguredScalar`.", + "call `proto_field type:" + )) + end + + it "prefixes enum values and escapes proto keywords in generated identifiers" do + results = define_proto_schema do |s| + s.enum_type "Command" do |t| + t.values "option", "stream" + end + + s.object_type "Request" do |t| + t.field "id", "ID" + t.field "package", "String" + t.field "command", "Command" + t.index "requests" + end + end + + expect(proto_schema_from(results)).to include("COMMAND_OPTION = 1;") + expect(proto_schema_from(results)).to include("COMMAND_STREAM = 2;") + expect(proto_schema_from(results)).to include("string package_ = 2; // source name: package") + end + + it "can source enum values from configured proto enum mappings" do + proto_status = ::Class.new do + def self.enums + [ + ::Data.define(:name).new(name: :UNKNOWN_DO_NOT_USE), + ::Data.define(:name).new(name: :ACTIVE), + ::Data.define(:name).new(name: :INACTIVE) + ] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + proto_status => { + exclusions: [:UNKNOWN_DO_NOT_USE], + expected_extras: [:LEGACY] + } + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE", "INACTIVE", "OBSOLETE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + generated = proto_schema_from(results) + expect(generated).to include("STATUS_ACTIVE = 1;") + expect(generated).to include("STATUS_INACTIVE = 2;") + expect(generated).to include("STATUS_LEGACY = 3;") + expect(generated).not_to include("OBSOLETE") + end + + it "can import and reference an external proto enum type" do + proto_status = ::Class.new do + def self.enums + [ + ::Data.define(:name).new(name: :ACTIVE), + ::Data.define(:name).new(name: :INACTIVE) + ] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings("Status" => {proto_status => {}}) + s.proto_external_types( + "Status" => { + proto: "squareup.connect.v2.Status", + import: "squareup/connect/v2/status.proto" + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE", "INACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect(proto_schema_from(results)).to eq(<<~PROTO) + syntax = "proto3"; + + package elasticgraph; + + import "squareup/connect/v2/status.proto"; + + message Account { + string id = 1; + squareup.connect.v2.Status status = 2; + } + PROTO + end + + it "raises when mapped proto enum sources produce inconsistent values" do + proto_status_a = ::Class.new do + def self.enums + [ + ::Data.define(:name).new(name: :ACTIVE), + ::Data.define(:name).new(name: :INACTIVE) + ] + end + end + + proto_status_b = ::Class.new do + def self.enums + [ + ::Data.define(:name).new(name: :ACTIVE), + ::Data.define(:name).new(name: :PENDING) + ] + end + end + + results = define_proto_schema do |s| + s.proto_enum_mappings( + "Status" => { + proto_status_a => {}, + proto_status_b => {} + } + ) + + s.enum_type "Status" do |t| + t.values "ACTIVE", "INACTIVE" + end + + s.object_type "Account" do |t| + t.field "id", "ID" + t.field "status", "Status" + t.index "accounts" + end + end + + expect { + proto_schema_from(results) + }.to raise_error(Errors::SchemaError, a_string_including( + "Protobuf enum mappings for `Status` produce inconsistent value sets" + )) + end + end + end + end +end diff --git a/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf_spec.rb b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf_spec.rb new file mode 100644 index 000000000..b93e372d5 --- /dev/null +++ b/elasticgraph-protobuf/spec/unit/elastic_graph/protobuf_spec.rb @@ -0,0 +1,21 @@ +# Copyright 2024 - 2026 Block, Inc. +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. +# +# frozen_string_literal: true + +require "elastic_graph/protobuf" + +module ElasticGraph + RSpec.describe Protobuf do + it "defines the PROTO_SCHEMA_FILE constant" do + expect(Protobuf::PROTO_SCHEMA_FILE).to eq("schema.proto") + end + + it "defines the PROTO_FIELD_NUMBERS_FILE constant" do + expect(Protobuf::PROTO_FIELD_NUMBERS_FILE).to eq("proto_field_numbers.yaml") + end + end +end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb index 0ae2e7ff7..10c29023b 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_artifact_manager.rb @@ -183,17 +183,19 @@ def new_yaml_artifact(file_name, desired_contents, extra_comment_lines: []) desired_contents, ->(hash) { ::YAML.dump(hash) }, ->(string) { ::YAML.safe_load(string) }, - extra_comment_lines + extra_comment_lines, + "#" ) end - def new_raw_artifact(file_name, desired_contents) + def new_raw_artifact(file_name, desired_contents, comment_prefix: "#") SchemaArtifact.new( ::File.join(@schema_artifacts_directory, file_name), desired_contents, _ = :itself.to_proc, _ = :itself.to_proc, - [] + [], + comment_prefix ) end @@ -215,7 +217,7 @@ def pruned_runtime_metadata(graphql_schema_string) end # @private - class SchemaArtifact < Support::MemoizableData.define(:file_name, :desired_contents, :dumper, :loader, :extra_comment_lines) + class SchemaArtifact < Support::MemoizableData.define(:file_name, :desired_contents, :dumper, :loader, :extra_comment_lines, :comment_prefix) def dump(output) if out_of_date? dirname = File.dirname(file_name) @@ -271,7 +273,7 @@ def comment_preamble ] lines = extra_comment_lines + [""] + lines unless extra_comment_lines.empty? - lines.map { |line| "# #{line}".strip }.join("\n") + lines.map { |line| "#{comment_prefix} #{line}".rstrip }.join("\n") end end end diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs index 713693ac3..88c63edb6 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_artifact_manager.rbs @@ -34,7 +34,11 @@ module ElasticGraph ?extra_comment_lines: ::Array[::String] ) -> SchemaArtifact[::Hash[::String, untyped]] - def new_raw_artifact: (::String, ::String) -> SchemaArtifact[::String] + def new_raw_artifact: ( + ::String, + ::String, + ?comment_prefix: ::String + ) -> SchemaArtifact[::String] def pruned_runtime_metadata: (::String) -> SchemaArtifacts::RuntimeMetadata::Schema end @@ -44,13 +48,15 @@ module ElasticGraph attr_reader dumper: ^(T) -> ::String attr_reader loader: ^(::String) -> T attr_reader extra_comment_lines: ::Array[::String] + attr_reader comment_prefix: ::String def initialize: ( ::String, T, ^(T) -> ::String, ^(::String) -> T, - ::Array[::String]) -> void + ::Array[::String], + ::String) -> void end class SchemaArtifact[T] < SchemaArtifactSupertype[T] diff --git a/elasticgraph-support/README.md b/elasticgraph-support/README.md index 8464cbc88..ebf1c6a2e 100644 --- a/elasticgraph-support/README.md +++ b/elasticgraph-support/README.md @@ -51,6 +51,9 @@ graph LR; elasticgraph-opensearch["elasticgraph-opensearch"]; elasticgraph-opensearch --> elasticgraph-support; class elasticgraph-opensearch otherEgGemStyle; + elasticgraph-protobuf["elasticgraph-protobuf"]; + elasticgraph-protobuf --> elasticgraph-support; + class elasticgraph-protobuf otherEgGemStyle; elasticgraph-query_registry["elasticgraph-query_registry"]; elasticgraph-query_registry --> elasticgraph-support; class elasticgraph-query_registry otherEgGemStyle; diff --git a/rbs_collection.yaml b/rbs_collection.yaml index e19fbb28d..130f6c6c3 100644 --- a/rbs_collection.yaml +++ b/rbs_collection.yaml @@ -82,6 +82,8 @@ gems: ignore: true - name: elasticgraph-opensearch ignore: true + - name: elasticgraph-protobuf + ignore: true - name: elasticgraph-query_interceptor ignore: true - name: elasticgraph-query_registry diff --git a/script/run_gem_specs b/script/run_gem_specs index 19f0e8fb4..57b197a34 100755 --- a/script/run_gem_specs +++ b/script/run_gem_specs @@ -29,7 +29,7 @@ pushd $gem # each gem subdirectory, we pass the ENV var here. If the file does not exist, we'll get an error. cp ../Gemfile.lock Gemfile.lock BUNDLE_GEMFILE=Gemfile bundle check || (rm -rf Gemfile.lock && bundle install) - if [[ "$gem" == "elasticgraph-graphql" || "$gem" == "elasticgraph-indexer" || "$gem" == "elasticgraph-json_ingestion" || "$gem" == "elasticgraph-schema_definition" ]]; then + if [[ "$gem" == "elasticgraph-graphql" || "$gem" == "elasticgraph-indexer" || "$gem" == "elasticgraph-json_ingestion" || "$gem" == "elasticgraph-protobuf" || "$gem" == "elasticgraph-schema_definition" ]]; then # These gems have larger test suites that take longer, and therefore benefit from being run in parallel with flatware. BUNDLE_GEMFILE=Gemfile ../script/flatware_rspec --backtrace --format progress else