open-metadata · lautel · May 8, 2026 · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/conf/openmetadata.yaml b/conf/openmetadata.yaml
@@ -495,7 +495,7 @@ elasticsearch:
     enabled: ${NATURAL_LANGUAGE_SEARCH_ENABLED:-false}
     semanticSearchEnabled: ${SEMANTIC_SEARCH_ENABLED:-false}
     embeddingProvider: ${EMBEDDING_PROVIDER:-bedrock} # Options: "openai", "bedrock", "djl"
-    maxConcurrentEmbeddingRequests: ${MAX_CONCURRENT_EMBEDDING_REQUESTS:-10}
+    maxConcurrentRequests: ${MAX_CONCURRENT_EMBEDDING_REQUESTS:-10}
     providerClass: ${NATURAL_LANGUAGE_SEARCH_PROVIDER_CLASS:-org.openmetadata.service.search.nlq.NoOpNLQService}
     bedrock:
       awsConfig:

diff --git a/...etadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplicationConfig.java b/...etadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplicationConfig.java
@@ -14,6 +14,7 @@
 package org.openmetadata.service;
 
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.JsonNode;
 import io.dropwizard.core.Configuration;
 import io.dropwizard.core.server.DefaultServerFactory;
 import jakarta.validation.Valid;
@@ -79,6 +80,9 @@ public class OpenMetadataApplicationConfig extends Configuration {
   @JsonProperty("elasticsearch")
   private ElasticSearchConfiguration elasticSearchConfiguration;
 
+  @JsonProperty("nlqHybridSearch")
+  private JsonNode nlqHybridSearch;
+
   @JsonProperty("eventHandlerConfiguration")
   private EventHandlerConfiguration eventHandlerConfiguration;
 

diff --git a/...-service/src/main/java/org/openmetadata/service/search/vector/client/EmbeddingClient.java b/...-service/src/main/java/org/openmetadata/service/search/vector/client/EmbeddingClient.java
@@ -55,7 +55,7 @@ public List<float[]> embedBatch(List<String> texts) {
   protected static int resolveMaxConcurrent(ElasticSearchConfiguration config) {
     NaturalLanguageSearchConfiguration nlsCfg = config.getNaturalLanguageSearch();
     if (nlsCfg != null) {
-      Integer value = nlsCfg.getMaxConcurrentEmbeddingRequests();
+      Integer value = nlsCfg.getMaxConcurrentRequests();
       if (value != null && value > 0) {
         return value;
       }

diff --git a/...ce/src/main/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClient.java b/...ce/src/main/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClient.java
@@ -79,7 +79,13 @@ public OpenAIEmbeddingClient(ElasticSearchConfiguration config) {
       String endpoint,
       boolean isAzure) {
     this(
-        httpClient, apiKey, modelId, dimension, endpoint, isAzure, DEFAULT_MAX_CONCURRENT_REQUESTS);
+        httpClient,
+        apiKey,
+        modelId,
+        dimension,
+        endpoint,
+        isAzure,
+        new NaturalLanguageSearchConfiguration().getMaxConcurrentRequests());
   }
 
   OpenAIEmbeddingClient(

diff --git a/...rc/test/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClientTest.java b/...rc/test/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClientTest.java
@@ -456,7 +456,7 @@ public <T> CompletableFuture<HttpResponse<T>> sendAsync(
   @Test
   void testResolveMaxConcurrentFromConfig() {
     NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
-    nlsCfg.setMaxConcurrentEmbeddingRequests(5);
+    nlsCfg.setMaxConcurrentRequests(5);
     ElasticSearchConfiguration config = new ElasticSearchConfiguration();
     config.setNaturalLanguageSearch(nlsCfg);
 
@@ -485,7 +485,7 @@ void testResolveMaxConcurrentDefaultWhenNullValue() {
   @Test
   void testResolveMaxConcurrentDefaultWhenZero() {
     NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
-    nlsCfg.setMaxConcurrentEmbeddingRequests(0);
+    nlsCfg.setMaxConcurrentRequests(0);
     ElasticSearchConfiguration config = new ElasticSearchConfiguration();
     config.setNaturalLanguageSearch(nlsCfg);
 
@@ -497,7 +497,7 @@ void testResolveMaxConcurrentDefaultWhenZero() {
   @Test
   void testResolveMaxConcurrentDefaultWhenNegative() {
     NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
-    nlsCfg.setMaxConcurrentEmbeddingRequests(-3);
+    nlsCfg.setMaxConcurrentRequests(-3);
     ElasticSearchConfiguration config = new ElasticSearchConfiguration();
     config.setNaturalLanguageSearch(nlsCfg);
 

diff --git a/...etadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json b/...etadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json
@@ -153,8 +153,8 @@
           "type": "string",
           "default": "bedrock"
         },
-        "maxConcurrentEmbeddingRequests": {
-          "description": "Maximum number of concurrent embedding API requests. Controls the semaphore used to throttle calls to the embedding provider and prevent overwhelming HTTP/2 connection limits.",
+        "maxConcurrentRequests": {
+          "description": "Maximum number of concurrent embedding and NLQ provider requests. Controls the semaphore used to throttle calls to the providers and prevent overwhelming HTTP/2 connection limits.",
           "type": "integer",
           "default": 10,
           "minimum": 1
@@ -186,6 +186,25 @@
               "description": "Dimension of the embedding vector",
               "type": "integer",
               "default": 512
+            },
+            "timeoutSeconds": {
+              "description": "Bedrock InvokeModel API call timeout in seconds.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 15
+            },
+            "maxTokens": {
+              "description": "Maximum tokens the Bedrock model is allowed to generate.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 256
+            },
+            "temperature": {
+              "description": "Sampling temperature for Bedrock requests.",
+              "type": "number",
+              "minimum": 0.0,
+              "maximum": 2.0,
+              "default": 0.0
             }
           },
           "additionalProperties": false
@@ -237,6 +256,89 @@
               "description": "Azure OpenAI API version. Only used with Azure OpenAI.",
               "type": "string",
               "default": "2024-02-01"
+            },
+            "timeoutSeconds": {
+              "description": "OpenAI HTTP request and connect timeout in seconds.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 30
+            },
+            "maxTokens": {
+              "description": "Maximum tokens the OpenAI model is allowed to generate.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 256
+            },
+            "temperature": {
+              "description": "Sampling temperature for OpenAI requests.",
+              "type": "number",
+              "minimum": 0.0,
+              "maximum": 2.0,
+              "default": 0.0
+            }
+          },
+          "additionalProperties": false
+        },
+        "filterExtractor": {
+          "description": "NLQ filter extractor cache and prompt tuning.",
+          "type": "object",
+          "javaType": "org.openmetadata.schema.service.configuration.elasticsearch.FilterExtractor",
+          "properties": {
+            "cacheMaxSize": {
+              "description": "Max number of entries in the NLQ filter extraction result cache.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 1000
+            },
+            "cacheExpiryMinutes": {
+              "description": "Cache TTL in minutes for NLQ filter extraction results.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 5
+            },
+            "maxSampleValues": {
+              "description": "Max sample values shown per filter category in the system prompt.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 10
+            }
+          },
+          "additionalProperties": false
+        },
+        "hybridSearch": {
+          "description": "Hybrid search runtime tuning combining BM25 keyword and KNN semantic queries.",
+          "type": "object",
+          "javaType": "org.openmetadata.schema.service.configuration.elasticsearch.HybridSearch",
+          "properties": {
+            "searchPipeline": {
+              "description": "Name of the OpenSearch search pipeline used to normalize hybrid (BM25 + KNN) scores.",
+              "type": "string",
+              "default": "hybrid-rrf"
+            },
+            "semanticScoreThreshold": {
+              "description": "Minimum score threshold for the semantic (KNN) sub-query results.",
+              "type": "number",
+              "minimum": 0.0,
+              "maximum": 1.0,
+              "default": 0.55
+            },
+            "maxQueryTerms": {
+              "description": "Maximum number of query terms forwarded to the shard-fair keyword sub-query.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 10
+            },
+            "fragmentSize": {
+              "description": "Highlight fragment size (characters) for hybrid search hits.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 1000
+            },
+            "paginationDepth": {
+              "description": "Pagination depth used by the hybrid query for RRF normalization.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 1000
             }
           },
           "additionalProperties": false