open-metadata · lautel · May 8, 2026 · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/conf/openmetadata.yaml b/conf/openmetadata.yaml
@@ -495,7 +495,7 @@ elasticsearch:
     enabled: ${NATURAL_LANGUAGE_SEARCH_ENABLED:-false}
     semanticSearchEnabled: ${SEMANTIC_SEARCH_ENABLED:-false}
     embeddingProvider: ${EMBEDDING_PROVIDER:-bedrock} # Options: "openai", "bedrock", "djl"
-    maxConcurrentEmbeddingRequests: ${MAX_CONCURRENT_EMBEDDING_REQUESTS:-10}
+    maxConcurrentRequests: ${MAX_CONCURRENT_EMBEDDING_REQUESTS:-10}
     providerClass: ${NATURAL_LANGUAGE_SEARCH_PROVIDER_CLASS:-org.openmetadata.service.search.nlq.NoOpNLQService}
     bedrock:
       awsConfig:

diff --git a/...etadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplicationConfig.java b/...etadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplicationConfig.java
@@ -14,6 +14,7 @@
 package org.openmetadata.service;
 
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.JsonNode;
 import io.dropwizard.core.Configuration;
 import io.dropwizard.core.server.DefaultServerFactory;
 import jakarta.validation.Valid;
@@ -79,6 +80,9 @@ public class OpenMetadataApplicationConfig extends Configuration {
   @JsonProperty("elasticsearch")
   private ElasticSearchConfiguration elasticSearchConfiguration;
 
+  @JsonProperty("nlqHybridSearch")
+  private JsonNode nlqHybridSearch;
+
   @JsonProperty("eventHandlerConfiguration")
   private EventHandlerConfiguration eventHandlerConfiguration;
 

diff --git a/...-service/src/main/java/org/openmetadata/service/search/vector/client/EmbeddingClient.java b/...-service/src/main/java/org/openmetadata/service/search/vector/client/EmbeddingClient.java
@@ -55,7 +55,7 @@ public List<float[]> embedBatch(List<String> texts) {
   protected static int resolveMaxConcurrent(ElasticSearchConfiguration config) {
     NaturalLanguageSearchConfiguration nlsCfg = config.getNaturalLanguageSearch();
     if (nlsCfg != null) {
-      Integer value = nlsCfg.getMaxConcurrentEmbeddingRequests();
+      Integer value = nlsCfg.getMaxConcurrentRequests();
       if (value != null && value > 0) {
         return value;
       }

diff --git a/...ce/src/main/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClient.java b/...ce/src/main/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClient.java
@@ -79,7 +79,13 @@ public OpenAIEmbeddingClient(ElasticSearchConfiguration config) {
       String endpoint,
       boolean isAzure) {
     this(
-        httpClient, apiKey, modelId, dimension, endpoint, isAzure, DEFAULT_MAX_CONCURRENT_REQUESTS);
+        httpClient,
+        apiKey,
+        modelId,
+        dimension,
+        endpoint,
+        isAzure,
+        new NaturalLanguageSearchConfiguration().getMaxConcurrentRequests());
   }
 
   OpenAIEmbeddingClient(

diff --git a/...rc/test/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClientTest.java b/...rc/test/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClientTest.java
@@ -456,7 +456,7 @@ public <T> CompletableFuture<HttpResponse<T>> sendAsync(
   @Test
   void testResolveMaxConcurrentFromConfig() {
     NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
-    nlsCfg.setMaxConcurrentEmbeddingRequests(5);
+    nlsCfg.setMaxConcurrentRequests(5);
     ElasticSearchConfiguration config = new ElasticSearchConfiguration();
     config.setNaturalLanguageSearch(nlsCfg);
 
@@ -485,7 +485,7 @@ void testResolveMaxConcurrentDefaultWhenNullValue() {
   @Test
   void testResolveMaxConcurrentDefaultWhenZero() {
     NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
-    nlsCfg.setMaxConcurrentEmbeddingRequests(0);
+    nlsCfg.setMaxConcurrentRequests(0);
     ElasticSearchConfiguration config = new ElasticSearchConfiguration();
     config.setNaturalLanguageSearch(nlsCfg);
 
@@ -497,7 +497,7 @@ void testResolveMaxConcurrentDefaultWhenZero() {
   @Test
   void testResolveMaxConcurrentDefaultWhenNegative() {
     NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
-    nlsCfg.setMaxConcurrentEmbeddingRequests(-3);
+    nlsCfg.setMaxConcurrentRequests(-3);
     ElasticSearchConfiguration config = new ElasticSearchConfiguration();
     config.setNaturalLanguageSearch(nlsCfg);
 

diff --git a/...etadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json b/...etadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json
@@ -153,8 +153,8 @@
           "type": "string",
           "default": "bedrock"
         },
-        "maxConcurrentEmbeddingRequests": {
-          "description": "Maximum number of concurrent embedding API requests. Controls the semaphore used to throttle calls to the embedding provider and prevent overwhelming HTTP/2 connection limits.",
+        "maxConcurrentRequests": {
+          "description": "Maximum number of concurrent embedding and NLQ provider requests. Controls the semaphore used to throttle calls to the providers and prevent overwhelming HTTP/2 connection limits.",
           "type": "integer",
           "default": 10,
           "minimum": 1
@@ -186,6 +186,25 @@
               "description": "Dimension of the embedding vector",
               "type": "integer",
               "default": 512
+            },
+            "timeoutSeconds": {
+              "description": "Bedrock InvokeModel API call timeout in seconds.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 15
+            },
+            "maxTokens": {
+              "description": "Maximum tokens the Bedrock model is allowed to generate.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 256
+            },
+            "temperature": {
+              "description": "Sampling temperature for Bedrock requests.",
+              "type": "number",
+              "minimum": 0.0,
+              "maximum": 2.0,
+              "default": 0.0
             }
           },
           "additionalProperties": false
@@ -237,6 +256,89 @@
               "description": "Azure OpenAI API version. Only used with Azure OpenAI.",
               "type": "string",
               "default": "2024-02-01"
+            },
+            "timeoutSeconds": {
+              "description": "OpenAI HTTP request and connect timeout in seconds.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 30
+            },
+            "maxTokens": {
+              "description": "Maximum tokens the OpenAI model is allowed to generate.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 256
+            },
+            "temperature": {
+              "description": "Sampling temperature for OpenAI requests.",
+              "type": "number",
+              "minimum": 0.0,
+              "maximum": 2.0,
+              "default": 0.0
+            }
+          },
+          "additionalProperties": false
+        },
+        "filterExtractor": {
+          "description": "NLQ filter extractor cache and prompt tuning.",
+          "type": "object",
+          "javaType": "org.openmetadata.schema.service.configuration.elasticsearch.FilterExtractor",
+          "properties": {
+            "cacheMaxSize": {
+              "description": "Max number of entries in the NLQ filter extraction result cache.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 1000
+            },
+            "cacheExpiryMinutes": {
+              "description": "Cache TTL in minutes for NLQ filter extraction results.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 5
+            },
+            "maxSampleValues": {
+              "description": "Max sample values shown per filter category in the system prompt.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 10
+            }
+          },
+          "additionalProperties": false
+        },
+        "hybridSearch": {
+          "description": "Hybrid search runtime tuning combining BM25 keyword and KNN semantic queries.",
+          "type": "object",
+          "javaType": "org.openmetadata.schema.service.configuration.elasticsearch.HybridSearch",
+          "properties": {
+            "searchPipeline": {
+              "description": "Name of the OpenSearch search pipeline used to normalize hybrid (BM25 + KNN) scores.",
+              "type": "string",
+              "default": "hybrid-rrf"
+            },
+            "semanticScoreThreshold": {
+              "description": "Minimum score threshold for the semantic (KNN) sub-query results.",
+              "type": "number",
+              "minimum": 0.0,
+              "maximum": 1.0,
+              "default": 0.55
+            },
+            "maxQueryTerms": {
+              "description": "Maximum number of query terms forwarded to the shard-fair keyword sub-query.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 10
+            },
+            "fragmentSize": {
+              "description": "Highlight fragment size (characters) for hybrid search hits.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 1000
+            },
+            "paginationDepth": {
+              "description": "Pagination depth used by the hybrid query for RRF normalization.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 1000
             }
           },
           "additionalProperties": false

@@ -134,16 +134,23 @@ export interface NaturalLanguageSearch {
      * Enable or disable natural language search
      */
     enabled?: boolean;
+    /**
+     * NLQ filter extractor cache and prompt tuning.
+     */
+    filterExtractor?: FilterExtractor;
+    /**
+     * Hybrid search runtime tuning combining BM25 keyword and KNN semantic queries.
+     */
+    hybridSearch?: HybridSearch;
     /**
      * Weight for BM25 keyword search results in hybrid RRF pipeline (0.0-1.0)
      */
     keywordWeight?: number;
     /**
-     * Maximum number of concurrent embedding API requests. Controls the semaphore used to
-     * throttle calls to the embedding provider and prevent overwhelming HTTP/2 connection
-     * limits.
+     * Maximum number of concurrent embedding and NLQ provider requests. Controls the semaphore
+     * used to throttle calls to the providers and prevent overwhelming HTTP/2 connection limits.
      */
-    maxConcurrentEmbeddingRequests?: number;
+    maxConcurrentRequests?: number;
     /**
      * OpenAI configuration for embedding generation. Supports both OpenAI and Azure OpenAI
      * endpoints.
@@ -179,10 +186,22 @@ export interface Bedrock {
      * Bedrock embedding model identifier to use for vector search
      */
     embeddingModelId?: string;
+    /**
+     * Maximum tokens the Bedrock model is allowed to generate.
+     */
+    maxTokens?: number;
     /**
      * Bedrock model identifier to use for query transformation
      */
     modelId?: string;
+    /**
+     * Sampling temperature for Bedrock requests.
+     */
+    temperature?: number;
+    /**
+     * Bedrock InvokeModel API call timeout in seconds.
+     */
+    timeoutSeconds?: number;
 }
 
 /**
@@ -238,6 +257,50 @@ export interface Djl {
     embeddingModel?: string;
 }
 
+/**
+ * NLQ filter extractor cache and prompt tuning.
+ */
+export interface FilterExtractor {
+    /**
+     * Cache TTL in minutes for NLQ filter extraction results.
+     */
+    cacheExpiryMinutes?: number;
+    /**
+     * Max number of entries in the NLQ filter extraction result cache.
+     */
+    cacheMaxSize?: number;
+    /**
+     * Max sample values shown per filter category in the system prompt.
+     */
+    maxSampleValues?: number;
+}
+
+/**
+ * Hybrid search runtime tuning combining BM25 keyword and KNN semantic queries.
+ */
+export interface HybridSearch {
+    /**
+     * Highlight fragment size (characters) for hybrid search hits.
+     */
+    fragmentSize?: number;
+    /**
+     * Maximum number of query terms forwarded to the shard-fair keyword sub-query.
+     */
+    maxQueryTerms?: number;
+    /**
+     * Pagination depth used by the hybrid query for RRF normalization.
+     */
+    paginationDepth?: number;
+    /**
+     * Name of the OpenSearch search pipeline used to normalize hybrid (BM25 + KNN) scores.
+     */
+    searchPipeline?: string;
+    /**
+     * Minimum score threshold for the semantic (KNN) sub-query results.
+     */
+    semanticScoreThreshold?: number;
+}
+
 /**
  * OpenAI configuration for embedding generation. Supports both OpenAI and Azure OpenAI
  * endpoints.
@@ -268,10 +331,22 @@ export interface Openai {
      * https://your-resource.openai.azure.com). Leave empty for standard OpenAI API.
      */
     endpoint?: string;
+    /**
+     * Maximum tokens the OpenAI model is allowed to generate.
+     */
+    maxTokens?: number;
     /**
      * OpenAI model identifier to use for query transformation (chat completions).
      */
     modelId?: string;
+    /**
+     * Sampling temperature for OpenAI requests.
+     */
+    temperature?: number;
+    /**
+     * OpenAI HTTP request and connect timeout in seconds.
+     */
+    timeoutSeconds?: number;
 }
 
 /**