From c4f8769f09ba211e582c200757015c1277c904c6 Mon Sep 17 00:00:00 2001
From: lautel <laura92cp2@gmail.com>
Date: Fri, 8 May 2026 13:28:08 +0200
Subject: [PATCH 1/4] add nlq to OpenMetadataApplicationConfig

---
 .../openmetadata/service/OpenMetadataApplicationConfig.java   | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplicationConfig.java b/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplicationConfig.java
index 85bd566667e5..3ac532f1091a 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplicationConfig.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplicationConfig.java
@@ -14,6 +14,7 @@
 package org.openmetadata.service;
 
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.JsonNode;
 import io.dropwizard.core.Configuration;
 import io.dropwizard.core.server.DefaultServerFactory;
 import jakarta.validation.Valid;
@@ -79,6 +80,9 @@ public class OpenMetadataApplicationConfig extends Configuration {
   @JsonProperty("elasticsearch")
   private ElasticSearchConfiguration elasticSearchConfiguration;
 
+  @JsonProperty("nlqHybridSearch")
+  private JsonNode nlqHybridSearch;
+
   @JsonProperty("eventHandlerConfiguration")
   private EventHandlerConfiguration eventHandlerConfiguration;
 

From fbccb2e8dc3db516331c5f1e082af3466e9b6ca1 Mon Sep 17 00:00:00 2001
From: lautel <laura92cp2@gmail.com>
Date: Fri, 8 May 2026 15:43:41 +0200
Subject: [PATCH 2/4] move config under naturalLanguageSearch

---
 conf/openmetadata.yaml                        |   2 +-
 .../search/vector/client/EmbeddingClient.java |   2 +-
 .../client/OpenAIEmbeddingClientTest.java     |   6 +-
 .../elasticSearchConfiguration.json           | 106 +++++++++++++++++-
 4 files changed, 109 insertions(+), 7 deletions(-)

diff --git a/conf/openmetadata.yaml b/conf/openmetadata.yaml
index 4150fc776f86..148790900221 100644
--- a/conf/openmetadata.yaml
+++ b/conf/openmetadata.yaml
@@ -495,7 +495,7 @@ elasticsearch:
     enabled: ${NATURAL_LANGUAGE_SEARCH_ENABLED:-false}
     semanticSearchEnabled: ${SEMANTIC_SEARCH_ENABLED:-false}
     embeddingProvider: ${EMBEDDING_PROVIDER:-bedrock} # Options: "openai", "bedrock", "djl"
-    maxConcurrentEmbeddingRequests: ${MAX_CONCURRENT_EMBEDDING_REQUESTS:-10}
+    maxConcurrentRequests: ${MAX_CONCURRENT_EMBEDDING_REQUESTS:-10}
     providerClass: ${NATURAL_LANGUAGE_SEARCH_PROVIDER_CLASS:-org.openmetadata.service.search.nlq.NoOpNLQService}
     bedrock:
       awsConfig:
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/client/EmbeddingClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/client/EmbeddingClient.java
index eed31860bad8..e5d109d69c37 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/client/EmbeddingClient.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/client/EmbeddingClient.java
@@ -55,7 +55,7 @@ public List<float[]> embedBatch(List<String> texts) {
   protected static int resolveMaxConcurrent(ElasticSearchConfiguration config) {
     NaturalLanguageSearchConfiguration nlsCfg = config.getNaturalLanguageSearch();
     if (nlsCfg != null) {
-      Integer value = nlsCfg.getMaxConcurrentEmbeddingRequests();
+      Integer value = nlsCfg.getMaxConcurrentRequests();
       if (value != null && value > 0) {
         return value;
       }
diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClientTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClientTest.java
index d0ed3d92b340..fd3ff08f875b 100644
--- a/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClientTest.java
+++ b/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClientTest.java
@@ -456,7 +456,7 @@ public <T> CompletableFuture<HttpResponse<T>> sendAsync(
   @Test
   void testResolveMaxConcurrentFromConfig() {
     NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
-    nlsCfg.setMaxConcurrentEmbeddingRequests(5);
+    nlsCfg.setMaxConcurrentRequests(5);
     ElasticSearchConfiguration config = new ElasticSearchConfiguration();
     config.setNaturalLanguageSearch(nlsCfg);
 
@@ -485,7 +485,7 @@ void testResolveMaxConcurrentDefaultWhenNullValue() {
   @Test
   void testResolveMaxConcurrentDefaultWhenZero() {
     NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
-    nlsCfg.setMaxConcurrentEmbeddingRequests(0);
+    nlsCfg.setMaxConcurrentRequests(0);
     ElasticSearchConfiguration config = new ElasticSearchConfiguration();
     config.setNaturalLanguageSearch(nlsCfg);
 
@@ -497,7 +497,7 @@ void testResolveMaxConcurrentDefaultWhenZero() {
   @Test
   void testResolveMaxConcurrentDefaultWhenNegative() {
     NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
-    nlsCfg.setMaxConcurrentEmbeddingRequests(-3);
+    nlsCfg.setMaxConcurrentRequests(-3);
     ElasticSearchConfiguration config = new ElasticSearchConfiguration();
     config.setNaturalLanguageSearch(nlsCfg);
 
diff --git a/openmetadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json b/openmetadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json
index 4b7591e6244d..e9ff5ba2ee85 100644
--- a/openmetadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json
+++ b/openmetadata-spec/src/main/resources/json/schema/configuration/elasticSearchConfiguration.json
@@ -153,8 +153,8 @@
           "type": "string",
           "default": "bedrock"
         },
-        "maxConcurrentEmbeddingRequests": {
-          "description": "Maximum number of concurrent embedding API requests. Controls the semaphore used to throttle calls to the embedding provider and prevent overwhelming HTTP/2 connection limits.",
+        "maxConcurrentRequests": {
+          "description": "Maximum number of concurrent embedding and NLQ provider requests. Controls the semaphore used to throttle calls to the providers and prevent overwhelming HTTP/2 connection limits.",
           "type": "integer",
           "default": 10,
           "minimum": 1
@@ -186,6 +186,25 @@
               "description": "Dimension of the embedding vector",
               "type": "integer",
               "default": 512
+            },
+            "timeoutSeconds": {
+              "description": "Bedrock InvokeModel API call timeout in seconds.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 15
+            },
+            "maxTokens": {
+              "description": "Maximum tokens the Bedrock model is allowed to generate.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 256
+            },
+            "temperature": {
+              "description": "Sampling temperature for Bedrock requests.",
+              "type": "number",
+              "minimum": 0.0,
+              "maximum": 2.0,
+              "default": 0.0
             }
           },
           "additionalProperties": false
@@ -237,6 +256,89 @@
               "description": "Azure OpenAI API version. Only used with Azure OpenAI.",
               "type": "string",
               "default": "2024-02-01"
+            },
+            "timeoutSeconds": {
+              "description": "OpenAI HTTP request and connect timeout in seconds.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 30
+            },
+            "maxTokens": {
+              "description": "Maximum tokens the OpenAI model is allowed to generate.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 256
+            },
+            "temperature": {
+              "description": "Sampling temperature for OpenAI requests.",
+              "type": "number",
+              "minimum": 0.0,
+              "maximum": 2.0,
+              "default": 0.0
+            }
+          },
+          "additionalProperties": false
+        },
+        "filterExtractor": {
+          "description": "NLQ filter extractor cache and prompt tuning.",
+          "type": "object",
+          "javaType": "org.openmetadata.schema.service.configuration.elasticsearch.FilterExtractor",
+          "properties": {
+            "cacheMaxSize": {
+              "description": "Max number of entries in the NLQ filter extraction result cache.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 1000
+            },
+            "cacheExpiryMinutes": {
+              "description": "Cache TTL in minutes for NLQ filter extraction results.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 5
+            },
+            "maxSampleValues": {
+              "description": "Max sample values shown per filter category in the system prompt.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 10
+            }
+          },
+          "additionalProperties": false
+        },
+        "hybridSearch": {
+          "description": "Hybrid search runtime tuning combining BM25 keyword and KNN semantic queries.",
+          "type": "object",
+          "javaType": "org.openmetadata.schema.service.configuration.elasticsearch.HybridSearch",
+          "properties": {
+            "searchPipeline": {
+              "description": "Name of the OpenSearch search pipeline used to normalize hybrid (BM25 + KNN) scores.",
+              "type": "string",
+              "default": "hybrid-rrf"
+            },
+            "semanticScoreThreshold": {
+              "description": "Minimum score threshold for the semantic (KNN) sub-query results.",
+              "type": "number",
+              "minimum": 0.0,
+              "maximum": 1.0,
+              "default": 0.55
+            },
+            "maxQueryTerms": {
+              "description": "Maximum number of query terms forwarded to the shard-fair keyword sub-query.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 10
+            },
+            "fragmentSize": {
+              "description": "Highlight fragment size (characters) for hybrid search hits.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 1000
+            },
+            "paginationDepth": {
+              "description": "Pagination depth used by the hybrid query for RRF normalization.",
+              "type": "integer",
+              "minimum": 1,
+              "default": 1000
             }
           },
           "additionalProperties": false

From 6aa16404449c4bd00f8943aa8d0fbfba64a022b4 Mon Sep 17 00:00:00 2001
From: lautel <laura92cp2@gmail.com>
Date: Fri, 8 May 2026 15:48:11 +0200
Subject: [PATCH 3/4] openai client

---
 .../search/vector/client/OpenAIEmbeddingClient.java       | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClient.java
index f1aba4ed32f9..4cbfb0a035f8 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClient.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/client/OpenAIEmbeddingClient.java
@@ -79,7 +79,13 @@ public OpenAIEmbeddingClient(ElasticSearchConfiguration config) {
       String endpoint,
       boolean isAzure) {
     this(
-        httpClient, apiKey, modelId, dimension, endpoint, isAzure, DEFAULT_MAX_CONCURRENT_REQUESTS);
+        httpClient,
+        apiKey,
+        modelId,
+        dimension,
+        endpoint,
+        isAzure,
+        new NaturalLanguageSearchConfiguration().getMaxConcurrentRequests());
   }
 
   OpenAIEmbeddingClient(

From be12eb2648f913de8f059da0fdff0a839d0e0483 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Fri, 8 May 2026 13:53:04 +0000
Subject: [PATCH 4/4] Update generated TypeScript types

---
 .../elasticSearchConfiguration.ts             | 83 ++++++++++++++++++-
 .../ui/src/generated/settings/settings.ts     | 83 ++++++++++++++++++-
 2 files changed, 158 insertions(+), 8 deletions(-)

diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/configuration/elasticSearchConfiguration.ts b/openmetadata-ui/src/main/resources/ui/src/generated/configuration/elasticSearchConfiguration.ts
index 4f798d792c90..87c6da38367b 100644
--- a/openmetadata-ui/src/main/resources/ui/src/generated/configuration/elasticSearchConfiguration.ts
+++ b/openmetadata-ui/src/main/resources/ui/src/generated/configuration/elasticSearchConfiguration.ts
@@ -134,16 +134,23 @@ export interface NaturalLanguageSearch {
      * Enable or disable natural language search
      */
     enabled?: boolean;
+    /**
+     * NLQ filter extractor cache and prompt tuning.
+     */
+    filterExtractor?: FilterExtractor;
+    /**
+     * Hybrid search runtime tuning combining BM25 keyword and KNN semantic queries.
+     */
+    hybridSearch?: HybridSearch;
     /**
      * Weight for BM25 keyword search results in hybrid RRF pipeline (0.0-1.0)
      */
     keywordWeight?: number;
     /**
-     * Maximum number of concurrent embedding API requests. Controls the semaphore used to
-     * throttle calls to the embedding provider and prevent overwhelming HTTP/2 connection
-     * limits.
+     * Maximum number of concurrent embedding and NLQ provider requests. Controls the semaphore
+     * used to throttle calls to the providers and prevent overwhelming HTTP/2 connection limits.
      */
-    maxConcurrentEmbeddingRequests?: number;
+    maxConcurrentRequests?: number;
     /**
      * OpenAI configuration for embedding generation. Supports both OpenAI and Azure OpenAI
      * endpoints.
@@ -179,10 +186,22 @@ export interface Bedrock {
      * Bedrock embedding model identifier to use for vector search
      */
     embeddingModelId?: string;
+    /**
+     * Maximum tokens the Bedrock model is allowed to generate.
+     */
+    maxTokens?: number;
     /**
      * Bedrock model identifier to use for query transformation
      */
     modelId?: string;
+    /**
+     * Sampling temperature for Bedrock requests.
+     */
+    temperature?: number;
+    /**
+     * Bedrock InvokeModel API call timeout in seconds.
+     */
+    timeoutSeconds?: number;
 }
 
 /**
@@ -238,6 +257,50 @@ export interface Djl {
     embeddingModel?: string;
 }
 
+/**
+ * NLQ filter extractor cache and prompt tuning.
+ */
+export interface FilterExtractor {
+    /**
+     * Cache TTL in minutes for NLQ filter extraction results.
+     */
+    cacheExpiryMinutes?: number;
+    /**
+     * Max number of entries in the NLQ filter extraction result cache.
+     */
+    cacheMaxSize?: number;
+    /**
+     * Max sample values shown per filter category in the system prompt.
+     */
+    maxSampleValues?: number;
+}
+
+/**
+ * Hybrid search runtime tuning combining BM25 keyword and KNN semantic queries.
+ */
+export interface HybridSearch {
+    /**
+     * Highlight fragment size (characters) for hybrid search hits.
+     */
+    fragmentSize?: number;
+    /**
+     * Maximum number of query terms forwarded to the shard-fair keyword sub-query.
+     */
+    maxQueryTerms?: number;
+    /**
+     * Pagination depth used by the hybrid query for RRF normalization.
+     */
+    paginationDepth?: number;
+    /**
+     * Name of the OpenSearch search pipeline used to normalize hybrid (BM25 + KNN) scores.
+     */
+    searchPipeline?: string;
+    /**
+     * Minimum score threshold for the semantic (KNN) sub-query results.
+     */
+    semanticScoreThreshold?: number;
+}
+
 /**
  * OpenAI configuration for embedding generation. Supports both OpenAI and Azure OpenAI
  * endpoints.
@@ -268,10 +331,22 @@ export interface Openai {
      * https://your-resource.openai.azure.com). Leave empty for standard OpenAI API.
      */
     endpoint?: string;
+    /**
+     * Maximum tokens the OpenAI model is allowed to generate.
+     */
+    maxTokens?: number;
     /**
      * OpenAI model identifier to use for query transformation (chat completions).
      */
     modelId?: string;
+    /**
+     * Sampling temperature for OpenAI requests.
+     */
+    temperature?: number;
+    /**
+     * OpenAI HTTP request and connect timeout in seconds.
+     */
+    timeoutSeconds?: number;
 }
 
 /**
diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/settings/settings.ts b/openmetadata-ui/src/main/resources/ui/src/generated/settings/settings.ts
index 891650b613c6..47181803f867 100644
--- a/openmetadata-ui/src/main/resources/ui/src/generated/settings/settings.ts
+++ b/openmetadata-ui/src/main/resources/ui/src/generated/settings/settings.ts
@@ -2180,16 +2180,23 @@ export interface NaturalLanguageSearch {
      * Enable or disable natural language search
      */
     enabled?: boolean;
+    /**
+     * NLQ filter extractor cache and prompt tuning.
+     */
+    filterExtractor?: FilterExtractor;
+    /**
+     * Hybrid search runtime tuning combining BM25 keyword and KNN semantic queries.
+     */
+    hybridSearch?: HybridSearch;
     /**
      * Weight for BM25 keyword search results in hybrid RRF pipeline (0.0-1.0)
      */
     keywordWeight?: number;
     /**
-     * Maximum number of concurrent embedding API requests. Controls the semaphore used to
-     * throttle calls to the embedding provider and prevent overwhelming HTTP/2 connection
-     * limits.
+     * Maximum number of concurrent embedding and NLQ provider requests. Controls the semaphore
+     * used to throttle calls to the providers and prevent overwhelming HTTP/2 connection limits.
      */
-    maxConcurrentEmbeddingRequests?: number;
+    maxConcurrentRequests?: number;
     /**
      * OpenAI configuration for embedding generation. Supports both OpenAI and Azure OpenAI
      * endpoints.
@@ -2225,10 +2232,22 @@ export interface Bedrock {
      * Bedrock embedding model identifier to use for vector search
      */
     embeddingModelId?: string;
+    /**
+     * Maximum tokens the Bedrock model is allowed to generate.
+     */
+    maxTokens?: number;
     /**
      * Bedrock model identifier to use for query transformation
      */
     modelId?: string;
+    /**
+     * Sampling temperature for Bedrock requests.
+     */
+    temperature?: number;
+    /**
+     * Bedrock InvokeModel API call timeout in seconds.
+     */
+    timeoutSeconds?: number;
 }
 
 /**
@@ -2284,6 +2303,50 @@ export interface Djl {
     embeddingModel?: string;
 }
 
+/**
+ * NLQ filter extractor cache and prompt tuning.
+ */
+export interface FilterExtractor {
+    /**
+     * Cache TTL in minutes for NLQ filter extraction results.
+     */
+    cacheExpiryMinutes?: number;
+    /**
+     * Max number of entries in the NLQ filter extraction result cache.
+     */
+    cacheMaxSize?: number;
+    /**
+     * Max sample values shown per filter category in the system prompt.
+     */
+    maxSampleValues?: number;
+}
+
+/**
+ * Hybrid search runtime tuning combining BM25 keyword and KNN semantic queries.
+ */
+export interface HybridSearch {
+    /**
+     * Highlight fragment size (characters) for hybrid search hits.
+     */
+    fragmentSize?: number;
+    /**
+     * Maximum number of query terms forwarded to the shard-fair keyword sub-query.
+     */
+    maxQueryTerms?: number;
+    /**
+     * Pagination depth used by the hybrid query for RRF normalization.
+     */
+    paginationDepth?: number;
+    /**
+     * Name of the OpenSearch search pipeline used to normalize hybrid (BM25 + KNN) scores.
+     */
+    searchPipeline?: string;
+    /**
+     * Minimum score threshold for the semantic (KNN) sub-query results.
+     */
+    semanticScoreThreshold?: number;
+}
+
 /**
  * OpenAI configuration for embedding generation. Supports both OpenAI and Azure OpenAI
  * endpoints.
@@ -2314,10 +2377,22 @@ export interface Openai {
      * https://your-resource.openai.azure.com). Leave empty for standard OpenAI API.
      */
     endpoint?: string;
+    /**
+     * Maximum tokens the OpenAI model is allowed to generate.
+     */
+    maxTokens?: number;
     /**
      * OpenAI model identifier to use for query transformation (chat completions).
      */
     modelId?: string;
+    /**
+     * Sampling temperature for OpenAI requests.
+     */
+    temperature?: number;
+    /**
+     * OpenAI HTTP request and connect timeout in seconds.
+     */
+    timeoutSeconds?: number;
 }
 
 /**