Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conf/openmetadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ elasticsearch:
enabled: ${NATURAL_LANGUAGE_SEARCH_ENABLED:-false}
semanticSearchEnabled: ${SEMANTIC_SEARCH_ENABLED:-false}
embeddingProvider: ${EMBEDDING_PROVIDER:-bedrock} # Options: "openai", "bedrock", "djl"
maxConcurrentEmbeddingRequests: ${MAX_CONCURRENT_EMBEDDING_REQUESTS:-10}
maxConcurrentRequests: ${MAX_CONCURRENT_EMBEDDING_REQUESTS:-10}
providerClass: ${NATURAL_LANGUAGE_SEARCH_PROVIDER_CLASS:-org.openmetadata.service.search.nlq.NoOpNLQService}
bedrock:
awsConfig:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package org.openmetadata.service;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.JsonNode;
import io.dropwizard.core.Configuration;
import io.dropwizard.core.server.DefaultServerFactory;
import jakarta.validation.Valid;
Expand Down Expand Up @@ -79,6 +80,9 @@ public class OpenMetadataApplicationConfig extends Configuration {
@JsonProperty("elasticsearch")
private ElasticSearchConfiguration elasticSearchConfiguration;

@JsonProperty("nlqHybridSearch")
private JsonNode nlqHybridSearch;
Comment thread
lautel marked this conversation as resolved.

@JsonProperty("eventHandlerConfiguration")
private EventHandlerConfiguration eventHandlerConfiguration;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public List<float[]> embedBatch(List<String> texts) {
protected static int resolveMaxConcurrent(ElasticSearchConfiguration config) {
NaturalLanguageSearchConfiguration nlsCfg = config.getNaturalLanguageSearch();
if (nlsCfg != null) {
Integer value = nlsCfg.getMaxConcurrentEmbeddingRequests();
Integer value = nlsCfg.getMaxConcurrentRequests();
if (value != null && value > 0) {
return value;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,13 @@ public OpenAIEmbeddingClient(ElasticSearchConfiguration config) {
String endpoint,
boolean isAzure) {
this(
httpClient, apiKey, modelId, dimension, endpoint, isAzure, DEFAULT_MAX_CONCURRENT_REQUESTS);
httpClient,
apiKey,
modelId,
dimension,
endpoint,
isAzure,
new NaturalLanguageSearchConfiguration().getMaxConcurrentRequests());
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Quality: Instantiating config object just to read a default value

The change replaces DEFAULT_MAX_CONCURRENT_REQUESTS (a clear constant with value 10) with new NaturalLanguageSearchConfiguration().getMaxConcurrentRequests(). This instantiates a full configuration POJO solely to retrieve a JSON-schema default value. This is less readable and less efficient than referencing the existing constant in EmbeddingClient. The parent class EmbeddingClient already has resolveMaxConcurrent() which falls back to DEFAULT_MAX_CONCURRENT_REQUESTS when the config value is null/non-positive, so the indirection through a fresh config object is unnecessary.

Suggested fix:

Replace with the existing constant:
    this(
        httpClient,
        apiKey,
        modelId,
        dimension,
        endpoint,
        isAzure,
        DEFAULT_MAX_CONCURRENT_REQUESTS);

Was this helpful? React with 👍 / 👎 | Reply gitar fix to apply this suggestion

}

OpenAIEmbeddingClient(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ public <T> CompletableFuture<HttpResponse<T>> sendAsync(
@Test
void testResolveMaxConcurrentFromConfig() {
NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
nlsCfg.setMaxConcurrentEmbeddingRequests(5);
nlsCfg.setMaxConcurrentRequests(5);
ElasticSearchConfiguration config = new ElasticSearchConfiguration();
config.setNaturalLanguageSearch(nlsCfg);

Expand Down Expand Up @@ -485,7 +485,7 @@ void testResolveMaxConcurrentDefaultWhenNullValue() {
@Test
void testResolveMaxConcurrentDefaultWhenZero() {
NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
nlsCfg.setMaxConcurrentEmbeddingRequests(0);
nlsCfg.setMaxConcurrentRequests(0);
ElasticSearchConfiguration config = new ElasticSearchConfiguration();
config.setNaturalLanguageSearch(nlsCfg);

Expand All @@ -497,7 +497,7 @@ void testResolveMaxConcurrentDefaultWhenZero() {
@Test
void testResolveMaxConcurrentDefaultWhenNegative() {
NaturalLanguageSearchConfiguration nlsCfg = new NaturalLanguageSearchConfiguration();
nlsCfg.setMaxConcurrentEmbeddingRequests(-3);
nlsCfg.setMaxConcurrentRequests(-3);
ElasticSearchConfiguration config = new ElasticSearchConfiguration();
config.setNaturalLanguageSearch(nlsCfg);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@
"type": "string",
"default": "bedrock"
},
"maxConcurrentEmbeddingRequests": {
"description": "Maximum number of concurrent embedding API requests. Controls the semaphore used to throttle calls to the embedding provider and prevent overwhelming HTTP/2 connection limits.",
"maxConcurrentRequests": {
"description": "Maximum number of concurrent embedding and NLQ provider requests. Controls the semaphore used to throttle calls to the providers and prevent overwhelming HTTP/2 connection limits.",
"type": "integer",
"default": 10,
"minimum": 1
Expand Down Expand Up @@ -186,6 +186,25 @@
"description": "Dimension of the embedding vector",
"type": "integer",
"default": 512
},
"timeoutSeconds": {
"description": "Bedrock InvokeModel API call timeout in seconds.",
"type": "integer",
"minimum": 1,
"default": 15
},
"maxTokens": {
"description": "Maximum tokens the Bedrock model is allowed to generate.",
"type": "integer",
"minimum": 1,
"default": 256
},
"temperature": {
"description": "Sampling temperature for Bedrock requests.",
"type": "number",
"minimum": 0.0,
"maximum": 2.0,
"default": 0.0
}
},
"additionalProperties": false
Expand Down Expand Up @@ -237,6 +256,89 @@
"description": "Azure OpenAI API version. Only used with Azure OpenAI.",
"type": "string",
"default": "2024-02-01"
},
"timeoutSeconds": {
"description": "OpenAI HTTP request and connect timeout in seconds.",
"type": "integer",
"minimum": 1,
"default": 30
},
"maxTokens": {
"description": "Maximum tokens the OpenAI model is allowed to generate.",
"type": "integer",
"minimum": 1,
"default": 256
},
"temperature": {
"description": "Sampling temperature for OpenAI requests.",
"type": "number",
"minimum": 0.0,
"maximum": 2.0,
"default": 0.0
}
},
"additionalProperties": false
},
"filterExtractor": {
"description": "NLQ filter extractor cache and prompt tuning.",
"type": "object",
"javaType": "org.openmetadata.schema.service.configuration.elasticsearch.FilterExtractor",
"properties": {
"cacheMaxSize": {
"description": "Max number of entries in the NLQ filter extraction result cache.",
"type": "integer",
"minimum": 1,
"default": 1000
},
"cacheExpiryMinutes": {
"description": "Cache TTL in minutes for NLQ filter extraction results.",
"type": "integer",
"minimum": 1,
"default": 5
},
"maxSampleValues": {
"description": "Max sample values shown per filter category in the system prompt.",
"type": "integer",
"minimum": 1,
"default": 10
}
},
"additionalProperties": false
},
"hybridSearch": {
"description": "Hybrid search runtime tuning combining BM25 keyword and KNN semantic queries.",
"type": "object",
"javaType": "org.openmetadata.schema.service.configuration.elasticsearch.HybridSearch",
"properties": {
"searchPipeline": {
"description": "Name of the OpenSearch search pipeline used to normalize hybrid (BM25 + KNN) scores.",
"type": "string",
"default": "hybrid-rrf"
},
"semanticScoreThreshold": {
"description": "Minimum score threshold for the semantic (KNN) sub-query results.",
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"default": 0.55
},
"maxQueryTerms": {
"description": "Maximum number of query terms forwarded to the shard-fair keyword sub-query.",
"type": "integer",
"minimum": 1,
"default": 10
},
"fragmentSize": {
"description": "Highlight fragment size (characters) for hybrid search hits.",
"type": "integer",
"minimum": 1,
"default": 1000
},
"paginationDepth": {
"description": "Pagination depth used by the hybrid query for RRF normalization.",
"type": "integer",
"minimum": 1,
"default": 1000
}
},
"additionalProperties": false
Expand Down
Loading