diff --git a/bin/distributed-test/scripts/trigger-reindex.sh b/bin/distributed-test/scripts/trigger-reindex.sh index 609b70665b65..fa39548ed27c 100755 --- a/bin/distributed-test/scripts/trigger-reindex.sh +++ b/bin/distributed-test/scripts/trigger-reindex.sh @@ -8,7 +8,6 @@ PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" # Default values SERVER_URL="http://localhost:8585" -RECREATE_INDEX=false ENTITY_TYPES="" BATCH_SIZE=100 PARTITION_SIZE=10000 @@ -20,10 +19,6 @@ while [[ $# -gt 0 ]]; do SERVER_URL="$2" shift 2 ;; - --recreate) - RECREATE_INDEX=true - shift - ;; --entities) ENTITY_TYPES="$2" shift 2 @@ -41,7 +36,6 @@ while [[ $# -gt 0 ]]; do echo "" echo "Options:" echo " --server URL Target server URL (default: http://localhost:8585)" - echo " --recreate Drop and recreate indices before reindexing" echo " --entities TYPES Comma-separated entity types to reindex (default: all)" echo " --batch-size NUM Batch size for indexing (default: 100)" echo " --partition-size NUM Partition size for distributed indexing (default: 10000, range: 1000-50000)" @@ -51,7 +45,6 @@ while [[ $# -gt 0 ]]; do echo "Examples:" echo " $0 # Reindex all on server 1" echo " $0 --server http://localhost:8587 # Trigger on server 2" - echo " $0 --recreate # Drop and recreate indices" echo " $0 --entities table,dashboard # Reindex only tables and dashboards" echo " $0 --partition-size 2000 # Use smaller partitions for better distribution" exit 0 @@ -67,7 +60,7 @@ echo "======================================" echo "Triggering Search Reindexing" echo "======================================" echo "Server: $SERVER_URL" -echo "Recreate indices: $RECREATE_INDEX" +echo "Indexing mode: staged indexes with alias promotion" echo "Batch size: $BATCH_SIZE" echo "Partition size: $PARTITION_SIZE" if [ -n "$ENTITY_TYPES" ]; then @@ -96,13 +89,6 @@ fi echo "Authenticated successfully." echo "" -# Build the reindex request body -if [ "$RECREATE_INDEX" == "true" ]; then - RECREATE_FLAG="true" -else - RECREATE_FLAG="false" -fi - # Build entities array if [ -n "$ENTITY_TYPES" ]; then # Convert comma-separated to JSON array @@ -113,11 +99,9 @@ fi REQUEST_BODY=$(cat < SUCCESS_STATUSES = Set.of("success", "completed"); + private static final Set TERMINAL_STATUSES = + Set.of("success", "completed", "failed", "activeerror", "stopped"); + + @BeforeAll + static void setup() { + SdkClients.adminClient(); + } + + @Test + void tableOnlyRerunPromotesNewStagedIndex(TestNamespace ns) { + assumeFalse( + TestSuiteBootstrap.isK8sEnabled(), "App trigger not compatible with K8s pipeline backend"); + + createTableForReindex(ns); + + HttpClient httpClient = SdkClients.adminClient().getHttpClient(); + waitForCurrentRunCompletion(httpClient); + + String initialTarget = readSingleTableAliasTargetIfPresent(); + Long previousRunStartTime = readLatestRunStartTime(httpClient); + triggerTableReindex(httpClient); + AppRunRecord firstRun = waitForLatestRunSuccess(httpClient, previousRunStartTime); + String firstTarget = waitForPromotedTableAlias(initialTarget); + + triggerTableReindex(httpClient); + waitForLatestRunSuccess(httpClient, firstRun.getStartTime()); + String secondTarget = waitForPromotedTableAlias(firstTarget); + + assertNotEquals(firstTarget, secondTarget, "Second reindex should promote a new staged index"); + assertPreviousTargetIsNotServing(firstTarget); + } + + private static void createTableForReindex(TestNamespace ns) { + DatabaseService service = DatabaseServiceTestFactory.createPostgres(ns); + DatabaseSchema schema = DatabaseSchemaTestFactory.createSimple(ns, service); + Table table = + TableTestFactory.createWithName(ns, schema.getFullyQualifiedName(), "promotion_table"); + + assertNotNull(table.getId(), "Test table should be created before reindex"); + } + + private static void triggerTableReindex(HttpClient httpClient) { + Map config = new HashMap<>(); + config.put("entities", List.of(TABLE_ENTITY)); + config.put("batchSize", 100); + + Awaitility.await("Trigger table-only " + APP_NAME) + .atMost(Duration.ofMinutes(2)) + .pollInterval(Duration.ofSeconds(3)) + .ignoreExceptionsMatching( + e -> e.getMessage() != null && e.getMessage().contains("already running")) + .until( + () -> { + httpClient.execute( + HttpMethod.POST, "/v1/apps/trigger/" + APP_NAME, config, Void.class); + return true; + }); + } + + private static AppRunRecord waitForLatestRunSuccess( + HttpClient httpClient, Long previousRunStartTime) { + AppRunRecord[] holder = new AppRunRecord[1]; + + Awaitility.await("Table reindex run completion") + .atMost(Duration.ofMinutes(5)) + .pollDelay(Duration.ofSeconds(2)) + .pollInterval(Duration.ofSeconds(5)) + .ignoreExceptions() + .untilAsserted( + () -> { + AppRunRecord run = readLatestRun(httpClient); + assertNotNull(run); + assertNotNull(run.getStatus()); + if (previousRunStartTime != null + && run.getStartTime() != null + && run.getStartTime() <= previousRunStartTime) { + throw new AssertionError( + "Latest run is still the pre-trigger one (startTime=" + + run.getStartTime() + + ", previous=" + + previousRunStartTime + + ")"); + } + String status = normalizedStatus(run); + assertTrue( + TERMINAL_STATUSES.contains(status), "Run not in terminal state: " + status); + holder[0] = run; + }); + + AppRunRecord run = holder[0]; + assertTrue( + SUCCESS_STATUSES.contains(normalizedStatus(run)), + () -> "Expected successful table reindex run but got: " + run); + return run; + } + + private static String waitForPromotedTableAlias(String previousTarget) { + String[] target = new String[1]; + + Awaitility.await("Table alias promotion") + .atMost(Duration.ofMinutes(2)) + .pollDelay(Duration.ofSeconds(1)) + .pollInterval(Duration.ofSeconds(2)) + .ignoreExceptions() + .untilAsserted( + () -> { + String currentTarget = readSingleTableAliasTarget(); + assertTrue( + currentTarget.startsWith(TABLE_REBUILD_PREFIX), + "Table alias should point at a staged rebuild index, got " + currentTarget); + if (previousTarget != null) { + assertNotEquals( + previousTarget, + currentTarget, + "Table alias should move to a new staged index after rerun"); + } + Set shortAliasTargets = searchClient().getIndicesByAlias(TABLE_SHORT_ALIAS); + assertTrue( + shortAliasTargets.contains(currentTarget), + "Short table alias should include the promoted staged table index"); + target[0] = currentTarget; + }); + + return target[0]; + } + + private static String readSingleTableAliasTargetIfPresent() { + Set targets = searchClient().getIndicesByAlias(TABLE_CANONICAL_ALIAS); + if (targets.isEmpty()) { + return null; + } + assertEquals(1, targets.size(), "Table canonical alias should have a single target"); + return targets.iterator().next(); + } + + private static String readSingleTableAliasTarget() { + String target = readSingleTableAliasTargetIfPresent(); + assertNotNull(target, "Table canonical alias should point at a promoted index"); + return target; + } + + private static void assertPreviousTargetIsNotServing(String previousTarget) { + SearchClient client = searchClient(); + if (!client.indexExists(previousTarget)) { + return; + } + + Set aliases = client.getAliases(previousTarget); + assertFalse( + aliases.contains(TABLE_CANONICAL_ALIAS), + "Previous staged index should no longer have the canonical table alias"); + assertFalse( + aliases.contains(TABLE_SHORT_ALIAS), + "Previous staged index should no longer have the short table alias"); + } + + private static Long readLatestRunStartTime(HttpClient httpClient) { + try { + AppRunRecord latest = readLatestRun(httpClient); + return latest == null ? null : latest.getStartTime(); + } catch (Exception ignored) { + return null; + } + } + + private static AppRunRecord readLatestRun(HttpClient httpClient) { + return httpClient.execute( + HttpMethod.GET, "/v1/apps/name/" + APP_NAME + "/runs/latest", null, AppRunRecord.class); + } + + private static void waitForCurrentRunCompletion(HttpClient httpClient) { + try { + Awaitility.await("Wait for in-flight " + APP_NAME) + .atMost(Duration.ofMinutes(5)) + .pollInterval(Duration.ofSeconds(3)) + .ignoreExceptions() + .until( + () -> { + AppRunRecord latest = readLatestRun(httpClient); + if (latest == null || latest.getStatus() == null) { + return true; + } + String status = normalizedStatus(latest); + return !"running".equals(status) && !"started".equals(status); + }); + } catch (org.awaitility.core.ConditionTimeoutException ignored) { + // The trigger retry loop handles "already running" if the current run continues. + } + } + + private static String normalizedStatus(AppRunRecord run) { + return run.getStatus().value().toLowerCase(); + } + + private static SearchClient searchClient() { + return Entity.getSearchRepository().getSearchClient(); + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java b/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java index c7a783078d20..8710ab3f55d1 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java @@ -89,7 +89,6 @@ import org.openmetadata.service.apps.scheduler.AppScheduler; import org.openmetadata.service.audit.AuditLogEventPublisher; import org.openmetadata.service.audit.AuditLogRepository; -import org.openmetadata.service.cache.CacheConfig; import org.openmetadata.service.config.CacheConfiguration; import org.openmetadata.service.config.OMWebBundle; import org.openmetadata.service.config.OMWebConfiguration; @@ -397,7 +396,7 @@ public void run(OpenMetadataApplicationConfig catalogConfig, Environment environ jdbi.onDemand(CollectionDAO.class), Entity.getSearchRepository())); // Register Distributed Job Participant for distributed search indexing - registerDistributedJobParticipant(environment, jdbi, catalogConfig.getCacheConfig()); + registerDistributedJobParticipant(environment, jdbi); registerDistributedRdfJobParticipant(environment, jdbi); // Register Event publishers @@ -1132,24 +1131,18 @@ private void initializeWebsockets( } } - protected void registerDistributedJobParticipant( - Environment environment, Jdbi jdbi, CacheConfig cacheConfig) { + protected void registerDistributedJobParticipant(Environment environment, Jdbi jdbi) { try { CollectionDAO collectionDAO = jdbi.onDemand(CollectionDAO.class); SearchRepository searchRepository = Entity.getSearchRepository(); String serverId = ServerIdentityResolver.getInstance().getServerId(); DistributedJobParticipant participant = - new DistributedJobParticipant(collectionDAO, searchRepository, serverId, cacheConfig); + new DistributedJobParticipant(collectionDAO, searchRepository, serverId); environment.lifecycle().manage(participant); - String notifierType = - (cacheConfig != null && cacheConfig.provider == CacheConfig.Provider.redis) - ? "Redis Pub/Sub" - : "database polling"; LOG.info( - "Registered DistributedJobParticipant for distributed search indexing using {}", - notifierType); + "Registered DistributedJobParticipant for distributed search indexing using database polling"); } catch (Exception e) { LOG.warn("Failed to register DistributedJobParticipant", e); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/AdaptiveBackoff.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/AdaptiveBackoff.java deleted file mode 100644 index bac6039873c7..000000000000 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/AdaptiveBackoff.java +++ /dev/null @@ -1,35 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -/** - * Replaces fixed-delay sleep in backpressure loops with exponential backoff. Starts at an initial - * delay and doubles on each call up to a configurable maximum. Call {@link #reset()} when - * backpressure clears so the next occurrence starts fresh. - */ -public class AdaptiveBackoff { - - private final long initialMs; - private final long maxMs; - private long currentMs; - - public AdaptiveBackoff(long initialMs, long maxMs) { - if (initialMs <= 0) { - throw new IllegalArgumentException("initialMs must be > 0"); - } - if (maxMs < initialMs) { - throw new IllegalArgumentException("maxMs must be >= initialMs"); - } - this.initialMs = initialMs; - this.maxMs = maxMs; - this.currentMs = initialMs; - } - - public long nextDelay() { - long delay = currentMs; - currentMs = Math.min(currentMs * 2, maxMs); - return delay; - } - - public void reset() { - currentMs = initialMs; - } -} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedIndexingStrategy.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedIndexingStrategy.java index 6fd00d9d1641..83c3d4b8ee04 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedIndexingStrategy.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedIndexingStrategy.java @@ -1,13 +1,7 @@ package org.openmetadata.service.apps.bundles.searchIndex; -import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; -import static org.openmetadata.service.Entity.QUERY_COST_RECORD; -import static org.openmetadata.service.Entity.TEST_CASE_RESOLUTION_STATUS; -import static org.openmetadata.service.Entity.TEST_CASE_RESULT; - import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; @@ -20,7 +14,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import lombok.extern.slf4j.Slf4j; -import org.openmetadata.schema.analytics.ReportData; import org.openmetadata.schema.system.EventPublisherJob; import org.openmetadata.schema.system.Stats; import org.openmetadata.schema.system.StepStats; @@ -39,19 +32,7 @@ import org.openmetadata.service.util.FullyQualifiedName; @Slf4j -public class DistributedIndexingStrategy implements IndexingStrategy { - - private static final Set TIME_SERIES_ENTITIES = - Set.of( - ReportData.ReportDataType.ENTITY_REPORT_DATA.value(), - ReportData.ReportDataType.RAW_COST_ANALYSIS_REPORT_DATA.value(), - ReportData.ReportDataType.WEB_ANALYTIC_USER_ACTIVITY_REPORT_DATA.value(), - ReportData.ReportDataType.WEB_ANALYTIC_ENTITY_VIEW_REPORT_DATA.value(), - ReportData.ReportDataType.AGGREGATED_COST_ANALYSIS_REPORT_DATA.value(), - TEST_CASE_RESOLUTION_STATUS, - TEST_CASE_RESULT, - QUERY_COST_RECORD); - +public class DistributedIndexingStrategy { private static final long MONITOR_POLL_INTERVAL_MS = 2000; private final CollectionDAO collectionDAO; @@ -60,6 +41,7 @@ public class DistributedIndexingStrategy implements IndexingStrategy { private final UUID appId; private final Long appStartTime; private final String createdBy; + private final DistributedReindexStatsMapper statsMapper; private final CompositeProgressListener listeners = new CompositeProgressListener(); private final AtomicBoolean stopped = new AtomicBoolean(false); @@ -82,14 +64,13 @@ public DistributedIndexingStrategy( this.appId = appId; this.appStartTime = appStartTime; this.createdBy = createdBy; + this.statsMapper = new DistributedReindexStatsMapper(collectionDAO); } - @Override public void addListener(ReindexingProgressListener listener) { listeners.addListener(listener); } - @Override public ExecutionResult execute(ReindexingConfiguration config, ReindexingJobContext context) { long startTime = System.currentTimeMillis(); try { @@ -113,9 +94,10 @@ private ExecutionResult doExecute( ReindexingConfiguration config, ReindexingJobContext context, long startTime) { this.config = config; - LOG.info("Starting distributed reindexing for entities: {}", config.entities()); + Set entityTypes = SearchIndexEntityTypes.normalizeEntityTypes(config.entities()); + LOG.info("Starting distributed reindexing for entities: {}", entityTypes); - Stats stats = initializeTotalRecords(config.entities()); + Stats stats = initializeTotalRecords(entityTypes); currentStats.set(stats); int partitionSize = jobData.getPartitionSize() != null ? jobData.getPartitionSize() : 10000; @@ -125,7 +107,7 @@ private ExecutionResult doExecute( distributedExecutor.addListener(listeners); SearchIndexJob distributedJob = - distributedExecutor.createJob(config.entities(), jobData, createdBy, config); + distributedExecutor.createJob(entityTypes, jobData, createdBy, config); LOG.info( "Created distributed job {} with {} total records", @@ -136,21 +118,19 @@ private ExecutionResult doExecute( searchRepository.createBulkSink( config.batchSize(), config.maxConcurrentRequests(), config.payloadSize()); - RecreateIndexHandler recreateIndexHandler = searchRepository.createReindexHandler(); - if (recreateIndexHandler instanceof DefaultRecreateHandler defaultHandler) { + RecreateIndexHandler stagedIndexHandler = searchRepository.createReindexHandler(); + if (stagedIndexHandler instanceof DefaultRecreateHandler defaultHandler) { defaultHandler.withJobData(jobData); } - ReindexContext recreateContext = null; - - if (config.recreateIndex()) { - recreateContext = recreateIndexHandler.reCreateIndexes(config.entities()); - if (recreateContext != null && !recreateContext.isEmpty()) { - distributedExecutor.updateStagedIndexMapping(recreateContext.getStagedIndexMapping()); - } + ReindexContext stagedIndexContext = stagedIndexHandler.reCreateIndexes(entityTypes); + if (stagedIndexContext == null || stagedIndexContext.isEmpty()) { + throw new IllegalStateException( + "Staged index preparation did not produce any target indexes"); } + distributedExecutor.updateStagedIndexMapping(stagedIndexContext.getStagedIndexMapping()); distributedExecutor.setAppContext(appId, appStartTime); - distributedExecutor.execute(searchIndexSink, recreateContext, config.recreateIndex(), config); + distributedExecutor.execute(searchIndexSink, stagedIndexContext, config); monitorDistributedJob(distributedJob.getId()); @@ -181,8 +161,8 @@ private ExecutionResult doExecute( boolean success = finalizeAllEntityReindex( - recreateIndexHandler, - recreateContext, + stagedIndexHandler, + stagedIndexContext, !stopped.get() && !hasIncompleteProcessing(stats)); ExecutionResult.Status resultStatus = determineStatus(stats); @@ -290,166 +270,7 @@ private void monitorDistributedJob(UUID jobId) { private void updateStatsFromDistributedJob( Stats stats, SearchIndexJob distributedJob, StepStats actualSinkStats) { - if (stats == null) { - return; - } - - CollectionDAO.SearchIndexServerStatsDAO.AggregatedServerStats serverStatsAggr = null; - try { - serverStatsAggr = - Entity.getCollectionDAO() - .searchIndexServerStatsDAO() - .getAggregatedStats(distributedJob.getId().toString()); - } catch (Exception e) { - LOG.debug("Could not fetch aggregated server stats for job {}", distributedJob.getId(), e); - } - - long successRecords; - long failedRecords; - String statsSource; - - if (serverStatsAggr != null && serverStatsAggr.sinkSuccess() > 0) { - successRecords = serverStatsAggr.sinkSuccess(); - failedRecords = - serverStatsAggr.readerFailed() - + serverStatsAggr.sinkFailed() - + serverStatsAggr.processFailed(); - statsSource = "serverStatsTable"; - } else if (actualSinkStats != null) { - successRecords = actualSinkStats.getSuccessRecords(); - failedRecords = actualSinkStats.getFailedRecords(); - statsSource = "localSink"; - } else { - successRecords = distributedJob.getSuccessRecords(); - failedRecords = distributedJob.getFailedRecords(); - statsSource = "partition-based"; - } - - LOG.debug( - "Stats source: {}, success={}, failed={}", statsSource, successRecords, failedRecords); - - StepStats jobStats = stats.getJobStats(); - if (jobStats != null) { - jobStats.setSuccessRecords(saturatedToInt(successRecords)); - jobStats.setFailedRecords(saturatedToInt(failedRecords)); - } - - StepStats readerStats = stats.getReaderStats(); - if (readerStats != null) { - readerStats.setTotalRecords(saturatedToInt(distributedJob.getTotalRecords())); - long readerFailed = serverStatsAggr != null ? serverStatsAggr.readerFailed() : 0; - long readerWarnings = serverStatsAggr != null ? serverStatsAggr.readerWarnings() : 0; - long readerSuccess = - serverStatsAggr != null - ? serverStatsAggr.readerSuccess() - : distributedJob.getTotalRecords() - readerFailed - readerWarnings; - readerStats.setSuccessRecords(saturatedToInt(readerSuccess)); - readerStats.setFailedRecords(saturatedToInt(readerFailed)); - readerStats.setWarningRecords(saturatedToInt(readerWarnings)); - // Carry stage timing forward into the final ExecutionResult stats. Without this the - // periodic aggregator's totalTimeMs (visible while running) gets clobbered to 0 here, - // and OmAppJobListener picks up the zero on the SUCCESS transition. - if (serverStatsAggr != null) { - readerStats.setTotalTimeMs(serverStatsAggr.readerTimeMs()); - } - } - - StepStats processStats = stats.getProcessStats(); - if (processStats != null && serverStatsAggr != null) { - long processSuccess = serverStatsAggr.processSuccess(); - long processFailed = serverStatsAggr.processFailed(); - processStats.setTotalRecords(saturatedToInt(processSuccess + processFailed)); - processStats.setSuccessRecords(saturatedToInt(processSuccess)); - processStats.setFailedRecords(saturatedToInt(processFailed)); - processStats.setTotalTimeMs(serverStatsAggr.processTimeMs()); - } - - StepStats sinkStats = stats.getSinkStats(); - if (sinkStats != null) { - if (serverStatsAggr != null) { - long sinkSuccess = serverStatsAggr.sinkSuccess(); - long sinkFailed = serverStatsAggr.sinkFailed(); - long actualSinkTotal = sinkSuccess + sinkFailed; - sinkStats.setTotalRecords(saturatedToInt(actualSinkTotal)); - sinkStats.setSuccessRecords(saturatedToInt(sinkSuccess)); - sinkStats.setFailedRecords(saturatedToInt(sinkFailed)); - sinkStats.setTotalTimeMs(serverStatsAggr.sinkTimeMs()); - } else { - long sinkTotal = distributedJob.getTotalRecords(); - sinkStats.setTotalRecords(saturatedToInt(sinkTotal)); - sinkStats.setSuccessRecords(saturatedToInt(successRecords)); - sinkStats.setFailedRecords(saturatedToInt(failedRecords)); - } - } - - StepStats vectorStats = stats.getVectorStats(); - if (vectorStats != null && serverStatsAggr != null) { - long vectorSuccess = serverStatsAggr.vectorSuccess(); - long vectorFailed = serverStatsAggr.vectorFailed(); - vectorStats.setTotalRecords(saturatedToInt(vectorSuccess + vectorFailed)); - vectorStats.setSuccessRecords(saturatedToInt(vectorSuccess)); - vectorStats.setFailedRecords(saturatedToInt(vectorFailed)); - vectorStats.setTotalTimeMs(serverStatsAggr.vectorTimeMs()); - } - - if (distributedJob.getEntityStats() != null && stats.getEntityStats() != null) { - for (Map.Entry entry : - distributedJob.getEntityStats().entrySet()) { - StepStats entityStats = - stats.getEntityStats().getAdditionalProperties().get(entry.getKey()); - if (entityStats != null) { - entityStats.setSuccessRecords(saturatedToInt(entry.getValue().getSuccessRecords())); - entityStats.setFailedRecords(saturatedToInt(entry.getValue().getFailedRecords())); - // Surface all four stage timings on the entity-level StepStats so the UI per-entity - // table can show Reader / Process / Sink / Vector avg latencies side-by-side. - entityStats.setReaderTimeMs(entry.getValue().getReaderTimeMs()); - entityStats.setProcessTimeMs(entry.getValue().getProcessTimeMs()); - entityStats.setSinkTimeMs(entry.getValue().getSinkTimeMs()); - entityStats.setVectorTimeMs(entry.getValue().getVectorTimeMs()); - } - } - } - - updateColumnStatsFromSink(stats); - - StatsReconciler.reconcile(stats); - } - - private void updateColumnStatsFromSink(Stats jobDataStats) { - if (searchIndexSink == null || jobDataStats == null || jobDataStats.getEntityStats() == null) { - return; - } - StepStats columnStats = searchIndexSink.getColumnStats(); - if (columnStats != null) { - StepStats existingColumnStats = - jobDataStats.getEntityStats().getAdditionalProperties().get(Entity.TABLE_COLUMN); - if (existingColumnStats != null) { - existingColumnStats.setTotalRecords(columnStats.getTotalRecords()); - existingColumnStats.setSuccessRecords(columnStats.getSuccessRecords()); - existingColumnStats.setFailedRecords(columnStats.getFailedRecords()); - } - } - } - - private void promoteColumnIndex( - RecreateIndexHandler recreateIndexHandler, - ReindexContext recreateContext, - boolean tableSuccess) { - Optional columnStagedIndex = recreateContext.getStagedIndex(Entity.TABLE_COLUMN); - if (columnStagedIndex.isEmpty()) { - return; - } - try { - finalizeEntityReindex( - recreateIndexHandler, recreateContext, Entity.TABLE_COLUMN, tableSuccess); - LOG.info("Promoted column index (tableSuccess={})", tableSuccess); - } catch (Exception ex) { - LOG.error("Failed to promote column index", ex); - } - } - - private static int saturatedToInt(long value) { - return (int) Math.min(value, Integer.MAX_VALUE); + statsMapper.updateStats(stats, distributedJob, actualSinkStats, getColumnStats()); } private ExecutionResult.Status determineStatus(Stats stats) { @@ -474,121 +295,83 @@ private boolean hasIncompleteProcessing(Stats stats) { } private boolean finalizeAllEntityReindex( - RecreateIndexHandler recreateIndexHandler, - ReindexContext recreateContext, + RecreateIndexHandler indexPromotionHandler, + ReindexContext stagedIndexContext, boolean finalSuccess) { - if (recreateIndexHandler == null || recreateContext == null) { + if (indexPromotionHandler == null || stagedIndexContext == null) { return finalSuccess; } - Set promotedEntities = Collections.emptySet(); + return new DistributedReindexFinalizer(indexPromotionHandler, stagedIndexContext) + .finalizeRemainingEntities(getPromotedEntities(), getFinalEntityStats(), finalSuccess); + } + + private StepStats getColumnStats() { + return searchIndexSink != null ? searchIndexSink.getColumnStats() : null; + } + + private Set getPromotedEntities() { if (distributedExecutor != null && distributedExecutor.getEntityTracker() != null) { - promotedEntities = distributedExecutor.getEntityTracker().getPromotedEntities(); + return distributedExecutor.getEntityTracker().getPromotedEntities(); } + return Collections.emptySet(); + } - // Get per-entity stats for determining per-entity success - Map entityStatsMap = Collections.emptyMap(); - if (distributedExecutor != null) { - SearchIndexJob finalJob = distributedExecutor.getJobWithFreshStats(); - if (finalJob != null && finalJob.getEntityStats() != null) { - entityStatsMap = finalJob.getEntityStats(); - } + private Map getFinalEntityStats() { + Map finalEntityStats = new HashMap<>(); + if (distributedExecutor == null) { + mergeInitializedEntityStats(finalEntityStats); + return finalEntityStats; } - - LOG.debug( - "Finalization: finalSuccess={}, promotedEntities={}, allEntities={}", - finalSuccess, - promotedEntities, - recreateContext.getEntities()); - - Set entitiesToFinalize = new HashSet<>(recreateContext.getEntities()); - entitiesToFinalize.removeAll(promotedEntities); - - if (promotedEntities.contains(Entity.TABLE) - && !promotedEntities.contains(Entity.TABLE_COLUMN)) { - boolean tableSuccess = computeEntitySuccess(Entity.TABLE, entityStatsMap); - promoteColumnIndex(recreateIndexHandler, recreateContext, tableSuccess); - entitiesToFinalize.remove(Entity.TABLE_COLUMN); + SearchIndexJob finalJob = distributedExecutor.getJobWithFreshStats(); + if (finalJob != null && finalJob.getEntityStats() != null) { + finalEntityStats.putAll(finalJob.getEntityStats()); } + mergeInitializedEntityStats(finalEntityStats); + return finalEntityStats; + } - LOG.debug("Entities to finalize={}, already promoted={}", entitiesToFinalize, promotedEntities); - - try { - if (!entitiesToFinalize.isEmpty()) { - LOG.info( - "Finalizing {} remaining entities (already promoted: {})", - entitiesToFinalize.size(), - promotedEntities.size()); - - for (String entityType : entitiesToFinalize) { - try { - boolean entitySuccess = computeEntitySuccess(entityType, entityStatsMap); - LOG.debug( - "Finalizing entity '{}' with perEntitySuccess={} (globalSuccess={})", - entityType, - entitySuccess, - finalSuccess); - finalizeEntityReindex(recreateIndexHandler, recreateContext, entityType, entitySuccess); - if (Entity.TABLE.equals(entityType)) { - promoteColumnIndex(recreateIndexHandler, recreateContext, entitySuccess); - } - } catch (Exception ex) { - LOG.error("Failed to finalize reindex for entity: {}", entityType, ex); - } - } - } - } catch (Exception e) { - LOG.error("Error during entity finalization", e); + private void mergeInitializedEntityStats( + Map finalEntityStats) { + Stats stats = currentStats.get(); + if (stats == null + || stats.getEntityStats() == null + || stats.getEntityStats().getAdditionalProperties() == null) { + return; } - return finalSuccess; + stats + .getEntityStats() + .getAdditionalProperties() + .forEach( + (entityType, stepStats) -> + finalEntityStats.computeIfAbsent( + entityType, key -> toEntityTypeStats(key, stepStats))); } - private boolean computeEntitySuccess( - String entityType, Map entityStatsMap) { - if (entityStatsMap == null || entityStatsMap.isEmpty()) { - return false; - } - SearchIndexJob.EntityTypeStats stats = entityStatsMap.get(entityType); - if (stats == null) { - // Entity not in stats means 0 records — nothing to index = success - return true; - } - return stats.getFailedRecords() == 0 - && stats.getSuccessRecords() + stats.getFailedRecords() >= stats.getTotalRecords(); + private SearchIndexJob.EntityTypeStats toEntityTypeStats(String entityType, StepStats stepStats) { + long success = stepStats != null ? statValue(stepStats.getSuccessRecords()) : 0L; + long failed = stepStats != null ? statValue(stepStats.getFailedRecords()) : 0L; + return SearchIndexJob.EntityTypeStats.builder() + .entityType(entityType) + .totalRecords(stepStats != null ? statValue(stepStats.getTotalRecords()) : 0L) + .processedRecords(success + failed) + .successRecords(success) + .failedRecords(failed) + .totalPartitions(0) + .completedPartitions(0) + .failedPartitions(0) + .build(); } - private void finalizeEntityReindex( - RecreateIndexHandler recreateIndexHandler, - ReindexContext recreateContext, - String entityType, - boolean success) { - try { - var entityReindexContext = - org.openmetadata.service.search.EntityReindexContext.builder() - .entityType(entityType) - .originalIndex(recreateContext.getOriginalIndex(entityType).orElse(null)) - .canonicalIndex(recreateContext.getCanonicalIndex(entityType).orElse(null)) - .activeIndex(recreateContext.getOriginalIndex(entityType).orElse(null)) - .stagedIndex(recreateContext.getStagedIndex(entityType).orElse(null)) - .canonicalAliases(recreateContext.getCanonicalAlias(entityType).orElse(null)) - .existingAliases(recreateContext.getExistingAliases(entityType)) - .parentAliases( - new HashSet<>(listOrEmpty(recreateContext.getParentAliases(entityType)))) - .build(); - - recreateIndexHandler.finalizeReindex(entityReindexContext, success); - } catch (Exception ex) { - LOG.error("Failed to finalize index recreation flow for {}", entityType, ex); - } + private long statValue(Number value) { + return value != null ? value.longValue() : 0L; } - @Override public Optional getStats() { return Optional.ofNullable(currentStats.get()); } - @Override public void stop() { if (stopped.compareAndSet(false, true)) { LOG.info("Stopping distributed indexing strategy"); @@ -606,7 +389,6 @@ public void stop() { } } - @Override public boolean isStopped() { return stopped.get(); } @@ -667,9 +449,9 @@ Stats initializeTotalRecords(Set entities) { private int getEntityTotal(String entityType) { try { - String correctedType = "queryCostResult".equals(entityType) ? QUERY_COST_RECORD : entityType; + String correctedType = SearchIndexEntityTypes.normalizeEntityType(entityType); - if (!TIME_SERIES_ENTITIES.contains(correctedType)) { + if (!SearchIndexEntityTypes.isTimeSeriesEntity(correctedType)) { return Entity.getEntityRepository(correctedType) .getDao() .listCount(new ListFilter(Include.ALL)); @@ -677,7 +459,7 @@ private int getEntityTotal(String entityType) { ListFilter listFilter = new ListFilter(null); EntityTimeSeriesRepository repository; - if (isDataInsightIndex(correctedType)) { + if (SearchIndexEntityTypes.isDataInsightEntity(correctedType)) { listFilter.addQueryParam("entityFQNHash", FullyQualifiedName.buildHash(correctedType)); repository = Entity.getEntityTimeSeriesRepository(Entity.ENTITY_REPORT_DATA); } else { @@ -699,10 +481,6 @@ private int getEntityTotal(String entityType) { } } - private boolean isDataInsightIndex(String entityType) { - return entityType.endsWith("ReportData"); - } - DistributedSearchIndexExecutor getDistributedExecutor() { return distributedExecutor; } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedReindexFinalizer.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedReindexFinalizer.java new file mode 100644 index 000000000000..d4aaadd06527 --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedReindexFinalizer.java @@ -0,0 +1,143 @@ +/* + * Copyright 2024 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.openmetadata.service.apps.bundles.searchIndex; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import lombok.extern.slf4j.Slf4j; +import org.openmetadata.service.Entity; +import org.openmetadata.service.apps.bundles.searchIndex.distributed.SearchIndexJob; +import org.openmetadata.service.search.RecreateIndexHandler; +import org.openmetadata.service.search.ReindexContext; + +@Slf4j +class DistributedReindexFinalizer { + private final RecreateIndexHandler indexPromotionHandler; + private final ReindexContext stagedIndexContext; + + DistributedReindexFinalizer( + RecreateIndexHandler indexPromotionHandler, ReindexContext stagedIndexContext) { + this.indexPromotionHandler = indexPromotionHandler; + this.stagedIndexContext = stagedIndexContext; + } + + boolean finalizeRemainingEntities( + Set promotedEntities, + Map entityStats, + boolean finalSuccess) { + LOG.debug( + "Finalization: finalSuccess={}, promotedEntities={}, allEntities={}", + finalSuccess, + promotedEntities, + stagedIndexContext.getEntities()); + + Set entitiesToFinalize = new HashSet<>(stagedIndexContext.getEntities()); + entitiesToFinalize.removeAll(promotedEntities); + Set finalizedEntities = new HashSet<>(promotedEntities); + + routeColumnFinalizationThroughTable(entitiesToFinalize); + promoteColumnIndexIfTableWasPromoted( + promotedEntities, entityStats, entitiesToFinalize, finalizedEntities); + finalizeEntities(entitiesToFinalize, entityStats, finalSuccess, finalizedEntities); + + return finalSuccess; + } + + private void routeColumnFinalizationThroughTable(Set entitiesToFinalize) { + if (entitiesToFinalize.contains(Entity.TABLE)) { + entitiesToFinalize.remove(Entity.TABLE_COLUMN); + } + } + + private void promoteColumnIndexIfTableWasPromoted( + Set promotedEntities, + Map entityStats, + Set entitiesToFinalize, + Set finalizedEntities) { + if (promotedEntities.contains(Entity.TABLE) + && !promotedEntities.contains(Entity.TABLE_COLUMN)) { + boolean tableSuccess = computeEntitySuccess(Entity.TABLE, entityStats); + promoteColumnIndex(tableSuccess, finalizedEntities); + entitiesToFinalize.remove(Entity.TABLE_COLUMN); + } + } + + private void finalizeEntities( + Set entitiesToFinalize, + Map entityStats, + boolean finalSuccess, + Set finalizedEntities) { + LOG.debug("Entities to finalize={}", entitiesToFinalize); + if (entitiesToFinalize.isEmpty()) { + return; + } + + LOG.info("Finalizing {} remaining entities", entitiesToFinalize.size()); + for (String entityType : entitiesToFinalize) { + if (!finalizedEntities.add(entityType)) { + LOG.debug("Skipping already finalized entity '{}'", entityType); + continue; + } + try { + boolean entitySuccess = computeEntitySuccess(entityType, entityStats); + LOG.debug( + "Finalizing entity '{}' with perEntitySuccess={} (globalSuccess={})", + entityType, + entitySuccess, + finalSuccess); + finalizeEntityReindex(entityType, entitySuccess); + if (Entity.TABLE.equals(entityType)) { + promoteColumnIndex(entitySuccess, finalizedEntities); + } + } catch (Exception ex) { + LOG.error("Failed to finalize reindex for entity: {}", entityType, ex); + } + } + } + + private void promoteColumnIndex(boolean tableSuccess, Set finalizedEntities) { + if (stagedIndexContext.getStagedIndex(Entity.TABLE_COLUMN).isEmpty()) { + return; + } + if (!finalizedEntities.add(Entity.TABLE_COLUMN)) { + LOG.debug("Skipping already finalized column index"); + return; + } + try { + finalizeEntityReindex(Entity.TABLE_COLUMN, tableSuccess); + LOG.info("Promoted column index (tableSuccess={})", tableSuccess); + } catch (Exception ex) { + LOG.error("Failed to promote column index", ex); + } + } + + private boolean computeEntitySuccess( + String entityType, Map entityStats) { + if (entityStats == null || entityStats.isEmpty()) { + return false; + } + SearchIndexJob.EntityTypeStats stats = entityStats.get(entityType); + if (stats == null) { + return false; + } + return stats.getFailedRecords() == 0 + && stats.getSuccessRecords() + stats.getFailedRecords() >= stats.getTotalRecords(); + } + + private void finalizeEntityReindex(String entityType, boolean success) { + indexPromotionHandler.finalizeReindex( + EntityReindexContextMapper.fromStagedContext(stagedIndexContext, entityType), success); + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedReindexStatsMapper.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedReindexStatsMapper.java new file mode 100644 index 000000000000..6d29711d08af --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedReindexStatsMapper.java @@ -0,0 +1,234 @@ +/* + * Copyright 2024 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.openmetadata.service.apps.bundles.searchIndex; + +import java.util.Map; +import lombok.extern.slf4j.Slf4j; +import org.openmetadata.schema.system.Stats; +import org.openmetadata.schema.system.StepStats; +import org.openmetadata.service.Entity; +import org.openmetadata.service.apps.bundles.searchIndex.distributed.SearchIndexJob; +import org.openmetadata.service.jdbi3.CollectionDAO; + +@Slf4j +class DistributedReindexStatsMapper { + private final CollectionDAO collectionDAO; + + DistributedReindexStatsMapper(CollectionDAO collectionDAO) { + this.collectionDAO = collectionDAO; + } + + void updateStats( + Stats stats, + SearchIndexJob distributedJob, + StepStats actualSinkStats, + StepStats columnStats) { + if (stats == null) { + return; + } + + CollectionDAO.SearchIndexServerStatsDAO.AggregatedServerStats aggregatedStats = + getAggregatedServerStats(distributedJob); + StatsSource source = resolveStatsSource(distributedJob, aggregatedStats, actualSinkStats); + + LOG.debug( + "Stats source: {}, success={}, failed={}", + source.name(), + source.successRecords(), + source.failedRecords()); + + updateJobStats(stats, source); + updateReaderStats(stats, distributedJob, aggregatedStats); + updateProcessStats(stats, aggregatedStats); + updateSinkStats(stats, distributedJob, aggregatedStats, source); + updateVectorStats(stats, aggregatedStats); + updateEntityStats(stats, distributedJob); + updateColumnStats(stats, columnStats); + + StatsReconciler.reconcile(stats); + } + + private CollectionDAO.SearchIndexServerStatsDAO.AggregatedServerStats getAggregatedServerStats( + SearchIndexJob distributedJob) { + try { + return collectionDAO + .searchIndexServerStatsDAO() + .getAggregatedStats(distributedJob.getId().toString()); + } catch (Exception e) { + LOG.debug("Could not fetch aggregated server stats for job {}", distributedJob.getId(), e); + return null; + } + } + + private StatsSource resolveStatsSource( + SearchIndexJob distributedJob, + CollectionDAO.SearchIndexServerStatsDAO.AggregatedServerStats aggregatedStats, + StepStats actualSinkStats) { + if (hasAggregatedStageRecords(aggregatedStats)) { + return new StatsSource( + "serverStatsTable", + aggregatedStats.sinkSuccess(), + aggregatedStats.readerFailed() + + aggregatedStats.sinkFailed() + + aggregatedStats.processFailed()); + } + if (actualSinkStats != null) { + return new StatsSource( + "localSink", actualSinkStats.getSuccessRecords(), actualSinkStats.getFailedRecords()); + } + return new StatsSource( + "partition-based", distributedJob.getSuccessRecords(), distributedJob.getFailedRecords()); + } + + private boolean hasAggregatedStageRecords( + CollectionDAO.SearchIndexServerStatsDAO.AggregatedServerStats aggregatedStats) { + return aggregatedStats != null + && (aggregatedStats.readerSuccess() > 0 + || aggregatedStats.readerFailed() > 0 + || aggregatedStats.readerWarnings() > 0 + || aggregatedStats.processSuccess() > 0 + || aggregatedStats.processFailed() > 0 + || aggregatedStats.sinkSuccess() > 0 + || aggregatedStats.sinkFailed() > 0 + || aggregatedStats.vectorSuccess() > 0 + || aggregatedStats.vectorFailed() > 0); + } + + private void updateJobStats(Stats stats, StatsSource source) { + StepStats jobStats = stats.getJobStats(); + if (jobStats != null) { + jobStats.setSuccessRecords(saturatedToInt(source.successRecords())); + jobStats.setFailedRecords(saturatedToInt(source.failedRecords())); + } + } + + private void updateReaderStats( + Stats stats, + SearchIndexJob distributedJob, + CollectionDAO.SearchIndexServerStatsDAO.AggregatedServerStats aggregatedStats) { + StepStats readerStats = stats.getReaderStats(); + if (readerStats == null) { + return; + } + + readerStats.setTotalRecords(saturatedToInt(distributedJob.getTotalRecords())); + long readerFailed = aggregatedStats != null ? aggregatedStats.readerFailed() : 0; + long readerWarnings = aggregatedStats != null ? aggregatedStats.readerWarnings() : 0; + long readerSuccess = + aggregatedStats != null + ? aggregatedStats.readerSuccess() + : distributedJob.getTotalRecords() - readerFailed - readerWarnings; + readerStats.setSuccessRecords(saturatedToInt(readerSuccess)); + readerStats.setFailedRecords(saturatedToInt(readerFailed)); + readerStats.setWarningRecords(saturatedToInt(readerWarnings)); + if (aggregatedStats != null) { + readerStats.setTotalTimeMs(aggregatedStats.readerTimeMs()); + } + } + + private void updateProcessStats( + Stats stats, CollectionDAO.SearchIndexServerStatsDAO.AggregatedServerStats aggregatedStats) { + StepStats processStats = stats.getProcessStats(); + if (processStats == null || aggregatedStats == null) { + return; + } + + long processSuccess = aggregatedStats.processSuccess(); + long processFailed = aggregatedStats.processFailed(); + processStats.setTotalRecords(saturatedToInt(processSuccess + processFailed)); + processStats.setSuccessRecords(saturatedToInt(processSuccess)); + processStats.setFailedRecords(saturatedToInt(processFailed)); + processStats.setTotalTimeMs(aggregatedStats.processTimeMs()); + } + + private void updateSinkStats( + Stats stats, + SearchIndexJob distributedJob, + CollectionDAO.SearchIndexServerStatsDAO.AggregatedServerStats aggregatedStats, + StatsSource source) { + StepStats sinkStats = stats.getSinkStats(); + if (sinkStats == null) { + return; + } + + if (aggregatedStats != null) { + long sinkSuccess = aggregatedStats.sinkSuccess(); + long sinkFailed = aggregatedStats.sinkFailed(); + sinkStats.setTotalRecords(saturatedToInt(sinkSuccess + sinkFailed)); + sinkStats.setSuccessRecords(saturatedToInt(sinkSuccess)); + sinkStats.setFailedRecords(saturatedToInt(sinkFailed)); + sinkStats.setTotalTimeMs(aggregatedStats.sinkTimeMs()); + return; + } + + sinkStats.setTotalRecords(saturatedToInt(distributedJob.getTotalRecords())); + sinkStats.setSuccessRecords(saturatedToInt(source.successRecords())); + sinkStats.setFailedRecords(saturatedToInt(source.failedRecords())); + } + + private void updateVectorStats( + Stats stats, CollectionDAO.SearchIndexServerStatsDAO.AggregatedServerStats aggregatedStats) { + StepStats vectorStats = stats.getVectorStats(); + if (vectorStats == null || aggregatedStats == null) { + return; + } + + long vectorSuccess = aggregatedStats.vectorSuccess(); + long vectorFailed = aggregatedStats.vectorFailed(); + vectorStats.setTotalRecords(saturatedToInt(vectorSuccess + vectorFailed)); + vectorStats.setSuccessRecords(saturatedToInt(vectorSuccess)); + vectorStats.setFailedRecords(saturatedToInt(vectorFailed)); + vectorStats.setTotalTimeMs(aggregatedStats.vectorTimeMs()); + } + + private void updateEntityStats(Stats stats, SearchIndexJob distributedJob) { + if (distributedJob.getEntityStats() == null || stats.getEntityStats() == null) { + return; + } + + for (Map.Entry entry : + distributedJob.getEntityStats().entrySet()) { + StepStats entityStats = stats.getEntityStats().getAdditionalProperties().get(entry.getKey()); + if (entityStats != null) { + SearchIndexJob.EntityTypeStats distributedEntityStats = entry.getValue(); + entityStats.setSuccessRecords(saturatedToInt(distributedEntityStats.getSuccessRecords())); + entityStats.setFailedRecords(saturatedToInt(distributedEntityStats.getFailedRecords())); + entityStats.setReaderTimeMs(distributedEntityStats.getReaderTimeMs()); + entityStats.setProcessTimeMs(distributedEntityStats.getProcessTimeMs()); + entityStats.setSinkTimeMs(distributedEntityStats.getSinkTimeMs()); + entityStats.setVectorTimeMs(distributedEntityStats.getVectorTimeMs()); + } + } + } + + private void updateColumnStats(Stats stats, StepStats columnStats) { + if (columnStats == null || stats.getEntityStats() == null) { + return; + } + + StepStats existingColumnStats = + stats.getEntityStats().getAdditionalProperties().get(Entity.TABLE_COLUMN); + if (existingColumnStats != null) { + existingColumnStats.setTotalRecords(columnStats.getTotalRecords()); + existingColumnStats.setSuccessRecords(columnStats.getSuccessRecords()); + existingColumnStats.setFailedRecords(columnStats.getFailedRecords()); + } + } + + private static int saturatedToInt(long value) { + return (int) Math.min(value, Integer.MAX_VALUE); + } + + private record StatsSource(String name, long successRecords, long failedRecords) {} +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityBatchSizeEstimator.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityBatchSizeEstimator.java deleted file mode 100644 index 512d990ee2a7..000000000000 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityBatchSizeEstimator.java +++ /dev/null @@ -1,38 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import java.util.Set; - -/** - * Per-entity-type batch sizing based on typical document size. Large entity types (tables, - * dashboards, etc.) produce bigger search documents, so we use smaller batches. Small entity types - * (users, tags, etc.) produce tiny documents, so we can use larger batches. - */ -public final class EntityBatchSizeEstimator { - - private static final Set LARGE_ENTITIES = - Set.of("table", "topic", "dashboard", "mlmodel", "container", "storedProcedure"); - - private static final Set SMALL_ENTITIES = - Set.of("user", "team", "bot", "role", "policy", "tag", "classification"); - - private static final int MIN_BATCH_SIZE = 25; - private static final int MAX_BATCH_SIZE = 1000; - - private EntityBatchSizeEstimator() {} - - public static int estimateBatchSize(String entityType, int baseBatchSize) { - if (baseBatchSize <= 0) { - return baseBatchSize; - } - - if (LARGE_ENTITIES.contains(entityType)) { - return Math.max(baseBatchSize / 2, MIN_BATCH_SIZE); - } - - if (SMALL_ENTITIES.contains(entityType)) { - return Math.min(baseBatchSize * 2, MAX_BATCH_SIZE); - } - - return baseBatchSize; - } -} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java deleted file mode 100644 index 6ae3400d235f..000000000000 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java +++ /dev/null @@ -1,354 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.openmetadata.service.Entity.QUERY_COST_RECORD; -import static org.openmetadata.service.Entity.TEST_CASE_RESOLUTION_STATUS; -import static org.openmetadata.service.Entity.TEST_CASE_RESULT; -import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.getSearchIndexFields; - -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Phaser; -import java.util.concurrent.atomic.AtomicBoolean; -import lombok.extern.slf4j.Slf4j; -import org.openmetadata.schema.analytics.ReportData; -import org.openmetadata.schema.utils.ResultList; -import org.openmetadata.service.exception.SearchIndexException; -import org.openmetadata.service.util.RestUtil; -import org.openmetadata.service.workflows.searchIndex.PaginatedEntitiesSource; -import org.openmetadata.service.workflows.searchIndex.PaginatedEntityTimeSeriesSource; - -/** - * Standalone reader that encapsulates all entity reading logic. Decoupled from queues and sinks — - * delivers batches via a callback interface. - */ -@Slf4j -public class EntityReader implements AutoCloseable { - - static final Set TIME_SERIES_ENTITIES = - Set.of( - ReportData.ReportDataType.ENTITY_REPORT_DATA.value(), - ReportData.ReportDataType.RAW_COST_ANALYSIS_REPORT_DATA.value(), - ReportData.ReportDataType.WEB_ANALYTIC_USER_ACTIVITY_REPORT_DATA.value(), - ReportData.ReportDataType.WEB_ANALYTIC_ENTITY_VIEW_REPORT_DATA.value(), - ReportData.ReportDataType.AGGREGATED_COST_ANALYSIS_REPORT_DATA.value(), - TEST_CASE_RESOLUTION_STATUS, - TEST_CASE_RESULT, - QUERY_COST_RECORD); - - private static final int MAX_READERS_PER_ENTITY = 5; - - @FunctionalInterface - public interface BatchCallback { - void onBatchRead(String entityType, ResultList batch, int offset) - throws InterruptedException; - } - - @FunctionalInterface - interface KeysetBatchReader { - ResultList readNextKeyset(String cursor) throws SearchIndexException; - } - - @FunctionalInterface - interface BoundaryFinder { - List findBoundaries(int numReaders, int totalRecords); - } - - private static final int DEFAULT_MAX_RETRY_ATTEMPTS = 3; - private static final long DEFAULT_RETRY_BACKOFF_MS = 500; - - private final ExecutorService producerExecutor; - private final AtomicBoolean stopped; - private final int maxRetryAttempts; - private final long retryBackoffMs; - - public EntityReader(ExecutorService producerExecutor, AtomicBoolean stopped) { - this(producerExecutor, stopped, DEFAULT_MAX_RETRY_ATTEMPTS, DEFAULT_RETRY_BACKOFF_MS); - } - - public EntityReader( - ExecutorService producerExecutor, - AtomicBoolean stopped, - int maxRetryAttempts, - long retryBackoffMs) { - this.producerExecutor = producerExecutor; - this.stopped = stopped; - this.maxRetryAttempts = maxRetryAttempts; - this.retryBackoffMs = retryBackoffMs; - } - - /** - * Read all entities of a given type, invoking callback for each batch. - * - * @param entityType The entity type to read - * @param totalRecords Total records expected for this entity - * @param batchSize Batch size for reading - * @param phaser Phaser for completion tracking (readers will register/deregister) - * @param callback Callback invoked with each batch - * @return Number of readers submitted - */ - public int readEntity( - String entityType, int totalRecords, int batchSize, Phaser phaser, BatchCallback callback) { - return readEntity(entityType, totalRecords, batchSize, phaser, callback, null, null); - } - - public int readEntity( - String entityType, - int totalRecords, - int batchSize, - Phaser phaser, - BatchCallback callback, - Long timeSeriesStartTs, - Long timeSeriesEndTs) { - if (totalRecords <= 0) { - return 0; - } - - int numReaders = - Math.min(calculateNumberOfReaders(totalRecords, batchSize), MAX_READERS_PER_ENTITY); - phaser.bulkRegister(numReaders); - - try { - if (TIME_SERIES_ENTITIES.contains(entityType)) { - submitReaders( - entityType, - totalRecords, - batchSize, - numReaders, - phaser, - callback, - () -> { - PaginatedEntityTimeSeriesSource source = - (timeSeriesStartTs != null) - ? new PaginatedEntityTimeSeriesSource( - entityType, - batchSize, - getSearchIndexFields(entityType), - totalRecords, - timeSeriesStartTs, - timeSeriesEndTs) - : new PaginatedEntityTimeSeriesSource( - entityType, batchSize, getSearchIndexFields(entityType), totalRecords); - return source::readWithCursor; - }, - (readers, total) -> { - List cursors = new ArrayList<>(); - int perReader = total / readers; - for (int i = 1; i < readers; i++) { - cursors.add(RestUtil.encodeCursor(String.valueOf(i * perReader))); - } - return cursors; - }); - } else { - PaginatedEntitiesSource entSource = - new PaginatedEntitiesSource( - entityType, batchSize, getSearchIndexFields(entityType), totalRecords); - submitReaders( - entityType, - totalRecords, - batchSize, - numReaders, - phaser, - callback, - () -> { - PaginatedEntitiesSource source = - new PaginatedEntitiesSource( - entityType, batchSize, getSearchIndexFields(entityType), totalRecords); - return source::readNextKeyset; - }, - entSource::findBoundaryCursors); - } - } catch (Exception e) { - LOG.error( - "Failed to submit readers for {}, deregistering {} phaser parties", - entityType, - numReaders, - e); - for (int i = 0; i < numReaders; i++) { - phaser.arriveAndDeregister(); - } - throw e; - } - - return numReaders; - } - - public void stop() { - stopped.set(true); - } - - @Override - public void close() { - stop(); - } - - private void submitReaders( - String entityType, - int totalRecords, - int batchSize, - int numReaders, - Phaser phaser, - BatchCallback callback, - java.util.function.Supplier readerFactory, - BoundaryFinder boundaryFinder) { - if (numReaders == 1) { - KeysetBatchReader reader = readerFactory.get(); - producerExecutor.submit( - () -> - readKeysetBatches( - entityType, Integer.MAX_VALUE, batchSize, null, reader, phaser, callback)); - return; - } - - List boundaries = boundaryFinder.findBoundaries(numReaders, totalRecords); - int actualReaders = boundaries.size() + 1; - int recordsPerReader = (totalRecords + actualReaders - 1) / actualReaders; - - if (actualReaders < numReaders) { - LOG.warn( - "Boundary discovery for {} returned {} cursors (expected {}), using {} readers", - entityType, - boundaries.size(), - numReaders - 1, - actualReaders); - for (int j = 0; j < numReaders - actualReaders; j++) { - phaser.arriveAndDeregister(); - } - } - - for (int i = 0; i < actualReaders; i++) { - String startCursor = (i == 0) ? null : boundaries.get(i - 1); - int limit = (i == actualReaders - 1) ? Integer.MAX_VALUE : recordsPerReader; - KeysetBatchReader readerSource = readerFactory.get(); - final int readerLimit = limit; - producerExecutor.submit( - () -> - readKeysetBatches( - entityType, readerLimit, batchSize, startCursor, readerSource, phaser, callback)); - } - } - - private void readKeysetBatches( - String entityType, - int recordLimit, - int batchSize, - String startCursor, - KeysetBatchReader batchReader, - Phaser phaser, - BatchCallback callback) { - // Bypass the Redis-backed entity cache on the reader thread for the same reasons as the - // distributed PartitionWorker: bulk reindex never re-reads entities, every relationship - // lookup pays a cache round-trip we don't need, and an unhealthy Redis turns each lookup - // into a 300ms timeout. See {@link org.openmetadata.service.cache.EntityCacheBypass}. - try (org.openmetadata.service.cache.EntityCacheBypass.Handle ignored = - org.openmetadata.service.cache.EntityCacheBypass.skip()) { - readKeysetBatchesInternal( - entityType, recordLimit, batchSize, startCursor, batchReader, phaser, callback); - } - } - - private void readKeysetBatchesInternal( - String entityType, - int recordLimit, - int batchSize, - String startCursor, - KeysetBatchReader batchReader, - Phaser phaser, - BatchCallback callback) { - try { - String keysetCursor = startCursor; - int processed = 0; - - while (processed < recordLimit && !stopped.get()) { - ResultList result = readWithRetry(batchReader, keysetCursor, entityType); - if (stopped.get()) { - break; - } - - if (result == null || result.getData().isEmpty()) { - LOG.debug( - "Reader for {} exhausted at processed={} of limit={} (empty result)", - entityType, - processed, - recordLimit); - break; - } - - callback.onBatchRead(entityType, result, processed); - - int readCount = result.getData().size(); - int errorCount = result.getErrors() != null ? result.getErrors().size() : 0; - int warningsCount = result.getWarningsCount() != null ? result.getWarningsCount() : 0; - processed += readCount + errorCount + warningsCount; - - keysetCursor = result.getPaging() != null ? result.getPaging().getAfter() : null; - if (keysetCursor == null) { - LOG.debug( - "Reader for {} exhausted at processed={} of limit={} (null cursor)", - entityType, - processed, - recordLimit); - break; - } - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.warn("Interrupted during reading of {}", entityType); - } catch (SearchIndexException e) { - LOG.error("Error reading keyset batch for {}", entityType, e); - } catch (Exception e) { - if (!stopped.get()) { - LOG.error("Error in keyset reading for {}", entityType, e); - } - } finally { - phaser.arriveAndDeregister(); - } - } - - private ResultList readWithRetry( - KeysetBatchReader batchReader, String keysetCursor, String entityType) - throws SearchIndexException, InterruptedException { - for (int attempt = 0; attempt <= maxRetryAttempts; attempt++) { - try { - return batchReader.readNextKeyset(keysetCursor); - } catch (SearchIndexException e) { - if (attempt >= maxRetryAttempts || !isTransientError(e)) { - throw e; - } - long backoff = retryBackoffMs * (1L << attempt); - LOG.warn( - "Transient read failure for {} (attempt {}/{}), retrying in {}ms", - entityType, - attempt + 1, - maxRetryAttempts, - backoff); - Thread.sleep(Math.min(backoff, 10_000)); - } - } - return null; - } - - static boolean isTransientError(SearchIndexException e) { - String msg = e.getMessage(); - if (msg == null) { - return false; - } - String lower = msg.toLowerCase(); - return lower.contains("timeout") - || lower.contains("connection") - || lower.contains("pool exhausted") - || lower.contains("connectexception") - || lower.contains("sockettimeoutexception"); - } - - static List getSearchIndexFields(String entityType) { - return org.openmetadata.service.workflows.searchIndex.ReindexingUtil.getSearchIndexFields( - entityType); - } - - static int calculateNumberOfReaders(int totalEntityRecords, int batchSize) { - if (batchSize <= 0) return 1; - return (totalEntityRecords + batchSize - 1) / batchSize; - } -} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReindexContextMapper.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReindexContextMapper.java new file mode 100644 index 000000000000..a501b73e27c9 --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReindexContextMapper.java @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.openmetadata.service.apps.bundles.searchIndex; + +import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; + +import java.util.HashSet; +import org.openmetadata.service.search.EntityReindexContext; +import org.openmetadata.service.search.ReindexContext; + +public final class EntityReindexContextMapper { + private EntityReindexContextMapper() {} + + public static EntityReindexContext fromStagedContext( + ReindexContext stagedIndexContext, String entityType) { + String originalIndex = stagedIndexContext.getOriginalIndex(entityType).orElse(null); + + return EntityReindexContext.builder() + .entityType(entityType) + .originalIndex(originalIndex) + .canonicalIndex(stagedIndexContext.getCanonicalIndex(entityType).orElse(null)) + .activeIndex(originalIndex) + .stagedIndex(stagedIndexContext.getStagedIndex(entityType).orElse(null)) + .canonicalAliases(stagedIndexContext.getCanonicalAlias(entityType).orElse(null)) + .existingAliases(stagedIndexContext.getExistingAliases(entityType)) + .parentAliases(new HashSet<>(listOrEmpty(stagedIndexContext.getParentAliases(entityType)))) + .build(); + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/IndexingPipeline.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/IndexingPipeline.java deleted file mode 100644 index d45864f497f1..000000000000 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/IndexingPipeline.java +++ /dev/null @@ -1,603 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; -import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.isDataInsightIndex; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.Phaser; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; -import lombok.Getter; -import lombok.extern.slf4j.Slf4j; -import org.openmetadata.schema.system.IndexingError; -import org.openmetadata.schema.system.Stats; -import org.openmetadata.schema.system.StepStats; -import org.openmetadata.schema.utils.ResultList; -import org.openmetadata.service.Entity; -import org.openmetadata.service.jdbi3.EntityRepository; -import org.openmetadata.service.jdbi3.EntityTimeSeriesRepository; -import org.openmetadata.service.jdbi3.ListFilter; -import org.openmetadata.service.search.EntityReindexContext; -import org.openmetadata.service.search.RecreateIndexHandler; -import org.openmetadata.service.search.ReindexContext; -import org.openmetadata.service.search.SearchRepository; -import org.openmetadata.service.util.FullyQualifiedName; -import org.openmetadata.service.workflows.searchIndex.ReindexingUtil; -import org.slf4j.MDC; - -/** - * Quartz-decoupled indexing pipeline that orchestrates: entity discovery -> reader -> queue -> sink. - * This class can be used by SearchIndexExecutor, CLI tools, REST APIs, or unit tests. - */ -@Slf4j -public class IndexingPipeline implements AutoCloseable { - - private static final String POISON_PILL = "__POISON_PILL__"; - private static final int DEFAULT_QUEUE_SIZE = 20000; - private static final int MAX_CONSUMER_THREADS = - Math.min(20, Runtime.getRuntime().availableProcessors() * 2); - private static final int MAX_JOB_THREADS = - Math.min(30, Runtime.getRuntime().availableProcessors() * 4); - private static final String ENTITY_TYPE_KEY = "entityType"; - private static final String RECREATE_INDEX = "recreateIndex"; - - private final SearchRepository searchRepository; - private final CompositeProgressListener listeners; - private final AtomicBoolean stopped = new AtomicBoolean(false); - @Getter private final AtomicReference stats = new AtomicReference<>(); - - private BulkSink searchIndexSink; - private RecreateIndexHandler recreateIndexHandler; - private ReindexContext recreateContext; - private EntityReader entityReader; - private ExecutorService consumerExecutor; - private ExecutorService producerExecutor; - private ExecutorService jobExecutor; - private BlockingQueue> taskQueue; - private final Set promotedEntities = java.util.concurrent.ConcurrentHashMap.newKeySet(); - - record IndexingTask(String entityType, ResultList entities, int offset) {} - - public IndexingPipeline(SearchRepository searchRepository) { - this.searchRepository = searchRepository; - this.listeners = new CompositeProgressListener(); - } - - public IndexingPipeline addListener(ReindexingProgressListener listener) { - listeners.addListener(listener); - return this; - } - - public ExecutionResult execute( - ReindexingConfiguration config, - ReindexingJobContext context, - Set entities, - BulkSink sink, - RecreateIndexHandler handler, - ReindexContext recreateCtx) { - this.searchIndexSink = sink; - this.recreateIndexHandler = handler; - this.recreateContext = recreateCtx; - long startTime = System.currentTimeMillis(); - - stats.set(initializeStats(config, entities)); - listeners.onJobStarted(context); - - try { - runPipeline(config, entities); - closeSink(); - finalizeReindex(); - return buildResult(startTime); - } catch (Exception e) { - LOG.error("Pipeline execution failed", e); - listeners.onJobFailed(stats.get(), e); - return ExecutionResult.fromStats(stats.get(), ExecutionResult.Status.FAILED, startTime); - } - } - - private void runPipeline(ReindexingConfiguration config, Set entities) - throws InterruptedException { - int numConsumers = - config.consumerThreads() > 0 ? Math.min(config.consumerThreads(), MAX_CONSUMER_THREADS) : 2; - int queueSize = config.queueSize() > 0 ? config.queueSize() : DEFAULT_QUEUE_SIZE; - int batchSize = config.batchSize(); - - taskQueue = new LinkedBlockingQueue<>(queueSize); - String jobIdTag = MDC.get("reindexJobId"); - String threadPrefix = "reindex-" + (jobIdTag != null ? jobIdTag + "-" : ""); - consumerExecutor = - Executors.newFixedThreadPool( - numConsumers, - Thread.ofPlatform().name(threadPrefix + "pipeline-consumer-", 0).factory()); - producerExecutor = - Executors.newFixedThreadPool( - config.producerThreads() > 0 ? config.producerThreads() : 2, - Thread.ofPlatform().name(threadPrefix + "pipeline-producer-", 0).factory()); - jobExecutor = - Executors.newFixedThreadPool( - Math.min(entities.size(), MAX_JOB_THREADS), - Thread.ofPlatform().name(threadPrefix + "pipeline-job-", 0).factory()); - - entityReader = new EntityReader(producerExecutor, stopped); - - CountDownLatch consumerLatch = new CountDownLatch(numConsumers); - Map mdc = MDC.getCopyOfContextMap(); - for (int i = 0; i < numConsumers; i++) { - final int id = i; - consumerExecutor.submit( - () -> { - if (mdc != null) MDC.setContextMap(mdc); - try { - runConsumer(id, consumerLatch); - } finally { - MDC.clear(); - } - }); - } - - try { - readAllEntities(config, entities, batchSize); - signalConsumersToStop(numConsumers); - consumerLatch.await(); - } catch (InterruptedException e) { - stopped.set(true); - Thread.currentThread().interrupt(); - throw e; - } finally { - shutdownExecutors(); - } - } - - private void readAllEntities(ReindexingConfiguration config, Set entities, int batchSize) - throws InterruptedException { - List ordered = EntityPriority.sortByPriority(entities); - Phaser producerPhaser = new Phaser(entities.size()); - Map mdc = MDC.getCopyOfContextMap(); - - for (String entityType : ordered) { - jobExecutor.submit( - () -> { - if (mdc != null) MDC.setContextMap(mdc); - try { - int totalRecords = getTotalEntityRecords(entityType); - listeners.onEntityTypeStarted(entityType, totalRecords); - - int effectiveBatchSize = - EntityBatchSizeEstimator.estimateBatchSize(entityType, batchSize); - Long filterStartTs = null; - Long filterEndTs = null; - long startTs = config.getTimeSeriesStartTs(entityType); - if (startTs > 0) { - filterStartTs = startTs; - filterEndTs = System.currentTimeMillis(); - } - entityReader.readEntity( - entityType, - totalRecords, - effectiveBatchSize, - producerPhaser, - (type, batch, offset) -> { - if (!stopped.get()) { - taskQueue.put(new IndexingTask<>(type, batch, offset)); - } - }, - filterStartTs, - filterEndTs); - } catch (Exception e) { - LOG.error("Error reading entity type {}", entityType, e); - } finally { - producerPhaser.arriveAndDeregister(); - MDC.clear(); - } - }); - } - - int phase = 0; - while (!producerPhaser.isTerminated()) { - if (stopped.get() || Thread.currentThread().isInterrupted()) { - break; - } - try { - producerPhaser.awaitAdvanceInterruptibly(phase, 1, TimeUnit.SECONDS); - break; - } catch (TimeoutException e) { - // Continue - } - } - } - - @SuppressWarnings("unchecked") - private void runConsumer(int consumerId, CountDownLatch consumerLatch) { - try { - while (!stopped.get()) { - IndexingTask task = taskQueue.poll(200, TimeUnit.MILLISECONDS); - if (task == null) continue; - if (POISON_PILL.equals(task.entityType())) break; - - String entityType = task.entityType(); - ResultList entities = task.entities(); - Map contextData = createContextData(entityType); - - int readerSuccess = listOrEmpty(entities.getData()).size(); - int readerFailed = listOrEmpty(entities.getErrors()).size(); - int readerWarnings = entities.getWarningsCount() != null ? entities.getWarningsCount() : 0; - updateReaderStats(readerSuccess, readerFailed, readerWarnings); - - try { - if (!EntityReader.TIME_SERIES_ENTITIES.contains(entityType)) { - searchIndexSink.write(entities.getData(), contextData); - } else { - searchIndexSink.write(entities.getData(), contextData); - } - - StepStats entityStats = new StepStats(); - entityStats.setSuccessRecords(readerSuccess); - entityStats.setFailedRecords(readerFailed); - updateEntityAndJobStats(entityType, entityStats); - - if (Entity.TABLE.equals(entityType)) { - updateColumnStatsFromSink(); - } - - listeners.onProgressUpdate(stats.get(), null); - } catch (Exception e) { - LOG.error("Sink error for {}", entityType, e); - IndexingError error = - new IndexingError() - .withErrorSource(IndexingError.ErrorSource.SINK) - .withMessage(e.getMessage()); - listeners.onError(entityType, error, stats.get()); - } - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } finally { - consumerLatch.countDown(); - } - } - - private Map createContextData(String entityType) { - Map contextData = new HashMap<>(); - contextData.put(ENTITY_TYPE_KEY, entityType); - contextData.put(RECREATE_INDEX, recreateContext != null); - if (recreateContext != null) { - contextData.put(ReindexingUtil.RECREATE_CONTEXT, recreateContext); - recreateContext - .getStagedIndex(entityType) - .ifPresent(index -> contextData.put(ReindexingUtil.TARGET_INDEX_KEY, index)); - } - return contextData; - } - - private void signalConsumersToStop(int numConsumers) throws InterruptedException { - for (int i = 0; i < numConsumers; i++) { - taskQueue.put(new IndexingTask<>(POISON_PILL, null, -1)); - } - } - - private void closeSink() { - if (searchIndexSink != null) { - int pendingVectorTasks = searchIndexSink.getPendingVectorTaskCount(); - if (pendingVectorTasks > 0) { - LOG.info("Waiting for {} pending vector embedding tasks", pendingVectorTasks); - VectorCompletionResult vcResult = searchIndexSink.awaitVectorCompletionWithDetails(300); - LOG.info( - "Vector completion: completed={}, pending={}, waited={}ms", - vcResult.completed(), - vcResult.pendingTaskCount(), - vcResult.waitedMillis()); - } - searchIndexSink.close(); - syncSinkStats(); - } - } - - private void finalizeReindex() { - if (recreateIndexHandler == null || recreateContext == null) return; - - try { - recreateContext - .getEntities() - .forEach( - entityType -> { - if (promotedEntities.contains(entityType)) return; - try { - EntityReindexContext ctx = buildEntityReindexContext(entityType); - recreateIndexHandler.finalizeReindex(ctx, !stopped.get()); - } catch (Exception ex) { - LOG.error("Failed to finalize reindex for {}", entityType, ex); - } - }); - } finally { - recreateContext = null; - promotedEntities.clear(); - } - } - - private EntityReindexContext buildEntityReindexContext(String entityType) { - return EntityReindexContext.builder() - .entityType(entityType) - .originalIndex(recreateContext.getOriginalIndex(entityType).orElse(null)) - .canonicalIndex(recreateContext.getCanonicalIndex(entityType).orElse(null)) - .activeIndex(recreateContext.getOriginalIndex(entityType).orElse(null)) - .stagedIndex(recreateContext.getStagedIndex(entityType).orElse(null)) - .canonicalAliases(recreateContext.getCanonicalAlias(entityType).orElse(null)) - .existingAliases(recreateContext.getExistingAliases(entityType)) - .parentAliases( - new HashSet<>( - org.openmetadata.common.utils.CommonUtil.listOrEmpty( - recreateContext.getParentAliases(entityType)))) - .build(); - } - - private ExecutionResult buildResult(long startTime) { - syncSinkStats(); - updateColumnStatsFromSink(); - Stats currentStats = stats.get(); - if (currentStats != null) { - StatsReconciler.reconcile(currentStats); - } - - ExecutionResult.Status status; - if (stopped.get()) { - status = ExecutionResult.Status.STOPPED; - listeners.onJobStopped(currentStats); - } else if (hasFailures()) { - status = ExecutionResult.Status.COMPLETED_WITH_ERRORS; - listeners.onJobCompletedWithErrors(currentStats, System.currentTimeMillis() - startTime); - } else { - status = ExecutionResult.Status.COMPLETED; - listeners.onJobCompleted(currentStats, System.currentTimeMillis() - startTime); - } - - return ExecutionResult.fromStats(currentStats, status, startTime); - } - - private boolean hasFailures() { - Stats s = stats.get(); - if (s == null || s.getJobStats() == null) return false; - StepStats js = s.getJobStats(); - long failed = js.getFailedRecords() != null ? js.getFailedRecords() : 0; - long success = js.getSuccessRecords() != null ? js.getSuccessRecords() : 0; - long total = js.getTotalRecords() != null ? js.getTotalRecords() : 0; - return failed > 0 || (total > 0 && success < total); - } - - private Stats initializeStats(ReindexingConfiguration config, Set entities) { - Stats s = new Stats(); - s.setEntityStats(new org.openmetadata.schema.system.EntityStats()); - s.setJobStats(new StepStats()); - s.setReaderStats(new StepStats()); - s.setSinkStats(new StepStats()); - - int total = 0; - for (String entityType : entities) { - int entityTotal = getEntityTotal(entityType, config); - total += entityTotal; - StepStats es = new StepStats(); - es.setTotalRecords(entityTotal); - es.setSuccessRecords(0); - es.setFailedRecords(0); - s.getEntityStats().getAdditionalProperties().put(entityType, es); - } - - if (entities.contains(Entity.TABLE) && !entities.contains(Entity.TABLE_COLUMN)) { - StepStats columnStats = new StepStats(); - columnStats.setTotalRecords(0); - columnStats.setSuccessRecords(0); - columnStats.setFailedRecords(0); - s.getEntityStats().getAdditionalProperties().put(Entity.TABLE_COLUMN, columnStats); - } - - s.getJobStats().setTotalRecords(total); - s.getJobStats().setSuccessRecords(0); - s.getJobStats().setFailedRecords(0); - s.getReaderStats().setTotalRecords(total); - s.getReaderStats().setSuccessRecords(0); - s.getReaderStats().setFailedRecords(0); - s.getReaderStats().setWarningRecords(0); - s.getSinkStats().setTotalRecords(0); - s.getSinkStats().setSuccessRecords(0); - s.getSinkStats().setFailedRecords(0); - - s.setProcessStats(new StepStats()); - s.getProcessStats().setTotalRecords(0); - s.getProcessStats().setSuccessRecords(0); - s.getProcessStats().setFailedRecords(0); - return s; - } - - private int getEntityTotal(String entityType, ReindexingConfiguration config) { - try { - if (!EntityReader.TIME_SERIES_ENTITIES.contains(entityType)) { - EntityRepository repository = Entity.getEntityRepository(entityType); - return repository - .getDao() - .listCount(new ListFilter(org.openmetadata.schema.type.Include.ALL)); - } - - EntityTimeSeriesRepository repository; - ListFilter listFilter = new ListFilter(null); - if (isDataInsightIndex(entityType)) { - listFilter.addQueryParam("entityFQNHash", FullyQualifiedName.buildHash(entityType)); - repository = Entity.getEntityTimeSeriesRepository(Entity.ENTITY_REPORT_DATA); - } else { - repository = Entity.getEntityTimeSeriesRepository(entityType); - } - - long startTs = config != null ? config.getTimeSeriesStartTs(entityType) : -1; - if (startTs > 0) { - long endTs = System.currentTimeMillis(); - return repository.getTimeSeriesDao().listCount(listFilter, startTs, endTs, false); - } - return repository.getTimeSeriesDao().listCount(listFilter); - } catch (Exception e) { - LOG.debug("Error getting total records for '{}'", entityType, e); - return 0; - } - } - - private int getTotalEntityRecords(String entityType) { - StepStats es = - stats.get() != null - && stats.get().getEntityStats() != null - && stats.get().getEntityStats().getAdditionalProperties() != null - ? stats.get().getEntityStats().getAdditionalProperties().get(entityType) - : null; - if (es != null && es.getTotalRecords() != null) { - return es.getTotalRecords(); - } - return 0; - } - - private synchronized void updateReaderStats(int success, int failed, int warnings) { - Stats s = stats.get(); - if (s == null) return; - StepStats rs = s.getReaderStats(); - if (rs == null) { - rs = new StepStats(); - s.setReaderStats(rs); - } - rs.setSuccessRecords((rs.getSuccessRecords() != null ? rs.getSuccessRecords() : 0) + success); - rs.setFailedRecords((rs.getFailedRecords() != null ? rs.getFailedRecords() : 0) + failed); - rs.setWarningRecords((rs.getWarningRecords() != null ? rs.getWarningRecords() : 0) + warnings); - } - - private synchronized void updateEntityAndJobStats(String entityType, StepStats entityDelta) { - Stats s = stats.get(); - if (s == null || s.getEntityStats() == null) return; - - StepStats es = s.getEntityStats().getAdditionalProperties().get(entityType); - if (es != null) { - es.setSuccessRecords(es.getSuccessRecords() + entityDelta.getSuccessRecords()); - es.setFailedRecords(es.getFailedRecords() + entityDelta.getFailedRecords()); - } - - StepStats js = s.getJobStats(); - if (js != null) { - int totalSuccess = - s.getEntityStats().getAdditionalProperties().entrySet().stream() - .filter(e -> !Entity.TABLE_COLUMN.equals(e.getKey())) - .mapToInt(e -> e.getValue().getSuccessRecords()) - .sum(); - int totalFailed = - s.getEntityStats().getAdditionalProperties().entrySet().stream() - .filter(e -> !Entity.TABLE_COLUMN.equals(e.getKey())) - .mapToInt(e -> e.getValue().getFailedRecords()) - .sum(); - js.setSuccessRecords(totalSuccess); - js.setFailedRecords(totalFailed); - } - } - - private synchronized void syncSinkStats() { - if (searchIndexSink == null) return; - Stats s = stats.get(); - if (s == null) return; - - StepStats bulkStats = searchIndexSink.getStats(); - if (bulkStats == null) return; - - StepStats sinkStats = s.getSinkStats(); - if (sinkStats == null) { - sinkStats = new StepStats(); - s.setSinkStats(sinkStats); - } - sinkStats.setTotalRecords( - bulkStats.getTotalRecords() != null ? bulkStats.getTotalRecords() : 0); - sinkStats.setSuccessRecords( - bulkStats.getSuccessRecords() != null ? bulkStats.getSuccessRecords() : 0); - sinkStats.setFailedRecords( - bulkStats.getFailedRecords() != null ? bulkStats.getFailedRecords() : 0); - - StepStats vectorStats = searchIndexSink.getVectorStats(); - if (vectorStats != null - && vectorStats.getTotalRecords() != null - && vectorStats.getTotalRecords() > 0) { - s.setVectorStats(vectorStats); - } - - StepStats processStats = searchIndexSink.getProcessStats(); - if (processStats != null) { - s.setProcessStats(processStats); - } - } - - private void updateColumnStatsFromSink() { - if (searchIndexSink == null) return; - Stats s = stats.get(); - if (s == null || s.getEntityStats() == null) return; - - StepStats columnStats = searchIndexSink.getColumnStats(); - if (columnStats != null && columnStats.getTotalRecords() > 0) { - StepStats existing = s.getEntityStats().getAdditionalProperties().get(Entity.TABLE_COLUMN); - if (existing != null) { - existing.setTotalRecords(columnStats.getTotalRecords()); - existing.setSuccessRecords(columnStats.getSuccessRecords()); - existing.setFailedRecords(columnStats.getFailedRecords()); - } - } - } - - private void shutdownExecutors() { - shutdownExecutor(producerExecutor, "producer"); - shutdownExecutor(jobExecutor, "job"); - shutdownExecutor(consumerExecutor, "consumer"); - } - - private void shutdownExecutor(ExecutorService executor, String name) { - if (executor != null && !executor.isShutdown()) { - executor.shutdown(); - try { - if (!executor.awaitTermination(30, TimeUnit.SECONDS)) { - executor.shutdownNow(); - LOG.warn("{} executor did not terminate in time", name); - } - } catch (InterruptedException e) { - executor.shutdownNow(); - Thread.currentThread().interrupt(); - } - } - } - - public void stop() { - stopped.set(true); - if (entityReader != null) entityReader.stop(); - - if (searchIndexSink != null) { - LOG.info( - "Stopping pipeline: flushing sink ({} active bulk requests)", - searchIndexSink.getActiveBulkRequestCount()); - searchIndexSink.flushAndAwait(10); - } - - int dropped = taskQueue != null ? taskQueue.size() : 0; - if (dropped > 0) { - LOG.warn("Dropping {} queued tasks during shutdown", dropped); - } - - if (taskQueue != null) { - taskQueue.clear(); - for (int i = 0; i < MAX_CONSUMER_THREADS; i++) { - taskQueue.offer(new IndexingTask<>(POISON_PILL, null, -1)); - } - } - shutdownExecutors(); - } - - @Override - public void close() { - stop(); - } -} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/IndexingStrategy.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/IndexingStrategy.java deleted file mode 100644 index e7d4b2018b9e..000000000000 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/IndexingStrategy.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import java.util.Optional; -import org.openmetadata.schema.system.Stats; - -/** - * Strategy interface for reindexing execution. Encapsulates the differences between single-server - * and distributed indexing so that SearchIndexApp uses a single code path regardless of mode. - */ -public interface IndexingStrategy { - - void addListener(ReindexingProgressListener listener); - - ExecutionResult execute(ReindexingConfiguration config, ReindexingJobContext context); - - Optional getStats(); - - void stop(); - - boolean isStopped(); -} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/OrchestratorContext.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/OrchestratorContext.java index 1fb84174d3a4..6b290b3627c0 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/OrchestratorContext.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/OrchestratorContext.java @@ -28,5 +28,5 @@ public interface OrchestratorContext { ReindexingProgressListener createProgressListener(EventPublisherJob jobData); - ReindexingJobContext createReindexingContext(boolean distributed); + ReindexingJobContext createReindexingContext(); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/OrphanedIndexCleaner.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/OrphanedIndexCleaner.java index 3f4c85b3f97a..94dc0ca0aedc 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/OrphanedIndexCleaner.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/OrphanedIndexCleaner.java @@ -25,7 +25,7 @@ * considered orphaned if: * *
    - *
  • It contains "_rebuild_" in its name (created during recreateIndex=true) + *
  • It contains "_rebuild_" in its name (created during staged reindexing) *
  • It has ZERO aliases pointing to it (not serving any traffic) *
* diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzJobContext.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzJobContext.java index e39a619749b8..b9ea0bbf4e72 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzJobContext.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzJobContext.java @@ -14,9 +14,8 @@ public class QuartzJobContext implements ReindexingJobContext { private final String jobName; private final Long startTime; private final UUID appId; - private final boolean distributed; - public QuartzJobContext(JobExecutionContext jobExecutionContext, App app, boolean distributed) { + public QuartzJobContext(JobExecutionContext jobExecutionContext, App app) { this.jobName = jobExecutionContext != null ? jobExecutionContext.getJobDetail().getKey().getName() @@ -24,7 +23,6 @@ public QuartzJobContext(JobExecutionContext jobExecutionContext, App app, boolea this.startTime = System.currentTimeMillis(); this.appId = app != null ? app.getId() : null; this.jobId = appId != null ? appId : UUID.randomUUID(); - this.distributed = distributed; } @Override @@ -47,11 +45,6 @@ public UUID getAppId() { return appId; } - @Override - public boolean isDistributed() { - return distributed; - } - @Override public String getSource() { return "QUARTZ"; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzOrchestratorContext.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzOrchestratorContext.java index 497616eac5e1..379f80fe02cf 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzOrchestratorContext.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzOrchestratorContext.java @@ -92,7 +92,7 @@ public ReindexingProgressListener createProgressListener(EventPublisherJob jobDa } @Override - public ReindexingJobContext createReindexingContext(boolean distributed) { - return new QuartzJobContext(ctx, app, distributed); + public ReindexingJobContext createReindexingContext() { + return new QuartzJobContext(ctx, app); } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingConfiguration.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingConfiguration.java index 2426e63c3672..7d7a68357156 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingConfiguration.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingConfiguration.java @@ -25,9 +25,7 @@ public record ReindexingConfiguration( int fieldFetchThreads, int docBuildThreads, long statsIntervalMs, - boolean recreateIndex, boolean autoTune, - boolean useDistributedIndexing, boolean force, int maxRetries, int initialBackoff, @@ -77,9 +75,7 @@ public static ReindexingConfiguration applyAutoTuning( .fieldFetchThreads(metrics.getRecommendedFieldFetchThreads()) .docBuildThreads(metrics.getRecommendedDocBuildThreads()) .statsIntervalMs(metrics.getRecommendedStatsIntervalMs()) - .recreateIndex(config.recreateIndex()) .autoTune(true) - .useDistributedIndexing(config.useDistributedIndexing()) .force(config.force()) .maxRetries(config.maxRetries()) .initialBackoff(config.initialBackoff()) @@ -128,9 +124,7 @@ public static ReindexingConfiguration from(EventPublisherJob jobData) { DEFAULT_FIELD_FETCH_THREADS, DEFAULT_DOC_BUILD_THREADS, DEFAULT_STATS_INTERVAL_MS, - Boolean.TRUE.equals(jobData.getRecreateIndex()), Boolean.TRUE.equals(jobData.getAutoTune()), - Boolean.TRUE.equals(jobData.getUseDistributedIndexing()), Boolean.TRUE.equals(jobData.getForce()), jobData.getMaxRetries() != null ? jobData.getMaxRetries() : DEFAULT_MAX_RETRIES, jobData.getInitialBackoff() != null ? jobData.getInitialBackoff() : DEFAULT_INITIAL_BACKOFF, @@ -187,7 +181,9 @@ public boolean hasSlackConfig() { /** Check if this is a subset (smart) reindexing */ public boolean isSmartReindexing() { - return entities != null && !entities.contains("all") && entities.size() < 20 && recreateIndex; + return entities != null + && !entities.contains(SearchIndexEntityTypes.ALL) + && entities.size() < 20; } /** Creates a builder for more flexible configuration creation */ @@ -206,9 +202,7 @@ public static class Builder { private int fieldFetchThreads = DEFAULT_FIELD_FETCH_THREADS; private int docBuildThreads = DEFAULT_DOC_BUILD_THREADS; private long statsIntervalMs = DEFAULT_STATS_INTERVAL_MS; - private boolean recreateIndex = false; private boolean autoTune = false; - private boolean useDistributedIndexing = false; private boolean force = false; private int maxRetries = DEFAULT_MAX_RETRIES; private int initialBackoff = DEFAULT_INITIAL_BACKOFF; @@ -270,21 +264,11 @@ public Builder statsIntervalMs(long statsIntervalMs) { return this; } - public Builder recreateIndex(boolean recreateIndex) { - this.recreateIndex = recreateIndex; - return this; - } - public Builder autoTune(boolean autoTune) { this.autoTune = autoTune; return this; } - public Builder useDistributedIndexing(boolean useDistributedIndexing) { - this.useDistributedIndexing = useDistributedIndexing; - return this; - } - public Builder force(boolean force) { this.force = force; return this; @@ -347,9 +331,7 @@ public ReindexingConfiguration build() { fieldFetchThreads, docBuildThreads, statsIntervalMs, - recreateIndex, autoTune, - useDistributedIndexing, force, maxRetries, initialBackoff, diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingJobContext.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingJobContext.java index 8b1b6af73231..a5a9b6679e86 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingJobContext.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingJobContext.java @@ -20,9 +20,6 @@ public interface ReindexingJobContext { /** Application ID (for Quartz-based jobs, null for CLI/API) */ UUID getAppId(); - /** Whether this is a distributed indexing job */ - boolean isDistributed(); - /** The source that triggered this job (e.g., "QUARTZ", "CLI", "API") */ String getSource(); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingOrchestrator.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingOrchestrator.java index 05c9e1f4ae1c..ca65907652c2 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingOrchestrator.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingOrchestrator.java @@ -33,14 +33,13 @@ @Slf4j public class ReindexingOrchestrator { - private static final String ALL = "all"; private final CollectionDAO collectionDAO; private final SearchRepository searchRepository; private final OrchestratorContext context; @Getter private EventPublisherJob jobData; private volatile boolean stopped = false; - private volatile IndexingStrategy activeStrategy; + private volatile DistributedIndexingStrategy activeStrategy; private volatile Map resultMetadata = Collections.emptyMap(); public ReindexingOrchestrator( @@ -95,7 +94,7 @@ public void stop() { LOG.info("Reindexing job is being stopped."); stopped = true; - IndexingStrategy strategy = this.activeStrategy; + DistributedIndexingStrategy strategy = this.activeStrategy; if (strategy != null) { try { strategy.stop(); @@ -110,6 +109,7 @@ public void stop() { AppRunRecord appRecord = context.getJobRecord(); appRecord.setStatus(AppRunRecord.Status.STOPPED); + sanitizeRunRecordConfig(appRecord); OmAppJobListener.fillTerminalTimings(appRecord); context.storeRunRecord(JsonUtils.pojoToJson(appRecord)); context.pushStatusUpdate(appRecord, true); @@ -129,10 +129,10 @@ private void initializeJobData() { jobData = loadJobData(); } - String jobName = context.getJobName(); - if (jobName.equals(ON_DEMAND_JOB)) { + if (ON_DEMAND_JOB.equals(context.getJobName())) { Map jsonAppConfig = JsonUtils.convertValue(jobData, new TypeReference>() {}); + SearchIndexAppConfigSanitizer.removeRemovedOptions(jsonAppConfig); context.updateAppConfiguration(jsonAppConfig); } } @@ -140,12 +140,18 @@ private void initializeJobData() { private EventPublisherJob loadJobData() { String appConfigJson = context.getAppConfigJson(); if (appConfigJson != null) { - return JsonUtils.readValue(appConfigJson, EventPublisherJob.class); + Map appConfig = + JsonUtils.readValue(appConfigJson, new TypeReference>() {}); + return JsonUtils.convertValue( + SearchIndexAppConfigSanitizer.copyWithoutRemovedOptions(appConfig), + EventPublisherJob.class); } Map appConfig = context.getAppConfiguration(); if (appConfig != null) { - return JsonUtils.convertValue(appConfig, EventPublisherJob.class); + return JsonUtils.convertValue( + SearchIndexAppConfigSanitizer.copyWithoutRemovedOptions(appConfig), + EventPublisherJob.class); } LOG.error("Unable to initialize jobData from JobDataMap or App configuration"); @@ -212,44 +218,77 @@ private void cleanupOrphanedIndicesPreFlight() { } private void runReindexing() { - if (jobData.getEntities() == null || jobData.getEntities().isEmpty()) { - LOG.info("No entities selected for reindexing, completing immediately"); - jobData.setStatus(EventPublisherJob.Status.COMPLETED); - jobData.setStats(new Stats()); + if (hasNoEntitiesSelected()) { + completeWithoutEntities(); return; } setupEntities(); cleanupOldFailures(); + logJobStart(); + DistributedIndexingStrategy strategy = createDistributedStrategy(); + activeStrategy = strategy; + registerProgressListeners(strategy); + + ReindexingConfiguration config = buildReindexingConfiguration(); + ExecutionResult result = executeDistributedReindex(strategy, config); + persistExecutionResult(result); + } + + private boolean hasNoEntitiesSelected() { + return jobData.getEntities() == null || jobData.getEntities().isEmpty(); + } + + private void completeWithoutEntities() { + LOG.info("No entities selected for reindexing, completing immediately"); + jobData.setStatus(EventPublisherJob.Status.COMPLETED); + jobData.setStats(new Stats()); + } + + private void logJobStart() { LOG.info( - "Search Index Job Started for Entities: {}, RecreateIndex: {}, DistributedIndexing: {}", - jobData.getEntities(), - jobData.getRecreateIndex(), - jobData.getUseDistributedIndexing()); + "Search Index Job Started for Entities: {} using staged index promotion", + jobData.getEntities()); + } - activeStrategy = createStrategy(); + private DistributedIndexingStrategy createDistributedStrategy() { + AppRunRecord appRecord = context.getJobRecord(); + return new DistributedIndexingStrategy( + collectionDAO, + searchRepository, + jobData, + appRecord.getAppId(), + appRecord.getStartTime(), + context.getJobName()); + } - activeStrategy.addListener(context.createProgressListener(jobData)); - activeStrategy.addListener(new LoggingProgressListener()); + private void registerProgressListeners(DistributedIndexingStrategy strategy) { + strategy.addListener(context.createProgressListener(jobData)); + strategy.addListener(new LoggingProgressListener()); if (hasSlackConfig()) { - String instanceUrl = getInstanceUrl(); - activeStrategy.addListener( + strategy.addListener( new SlackProgressListener( - jobData.getSlackBotToken(), jobData.getSlackChannel(), instanceUrl)); + jobData.getSlackBotToken(), jobData.getSlackChannel(), getInstanceUrl())); } + } - ReindexingJobContext jobContext = - context.createReindexingContext(Boolean.TRUE.equals(jobData.getUseDistributedIndexing())); - + private ReindexingConfiguration buildReindexingConfiguration() { ReindexingConfiguration config = ReindexingConfiguration.from(jobData); - long totalEntities = countTotalEntities(); - config = ReindexingConfiguration.applyAutoTuning(config, searchRepository, totalEntities); + config = + ReindexingConfiguration.applyAutoTuning(config, searchRepository, countTotalEntities()); config.applyTo(jobData); updateRunRecordConfig(config); + return config; + } + + private ExecutionResult executeDistributedReindex( + DistributedIndexingStrategy strategy, ReindexingConfiguration config) { + return strategy.execute(config, context.createReindexingContext()); + } - ExecutionResult result = activeStrategy.execute(config, jobContext); + private void persistExecutionResult(ExecutionResult result) { updateJobDataFromResult(result); if (jobData.getStats() != null) { @@ -261,20 +300,6 @@ private void runReindexing() { } } - private IndexingStrategy createStrategy() { - if (Boolean.TRUE.equals(jobData.getUseDistributedIndexing())) { - AppRunRecord appRecord = context.getJobRecord(); - return new DistributedIndexingStrategy( - collectionDAO, - searchRepository, - jobData, - appRecord.getAppId(), - appRecord.getStartTime(), - context.getJobName()); - } - return new SingleServerIndexingStrategy(collectionDAO, searchRepository); - } - private void updateJobDataFromResult(ExecutionResult result) { if (result.finalStats() != null) { Stats stats = result.finalStats(); @@ -298,6 +323,7 @@ private void updateRunRecordConfig(ReindexingConfiguration config) { if (appRecord != null) { Map configMap = appRecord.getConfig(); if (configMap != null) { + SearchIndexAppConfigSanitizer.removeRemovedOptions(configMap); configMap.put("batchSize", config.batchSize()); configMap.put("consumerThreads", config.consumerThreads()); configMap.put("producerThreads", config.producerThreads()); @@ -336,7 +362,7 @@ private void saveResultMetadataToJobRecord(Map metadata) { } private void handleExecutionException(Exception ex) { - IndexingStrategy strategy = this.activeStrategy; + DistributedIndexingStrategy strategy = this.activeStrategy; if (strategy != null && jobData != null) { try { strategy.getStats().ifPresent(jobData::setStats); @@ -369,6 +395,7 @@ private void finalizeJobExecution() { if (stopped) { AppRunRecord appRecord = context.getJobRecord(); appRecord.setStatus(AppRunRecord.Status.STOPPED); + sanitizeRunRecordConfig(appRecord); OmAppJobListener.fillTerminalTimings(appRecord); context.storeRunRecord(JsonUtils.pojoToJson(appRecord)); } @@ -385,6 +412,7 @@ private void sendUpdates() { private void updateRecordToDbAndNotify() { AppRunRecord appRecord = context.getJobRecord(); appRecord.setStatus(AppRunRecord.Status.fromValue(jobData.getStatus().value())); + sanitizeRunRecordConfig(appRecord); OmAppJobListener.fillTerminalTimings(appRecord); if (jobData.getFailure() != null) { @@ -437,6 +465,12 @@ private void updateRecordToDbAndNotify() { } } + private void sanitizeRunRecordConfig(AppRunRecord appRecord) { + if (appRecord != null) { + SearchIndexAppConfigSanitizer.removeRemovedOptions(appRecord.getConfig()); + } + } + private void cleanupOldFailures() { try { int deleted = collectionDAO.searchIndexFailureDAO().deleteAll(); @@ -466,10 +500,11 @@ private void cleanupOrphanedIndices() { } private void setupEntities() { - boolean containsAll = jobData.getEntities().contains(ALL); - if (containsAll) { - jobData.setEntities(getAll()); - } + Set entities = + jobData.getEntities().contains(SearchIndexEntityTypes.ALL) + ? getAll() + : jobData.getEntities(); + jobData.setEntities(SearchIndexEntityTypes.normalizeEntityTypes(entities)); } private Set getAll() { @@ -487,9 +522,10 @@ private long countTotalEntities() { long total = 0; for (String entityType : jobData.getEntities()) { try { - if (!SearchIndexApp.TIME_SERIES_ENTITIES.contains(entityType)) { + String normalizedEntityType = SearchIndexEntityTypes.normalizeEntityType(entityType); + if (!SearchIndexEntityTypes.isTimeSeriesEntity(normalizedEntityType)) { total += - Entity.getEntityRepository(entityType) + Entity.getEntityRepository(normalizedEntityType) .getDao() .listCount( new org.openmetadata.service.jdbi3.ListFilter( diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingProgressListener.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingProgressListener.java index d04b613a3ac4..99846826ce42 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingProgressListener.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingProgressListener.java @@ -33,7 +33,7 @@ default void onJobStarted(ReindexingJobContext context) {} /** Called when job configuration is determined (after auto-tune) */ default void onJobConfigured(ReindexingJobContext context, ReindexingConfiguration config) {} - /** Called when index recreation begins (if recreateIndex=true) */ + /** Called when staged index preparation begins. */ default void onIndexRecreationStarted(Set entities) {} /** Called when a specific entity type processing begins */ diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexApp.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexApp.java index 3f4b1c93b602..7122574a3c97 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexApp.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexApp.java @@ -1,22 +1,17 @@ package org.openmetadata.service.apps.bundles.searchIndex; -import static org.openmetadata.service.Entity.QUERY_COST_RECORD; -import static org.openmetadata.service.Entity.TEST_CASE_RESOLUTION_STATUS; -import static org.openmetadata.service.Entity.TEST_CASE_RESULT; - import jakarta.ws.rs.core.Response; import java.util.List; import java.util.Map; -import java.util.Set; import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import org.openmetadata.schema.analytics.ReportData; import org.openmetadata.schema.entity.app.App; import org.openmetadata.schema.entity.app.AppRunRecord; import org.openmetadata.schema.system.EventPublisherJob; import org.openmetadata.schema.utils.JsonUtils; import org.openmetadata.service.apps.AbstractNativeApplication; import org.openmetadata.service.apps.bundles.searchIndex.distributed.DistributedSearchIndexCoordinator; +import org.openmetadata.service.apps.bundles.searchIndex.distributed.IndexJobStatus; import org.openmetadata.service.exception.AppException; import org.openmetadata.service.jdbi3.AppRepository; import org.openmetadata.service.jdbi3.CollectionDAO; @@ -25,6 +20,12 @@ @Slf4j public class SearchIndexApp extends AbstractNativeApplication { + private static final String REINDEX_LOCK_KEY = "SEARCH_REINDEX_LOCK"; + private static final List ACTIVE_DISTRIBUTED_JOB_STATUSES = + List.of( + IndexJobStatus.RUNNING.name(), + IndexJobStatus.READY.name(), + IndexJobStatus.INITIALIZING.name()); public static class ReindexingException extends RuntimeException { public ReindexingException(String message) { @@ -36,17 +37,6 @@ public ReindexingException(String message, Throwable cause) { } } - public static final Set TIME_SERIES_ENTITIES = - Set.of( - ReportData.ReportDataType.ENTITY_REPORT_DATA.value(), - ReportData.ReportDataType.RAW_COST_ANALYSIS_REPORT_DATA.value(), - ReportData.ReportDataType.WEB_ANALYTIC_USER_ACTIVITY_REPORT_DATA.value(), - ReportData.ReportDataType.WEB_ANALYTIC_ENTITY_VIEW_REPORT_DATA.value(), - ReportData.ReportDataType.AGGREGATED_COST_ANALYSIS_REPORT_DATA.value(), - TEST_CASE_RESOLUTION_STATUS, - TEST_CASE_RESULT, - QUERY_COST_RECORD); - @Getter private EventPublisherJob jobData; private volatile ReindexingOrchestrator orchestrator; @@ -57,7 +47,10 @@ public SearchIndexApp(CollectionDAO collectionDAO, SearchRepository searchReposi @Override public void init(App app) { super.init(app); - jobData = JsonUtils.convertValue(app.getAppConfiguration(), EventPublisherJob.class); + Map appConfig = + SearchIndexAppConfigSanitizer.copyWithoutRemovedOptions( + JsonUtils.getMap(app.getAppConfiguration())); + jobData = JsonUtils.convertValue(appConfig, EventPublisherJob.class); } @Override @@ -115,6 +108,7 @@ private void updateRunRecordToStopped() { run -> { run.withStatus(AppRunRecord.Status.STOPPED); run.withEndTime(System.currentTimeMillis()); + SearchIndexAppConfigSanitizer.removeRemovedOptions(run.getConfig()); appRepository.updateAppStatus(app.getId(), run); LOG.info("Updated app run record to STOPPED for {}", app.getName()); }); @@ -132,9 +126,7 @@ public void uninstall() { private void purgeSearchIndexTables() { List activeJobs = - collectionDAO - .searchIndexJobDAO() - .findByStatuses(List.of("RUNNING", "READY", "INITIALIZING")); + collectionDAO.searchIndexJobDAO().findByStatuses(ACTIVE_DISTRIBUTED_JOB_STATUSES); if (!activeJobs.isEmpty()) { LOG.warn( "Uninstalling SearchIndexApp while {} distributed job(s) are still active. " @@ -147,7 +139,7 @@ private void purgeSearchIndexTables() { .searchIndexJobDAO() .update( job.id(), - "STOPPED", + IndexJobStatus.STOPPED.name(), job.processedRecords(), job.successRecords(), job.failedRecords(), @@ -166,7 +158,7 @@ private void purgeSearchIndexTables() { () -> collectionDAO.searchIndexPartitionDAO().deleteAll(), () -> collectionDAO.searchIndexServerStatsDAO().deleteAll(), () -> collectionDAO.searchIndexFailureDAO().deleteAll(), - () -> collectionDAO.searchReindexLockDAO().delete("SEARCH_REINDEX_LOCK"), + () -> collectionDAO.searchReindexLockDAO().delete(REINDEX_LOCK_KEY), () -> collectionDAO.searchIndexJobDAO().deleteAll(), () -> { App app = getApp(); @@ -185,7 +177,9 @@ private void purgeSearchIndexTables() { @Override protected void validateConfig(Map appConfig) { try { - JsonUtils.convertValue(appConfig, EventPublisherJob.class); + JsonUtils.convertValue( + SearchIndexAppConfigSanitizer.copyWithoutRemovedOptions(appConfig), + EventPublisherJob.class); } catch (IllegalArgumentException e) { throw AppException.byMessage( Response.Status.BAD_REQUEST, "Invalid App Configuration: " + e.getMessage()); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexAppConfigSanitizer.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexAppConfigSanitizer.java new file mode 100644 index 000000000000..054edc1b65a7 --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexAppConfigSanitizer.java @@ -0,0 +1,28 @@ +package org.openmetadata.service.apps.bundles.searchIndex; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +final class SearchIndexAppConfigSanitizer { + private static final Set REMOVED_OPTIONS = + Set.of("recreateIndex", "useDistributedIndexing"); + + private SearchIndexAppConfigSanitizer() {} + + static Map copyWithoutRemovedOptions(Map config) { + if (config == null) { + return config; + } + Map sanitized = new LinkedHashMap<>(config); + removeRemovedOptions(sanitized); + return sanitized; + } + + static void removeRemovedOptions(Map config) { + if (config == null || config.isEmpty()) { + return; + } + REMOVED_OPTIONS.forEach(config::remove); + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexEntityTypes.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexEntityTypes.java new file mode 100644 index 000000000000..c83329a77bee --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexEntityTypes.java @@ -0,0 +1,63 @@ +/* + * Copyright 2024 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.openmetadata.service.apps.bundles.searchIndex; + +import static org.openmetadata.service.Entity.QUERY_COST_RECORD; +import static org.openmetadata.service.Entity.TEST_CASE_RESOLUTION_STATUS; +import static org.openmetadata.service.Entity.TEST_CASE_RESULT; + +import java.util.LinkedHashSet; +import java.util.Set; +import org.openmetadata.schema.analytics.ReportData; + +public final class SearchIndexEntityTypes { + public static final String ALL = "all"; + public static final String QUERY_COST_RESULT = "queryCostResult"; + + public static final Set TIME_SERIES_ENTITIES = + Set.of( + ReportData.ReportDataType.ENTITY_REPORT_DATA.value(), + ReportData.ReportDataType.RAW_COST_ANALYSIS_REPORT_DATA.value(), + ReportData.ReportDataType.WEB_ANALYTIC_USER_ACTIVITY_REPORT_DATA.value(), + ReportData.ReportDataType.WEB_ANALYTIC_ENTITY_VIEW_REPORT_DATA.value(), + ReportData.ReportDataType.AGGREGATED_COST_ANALYSIS_REPORT_DATA.value(), + TEST_CASE_RESOLUTION_STATUS, + TEST_CASE_RESULT, + QUERY_COST_RECORD); + + private SearchIndexEntityTypes() {} + + public static String normalizeEntityType(String entityType) { + return QUERY_COST_RESULT.equals(entityType) ? QUERY_COST_RECORD : entityType; + } + + public static Set normalizeEntityTypes(Set entityTypes) { + if (entityTypes == null || entityTypes.isEmpty()) { + return entityTypes; + } + Set normalizedEntityTypes = new LinkedHashSet<>(); + for (String entityType : entityTypes) { + normalizedEntityTypes.add(normalizeEntityType(entityType)); + } + return normalizedEntityTypes; + } + + public static boolean isTimeSeriesEntity(String entityType) { + return TIME_SERIES_ENTITIES.contains(normalizeEntityType(entityType)); + } + + public static boolean isDataInsightEntity(String entityType) { + return entityType != null && entityType.endsWith("ReportData"); + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexExecutor.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexExecutor.java deleted file mode 100644 index 60591421d4c7..000000000000 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexExecutor.java +++ /dev/null @@ -1,1950 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; -import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty; -import static org.openmetadata.service.Entity.QUERY_COST_RECORD; -import static org.openmetadata.service.Entity.TEST_CASE_RESOLUTION_STATUS; -import static org.openmetadata.service.Entity.TEST_CASE_RESULT; -import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.RECREATE_CONTEXT; -import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.TARGET_INDEX_KEY; -import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.isDataInsightIndex; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.Phaser; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReference; -import lombok.Getter; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.exception.ExceptionUtils; -import org.openmetadata.schema.EntityInterface; -import org.openmetadata.schema.EntityTimeSeriesInterface; -import org.openmetadata.schema.analytics.ReportData; -import org.openmetadata.schema.system.EntityStats; -import org.openmetadata.schema.system.IndexingError; -import org.openmetadata.schema.system.Stats; -import org.openmetadata.schema.system.StepStats; -import org.openmetadata.schema.type.Include; -import org.openmetadata.schema.utils.ResultList; -import org.openmetadata.service.Entity; -import org.openmetadata.service.apps.bundles.searchIndex.stats.EntityStatsTracker; -import org.openmetadata.service.apps.bundles.searchIndex.stats.JobStatsManager; -import org.openmetadata.service.apps.bundles.searchIndex.stats.StageStatsTracker; -import org.openmetadata.service.exception.SearchIndexException; -import org.openmetadata.service.jdbi3.BoundedListFilter; -import org.openmetadata.service.jdbi3.CollectionDAO; -import org.openmetadata.service.jdbi3.EntityRepository; -import org.openmetadata.service.jdbi3.EntityTimeSeriesRepository; -import org.openmetadata.service.jdbi3.ListFilter; -import org.openmetadata.service.search.DefaultRecreateHandler; -import org.openmetadata.service.search.EntityReindexContext; -import org.openmetadata.service.search.RecreateIndexHandler; -import org.openmetadata.service.search.ReindexContext; -import org.openmetadata.service.search.SearchRepository; -import org.openmetadata.service.util.FullyQualifiedName; -import org.openmetadata.service.util.RestUtil; -import org.openmetadata.service.workflows.interfaces.Source; -import org.openmetadata.service.workflows.searchIndex.PaginatedEntitiesSource; -import org.openmetadata.service.workflows.searchIndex.PaginatedEntityTimeSeriesSource; -import org.slf4j.MDC; - -/** - * Core reindexing executor that handles entity indexing without any Quartz dependencies. Can be - * used by: - * - *
    - *
  • SearchIndexApp (Quartz integration) - *
  • CLI tools - *
  • REST API endpoints - *
  • Unit tests - *
- * - *

Uses ReindexingProgressListener for extensible progress reporting. - */ -@Slf4j -public class SearchIndexExecutor implements AutoCloseable { - - private static final String ALL = "all"; - private static final String POISON_PILL = "__POISON_PILL__"; - private static final int DEFAULT_QUEUE_SIZE = 20000; - private static final String RECREATE_INDEX = "recreateIndex"; - private static final String ENTITY_TYPE_KEY = "entityType"; - private static final String QUERY_COST_RESULT_INCORRECT = "queryCostResult"; - private static final String QUERY_COST_RESULT_WARNING = - "Found incorrect entity type 'queryCostResult', correcting to 'queryCostRecord'"; - - private static final int AVAILABLE_PROCESSORS = Runtime.getRuntime().availableProcessors(); - private static final int MAX_READERS_PER_ENTITY = 5; - private static final int MAX_PRODUCER_THREADS = Math.min(20, AVAILABLE_PROCESSORS * 2); - private static final int MAX_CONSUMER_THREADS = Math.min(20, AVAILABLE_PROCESSORS * 2); - private static final int MAX_TOTAL_THREADS = Math.min(50, AVAILABLE_PROCESSORS * 4); - - public static final Set TIME_SERIES_ENTITIES = - Set.of( - ReportData.ReportDataType.ENTITY_REPORT_DATA.value(), - ReportData.ReportDataType.RAW_COST_ANALYSIS_REPORT_DATA.value(), - ReportData.ReportDataType.WEB_ANALYTIC_USER_ACTIVITY_REPORT_DATA.value(), - ReportData.ReportDataType.WEB_ANALYTIC_ENTITY_VIEW_REPORT_DATA.value(), - ReportData.ReportDataType.AGGREGATED_COST_ANALYSIS_REPORT_DATA.value(), - TEST_CASE_RESOLUTION_STATUS, - TEST_CASE_RESULT, - QUERY_COST_RECORD); - - private final CollectionDAO collectionDAO; - private final SearchRepository searchRepository; - private final CompositeProgressListener listeners; - private final AtomicBoolean stopped = new AtomicBoolean(false); - private final AtomicBoolean sinkClosed = new AtomicBoolean(false); - - private BulkSink searchIndexSink; - private RecreateIndexHandler recreateIndexHandler; - private ReindexContext recreateContext; - private ExecutorService producerExecutor; - private ExecutorService consumerExecutor; - private ExecutorService jobExecutor; - private BlockingQueue> taskQueue; - private final AtomicBoolean producersDone = new AtomicBoolean(false); - - @Getter private final AtomicReference stats = new AtomicReference<>(); - private final AtomicReference batchSize = new AtomicReference<>(100); - - private ReindexingConfiguration config; - private ReindexingJobContext context; - private long startTime; - private IndexingFailureRecorder failureRecorder; - private JobStatsManager statsManager; - private final Map entityBatchCounters = new ConcurrentHashMap<>(); - private final Map entityBatchFailures = new ConcurrentHashMap<>(); - private final Set promotedEntities = ConcurrentHashMap.newKeySet(); - private final Map sinkTrackers = new ConcurrentHashMap<>(); - private final Map> contextDataCache = new ConcurrentHashMap<>(); - private static final long SINK_SYNC_INTERVAL_MS = 2000; - private final AtomicLong lastSinkSyncTime = new AtomicLong(0); - - record IndexingTask(String entityType, ResultList entities, int offset, int retryCount) { - IndexingTask(String entityType, ResultList entities, int offset) { - this(entityType, entities, offset, 0); - } - } - - record ThreadConfiguration(int numProducers, int numConsumers) {} - - @FunctionalInterface - interface KeysetBatchReader { - ResultList readNextKeyset(String cursor) throws SearchIndexException; - } - - static class MemoryInfo { - final long maxMemory; - final long usedMemory; - final double usageRatio; - - MemoryInfo() { - Runtime runtime = Runtime.getRuntime(); - this.maxMemory = runtime.maxMemory(); - long totalMemory = runtime.totalMemory(); - long freeMemory = runtime.freeMemory(); - this.usedMemory = totalMemory - freeMemory; - this.usageRatio = (double) usedMemory / maxMemory; - } - } - - public SearchIndexExecutor(CollectionDAO collectionDAO, SearchRepository searchRepository) { - this.collectionDAO = collectionDAO; - this.searchRepository = searchRepository; - this.listeners = new CompositeProgressListener(); - } - - private EntityStatsTracker getTracker(String entityType) { - return statsManager != null ? statsManager.getTracker(entityType) : null; - } - - private void initStatsManager() { - if (statsManager == null && context != null) { - String jobId = context.getJobId().toString(); - String serverId = - org.openmetadata - .service - .apps - .bundles - .searchIndex - .distributed - .ServerIdentityResolver - .getInstance() - .getServerId(); - statsManager = new JobStatsManager(jobId, serverId, collectionDAO); - } - } - - public SearchIndexExecutor addListener(ReindexingProgressListener listener) { - listeners.addListener(listener); - return this; - } - - public SearchIndexExecutor removeListener(ReindexingProgressListener listener) { - listeners.removeListener(listener); - return this; - } - - /** - * Execute reindexing with the given configuration. - * - * @param config The reindexing configuration - * @param context The job context - * @return ExecutionResult with final stats - */ - public ExecutionResult execute(ReindexingConfiguration config, ReindexingJobContext context) { - this.config = config; - this.context = context; - this.startTime = System.currentTimeMillis(); - initializeState(); - - listeners.onJobStarted(context); - - try { - return executeSingleServer(); - } catch (Exception e) { - LOG.error("Reindexing failed", e); - listeners.onJobFailed(stats.get(), e); - return ExecutionResult.fromStats(stats.get(), ExecutionResult.Status.FAILED, startTime); - } - } - - private void initializeState() { - stopped.set(false); - sinkClosed.set(false); - recreateContext = null; - producersDone.set(false); - entityBatchCounters.clear(); - entityBatchFailures.clear(); - promotedEntities.clear(); - sinkTrackers.clear(); - contextDataCache.clear(); - lastSinkSyncTime.set(0); - initStatsManager(); - } - - private ExecutionResult executeSingleServer() throws Exception { - Set entities = expandEntities(config.entities()); - batchSize.set(config.batchSize()); - - listeners.onJobConfigured(context, config); - - stats.set(initializeTotalRecords(entities)); - - String serverId = - org.openmetadata - .service - .apps - .bundles - .searchIndex - .distributed - .ServerIdentityResolver - .getInstance() - .getServerId(); - String jobId = - context.getJobId() != null ? context.getJobId().toString() : UUID.randomUUID().toString(); - this.failureRecorder = new IndexingFailureRecorder(collectionDAO, jobId, serverId); - cleanupOldFailures(); - - initializeSink(config); - - if (config.recreateIndex()) { - validateClusterCapacity(entities); - listeners.onIndexRecreationStarted(entities); - recreateContext = reCreateIndexes(entities); - } - - reIndexFromStartToEnd(entities); - closeSinkIfNeeded(); - // Promote anything yet to be promoted such as vector search indexes which is not part of - // entities set - finalizeReindex(); - - return buildResult(); - } - - private Set expandEntities(Set entities) { - if (entities.contains(ALL)) { - return getAll(); - } - return entities; - } - - private void validateClusterCapacity(Set entities) { - try { - SearchIndexClusterValidator validator = new SearchIndexClusterValidator(); - validator.validateCapacityForRecreate(searchRepository, entities); - } catch (InsufficientClusterCapacityException e) { - LOG.error("Cluster capacity check failed: {}", e.getMessage()); - throw e; - } catch (Exception e) { - LOG.warn("Failed to validate cluster capacity, proceeding with caution: {}", e.getMessage()); - } - } - - private void initializeSink(ReindexingConfiguration config) { - this.searchIndexSink = - searchRepository.createBulkSink( - config.batchSize(), config.maxConcurrentRequests(), config.payloadSize()); - this.recreateIndexHandler = searchRepository.createReindexHandler(); - - if (searchIndexSink != null) { - searchIndexSink.setFailureCallback(this::handleSinkFailure); - } - - LOG.debug("Initialized BulkSink with batch size: {}", config.batchSize()); - } - - private void handleSinkFailure( - String entityType, - String entityId, - String entityFqn, - String errorMessage, - IndexingFailureRecorder.FailureStage stage) { - if (failureRecorder != null) { - if (stage == IndexingFailureRecorder.FailureStage.PROCESS) { - failureRecorder.recordProcessFailure(entityType, entityId, entityFqn, errorMessage); - } else { - failureRecorder.recordSinkFailure(entityType, entityId, entityFqn, errorMessage); - } - } - } - - private void cleanupOldFailures() { - try { - long cutoffTime = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(30); - int deleted = collectionDAO.searchIndexFailureDAO().deleteOlderThan(cutoffTime); - if (deleted > 0) { - LOG.info("Cleaned up {} old failure records", deleted); - } - } catch (Exception e) { - LOG.warn("Failed to cleanup old failure records", e); - } - } - - private void reIndexFromStartToEnd(Set entities) throws InterruptedException { - long totalEntities = - stats.get() != null && stats.get().getJobStats() != null - ? stats.get().getJobStats().getTotalRecords() - : 0; - - ThreadConfiguration threadConfig = calculateThreadConfiguration(totalEntities); - int effectiveQueueSize = initializeQueueAndExecutors(threadConfig, entities.size()); - - LOG.info( - "Starting reindexing with {} producers, {} consumers, queue size {}", - threadConfig.numProducers(), - threadConfig.numConsumers(), - effectiveQueueSize); - - executeReindexing(threadConfig.numConsumers(), entities); - } - - private ThreadConfiguration calculateThreadConfiguration(long totalEntities) { - int numConsumers = - config.consumerThreads() > 0 ? Math.min(config.consumerThreads(), MAX_CONSUMER_THREADS) : 2; - int numProducers = - config.producerThreads() > 1 - ? Math.min(config.producerThreads(), MAX_PRODUCER_THREADS) - : Math.clamp((int) (totalEntities / 10000), 2, MAX_PRODUCER_THREADS); - - return adjustThreadsForLimit(numProducers, numConsumers); - } - - private ThreadConfiguration adjustThreadsForLimit(int numProducers, int numConsumers) { - int entityCount = config.entities() != null ? config.entities().size() : 0; - int totalThreads = numProducers + numConsumers + entityCount; - - if (totalThreads > MAX_TOTAL_THREADS) { - LOG.warn( - "Total thread count {} exceeds limit {}, reducing...", totalThreads, MAX_TOTAL_THREADS); - double ratio = (double) MAX_TOTAL_THREADS / totalThreads; - numProducers = Math.max(1, (int) (numProducers * ratio)); - numConsumers = Math.max(1, (int) (numConsumers * ratio)); - } - - return new ThreadConfiguration(numProducers, numConsumers); - } - - private int initializeQueueAndExecutors(ThreadConfiguration threadConfig, int entityCount) { - int queueSize = config.queueSize() > 0 ? config.queueSize() : DEFAULT_QUEUE_SIZE; - int effectiveQueueSize = calculateMemoryAwareQueueSize(queueSize); - - taskQueue = new LinkedBlockingQueue<>(effectiveQueueSize); - producersDone.set(false); - - String jobIdTag = MDC.get("reindexJobId"); - String threadPrefix = "reindex-" + (jobIdTag != null ? jobIdTag + "-" : ""); - - int maxJobThreads = - Math.max(1, MAX_TOTAL_THREADS - threadConfig.numProducers() - threadConfig.numConsumers()); - int cappedEntityCount = Math.min(entityCount, maxJobThreads); - jobExecutor = - Executors.newFixedThreadPool( - cappedEntityCount, - Thread.ofPlatform() - .name(threadPrefix + "job-", 0) - .priority(Thread.MIN_PRIORITY) - .factory()); - - int finalNumConsumers = Math.min(threadConfig.numConsumers(), MAX_CONSUMER_THREADS); - consumerExecutor = - Executors.newFixedThreadPool( - finalNumConsumers, - Thread.ofPlatform() - .name(threadPrefix + "consumer-", 0) - .priority(Thread.MIN_PRIORITY) - .factory()); - - producerExecutor = - Executors.newFixedThreadPool( - threadConfig.numProducers(), - Thread.ofPlatform() - .name(threadPrefix + "producer-", 0) - .priority(Thread.MIN_PRIORITY) - .factory()); - - return effectiveQueueSize; - } - - private int calculateMemoryAwareQueueSize(int requestedSize) { - MemoryInfo memInfo = new MemoryInfo(); - long estimatedEntitySize = 5 * 1024L; - long maxQueueMemory = (long) (memInfo.maxMemory * 0.25); - long memoryBasedLimitLong = maxQueueMemory / (estimatedEntitySize * batchSize.get()); - int memoryBasedLimit = (int) Math.max(1, Math.min(memoryBasedLimitLong, Integer.MAX_VALUE)); - return Math.min(requestedSize, memoryBasedLimit); - } - - private void executeReindexing(int numConsumers, Set entities) - throws InterruptedException { - CountDownLatch consumerLatch = startConsumerThreads(numConsumers); - - try { - processEntityReindex(entities); - signalConsumersToStop(numConsumers); - waitForConsumersToComplete(consumerLatch); - } catch (InterruptedException e) { - LOG.info("Reindexing interrupted - stopping immediately"); - stopped.set(true); - Thread.currentThread().interrupt(); - throw e; - } finally { - cleanupExecutors(); - } - } - - private CountDownLatch startConsumerThreads(int numConsumers) { - CountDownLatch consumerLatch = new CountDownLatch(numConsumers); - Map mdc = MDC.getCopyOfContextMap(); - for (int i = 0; i < numConsumers; i++) { - final int consumerId = i; - consumerExecutor.submit( - () -> { - if (mdc != null) MDC.setContextMap(mdc); - try { - runConsumer(consumerId, consumerLatch); - } finally { - MDC.clear(); - } - }); - } - return consumerLatch; - } - - private void runConsumer(int consumerId, CountDownLatch consumerLatch) { - LOG.debug("Consumer {} started", consumerId); - try { - while (!stopped.get()) { - try { - IndexingTask task = taskQueue.poll(200, TimeUnit.MILLISECONDS); - if (task == null) { - continue; - } - if (POISON_PILL.equals(task.entityType())) { - break; - } - processTask(task); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } - } - } finally { - LOG.debug("Consumer {} stopped", consumerId); - consumerLatch.countDown(); - } - } - - /** - * Process a single indexing task. - * - *

Stats are tracked via EntityStatsTracker (one per entity type) which flushes to - * search_index_server_stats table. Each stage tracks: - *

    - *
  • Reader: success/warnings/failed from ResultList - *
  • Process: success/failed during entity → search doc conversion (in BulkSink) - *
  • Sink: success/failed from ES/OS bulk response (in BulkSink) - *
  • Vector: success/failed for vector embeddings (in OpenSearchBulkSink) - *
- */ - private void processTask(IndexingTask task) { - String entityType = task.entityType(); - ResultList entities = task.entities(); - Map contextData = createContextData(entityType); - EntityStatsTracker tracker = getTracker(entityType); - - // Stage 1: Reader stats (from source read) - int readerSuccessCount = listOrEmpty(entities.getData()).size(); - int readerFailedCount = listOrEmpty(entities.getErrors()).size(); - int readerWarningsCount = entities.getWarningsCount() != null ? entities.getWarningsCount() : 0; - - updateReaderStats(readerSuccessCount, readerFailedCount, readerWarningsCount); - if (tracker != null) { - tracker.recordReaderBatch(readerSuccessCount, readerFailedCount, readerWarningsCount); - } - - // Stage 2 & 3: Process + Sink handled by BulkSink via tracker passed in context - try { - writeEntitiesToSink(entityType, entities, contextData); - - StepStats currentEntityStats = createEntityStats(entities); - handleTaskSuccess(entityType, entities, currentEntityStats); - periodicSyncSinkStats(); - } catch (SearchIndexException e) { - handleSearchIndexException(entityType, entities, e); - } catch (Exception e) { - handleGenericException(entityType, entities, e); - } - } - - private Map createContextData(String entityType) { - return contextDataCache.computeIfAbsent( - entityType, - type -> { - Map contextData = new HashMap<>(); - contextData.put(ENTITY_TYPE_KEY, type); - contextData.put(RECREATE_INDEX, config.recreateIndex()); - contextData.put(RECREATE_CONTEXT, recreateContext); - contextData.put(BulkSink.STATS_TRACKER_CONTEXT_KEY, getSinkTracker(type)); - getTargetIndexForEntity(type) - .ifPresent(index -> contextData.put(TARGET_INDEX_KEY, index)); - return contextData; - }); - } - - private StageStatsTracker getSinkTracker(String entityType) { - if (context == null) { - return null; - } - return sinkTrackers.computeIfAbsent( - entityType, - et -> { - String jobId = context.getJobId().toString(); - String serverId = - org.openmetadata - .service - .apps - .bundles - .searchIndex - .distributed - .ServerIdentityResolver - .getInstance() - .getServerId(); - return new StageStatsTracker( - jobId, serverId, et, collectionDAO.searchIndexServerStatsDAO()); - }); - } - - private void writeEntitiesToSink( - String entityType, ResultList entities, Map contextData) throws Exception { - if (!TIME_SERIES_ENTITIES.contains(entityType)) { - @SuppressWarnings("unchecked") - List entityList = (List) entities.getData(); - searchIndexSink.write(entityList, contextData); - } else { - @SuppressWarnings("unchecked") - List entityList = - (List) entities.getData(); - searchIndexSink.write(entityList, contextData); - } - } - - private StepStats createEntityStats(ResultList entities) { - StepStats stepStats = new StepStats(); - stepStats.setSuccessRecords(listOrEmpty(entities.getData()).size()); - stepStats.setFailedRecords(listOrEmpty(entities.getErrors()).size()); - return stepStats; - } - - private void handleTaskSuccess( - String entityType, ResultList entities, StepStats currentEntityStats) { - if (entities.getErrors() != null && !entities.getErrors().isEmpty()) { - IndexingError error = - new IndexingError() - .withErrorSource(IndexingError.ErrorSource.READER) - .withSubmittedCount(batchSize.get()) - .withSuccessCount(entities.getData().size()) - .withFailedCount(entities.getErrors().size()) - .withMessage("Issues in Reading A Batch For Entities."); - listeners.onError(entityType, error, stats.get()); - } - - updateStats(entityType, currentEntityStats); - listeners.onProgressUpdate(stats.get(), context); - } - - private void handleSearchIndexException( - String entityType, ResultList entities, SearchIndexException e) { - if (!stopped.get()) { - IndexingError indexingError = e.getIndexingError(); - if (indexingError != null) { - listeners.onError(entityType, indexingError, stats.get()); - } else { - IndexingError error = createSinkError(e.getMessage()); - listeners.onError(entityType, error, stats.get()); - } - - syncSinkStatsFromBulkSink(); - - int dataSize = entities != null && entities.getData() != null ? entities.getData().size() : 0; - int readerErrors = entities != null ? listOrEmpty(entities.getErrors()).size() : 0; - StepStats failedStats = createFailedStats(indexingError, dataSize + readerErrors); - updateStats(entityType, failedStats); - } - LOG.error("Sink error for {}", entityType, e); - } - - private void handleGenericException(String entityType, ResultList entities, Exception e) { - if (!stopped.get()) { - IndexingError error = createSinkError(ExceptionUtils.getStackTrace(e)); - listeners.onError(entityType, error, stats.get()); - syncSinkStatsFromBulkSink(); - - int failedCount = - entities != null && entities.getData() != null ? entities.getData().size() : 0; - int readerErrors = entities != null ? listOrEmpty(entities.getErrors()).size() : 0; - StepStats failedStats = - new StepStats().withSuccessRecords(0).withFailedRecords(failedCount + readerErrors); - updateStats(entityType, failedStats); - } - LOG.error("Error for {}", entityType, e); - } - - private void signalConsumersToStop(int numConsumers) throws InterruptedException { - producersDone.set(true); - for (int i = 0; i < numConsumers; i++) { - taskQueue.put(new IndexingTask<>(POISON_PILL, null, -1)); - } - } - - private void waitForConsumersToComplete(CountDownLatch consumerLatch) - throws InterruptedException { - LOG.info("Waiting for consumers to complete..."); - consumerLatch.await(); - LOG.info("All consumers finished"); - } - - private void processEntityReindex(Set entities) throws InterruptedException { - // Use Phaser instead of pre-computed CountDownLatch to handle dynamic reader counts. - // Each entity type registers as a party, then dynamically registers its actual readers. - // This eliminates the batch-size-snapshot mismatch where auto-tune could desynchronize - // the pre-computed latch count from the actual number of readers created. - List ordered = EntityPriority.sortByPriority(entities); - LOG.info("Entity processing order: {}", ordered); - Phaser producerPhaser = new Phaser(entities.size()); - Map mdc = MDC.getCopyOfContextMap(); - - for (String entityType : ordered) { - jobExecutor.submit( - () -> { - if (mdc != null) MDC.setContextMap(mdc); - try { - processEntityType(entityType, producerPhaser); - } finally { - MDC.clear(); - } - }); - } - - int phase = 0; - while (!producerPhaser.isTerminated()) { - if (stopped.get() || Thread.currentThread().isInterrupted()) { - LOG.info("Stop signal received during reindexing"); - if (producerExecutor != null) producerExecutor.shutdownNow(); - if (jobExecutor != null) jobExecutor.shutdownNow(); - return; - } - try { - producerPhaser.awaitAdvanceInterruptibly(phase, 1, TimeUnit.SECONDS); - break; - } catch (TimeoutException e) { - // Continue checking stop signal - } - } - } - - private void processEntityType(String entityType, Phaser producerPhaser) { - try { - int fixedBatchSize = EntityBatchSizeEstimator.estimateBatchSize(entityType, batchSize.get()); - int totalEntityRecords = getTotalEntityRecords(entityType); - listeners.onEntityTypeStarted(entityType, totalEntityRecords); - - entityBatchFailures.put(entityType, new AtomicInteger(0)); - - if (totalEntityRecords > 0) { - int numReaders = - Math.min( - calculateNumberOfThreads(totalEntityRecords, fixedBatchSize), - MAX_READERS_PER_ENTITY); - entityBatchCounters.put(entityType, new AtomicInteger(numReaders)); - - // Dynamically register actual readers with the phaser - producerPhaser.bulkRegister(numReaders); - - try { - if (TIME_SERIES_ENTITIES.contains(entityType)) { - Long filterStartTs = null; - Long filterEndTs = null; - if (config != null) { - long startTs = config.getTimeSeriesStartTs(entityType); - if (startTs > 0) { - filterStartTs = startTs; - filterEndTs = System.currentTimeMillis(); - } - } - final Long tsStart = filterStartTs; - final Long tsEnd = filterEndTs; - submitReaders( - entityType, - totalEntityRecords, - fixedBatchSize, - numReaders, - producerPhaser, - () -> { - PaginatedEntityTimeSeriesSource source = - (tsStart != null) - ? new PaginatedEntityTimeSeriesSource( - entityType, - fixedBatchSize, - getSearchIndexFields(entityType), - totalEntityRecords, - tsStart, - tsEnd) - : new PaginatedEntityTimeSeriesSource( - entityType, - fixedBatchSize, - getSearchIndexFields(entityType), - totalEntityRecords); - return source::readWithCursor; - }, - (readers, total) -> { - List cursors = new ArrayList<>(); - int perReader = total / readers; - for (int i = 1; i < readers; i++) { - cursors.add(RestUtil.encodeCursor(String.valueOf(i * perReader))); - } - return cursors; - }); - } else { - PaginatedEntitiesSource entSource = - new PaginatedEntitiesSource( - entityType, - fixedBatchSize, - getSearchIndexFields(entityType), - totalEntityRecords); - submitEntityReaders( - entityType, - totalEntityRecords, - fixedBatchSize, - numReaders, - producerPhaser, - entSource::findBoundaryCursors); - } - } catch (Exception e) { - LOG.error( - "Failed to submit readers for {}, deregistering {} phaser parties", - entityType, - numReaders, - e); - for (int i = 0; i < numReaders; i++) { - producerPhaser.arriveAndDeregister(); - } - throw e; - } - } else { - entityBatchCounters.put(entityType, new AtomicInteger(1)); - promoteEntityIndexIfReady(entityType); - } - - StepStats entityStats = - stats.get() != null && stats.get().getEntityStats() != null - ? stats.get().getEntityStats().getAdditionalProperties().get(entityType) - : null; - listeners.onEntityTypeCompleted(entityType, entityStats); - } catch (Exception e) { - LOG.error("Error processing entity type {}", entityType, e); - } finally { - // Deregister the entity coordinator party - producerPhaser.arriveAndDeregister(); - } - } - - private void submitReaders( - String entityType, - int totalRecords, - int fixedBatchSize, - int numReaders, - Phaser producerPhaser, - java.util.function.Supplier readerFactory, - java.util.function.BiFunction> boundaryFinder) { - Map mdc = MDC.getCopyOfContextMap(); - if (numReaders == 1) { - KeysetBatchReader reader = readerFactory.get(); - producerExecutor.submit( - () -> { - if (mdc != null) MDC.setContextMap(mdc); - try { - processKeysetBatches( - entityType, Integer.MAX_VALUE, fixedBatchSize, null, reader, producerPhaser); - } finally { - MDC.clear(); - } - }); - return; - } - - List boundaries = boundaryFinder.apply(numReaders, totalRecords); - int actualReaders = boundaries.size() + 1; - // Use ceiling division to avoid rounding-related entity loss at reader boundaries - int recordsPerReader = (totalRecords + actualReaders - 1) / actualReaders; - - if (actualReaders < numReaders) { - LOG.warn( - "Boundary discovery for {} returned {} cursors (expected {}), using {} readers", - entityType, - boundaries.size(), - numReaders - 1, - actualReaders); - entityBatchCounters.get(entityType).set(actualReaders); - // Deregister extra reader parties from the phaser - for (int j = 0; j < numReaders - actualReaders; j++) { - producerPhaser.arriveAndDeregister(); - } - } - - for (int i = 0; i < actualReaders; i++) { - String startCursor = (i == 0) ? null : boundaries.get(i - 1); - String endCursorForReader = (i < boundaries.size()) ? boundaries.get(i) : null; - int limit = (i == actualReaders - 1) ? Integer.MAX_VALUE : recordsPerReader; - KeysetBatchReader readerSource = readerFactory.get(); - final int readerLimit = limit; - final String readerEndCursor = endCursorForReader; - producerExecutor.submit( - () -> { - if (mdc != null) MDC.setContextMap(mdc); - try { - processKeysetBatches( - entityType, - readerLimit, - fixedBatchSize, - startCursor, - readerSource, - producerPhaser, - readerEndCursor); - } finally { - MDC.clear(); - } - }); - } - } - - @SuppressWarnings("unchecked") - private void submitEntityReaders( - String entityType, - int totalRecords, - int fixedBatchSize, - int numReaders, - Phaser producerPhaser, - java.util.function.BiFunction> boundaryFinder) { - Map mdc = MDC.getCopyOfContextMap(); - if (numReaders == 1) { - PaginatedEntitiesSource source = - new PaginatedEntitiesSource( - entityType, fixedBatchSize, getSearchIndexFields(entityType), totalRecords); - producerExecutor.submit( - () -> { - if (mdc != null) MDC.setContextMap(mdc); - try { - processKeysetBatches( - entityType, - Integer.MAX_VALUE, - fixedBatchSize, - null, - source::readNextKeyset, - producerPhaser); - } finally { - MDC.clear(); - } - }); - return; - } - - List boundaries = boundaryFinder.apply(numReaders, totalRecords); - int actualReaders = boundaries.size() + 1; - - if (actualReaders < numReaders) { - LOG.warn( - "Boundary discovery for {} returned {} cursors (expected {}), using {} readers", - entityType, - boundaries.size(), - numReaders - 1, - actualReaders); - entityBatchCounters.get(entityType).set(actualReaders); - for (int j = 0; j < numReaders - actualReaders; j++) { - producerPhaser.arriveAndDeregister(); - } - } - - for (int i = 0; i < actualReaders; i++) { - final String startCursor = (i == 0) ? null : boundaries.get(i - 1); - final boolean isLastReader = (i == actualReaders - 1); - - ListFilter filter; - if (isLastReader) { - filter = new ListFilter(Include.ALL); - } else { - String endBoundary = boundaries.get(i); - String decoded = RestUtil.decodeCursor(endBoundary); - Map cursorMap = - org.openmetadata.schema.utils.JsonUtils.readValue(decoded, Map.class); - filter = new BoundedListFilter(Include.ALL, cursorMap.get("name"), cursorMap.get("id")); - } - - final ListFilter readerFilter = filter; - producerExecutor.submit( - () -> { - if (mdc != null) MDC.setContextMap(mdc); - try { - PaginatedEntitiesSource source = - new PaginatedEntitiesSource( - entityType, - fixedBatchSize, - getSearchIndexFields(entityType), - totalRecords, - readerFilter); - processKeysetBatches( - entityType, - Integer.MAX_VALUE, - fixedBatchSize, - startCursor, - source::readNextKeyset, - producerPhaser); - } finally { - MDC.clear(); - } - }); - } - } - - private boolean hasReachedEndCursor(String afterCursor, String endCursor) { - if (endCursor == null || afterCursor == null) return false; - String decodedAfter = RestUtil.decodeCursor(afterCursor); - String decodedEnd = RestUtil.decodeCursor(endCursor); - if (decodedAfter == null || decodedEnd == null) return false; - - // Time-series cursors are numeric offsets - try { - int afterOffset = Integer.parseInt(decodedAfter); - int endOffset = Integer.parseInt(decodedEnd); - return afterOffset >= endOffset; - } catch (NumberFormatException ignored) { - // Not a numeric cursor, fall through to string comparison - } - return decodedAfter.equals(decodedEnd); - } - - private void processKeysetBatches( - String entityType, - int recordLimit, - int fixedBatchSize, - String startCursor, - KeysetBatchReader batchReader, - Phaser producerPhaser) { - processKeysetBatches( - entityType, recordLimit, fixedBatchSize, startCursor, batchReader, producerPhaser, null); - } - - private void processKeysetBatches( - String entityType, - int recordLimit, - int fixedBatchSize, - String startCursor, - KeysetBatchReader batchReader, - Phaser producerPhaser, - String endCursor) { - // Bypass the Redis-backed entity cache for the duration of this reader. Reindex never - // re-reads the same entity, so the cache hit rate is ~0; every relationship lookup pays a - // cache round-trip we don't need, and on an unhealthy Redis the indexer crawls because each - // miss pays a 300ms timeout. See {@link org.openmetadata.service.cache.EntityCacheBypass}. - try (org.openmetadata.service.cache.EntityCacheBypass.Handle ignored = - org.openmetadata.service.cache.EntityCacheBypass.skip()) { - processKeysetBatchesInternal( - entityType, - recordLimit, - fixedBatchSize, - startCursor, - batchReader, - producerPhaser, - endCursor); - } - } - - private void processKeysetBatchesInternal( - String entityType, - int recordLimit, - int fixedBatchSize, - String startCursor, - KeysetBatchReader batchReader, - Phaser producerPhaser, - String endCursor) { - boolean hadFailure = false; - try { - String keysetCursor = startCursor; - int processed = 0; - - while (processed < recordLimit && !stopped.get()) { - long backpressureWaitStart = System.currentTimeMillis(); - AdaptiveBackoff backoff = new AdaptiveBackoff(50, 2000); - while (isBackpressureActive()) { - if (stopped.get()) { - return; - } - long elapsed = System.currentTimeMillis() - backpressureWaitStart; - if (elapsed > 15_000) { - LOG.warn("Backpressure wait timeout for {}, proceeding anyway", entityType); - break; - } - Thread.sleep(backoff.nextDelay()); - } - - try { - ResultList result = readWithRetry(batchReader, keysetCursor, entityType); - if (result == null || result.getData().isEmpty()) { - LOG.debug( - "Reader for {} exhausted at processed={} of limit={} (empty result)", - entityType, - processed, - recordLimit); - break; - } - - if (!stopped.get()) { - IndexingTask task = new IndexingTask<>(entityType, result, processed); - taskQueue.put(task); - } - - int readerSuccessCount = result.getData().size(); - int readerFailedCount = listOrEmpty(result.getErrors()).size(); - int readerWarningsCount = - result.getWarningsCount() != null ? result.getWarningsCount() : 0; - processed += readerSuccessCount + readerFailedCount + readerWarningsCount; - keysetCursor = result.getPaging() != null ? result.getPaging().getAfter() : null; - if (keysetCursor == null) { - LOG.debug( - "Reader for {} exhausted at processed={} of limit={} (null cursor)", - entityType, - processed, - recordLimit); - break; - } - if (hasReachedEndCursor(keysetCursor, endCursor)) { - LOG.debug("Reader for {} reached end cursor at processed={}", entityType, processed); - break; - } - } catch (SearchIndexException e) { - hadFailure = true; - LOG.error("Error reading keyset batch for {}", entityType, e); - if (failureRecorder != null) { - failureRecorder.recordReaderFailure( - entityType, e.getMessage(), ExceptionUtils.getStackTrace(e)); - } - listeners.onError(entityType, e.getIndexingError(), stats.get()); - int failedCount = - e.getIndexingError() != null && e.getIndexingError().getFailedCount() != null - ? e.getIndexingError().getFailedCount() - : fixedBatchSize; - updateReaderStats(0, failedCount, 0); - updateStats( - entityType, new StepStats().withSuccessRecords(0).withFailedRecords(failedCount)); - processed += fixedBatchSize; - } - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.warn("Interrupted during keyset processing of {}", entityType); - } catch (Exception e) { - hadFailure = true; - if (!stopped.get()) { - LOG.error("Error in keyset processing for {}", entityType, e); - } - } finally { - producerPhaser.arriveAndDeregister(); - if (hadFailure) { - AtomicInteger failures = entityBatchFailures.get(entityType); - if (failures != null) { - failures.incrementAndGet(); - } - } - AtomicInteger remaining = entityBatchCounters.get(entityType); - if (remaining != null && remaining.decrementAndGet() == 0) { - promoteEntityIndexIfReady(entityType); - } - } - } - - private void processBatch(String entityType, int currentOffset, CountDownLatch producerLatch) { - // See note on processKeysetBatches: bypass the entity cache for reindex reader threads. - try (org.openmetadata.service.cache.EntityCacheBypass.Handle ignored = - org.openmetadata.service.cache.EntityCacheBypass.skip()) { - processBatchInternal(entityType, currentOffset, producerLatch); - } - } - - private void processBatchInternal( - String entityType, int currentOffset, CountDownLatch producerLatch) { - boolean batchHadFailure = false; - try { - if (stopped.get()) { - return; - } - - long backpressureWaitStart = System.currentTimeMillis(); - AdaptiveBackoff backoff = new AdaptiveBackoff(50, 2000); - while (isBackpressureActive()) { - if (stopped.get()) { - return; - } - long elapsed = System.currentTimeMillis() - backpressureWaitStart; - if (elapsed > 15_000) { - LOG.warn( - "Backpressure wait timeout for {} offset {}, proceeding anyway", - entityType, - currentOffset); - break; - } - Thread.sleep(backoff.nextDelay()); - } - - Source source = createSource(entityType); - processReadTask(entityType, source, currentOffset); - } catch (Exception e) { - batchHadFailure = true; - if (!stopped.get()) { - LOG.error("Error processing batch for {}", entityType, e); - } - } finally { - producerLatch.countDown(); - // Track batch completion for per-entity promotion - if (batchHadFailure) { - AtomicInteger failures = entityBatchFailures.get(entityType); - if (failures != null) { - failures.incrementAndGet(); - } - } - AtomicInteger remaining = entityBatchCounters.get(entityType); - if (remaining != null && remaining.decrementAndGet() == 0) { - promoteEntityIndexIfReady(entityType); - } - } - } - - private void promoteEntityIndexIfReady(String entityType) { - if (recreateIndexHandler == null || recreateContext == null) { - return; - } - if (!config.recreateIndex()) { - return; - } - - if (!promotedEntities.add(entityType)) { - LOG.debug("Entity '{}' already promoted, skipping.", entityType); - return; - } - - AtomicInteger failures = entityBatchFailures.get(entityType); - boolean entitySuccess = failures == null || failures.get() == 0; - - Optional stagedIndexOpt = recreateContext.getStagedIndex(entityType); - if (stagedIndexOpt.isEmpty()) { - LOG.debug("No staged index found for entity '{}', skipping promotion.", entityType); - promotedEntities.remove(entityType); - return; - } - - EntityReindexContext entityContext = buildEntityReindexContext(entityType); - if (recreateIndexHandler instanceof DefaultRecreateHandler defaultHandler) { - LOG.info( - "Promoting index for entity '{}' (success={}, stagedIndex={})", - entityType, - entitySuccess, - stagedIndexOpt.get()); - defaultHandler.promoteEntityIndex(entityContext, entitySuccess); - - // When promoting the table index, also promote the column index since columns - // are indexed as part of table processing - if (Entity.TABLE.equals(entityType)) { - promoteColumnIndex(defaultHandler, entitySuccess); - } - } - } - - private void promoteColumnIndex(DefaultRecreateHandler handler, boolean tableSuccess) { - if (recreateContext == null) { - return; - } - Optional columnStagedIndex = recreateContext.getStagedIndex(Entity.TABLE_COLUMN); - if (columnStagedIndex.isEmpty()) { - return; - } - EntityReindexContext columnContext = buildEntityReindexContext(Entity.TABLE_COLUMN); - LOG.info( - "Promoting column index (success={}, stagedIndex={})", - tableSuccess, - columnStagedIndex.get()); - handler.promoteEntityIndex(columnContext, tableSuccess); - promotedEntities.add(Entity.TABLE_COLUMN); - } - - private ResultList readWithRetry( - KeysetBatchReader batchReader, String keysetCursor, String entityType) - throws SearchIndexException, InterruptedException { - int maxRetryAttempts = 3; - long retryBackoffMs = 500; - for (int attempt = 0; attempt <= maxRetryAttempts; attempt++) { - try { - return batchReader.readNextKeyset(keysetCursor); - } catch (SearchIndexException e) { - if (attempt >= maxRetryAttempts || !isTransientReadError(e)) { - throw e; - } - long backoffDelay = retryBackoffMs * (1L << attempt); - LOG.warn( - "Transient read failure for {} (attempt {}/{}), retrying in {}ms", - entityType, - attempt + 1, - maxRetryAttempts, - backoffDelay); - Thread.sleep(Math.min(backoffDelay, 10_000)); - } - } - return null; - } - - private boolean isTransientReadError(SearchIndexException e) { - String msg = e.getMessage(); - if (msg == null) { - msg = ""; - } - String lower = msg.toLowerCase(); - return lower.contains("timeout") - || lower.contains("connection") - || lower.contains("pool exhausted") - || lower.contains("connectexception") - || lower.contains("sockettimeoutexception") - || lower.contains("remotetransportexception"); - } - - private boolean isBackpressureActive() { - if (taskQueue != null) { - int size = taskQueue.size(); - int capacity = size + taskQueue.remainingCapacity(); - if (capacity > 0) { - int fillPercent = size * 100 / capacity; - ReindexingMetrics metrics = ReindexingMetrics.getInstance(); - if (metrics != null) { - metrics.updateQueueFillRatio(fillPercent); - } - return fillPercent > 90; - } - } - return false; - } - - private void processReadTask(String entityType, Source source, int offset) { - try { - if (stopped.get()) { - return; - } - - Object resultList = source.readWithCursor(RestUtil.encodeCursor(String.valueOf(offset))); - if (stopped.get()) { - return; - } - - if (resultList != null) { - ResultList entities = extractEntities(entityType, resultList); - if (!nullOrEmpty(entities.getData()) && !stopped.get()) { - IndexingTask task = new IndexingTask<>(entityType, entities, offset); - taskQueue.put(task); - } - } - } catch (SearchIndexException e) { - LOG.error("Error reading source for {}", entityType, e); - if (!stopped.get()) { - if (failureRecorder != null) { - failureRecorder.recordReaderFailure( - entityType, e.getMessage(), ExceptionUtils.getStackTrace(e)); - } - - listeners.onError(entityType, e.getIndexingError(), stats.get()); - IndexingError indexingError = e.getIndexingError(); - int failedCount = - indexingError != null && indexingError.getFailedCount() != null - ? indexingError.getFailedCount() - : batchSize.get(); - updateReaderStats(0, failedCount, 0); - StepStats failedStats = - new StepStats().withSuccessRecords(0).withFailedRecords(failedCount); - updateStats(entityType, failedStats); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.warn("Interrupted while queueing task for {}", entityType); - } - } - - private Source createSource(String entityType) { - String correctedEntityType = entityType; - if (QUERY_COST_RESULT_INCORRECT.equals(entityType)) { - LOG.warn(QUERY_COST_RESULT_WARNING); - correctedEntityType = QUERY_COST_RECORD; - } - - List searchIndexFields = getSearchIndexFields(correctedEntityType); - int knownTotal = getTotalEntityRecords(correctedEntityType); - - if (!TIME_SERIES_ENTITIES.contains(correctedEntityType)) { - return new PaginatedEntitiesSource( - correctedEntityType, batchSize.get(), searchIndexFields, knownTotal); - } else { - if (config != null) { - long startTs = config.getTimeSeriesStartTs(correctedEntityType); - if (startTs > 0) { - return new PaginatedEntityTimeSeriesSource( - correctedEntityType, - batchSize.get(), - searchIndexFields, - knownTotal, - startTs, - System.currentTimeMillis()); - } - } - return new PaginatedEntityTimeSeriesSource( - correctedEntityType, batchSize.get(), searchIndexFields, knownTotal); - } - } - - private List getSearchIndexFields(String entityType) { - // Delegate to the shared helper so single-server (this executor) and distributed - // (PartitionWorker) reindex paths request the same minimal field set. Otherwise - // setFieldsInBulk runs every registered fieldFetcher — including expensive ones like - // fetchAndSetOwns on Team/User — even though the search document drops most of them - // via getExcludedFields. PR #27723 originally fixed this for EntityReader; the - // executor was the missing piece. - return org.openmetadata.service.workflows.searchIndex.ReindexingUtil.getSearchIndexFields( - entityType); - } - - @SuppressWarnings("unchecked") - private ResultList extractEntities(String entityType, Object resultList) { - if (!TIME_SERIES_ENTITIES.contains(entityType)) { - return ((ResultList) resultList); - } else { - return ((ResultList) resultList); - } - } - - private Optional getTargetIndexForEntity(String entityType) { - if (recreateContext == null) { - return Optional.empty(); - } - - Optional stagedIndex = recreateContext.getStagedIndex(entityType); - if (stagedIndex.isPresent()) { - return stagedIndex; - } - - if (QUERY_COST_RESULT_INCORRECT.equals(entityType)) { - return recreateContext.getStagedIndex(QUERY_COST_RECORD); - } - - return Optional.empty(); - } - - public Stats initializeTotalRecords(Set entities) { - Stats jobDataStats = new Stats(); - jobDataStats.setEntityStats(new EntityStats()); - - int total = 0; - for (String entityType : entities) { - int entityTotal = getEntityTotal(entityType); - total += entityTotal; - - StepStats entityStats = new StepStats(); - entityStats.setTotalRecords(entityTotal); - entityStats.setSuccessRecords(0); - entityStats.setFailedRecords(0); - - jobDataStats.getEntityStats().getAdditionalProperties().put(entityType, entityStats); - } - - StepStats jobStats = new StepStats(); - jobStats.setTotalRecords(total); - jobStats.setSuccessRecords(0); - jobStats.setFailedRecords(0); - jobDataStats.setJobStats(jobStats); - - StepStats readerStats = new StepStats(); - readerStats.setTotalRecords(total); - readerStats.setSuccessRecords(0); - readerStats.setFailedRecords(0); - readerStats.setWarningRecords(0); - jobDataStats.setReaderStats(readerStats); - - StepStats sinkStats = new StepStats(); - sinkStats.setTotalRecords(0); - sinkStats.setSuccessRecords(0); - sinkStats.setFailedRecords(0); - jobDataStats.setSinkStats(sinkStats); - - StepStats processStats = new StepStats(); - processStats.setTotalRecords(0); - processStats.setSuccessRecords(0); - processStats.setFailedRecords(0); - jobDataStats.setProcessStats(processStats); - - // Add a stats slot for TABLE_COLUMN since columns are indexed as part of table processing - // but TABLE_COLUMN is not a standalone entity in the entities set - if (entities.contains(Entity.TABLE) && !entities.contains(Entity.TABLE_COLUMN)) { - StepStats columnEntityStats = new StepStats(); - columnEntityStats.setTotalRecords(0); - columnEntityStats.setSuccessRecords(0); - columnEntityStats.setFailedRecords(0); - jobDataStats - .getEntityStats() - .getAdditionalProperties() - .put(Entity.TABLE_COLUMN, columnEntityStats); - LOG.info("Added TABLE_COLUMN stats slot for column indexing tracking"); - } - - return jobDataStats; - } - - private int getEntityTotal(String entityType) { - try { - String correctedEntityType = entityType; - if (QUERY_COST_RESULT_INCORRECT.equals(entityType)) { - LOG.warn(QUERY_COST_RESULT_WARNING); - correctedEntityType = QUERY_COST_RECORD; - } - - if (!TIME_SERIES_ENTITIES.contains(correctedEntityType)) { - EntityRepository repository = Entity.getEntityRepository(correctedEntityType); - return repository.getDao().listCount(new ListFilter(Include.ALL)); - } else { - EntityTimeSeriesRepository repository; - ListFilter listFilter = new ListFilter(null); - if (isDataInsightIndex(entityType)) { - listFilter.addQueryParam("entityFQNHash", FullyQualifiedName.buildHash(entityType)); - repository = Entity.getEntityTimeSeriesRepository(Entity.ENTITY_REPORT_DATA); - } else { - repository = Entity.getEntityTimeSeriesRepository(entityType); - } - if (config != null) { - long startTs = config.getTimeSeriesStartTs(correctedEntityType); - if (startTs > 0) { - long endTs = System.currentTimeMillis(); - return repository.getTimeSeriesDao().listCount(listFilter, startTs, endTs, false); - } - } - return repository.getTimeSeriesDao().listCount(listFilter); - } - } catch (Exception e) { - LOG.debug("Error getting total for '{}'", entityType, e); - return 0; - } - } - - private int getTotalEntityRecords(String entityType) { - if (stats.get() == null - || stats.get().getEntityStats() == null - || stats.get().getEntityStats().getAdditionalProperties() == null) { - return 0; - } - - StepStats entityStats = stats.get().getEntityStats().getAdditionalProperties().get(entityType); - if (entityStats != null) { - return entityStats.getTotalRecords() != null ? entityStats.getTotalRecords() : 0; - } - return 0; - } - - private int calculateNumberOfThreads(int totalEntityRecords, int fixedBatchSize) { - if (fixedBatchSize <= 0) return 1; - int mod = totalEntityRecords % fixedBatchSize; - if (mod == 0) { - return totalEntityRecords / fixedBatchSize; - } else { - return (totalEntityRecords / fixedBatchSize) + 1; - } - } - - // Stats is published once via stats.set(initializeTotalRecords(...)) and all subsequent - // mutations operate on that same mutable object under synchronized methods. - - synchronized void updateStats(String entityType, StepStats currentEntityStats) { - Stats jobDataStats = stats.get(); - if (jobDataStats == null) { - return; - } - - updateEntityStats(jobDataStats, entityType, currentEntityStats); - - // When processing tables, also update column stats from the sink - if (Entity.TABLE.equals(entityType) && searchIndexSink != null) { - updateColumnStatsFromSink(jobDataStats); - } - - updateJobStats(jobDataStats); - } - - private void updateColumnStatsFromSink(Stats jobDataStats) { - if (searchIndexSink == null || jobDataStats == null || jobDataStats.getEntityStats() == null) { - return; - } - StepStats columnStats = searchIndexSink.getColumnStats(); - if (columnStats != null && columnStats.getTotalRecords() > 0) { - StepStats existingColumnStats = - jobDataStats.getEntityStats().getAdditionalProperties().get(Entity.TABLE_COLUMN); - if (existingColumnStats != null) { - existingColumnStats.setTotalRecords(columnStats.getTotalRecords()); - existingColumnStats.setSuccessRecords(columnStats.getSuccessRecords()); - existingColumnStats.setFailedRecords(columnStats.getFailedRecords()); - } - } - } - - synchronized void updateReaderStats(int successCount, int failedCount, int warningsCount) { - Stats jobDataStats = stats.get(); - if (jobDataStats == null) { - return; - } - - StepStats readerStats = jobDataStats.getReaderStats(); - if (readerStats == null) { - readerStats = new StepStats(); - jobDataStats.setReaderStats(readerStats); - } - - int currentSuccess = - readerStats.getSuccessRecords() != null ? readerStats.getSuccessRecords() : 0; - int currentFailed = readerStats.getFailedRecords() != null ? readerStats.getFailedRecords() : 0; - int currentWarnings = - readerStats.getWarningRecords() != null ? readerStats.getWarningRecords() : 0; - - readerStats.setSuccessRecords(currentSuccess + successCount); - readerStats.setFailedRecords(currentFailed + failedCount); - readerStats.setWarningRecords(currentWarnings + warningsCount); - } - - synchronized void updateSinkTotalSubmitted(int submittedCount) { - Stats jobDataStats = stats.get(); - if (jobDataStats == null) { - return; - } - - StepStats sinkStats = jobDataStats.getSinkStats(); - if (sinkStats == null) { - sinkStats = new StepStats(); - sinkStats.setTotalRecords(0); - jobDataStats.setSinkStats(sinkStats); - } - - int currentTotal = sinkStats.getTotalRecords() != null ? sinkStats.getTotalRecords() : 0; - sinkStats.setTotalRecords(currentTotal + submittedCount); - } - - synchronized void syncSinkStatsFromBulkSink() { - if (searchIndexSink == null) { - return; - } - - Stats jobDataStats = stats.get(); - if (jobDataStats == null) { - return; - } - - StepStats bulkSinkStats = searchIndexSink.getStats(); - if (bulkSinkStats == null) { - return; - } - - StepStats sinkStats = jobDataStats.getSinkStats(); - if (sinkStats == null) { - sinkStats = new StepStats(); - jobDataStats.setSinkStats(sinkStats); - } - - sinkStats.setTotalRecords( - bulkSinkStats.getTotalRecords() != null ? bulkSinkStats.getTotalRecords() : 0); - sinkStats.setSuccessRecords( - bulkSinkStats.getSuccessRecords() != null ? bulkSinkStats.getSuccessRecords() : 0); - sinkStats.setFailedRecords( - bulkSinkStats.getFailedRecords() != null ? bulkSinkStats.getFailedRecords() : 0); - - // Sync vector stats if available - StepStats vectorStats = searchIndexSink.getVectorStats(); - if (vectorStats != null - && (vectorStats.getTotalRecords() != null && vectorStats.getTotalRecords() > 0)) { - jobDataStats.setVectorStats(vectorStats); - } - - // Sync process stats if available - StepStats processStats = searchIndexSink.getProcessStats(); - if (processStats != null) { - jobDataStats.setProcessStats(processStats); - } - } - - private void periodicSyncSinkStats() { - long now = System.currentTimeMillis(); - long last = lastSinkSyncTime.get(); - if (now - last >= SINK_SYNC_INTERVAL_MS && lastSinkSyncTime.compareAndSet(last, now)) { - syncSinkStatsFromBulkSink(); - } - } - - private void updateEntityStats(Stats statsObj, String entityType, StepStats currentEntityStats) { - if (statsObj.getEntityStats() == null - || statsObj.getEntityStats().getAdditionalProperties() == null) { - return; - } - - StepStats entityStats = statsObj.getEntityStats().getAdditionalProperties().get(entityType); - if (entityStats != null) { - entityStats.withSuccessRecords( - entityStats.getSuccessRecords() + currentEntityStats.getSuccessRecords()); - entityStats.withFailedRecords( - entityStats.getFailedRecords() + currentEntityStats.getFailedRecords()); - - int actual = entityStats.getSuccessRecords() + entityStats.getFailedRecords(); - if (actual > entityStats.getTotalRecords()) { - entityStats.setTotalRecords(actual); - } - } - } - - private void updateJobStats(Stats statsObj) { - StepStats jobStats = statsObj.getJobStats(); - if (jobStats == null || statsObj.getEntityStats() == null) { - return; - } - - int totalRecords = - statsObj.getEntityStats().getAdditionalProperties().entrySet().stream() - .filter(e -> !Entity.TABLE_COLUMN.equals(e.getKey())) - .mapToInt(e -> e.getValue().getTotalRecords()) - .sum(); - - int totalSuccess = - statsObj.getEntityStats().getAdditionalProperties().entrySet().stream() - .filter(e -> !Entity.TABLE_COLUMN.equals(e.getKey())) - .mapToInt(e -> e.getValue().getSuccessRecords()) - .sum(); - - int totalFailed = - statsObj.getEntityStats().getAdditionalProperties().entrySet().stream() - .filter(e -> !Entity.TABLE_COLUMN.equals(e.getKey())) - .mapToInt(e -> e.getValue().getFailedRecords()) - .sum(); - - jobStats - .withTotalRecords(totalRecords) - .withSuccessRecords(totalSuccess) - .withFailedRecords(totalFailed); - - StepStats readerStats = statsObj.getReaderStats(); - if (readerStats != null && totalRecords > readerStats.getTotalRecords()) { - readerStats.setTotalRecords(totalRecords); - } - } - - private IndexingError createSinkError(String message) { - return new IndexingError().withErrorSource(IndexingError.ErrorSource.SINK).withMessage(message); - } - - private StepStats createFailedStats(IndexingError indexingError, int dataSize) { - StepStats failedStats = new StepStats(); - failedStats.setSuccessRecords(indexingError != null ? indexingError.getSuccessCount() : 0); - failedStats.setFailedRecords(indexingError != null ? indexingError.getFailedCount() : dataSize); - return failedStats; - } - - private Set getAll() { - return new HashSet<>(searchRepository.getEntityIndexMap().keySet()); - } - - private ReindexContext reCreateIndexes(Set entities) { - if (recreateIndexHandler == null) { - return null; - } - return recreateIndexHandler.reCreateIndexes(entities); - } - - private void closeSinkIfNeeded() { - if (searchIndexSink != null && sinkClosed.compareAndSet(false, true)) { - int pendingVectorTasks = searchIndexSink.getPendingVectorTaskCount(); - if (pendingVectorTasks > 0) { - LOG.info( - "Waiting for {} pending vector embedding tasks to complete before closing", - pendingVectorTasks); - VectorCompletionResult vcResult = searchIndexSink.awaitVectorCompletionWithDetails(300); - LOG.info( - "Vector completion: completed={}, pending={}, waited={}ms", - vcResult.completed(), - vcResult.pendingTaskCount(), - vcResult.waitedMillis()); - } - - LOG.info("Forcing final flush of bulk processor and vector embeddings"); - searchIndexSink.close(); - syncSinkStatsFromBulkSink(); - } - } - - private ExecutionResult buildResult() { - if (failureRecorder != null) { - failureRecorder.flush(); - } - - syncSinkStatsFromBulkSink(); - updateColumnStatsFromSink(stats.get()); - - Stats currentStats = stats.get(); - if (currentStats != null) { - StatsReconciler.reconcile(currentStats); - } - - long endTime = System.currentTimeMillis(); - ExecutionResult.Status status = determineStatus(); - - if (status == ExecutionResult.Status.COMPLETED) { - listeners.onJobCompleted(stats.get(), endTime - startTime); - } else if (status == ExecutionResult.Status.COMPLETED_WITH_ERRORS) { - listeners.onJobCompletedWithErrors(stats.get(), endTime - startTime); - } else if (status == ExecutionResult.Status.STOPPED) { - listeners.onJobStopped(stats.get()); - } - - return ExecutionResult.fromStats(stats.get(), status, startTime); - } - - private ExecutionResult.Status determineStatus() { - if (stopped.get()) { - return ExecutionResult.Status.STOPPED; - } - - if (hasIncompleteProcessing()) { - return ExecutionResult.Status.COMPLETED_WITH_ERRORS; - } - - return ExecutionResult.Status.COMPLETED; - } - - private boolean hasIncompleteProcessing() { - Stats currentStats = stats.get(); - if (currentStats == null || currentStats.getJobStats() == null) { - return false; - } - - StepStats jobStats = currentStats.getJobStats(); - long failed = jobStats.getFailedRecords() != null ? jobStats.getFailedRecords() : 0; - long processed = jobStats.getSuccessRecords() != null ? jobStats.getSuccessRecords() : 0; - long total = jobStats.getTotalRecords() != null ? jobStats.getTotalRecords() : 0; - - return failed > 0 || (total > 0 && processed < total); - } - - public void stop() { - LOG.info("Stopping reindexing executor..."); - stopped.set(true); - producersDone.set(true); - - listeners.onJobStopped(stats.get()); - - if (searchIndexSink != null) { - LOG.info( - "Stopping executor: flushing sink ({} active bulk requests)", - searchIndexSink.getActiveBulkRequestCount()); - searchIndexSink.flushAndAwait(10); - } - - int dropped = taskQueue != null ? taskQueue.size() : 0; - if (dropped > 0) { - LOG.warn("Dropping {} queued tasks during shutdown", dropped); - } - - shutdownExecutor(producerExecutor, "producer"); - shutdownExecutor(jobExecutor, "job"); - - if (taskQueue != null) { - taskQueue.clear(); - for (int i = 0; i < MAX_CONSUMER_THREADS; i++) { - taskQueue.offer(new IndexingTask<>(POISON_PILL, null, -1)); - } - } - if (consumerExecutor != null && !consumerExecutor.isShutdown()) { - consumerExecutor.shutdown(); - try { - if (!consumerExecutor.awaitTermination(5, TimeUnit.SECONDS)) { - consumerExecutor.shutdownNow(); - LOG.warn("Consumer executor did not terminate within 5s, forced shutdown"); - } - } catch (InterruptedException e) { - consumerExecutor.shutdownNow(); - Thread.currentThread().interrupt(); - } - } - - LOG.info("Reindexing executor stopped"); - } - - public boolean isStopped() { - return stopped.get(); - } - - private void cleanupExecutors() { - if (!stopped.get()) { - shutdownExecutor(consumerExecutor, "consumer", 30, TimeUnit.SECONDS); - shutdownExecutor(jobExecutor, "job", 20, TimeUnit.SECONDS); - shutdownExecutor(producerExecutor, "producer", 1, TimeUnit.MINUTES); - } - } - - private void shutdownExecutor(ExecutorService executor, String name) { - if (executor != null && !executor.isShutdown()) { - LOG.info("Force shutting down {} executor", name); - List pendingTasks = executor.shutdownNow(); - LOG.info("Cancelled {} pending {} tasks", pendingTasks.size(), name); - } - } - - private void shutdownExecutor( - ExecutorService executor, String name, long timeout, TimeUnit unit) { - if (executor != null && !executor.isShutdown()) { - executor.shutdown(); - try { - if (!executor.awaitTermination(timeout, unit)) { - executor.shutdownNow(); - LOG.warn("{} did not terminate within timeout", name); - } - } catch (InterruptedException e) { - executor.shutdownNow(); - Thread.currentThread().interrupt(); - } - } - } - - private void cleanup() { - if (failureRecorder != null) { - try { - failureRecorder.close(); - } catch (Exception e) { - LOG.error("Error closing failure recorder", e); - } - } - - if (searchIndexSink != null && sinkClosed.compareAndSet(false, true)) { - try { - searchIndexSink.close(); - } catch (Exception e) { - LOG.error("Error closing search index sink", e); - } - } - - finalizeReindex(); - } - - private void finalizeReindex() { - if (recreateIndexHandler == null || recreateContext == null) { - return; - } - - try { - recreateContext - .getEntities() - .forEach( - entityType -> { - // Skip entities already promoted via per-entity promotion - if (promotedEntities.contains(entityType)) { - LOG.debug( - "Skipping finalizeReindex for entity '{}' - already promoted.", entityType); - return; - } - try { - AtomicInteger failures = entityBatchFailures.get(entityType); - boolean entitySuccess = - !stopped.get() && (failures == null || failures.get() == 0); - recreateIndexHandler.finalizeReindex( - buildEntityReindexContext(entityType), entitySuccess); - } catch (Exception ex) { - LOG.error("Failed to finalize reindex for {}", entityType, ex); - } - }); - } finally { - recreateContext = null; - promotedEntities.clear(); - } - } - - private EntityReindexContext buildEntityReindexContext(String entityType) { - return EntityReindexContext.builder() - .entityType(entityType) - .originalIndex(recreateContext.getOriginalIndex(entityType).orElse(null)) - .canonicalIndex(recreateContext.getCanonicalIndex(entityType).orElse(null)) - .activeIndex(recreateContext.getOriginalIndex(entityType).orElse(null)) - .stagedIndex(recreateContext.getStagedIndex(entityType).orElse(null)) - .canonicalAliases(recreateContext.getCanonicalAlias(entityType).orElse(null)) - .existingAliases(recreateContext.getExistingAliases(entityType)) - .parentAliases(new HashSet<>(listOrEmpty(recreateContext.getParentAliases(entityType)))) - .build(); - } - - @Override - public void close() { - if (statsManager != null) { - statsManager.flushAll(); - } - sinkTrackers.values().forEach(StageStatsTracker::flush); - stop(); - cleanup(); - } -} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SingleServerIndexingStrategy.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SingleServerIndexingStrategy.java deleted file mode 100644 index d347514bdb69..000000000000 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SingleServerIndexingStrategy.java +++ /dev/null @@ -1,41 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import java.util.Optional; -import org.openmetadata.schema.system.Stats; -import org.openmetadata.service.jdbi3.CollectionDAO; -import org.openmetadata.service.search.SearchRepository; - -public class SingleServerIndexingStrategy implements IndexingStrategy { - - private final SearchIndexExecutor executor; - - public SingleServerIndexingStrategy( - CollectionDAO collectionDAO, SearchRepository searchRepository) { - this.executor = new SearchIndexExecutor(collectionDAO, searchRepository); - } - - @Override - public void addListener(ReindexingProgressListener listener) { - executor.addListener(listener); - } - - @Override - public ExecutionResult execute(ReindexingConfiguration config, ReindexingJobContext context) { - return executor.execute(config, context); - } - - @Override - public Optional getStats() { - return Optional.ofNullable(executor.getStats().get()); - } - - @Override - public void stop() { - executor.stop(); - } - - @Override - public boolean isStopped() { - return executor.isStopped(); - } -} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DISTRIBUTED_INDEXING.md b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DISTRIBUTED_INDEXING.md index 3d22f758a985..354fb9ed2091 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DISTRIBUTED_INDEXING.md +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DISTRIBUTED_INDEXING.md @@ -250,23 +250,23 @@ WHERE lockKey = ? AND jobId = ? ## Configuration -Enable distributed indexing via the reindex API: +Distributed indexing is always enabled. Tune the reindex API like so: ```json { "entities": ["table", "database", "topic", "dashboard"], - "recreateIndex": true, "batchSize": 100, - "consumerThreads": 4, - "useDistributedIndexing": true + "consumerThreads": 4 } ``` +Search indexing always writes to staged indexes and promotes aliases after successful processing so +live search indexes are not mutated during the bulk rebuild. + ### Configuration Options | Parameter | Default | Description | |-----------|---------|-------------| -| useDistributedIndexing | false | Enable distributed mode | | batchSize | 100 | Entities per batch | | consumerThreads | 4 | Worker threads per server | | maxConcurrentRequests | 100 | Concurrent ES/OS requests | diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobContext.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobContext.java index fd6677781744..6e05f162daec 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobContext.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobContext.java @@ -57,11 +57,6 @@ public UUID getAppId() { return job.getId(); } - @Override - public boolean isDistributed() { - return true; - } - @Override public String getSource() { return source; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifier.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifier.java index 7b0e26dd6da5..1f8b2f4b49e2 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifier.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifier.java @@ -18,13 +18,6 @@ /** * Interface for notifying servers about distributed job events. - * - *

This abstraction allows for different notification mechanisms: - * - *

    - *
  • Redis Pub/Sub - instant push notifications when Redis is available - *
  • Database polling - fallback when Redis is not configured - *
*/ public interface DistributedJobNotifier { diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifierFactory.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifierFactory.java index b90685d0f8a2..df288fe0670c 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifierFactory.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifierFactory.java @@ -14,14 +14,10 @@ package org.openmetadata.service.apps.bundles.searchIndex.distributed; import lombok.extern.slf4j.Slf4j; -import org.openmetadata.service.cache.CacheConfig; import org.openmetadata.service.jdbi3.CollectionDAO; /** - * Factory for creating the appropriate DistributedJobNotifier based on configuration. - * - *

Uses Redis Pub/Sub when Redis is configured and available, otherwise falls back to database - * polling. + * Factory for creating the DistributedJobNotifier used by search indexing. */ @Slf4j public class DistributedJobNotifierFactory { @@ -31,42 +27,14 @@ private DistributedJobNotifierFactory() { } /** - * Create a DistributedJobNotifier based on the current configuration. + * Create a DistributedJobNotifier. * - * @param cacheConfig The cache configuration (contains Redis settings) * @param collectionDAO The DAO for database access * @param serverId The current server's ID - * @return The appropriate notifier implementation + * @return The notifier implementation */ - public static DistributedJobNotifier create( - CacheConfig cacheConfig, CollectionDAO collectionDAO, String serverId) { - - if (cacheConfig != null && cacheConfig.provider == CacheConfig.Provider.redis) { - // Redis is configured - try to use Redis Pub/Sub - if (isRedisConfigValid(cacheConfig)) { - LOG.info( - "Redis is configured - using Redis Pub/Sub for distributed job notifications (instant discovery)"); - return new RedisJobNotifier(cacheConfig, serverId); - } else { - LOG.warn( - "Redis is configured but URL is missing - falling back to database polling for job notifications"); - } - } - - LOG.info( - "Redis not configured - using database polling for distributed job notifications (30s discovery delay)"); + public static DistributedJobNotifier create(CollectionDAO collectionDAO, String serverId) { + LOG.info("Using database polling for distributed search indexing job discovery"); return new PollingJobNotifier(collectionDAO, serverId); } - - /** - * Check if Redis configuration is valid and complete. - * - * @param cacheConfig The cache configuration - * @return true if Redis can be used - */ - private static boolean isRedisConfigValid(CacheConfig cacheConfig) { - return cacheConfig.redis != null - && cacheConfig.redis.url != null - && !cacheConfig.redis.url.isEmpty(); - } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobParticipant.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobParticipant.java index c0dc8ce20b43..f2c407b0ce54 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobParticipant.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobParticipant.java @@ -25,9 +25,9 @@ import org.openmetadata.service.Entity; import org.openmetadata.service.apps.bundles.searchIndex.BulkSink; import org.openmetadata.service.apps.bundles.searchIndex.IndexingFailureRecorder; -import org.openmetadata.service.cache.CacheConfig; import org.openmetadata.service.jdbi3.AppRepository; import org.openmetadata.service.jdbi3.CollectionDAO; +import org.openmetadata.service.search.ReindexContext; import org.openmetadata.service.search.SearchClusterMetrics; import org.openmetadata.service.search.SearchRepository; @@ -39,12 +39,7 @@ * service runs on all servers and allows non-triggering servers to discover and participate in * active jobs. * - *

Job discovery is handled by a {@link DistributedJobNotifier}: - * - *

    - *
  • When Redis is configured: Uses Redis Pub/Sub for instant notification - *
  • When Redis is not available: Falls back to database polling (30s interval) - *
+ *

Job discovery is handled by a {@link DistributedJobNotifier} backed by database polling. */ @Slf4j public class DistributedJobParticipant implements Managed { @@ -73,15 +68,12 @@ public class DistributedJobParticipant implements Managed { private volatile Thread participantThread; public DistributedJobParticipant( - CollectionDAO collectionDAO, - SearchRepository searchRepository, - String serverId, - CacheConfig cacheConfig) { + CollectionDAO collectionDAO, SearchRepository searchRepository, String serverId) { this( collectionDAO, searchRepository, serverId, - DistributedJobNotifierFactory.create(cacheConfig, collectionDAO, serverId)); + DistributedJobNotifierFactory.create(collectionDAO, serverId)); } /** @@ -111,7 +103,7 @@ public void start() { // Register callback to receive job start notifications notifier.onJobStarted(this::onJobDiscovered); - // Start the notifier (Redis subscription or polling) + // Start the notifier notifier.start(); // Start orphan job monitor to detect jobs left behind by crashed coordinators @@ -189,7 +181,16 @@ private void onJobDiscovered(UUID jobId) { // Check if there are pending partitions we can help with long pendingCount = coordinator.getPartitions(job.getId(), PartitionStatus.PENDING).size(); if (pendingCount == 0) { - LOG.debug("No pending partitions to process for job {}", job.getId()); + long processingCount = + coordinator.getPartitions(job.getId(), PartitionStatus.PROCESSING).size(); + long completedCount = + coordinator.getPartitions(job.getId(), PartitionStatus.COMPLETED).size(); + LOG.info( + "Discovered distributed job {} on server {}, but no pending partitions remain (processing={}, completed={}); not joining", + job.getId(), + serverId, + processingCount, + completedCount); return; } @@ -305,6 +306,12 @@ private void processJobPartitions(SearchIndexJob job) { DistributedJobStatsAggregator statsAggregator = null; AppRunRecordContext appCtx = null; try { + Optional stagedIndexContext = buildStagedIndexContext(job); + if (stagedIndexContext.isEmpty()) { + return; + } + ReindexContext reindexContext = stagedIndexContext.orElseThrow(); + appCtx = resolveAppRunRecordContext(); if (appCtx != null) { restoreAppRunRecordToRunning(appCtx.appId(), appCtx.startTime()); @@ -341,22 +348,6 @@ private void processJobPartitions(SearchIndexJob job) { ? job.getJobConfiguration().getBatchSize() : 100; - // Check if this job is doing index recreation - boolean recreateIndex = Boolean.TRUE.equals(job.getJobConfiguration().getRecreateIndex()); - org.openmetadata.service.search.ReindexContext recreateContext = null; - - if (recreateIndex && job.getStagedIndexMapping() != null) { - // Reconstruct context from job's staged index mapping - recreateContext = - org.openmetadata.service.search.ReindexContext.fromStagedIndexMapping( - job.getStagedIndexMapping()); - LOG.info( - "Participant using staged index mapping from job {}: {}", - job.getId(), - job.getStagedIndexMapping()); - } - - // Set up failure callback on bulk sink to record sink failures final IndexingFailureRecorder recorder = failureRecorder; bulkSink.setFailureCallback( (entityType, entityId, entityFqn, errorMessage, stage) -> { @@ -369,10 +360,8 @@ private void processJobPartitions(SearchIndexJob job) { } }); - // Create partition worker with recreate context and failure recorder PartitionWorker worker = - new PartitionWorker( - coordinator, bulkSink, batchSize, recreateContext, recreateIndex, failureRecorder); + new PartitionWorker(coordinator, bulkSink, batchSize, reindexContext, failureRecorder); int partitionsProcessed = 0; long totalReaderSuccess = 0; @@ -486,6 +475,21 @@ private void processJobPartitions(SearchIndexJob job) { } } + private Optional buildStagedIndexContext(SearchIndexJob job) { + if (job.getStagedIndexMapping() == null || job.getStagedIndexMapping().isEmpty()) { + LOG.warn( + "Skipping distributed reindex job {} on server {} because staged index mapping is missing", + job.getId(), + serverId); + return Optional.empty(); + } + LOG.info( + "Participant using staged index mapping from job {}: {}", + job.getId(), + job.getStagedIndexMapping()); + return Optional.of(ReindexContext.fromStagedIndexMapping(job.getStagedIndexMapping())); + } + /** Check if currently participating in a job. */ public boolean isParticipating() { return participating.get(); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexCoordinator.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexCoordinator.java index 91b30bb593b1..e0e22263b0ef 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexCoordinator.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexCoordinator.java @@ -29,6 +29,7 @@ import org.openmetadata.schema.utils.JsonUtils; import org.openmetadata.service.Entity; import org.openmetadata.service.apps.bundles.searchIndex.ReindexingConfiguration; +import org.openmetadata.service.apps.bundles.searchIndex.SearchIndexEntityTypes; import org.openmetadata.service.jdbi3.CollectionDAO; import org.openmetadata.service.jdbi3.CollectionDAO.SearchIndexJobDAO; import org.openmetadata.service.jdbi3.CollectionDAO.SearchIndexJobDAO.SearchIndexJobRecord; @@ -245,7 +246,7 @@ private void precomputePartitionStartCursors(UUID jobId, List> byEntity = partitions.stream() .filter(p -> p.getEntityType() != null) - .filter(p -> !PartitionWorker.TIME_SERIES_ENTITIES.contains(p.getEntityType())) + .filter(p -> !SearchIndexEntityTypes.isTimeSeriesEntity(p.getEntityType())) .collect(Collectors.groupingBy(SearchIndexPartition::getEntityType)); Map> jobCache = new HashMap<>(); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexExecutor.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexExecutor.java index 0950a0c490eb..efeb6ea84f08 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexExecutor.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexExecutor.java @@ -13,12 +13,9 @@ package org.openmetadata.service.apps.bundles.searchIndex.distributed; -import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; - import io.micrometer.core.instrument.Timer; import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; @@ -39,6 +36,7 @@ import org.openmetadata.service.apps.bundles.searchIndex.BulkSink; import org.openmetadata.service.apps.bundles.searchIndex.CompositeProgressListener; import org.openmetadata.service.apps.bundles.searchIndex.ElasticSearchBulkSink; +import org.openmetadata.service.apps.bundles.searchIndex.EntityReindexContextMapper; import org.openmetadata.service.apps.bundles.searchIndex.IndexingFailureRecorder; import org.openmetadata.service.apps.bundles.searchIndex.OpenSearchBulkSink; import org.openmetadata.service.apps.bundles.searchIndex.ReindexingConfiguration; @@ -127,10 +125,10 @@ public static boolean isCoordinatingJob(UUID jobId) { private IndexingFailureRecorder failureRecorder; private BulkSink searchIndexSink; - // Per-entity index promotion + // Per-entity staged index promotion private EntityCompletionTracker entityTracker; - private RecreateIndexHandler recreateIndexHandler; - private ReindexContext recreateContext; + private RecreateIndexHandler indexPromotionHandler; + private ReindexContext stagedIndexContext; // Reader stats tracking (accumulated across all worker threads) private final AtomicLong coordinatorReaderSuccess = new AtomicLong(0); @@ -195,8 +193,8 @@ public void setAppContext(UUID appId, Long startTime) { } /** - * Set the job notifier for alerting other servers when a job starts. When set, other servers in - * the cluster will be notified via Redis Pub/Sub (if available) or discovered via polling. + * Set the job notifier for alerting other servers when a job starts. Servers discover the job + * through database polling. * * @param notifier The job notifier */ @@ -311,19 +309,19 @@ public Optional joinJob(UUID jobId) { * none remain 3. Coordinates with other servers for load balancing * * @param bulkSink The sink for writing to search index - * @param recreateContext Context for index recreation, if applicable - * @param recreateIndex Whether indices should be recreated + * @param stagedIndexContext Context for staged index writes and promotion * @return Execution result with statistics */ public ExecutionResult execute( - BulkSink bulkSink, - ReindexContext recreateContext, - boolean recreateIndex, - ReindexingConfiguration reindexConfig) { + BulkSink bulkSink, ReindexContext stagedIndexContext, ReindexingConfiguration reindexConfig) { if (currentJob == null) { throw new IllegalStateException("No job to execute - call createJob() or joinJob() first"); } + if (stagedIndexContext == null || stagedIndexContext.isEmpty()) { + throw new IllegalArgumentException( + "Staged index context is required for distributed reindexing"); + } UUID jobId = currentJob.getId(); LOG.info("Server {} starting execution of job {}", serverId, jobId); @@ -406,12 +404,12 @@ public ExecutionResult execute( // Stats are tracked per-entityType by StageStatsTracker in PartitionWorker // No need for redundant server-level stats persistence - // Store recreate context for per-entity promotion - this.recreateContext = recreateContext; + // Store staged index context for per-entity promotion + this.stagedIndexContext = stagedIndexContext; // Initialize entity completion tracker for per-entity index promotion this.entityTracker = new EntityCompletionTracker(jobId); - initializeEntityTracker(jobId, recreateIndex); + initializeEntityTracker(jobId); coordinator.setEntityCompletionTracker(entityTracker); // Start lock refresh thread to prevent lock expiration during long-running jobs @@ -462,8 +460,7 @@ public ExecutionResult execute( workerId, bulkSink, batchSize, - recreateContext, - recreateIndex, + stagedIndexContext, totalSuccess, totalFailed, reindexConfig); @@ -491,7 +488,7 @@ public ExecutionResult execute( // Final reconciliation pass: catch ALL participant-server completions before // the stale-reclaimer is killed. Participant workers may have finished partitions // that were never reconciled by the stale-reclaimer's periodic loop. - if (entityTracker != null && recreateContext != null) { + if (entityTracker != null && stagedIndexContext != null) { LOG.info("Running final DB reconciliation for job {}", jobId); List allPartitions = coordinator.getPartitions(jobId, null); entityTracker.reconcileFromDatabase(allPartitions); @@ -656,8 +653,7 @@ private void runWorkerLoop( int workerId, BulkSink bulkSink, int batchSize, - ReindexContext recreateContext, - boolean recreateIndex, + ReindexContext stagedIndexContext, AtomicLong totalSuccess, AtomicLong totalFailed, ReindexingConfiguration reindexConfig) { @@ -666,13 +662,7 @@ private void runWorkerLoop( PartitionWorker worker = new PartitionWorker( - coordinator, - bulkSink, - batchSize, - recreateContext, - recreateIndex, - failureRecorder, - reindexConfig); + coordinator, bulkSink, batchSize, stagedIndexContext, failureRecorder, reindexConfig); synchronized (activeWorkers) { activeWorkers.add(worker); @@ -1080,7 +1070,7 @@ public void updateStagedIndexMapping(Map stagedIndexMapping) { /** * Initialize the entity completion tracker with partition counts and promotion callback. */ - private void initializeEntityTracker(UUID jobId, boolean recreateIndex) { + private void initializeEntityTracker(UUID jobId) { // Count partitions per entity Map partitionCountByEntity = new HashMap<>(); List allPartitions = coordinator.getPartitions(jobId, null); @@ -1099,79 +1089,60 @@ private void initializeEntityTracker(UUID jobId, boolean recreateIndex) { partitionCountByEntity.size(), partitionCountByEntity); - // Set up per-entity promotion callback if recreating indices - if (recreateIndex && recreateContext != null) { - this.recreateIndexHandler = Entity.getSearchRepository().createReindexHandler(); - // Wire jobData into the handler so applyLiveServingSettings can revert bulk-build - // overrides (refresh_interval=-1, replicas=0, async translog) before the per-entity - // alias swap. Without this, buildRevertJson returns null and the bulk overrides - // silently become the live settings. - if (recreateIndexHandler instanceof DefaultRecreateHandler defaultHandler - && currentJob != null - && currentJob.getJobConfiguration() != null) { - defaultHandler.withJobData(currentJob.getJobConfiguration()); - } - entityTracker.setOnEntityComplete(this::promoteEntityIndex); - LOG.info( - "Per-entity promotion callback SET for job {} (recreateIndex={}, recreateContext entities={})", - jobId, - recreateIndex, - recreateContext.getEntities()); - } else { - LOG.info( - "Per-entity promotion callback NOT set for job {} (recreateIndex={}, recreateContext={})", - jobId, - recreateIndex, - recreateContext != null ? "present" : "null"); + if (partitionCountByEntity.isEmpty()) { + LOG.info("No partitions found for job {}; finalizer will promote staged indexes", jobId); + return; } + + if (stagedIndexContext == null || stagedIndexContext.isEmpty()) { + throw new IllegalStateException("Staged index context is required for entity promotion"); + } + indexPromotionHandler = Entity.getSearchRepository().createReindexHandler(); + // Wire job configuration so applyLiveServingSettings can revert bulk-build overrides + // (refresh=-1, replicas=0, async translog) before the per-entity alias swap. + if (indexPromotionHandler instanceof DefaultRecreateHandler defaultHandler + && currentJob != null + && currentJob.getJobConfiguration() != null) { + defaultHandler.withJobData(currentJob.getJobConfiguration()); + } + entityTracker.setOnEntityComplete(this::promoteEntityIndex); + LOG.info( + "Per-entity promotion callback set for job {} (staged index entities={})", + jobId, + stagedIndexContext.getEntities()); } /** * Promote a single entity's index when all its partitions complete. */ private void promoteEntityIndex(String entityType, boolean success) { - if (recreateIndexHandler == null || recreateContext == null) { + if (indexPromotionHandler == null || stagedIndexContext == null) { LOG.warn( - "Cannot promote index for entity '{}' - no recreateIndexHandler or recreateContext", + "Cannot promote index for entity '{}' - no index promotion handler or staged context", entityType); return; } - Optional stagedIndexOpt = recreateContext.getStagedIndex(entityType); - if (stagedIndexOpt.isEmpty()) { + EntityReindexContext entityContext = + EntityReindexContextMapper.fromStagedContext(stagedIndexContext, entityType); + if (entityContext.getStagedIndex() == null) { LOG.debug("No staged index for entity '{}', skipping promotion", entityType); return; } try { - String canonicalIndex = recreateContext.getCanonicalIndex(entityType).orElse(null); - String originalIndex = recreateContext.getOriginalIndex(entityType).orElse(null); - LOG.debug( "Promoting entity '{}': success={}, canonicalIndex={}, stagedIndex={}", entityType, success, - canonicalIndex, - stagedIndexOpt.get()); - - EntityReindexContext entityContext = - EntityReindexContext.builder() - .entityType(entityType) - .originalIndex(originalIndex) - .canonicalIndex(canonicalIndex) - .activeIndex(originalIndex) - .stagedIndex(stagedIndexOpt.get()) - .canonicalAliases(recreateContext.getCanonicalAlias(entityType).orElse(null)) - .existingAliases(recreateContext.getExistingAliases(entityType)) - .parentAliases( - new HashSet<>(listOrEmpty(recreateContext.getParentAliases(entityType)))) - .build(); - - if (recreateIndexHandler instanceof DefaultRecreateHandler defaultHandler) { + entityContext.getCanonicalIndex(), + entityContext.getStagedIndex()); + + if (indexPromotionHandler instanceof DefaultRecreateHandler defaultHandler) { LOG.info("Promoting index for entity '{}' (success={})", entityType, success); defaultHandler.promoteEntityIndex(entityContext, success); } else { - recreateIndexHandler.finalizeReindex(entityContext, success); + indexPromotionHandler.finalizeReindex(entityContext, success); } } catch (Exception e) { LOG.error("Failed to promote index for entity '{}'", entityType, e); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionCalculator.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionCalculator.java index 3079b00aa801..5a50b9c86422 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionCalculator.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionCalculator.java @@ -24,6 +24,7 @@ import org.openmetadata.service.Entity; import org.openmetadata.service.apps.bundles.searchIndex.EntityPriority; import org.openmetadata.service.apps.bundles.searchIndex.ReindexingConfiguration; +import org.openmetadata.service.apps.bundles.searchIndex.SearchIndexEntityTypes; import org.openmetadata.service.jdbi3.EntityRepository; import org.openmetadata.service.jdbi3.EntityTimeSeriesRepository; import org.openmetadata.service.jdbi3.ListFilter; @@ -81,18 +82,6 @@ public class PartitionCalculator { Map.entry("queryCostRecord", 0.3) // Time series, simple structure ); - /** Time series entity types */ - private static final Set TIME_SERIES_ENTITIES = - Set.of( - "testCaseResolutionStatus", - "testCaseResult", - "queryCostRecord", - "webAnalyticEntityViewReportData", - "webAnalyticUserActivityReportData", - "entityReportData", - "rawCostAnalysisReportData", - "aggregatedCostAnalysisReportData"); - private final int partitionSize; private final int minPartitionsPerEntity; @@ -256,7 +245,7 @@ public long getEntityCount(String entityType) { public long getEntityCount(String entityType, ReindexingConfiguration reindexConfig) { try { long count; - if (TIME_SERIES_ENTITIES.contains(entityType)) { + if (SearchIndexEntityTypes.isTimeSeriesEntity(entityType)) { count = getTimeSeriesEntityCount(entityType, reindexConfig); } else { count = getRegularEntityCount(entityType); @@ -278,7 +267,7 @@ private long getTimeSeriesEntityCount(String entityType, ReindexingConfiguration ListFilter listFilter = new ListFilter(Include.ALL); EntityTimeSeriesRepository repository; - if (isDataInsightIndex(entityType)) { + if (SearchIndexEntityTypes.isDataInsightEntity(entityType)) { listFilter.addQueryParam("entityFQNHash", FullyQualifiedName.buildHash(entityType)); repository = Entity.getEntityTimeSeriesRepository(Entity.ENTITY_REPORT_DATA); } else { @@ -303,10 +292,6 @@ private long getTimeSeriesEntityCount(String entityType, ReindexingConfiguration return repository.getTimeSeriesDao().listCount(listFilter); } - private boolean isDataInsightIndex(String entityType) { - return entityType.endsWith("ReportData"); - } - /** * Get entity counts for all requested entity types. * diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionWorker.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionWorker.java index 1db12e6c577d..c108a4ba0094 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionWorker.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionWorker.java @@ -14,13 +14,9 @@ package org.openmetadata.service.apps.bundles.searchIndex.distributed; import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; -import static org.openmetadata.service.Entity.QUERY_COST_RECORD; -import static org.openmetadata.service.Entity.TEST_CASE_RESOLUTION_STATUS; -import static org.openmetadata.service.Entity.TEST_CASE_RESULT; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; @@ -28,7 +24,6 @@ import org.apache.commons.lang3.exception.ExceptionUtils; import org.openmetadata.schema.EntityInterface; import org.openmetadata.schema.EntityTimeSeriesInterface; -import org.openmetadata.schema.analytics.ReportData; import org.openmetadata.schema.system.EntityError; import org.openmetadata.schema.type.Include; import org.openmetadata.schema.utils.ResultList; @@ -36,6 +31,7 @@ import org.openmetadata.service.apps.bundles.searchIndex.BulkSink; import org.openmetadata.service.apps.bundles.searchIndex.IndexingFailureRecorder; import org.openmetadata.service.apps.bundles.searchIndex.ReindexingConfiguration; +import org.openmetadata.service.apps.bundles.searchIndex.SearchIndexEntityTypes; import org.openmetadata.service.apps.bundles.searchIndex.stats.StageStatsTracker; import org.openmetadata.service.cache.EntityCacheBypass; import org.openmetadata.service.exception.SearchIndexException; @@ -56,26 +52,14 @@ public class PartitionWorker { private static final long MAX_CURSOR_INITIALIZATION_OFFSET = (long) Integer.MAX_VALUE + 1L; - /** Time series entity types that need special handling */ - static final Set TIME_SERIES_ENTITIES = - Set.of( - ReportData.ReportDataType.ENTITY_REPORT_DATA.value(), - ReportData.ReportDataType.RAW_COST_ANALYSIS_REPORT_DATA.value(), - ReportData.ReportDataType.WEB_ANALYTIC_USER_ACTIVITY_REPORT_DATA.value(), - ReportData.ReportDataType.WEB_ANALYTIC_ENTITY_VIEW_REPORT_DATA.value(), - ReportData.ReportDataType.AGGREGATED_COST_ANALYSIS_REPORT_DATA.value(), - TEST_CASE_RESOLUTION_STATUS, - TEST_CASE_RESULT, - QUERY_COST_RECORD); - /** Context key for entity type */ private static final String ENTITY_TYPE_KEY = "entityType"; - /** Context key for recreate index flag */ - private static final String RECREATE_INDEX = "recreateIndex"; + /** Context key used by search sinks to write into staged indexes. */ + private static final String STAGED_WRITE_KEY = "recreateIndex"; - /** Context key for recreate context */ - private static final String RECREATE_CONTEXT = "recreateContext"; + /** Context key for staged index context. */ + private static final String STAGED_CONTEXT_KEY = "recreateContext"; /** Context key for target index */ private static final String TARGET_INDEX_KEY = "targetIndex"; @@ -92,8 +76,7 @@ public class PartitionWorker { private final DistributedSearchIndexCoordinator coordinator; private final BulkSink searchIndexSink; private final int batchSize; - private final ReindexContext recreateContext; - private final boolean recreateIndex; + private final ReindexContext stagedIndexContext; private final AtomicBoolean stopped = new AtomicBoolean(false); private final IndexingFailureRecorder failureRecorder; private final ReindexingConfiguration reindexConfig; @@ -102,41 +85,30 @@ public PartitionWorker( DistributedSearchIndexCoordinator coordinator, BulkSink searchIndexSink, int batchSize, - ReindexContext recreateContext, - boolean recreateIndex) { - this(coordinator, searchIndexSink, batchSize, recreateContext, recreateIndex, null, null); + ReindexContext stagedIndexContext) { + this(coordinator, searchIndexSink, batchSize, stagedIndexContext, null, null); } public PartitionWorker( DistributedSearchIndexCoordinator coordinator, BulkSink searchIndexSink, int batchSize, - ReindexContext recreateContext, - boolean recreateIndex, + ReindexContext stagedIndexContext, IndexingFailureRecorder failureRecorder) { - this( - coordinator, - searchIndexSink, - batchSize, - recreateContext, - recreateIndex, - failureRecorder, - null); + this(coordinator, searchIndexSink, batchSize, stagedIndexContext, failureRecorder, null); } public PartitionWorker( DistributedSearchIndexCoordinator coordinator, BulkSink searchIndexSink, int batchSize, - ReindexContext recreateContext, - boolean recreateIndex, + ReindexContext stagedIndexContext, IndexingFailureRecorder failureRecorder, ReindexingConfiguration reindexConfig) { this.coordinator = coordinator; this.searchIndexSink = searchIndexSink; this.batchSize = batchSize; - this.recreateContext = recreateContext; - this.recreateIndex = recreateIndex; + this.stagedIndexContext = stagedIndexContext; this.failureRecorder = failureRecorder; this.reindexConfig = reindexConfig; } @@ -161,7 +133,7 @@ public PartitionResult processPartition(SearchIndexPartition partition) { } private PartitionResult processPartitionInternal(SearchIndexPartition partition) { - String entityType = partition.getEntityType(); + String entityType = SearchIndexEntityTypes.normalizeEntityType(partition.getEntityType()); long rangeStart = partition.getRangeStart(); long rangeEnd = partition.getRangeEnd(); @@ -609,22 +581,21 @@ private void recordReaderFailures( */ private ResultList readEntitiesKeyset(String entityType, String keysetCursor, int limit) throws SearchIndexException { + String normalizedEntityType = SearchIndexEntityTypes.normalizeEntityType(entityType); - // Selective fields, not "*". Asking for "*" runs every registered fieldFetcher in - // setFieldsInBulk — including expensive ones like fetchAndSetOwns on Team/User where every - // owned entity becomes an Entity.getEntityReferenceById round-trip — and the index class then - // strips most of those out via getExcludedFields anyway. Mirror what EntityReader does on the - // single-server pipeline (PR #27723) so both paths request the same minimal set. - List fields = ReindexingUtil.getSearchIndexFields(entityType); + // Selective fields avoid running expensive field fetchers that are stripped out before + // indexing. + List fields = ReindexingUtil.getSearchIndexFields(normalizedEntityType); - if (!TIME_SERIES_ENTITIES.contains(entityType)) { - PaginatedEntitiesSource source = new PaginatedEntitiesSource(entityType, limit, fields, 0); + if (!SearchIndexEntityTypes.isTimeSeriesEntity(normalizedEntityType)) { + PaginatedEntitiesSource source = + new PaginatedEntitiesSource(normalizedEntityType, limit, fields, 0); return source.readNextKeyset(keysetCursor); } else { Long filterStartTs = null; Long filterEndTs = null; if (reindexConfig != null) { - long startTs = reindexConfig.getTimeSeriesStartTs(entityType); + long startTs = reindexConfig.getTimeSeriesStartTs(normalizedEntityType); if (startTs > 0) { filterStartTs = startTs; filterEndTs = System.currentTimeMillis(); @@ -633,8 +604,8 @@ private ResultList readEntitiesKeyset(String entityType, String keysetCursor, PaginatedEntityTimeSeriesSource source = (filterStartTs != null) ? new PaginatedEntityTimeSeriesSource( - entityType, limit, fields, filterStartTs, filterEndTs) - : new PaginatedEntityTimeSeriesSource(entityType, limit, fields, 0); + normalizedEntityType, limit, fields, filterStartTs, filterEndTs) + : new PaginatedEntityTimeSeriesSource(normalizedEntityType, limit, fields, 0); return source.readWithCursor(keysetCursor); } } @@ -643,8 +614,8 @@ private String initializeKeysetCursor(SearchIndexPartition partition, long offse if (offset <= 0) { return null; } - String entityType = partition.getEntityType(); - if (TIME_SERIES_ENTITIES.contains(entityType)) { + String entityType = SearchIndexEntityTypes.normalizeEntityType(partition.getEntityType()); + if (SearchIndexEntityTypes.isTimeSeriesEntity(entityType)) { return RestUtil.encodeCursor(String.valueOf(offset)); } // Fast path: coordinator precomputed boundary cursors for every partition's @@ -693,8 +664,9 @@ private int toCursorOffset(String entityType, long offset) { private void writeToSink( String entityType, ResultList resultList, Map contextData) throws Exception { + String normalizedEntityType = SearchIndexEntityTypes.normalizeEntityType(entityType); - if (!TIME_SERIES_ENTITIES.contains(entityType)) { + if (!SearchIndexEntityTypes.isTimeSeriesEntity(normalizedEntityType)) { List entities = (List) resultList.getData(); searchIndexSink.write(entities, contextData); } else { @@ -712,21 +684,30 @@ private void writeToSink( * @return Context data map */ private Map createContextData(String entityType, StageStatsTracker statsTracker) { + String normalizedEntityType = SearchIndexEntityTypes.normalizeEntityType(entityType); Map contextData = new java.util.HashMap<>(); - contextData.put(ENTITY_TYPE_KEY, entityType); - contextData.put(RECREATE_INDEX, recreateIndex); + contextData.put(ENTITY_TYPE_KEY, normalizedEntityType); + contextData.put(STAGED_WRITE_KEY, true); if (statsTracker != null) { contextData.put(BulkSink.STATS_TRACKER_CONTEXT_KEY, statsTracker); } - if (recreateContext != null) { - contextData.put(RECREATE_CONTEXT, recreateContext); - recreateContext - .getStagedIndex(entityType) - .ifPresent(index -> contextData.put(TARGET_INDEX_KEY, index)); + if (stagedIndexContext == null) { + throw new IllegalStateException( + "Staged index context is required for distributed reindexing"); } + String targetIndex = + stagedIndexContext + .getStagedIndex(normalizedEntityType) + .orElseThrow( + () -> + new IllegalStateException( + "No staged index configured for entity type: " + normalizedEntityType)); + contextData.put(STAGED_CONTEXT_KEY, stagedIndexContext); + contextData.put(TARGET_INDEX_KEY, targetIndex); + return contextData; } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PollingJobNotifier.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PollingJobNotifier.java index 0d41ecf0f47d..31f643cf71ec 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PollingJobNotifier.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PollingJobNotifier.java @@ -13,6 +13,7 @@ package org.openmetadata.service.apps.bundles.searchIndex.distributed; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.UUID; @@ -26,28 +27,31 @@ import org.openmetadata.service.jdbi3.CollectionDAO; /** - * Database polling based job notifier as fallback when Redis is not available. + * Database polling based job notifier for distributed job discovery. * *

Uses adaptive polling intervals: * *

    - *
  • 30 seconds when idle (no active jobs) - *
  • 1 second when actively participating in a job + *
  • 1 second while actively participating in a job + *
  • 2 seconds plus jitter while recently started or after job activity + *
  • 30 seconds plus jitter after an extended idle period *
- * - *

This minimizes database overhead while still providing reasonable job discovery latency. */ @Slf4j public class PollingJobNotifier implements DistributedJobNotifier { - /** Poll interval when no job is running (30 seconds) */ - private static final long IDLE_POLL_INTERVAL_MS = 30_000; + private static final long FAST_IDLE_POLL_INTERVAL_MS = 2_000; + private static final long BACKOFF_IDLE_POLL_INTERVAL_MS = 30_000; - /** Poll interval when actively participating (1 second) */ private static final long ACTIVE_POLL_INTERVAL_MS = 1_000; + private static final long FAST_IDLE_WINDOW_MS = 60_000; + private static final long FAST_IDLE_JITTER_MS = 1_000; + private static final long BACKOFF_IDLE_JITTER_MS = 5_000; private final CollectionDAO collectionDAO; private final String serverId; + private final long fastIdleJitterMs; + private final long backoffIdleJitterMs; private final AtomicBoolean running = new AtomicBoolean(false); private final AtomicBoolean participating = new AtomicBoolean(false); private final Set knownJobs = ConcurrentHashMap.newKeySet(); @@ -55,10 +59,13 @@ public class PollingJobNotifier implements DistributedJobNotifier { private ScheduledExecutorService scheduler; private Consumer jobStartedCallback; private volatile long lastPollTime = 0; + private volatile long fastIdleUntil = 0; public PollingJobNotifier(CollectionDAO collectionDAO, String serverId) { this.collectionDAO = collectionDAO; this.serverId = serverId; + this.fastIdleJitterMs = computeJitter(FAST_IDLE_JITTER_MS, 17); + this.backoffIdleJitterMs = computeJitter(BACKOFF_IDLE_JITTER_MS, 31); } @Override @@ -68,6 +75,10 @@ public void start() { return; } + long now = System.currentTimeMillis(); + lastPollTime = 0; + extendFastIdleWindow(now); + scheduler = Executors.newSingleThreadScheduledExecutor( Thread.ofPlatform() @@ -75,14 +86,14 @@ public void start() { "reindex-job-notifier-" + serverId.substring(0, Math.min(8, serverId.length()))) .factory()); - // Schedule with fixed delay of 1 second, but actual polling is controlled by interval logic scheduler.scheduleWithFixedDelay( this::pollForJobs, 0, ACTIVE_POLL_INTERVAL_MS, TimeUnit.MILLISECONDS); LOG.info( - "PollingJobNotifier started on server {} (idle: {}s, active: {}s)", + "PollingJobNotifier started on server {} (fast idle: {}s, backoff idle: {}s, active: {}s)", serverId, - IDLE_POLL_INTERVAL_MS / 1000, + FAST_IDLE_POLL_INTERVAL_MS / 1000, + BACKOFF_IDLE_POLL_INTERVAL_MS / 1000, ACTIVE_POLL_INTERVAL_MS / 1000); } @@ -110,9 +121,8 @@ public void stop() { @Override public void notifyJobStarted(UUID jobId, String jobType) { - // In polling mode, we don't actively notify - other servers will discover via polling - // But we track it locally to avoid re-notifying ourselves knownJobs.add(jobId); + extendFastIdleWindow(System.currentTimeMillis()); LOG.debug( "Job {} (type: {}) started - other servers will discover via polling", jobId, jobType); } @@ -120,6 +130,7 @@ public void notifyJobStarted(UUID jobId, String jobType) { @Override public void notifyJobCompleted(UUID jobId) { knownJobs.remove(jobId); + extendFastIdleWindow(System.currentTimeMillis()); LOG.debug("Job {} completed - removed from known jobs", jobId); } @@ -144,6 +155,9 @@ public String getType() { */ public void setParticipating(boolean isParticipating) { this.participating.set(isParticipating); + if (!isParticipating) { + extendFastIdleWindow(System.currentTimeMillis()); + } } private void pollForJobs() { @@ -152,32 +166,23 @@ private void pollForJobs() { } long now = System.currentTimeMillis(); - long interval = participating.get() ? ACTIVE_POLL_INTERVAL_MS : IDLE_POLL_INTERVAL_MS; - - // Skip poll if not enough time has elapsed - if (now - lastPollTime < interval) { + if (now - lastPollTime < currentPollIntervalMs(now)) { return; } lastPollTime = now; try { - // Fast, lightweight query for running jobs List runningJobIds = collectionDAO.searchIndexJobDAO().getRunningJobIds(); if (runningJobIds.isEmpty()) { - // No jobs running - clear known jobs and stay in idle mode - if (!knownJobs.isEmpty()) { - LOG.debug("No running jobs found, clearing {} known jobs", knownJobs.size()); - knownJobs.clear(); - } + handleNoRunningJobs(now); return; } - // Check for new jobs we haven't seen + extendFastIdleWindow(now); for (String jobIdStr : runningJobIds) { UUID jobId = UUID.fromString(jobIdStr); if (!knownJobs.contains(jobId)) { - // New job discovered! LOG.info("Discovered new running job via polling: {}", jobId); knownJobs.add(jobId); @@ -187,12 +192,38 @@ private void pollForJobs() { } } - // Clean up jobs that are no longer running - knownJobs.removeIf( - jobId -> runningJobIds.stream().noneMatch(id -> id.equals(jobId.toString()))); + Set runningJobIdSet = new HashSet<>(runningJobIds); + knownJobs.removeIf(jobId -> !runningJobIdSet.contains(jobId.toString())); } catch (Exception e) { LOG.error("Error polling for jobs", e); } } + + private void handleNoRunningJobs(long now) { + if (knownJobs.isEmpty()) { + return; + } + LOG.debug("No running jobs found, clearing {} known jobs", knownJobs.size()); + knownJobs.clear(); + extendFastIdleWindow(now); + } + + private long currentPollIntervalMs(long now) { + if (participating.get()) { + return ACTIVE_POLL_INTERVAL_MS; + } + if (now <= fastIdleUntil) { + return FAST_IDLE_POLL_INTERVAL_MS + fastIdleJitterMs; + } + return BACKOFF_IDLE_POLL_INTERVAL_MS + backoffIdleJitterMs; + } + + private void extendFastIdleWindow(long now) { + fastIdleUntil = now + FAST_IDLE_WINDOW_MS; + } + + private long computeJitter(long maxJitterMs, int salt) { + return Math.floorMod((serverId.hashCode() * 31) + salt, (int) maxJitterMs + 1); + } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/RedisJobNotifier.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/RedisJobNotifier.java deleted file mode 100644 index 0d163ef79283..000000000000 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/RedisJobNotifier.java +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright 2024 Collate - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.openmetadata.service.apps.bundles.searchIndex.distributed; - -import io.lettuce.core.RedisClient; -import io.lettuce.core.RedisURI; -import io.lettuce.core.api.StatefulRedisConnection; -import io.lettuce.core.pubsub.RedisPubSubAdapter; -import io.lettuce.core.pubsub.StatefulRedisPubSubConnection; -import java.time.Duration; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Consumer; -import lombok.extern.slf4j.Slf4j; -import org.openmetadata.service.cache.CacheConfig; - -/** - * Redis Pub/Sub based job notifier for instant push notifications. - * - *

When Redis is available, this provides zero-latency job discovery across all servers in the - * cluster. Messages are delivered instantly via Redis Pub/Sub. - */ -@Slf4j -public class RedisJobNotifier implements DistributedJobNotifier { - - private static final String CHANNEL_PREFIX = "om:distributed-jobs:"; - private static final String START_CHANNEL = CHANNEL_PREFIX + "start"; - private static final String COMPLETE_CHANNEL = CHANNEL_PREFIX + "complete"; - - private final CacheConfig.Redis redisConfig; - private final String serverId; - private final AtomicBoolean running = new AtomicBoolean(false); - - private RedisClient redisClient; - private StatefulRedisPubSubConnection subConnection; - private StatefulRedisConnection pubConnection; - private Consumer jobStartedCallback; - - public RedisJobNotifier(CacheConfig cacheConfig, String serverId) { - this.redisConfig = cacheConfig.redis; - this.serverId = serverId; - } - - @Override - public void start() { - if (!running.compareAndSet(false, true)) { - LOG.warn("RedisJobNotifier already running"); - return; - } - - try { - RedisURI uri = buildRedisURI(); - redisClient = RedisClient.create(uri); - - // Create subscription connection - subConnection = redisClient.connectPubSub(); - subConnection.addListener( - new RedisPubSubAdapter<>() { - @Override - public void message(String channel, String message) { - handleMessage(channel, message); - } - }); - - // Subscribe to job channels - subConnection.sync().subscribe(START_CHANNEL, COMPLETE_CHANNEL); - - // Create publish connection (separate from subscription) - pubConnection = redisClient.connect(); - - LOG.info( - "RedisJobNotifier started on server {} - subscribed to channels: {}, {}", - serverId, - START_CHANNEL, - COMPLETE_CHANNEL); - - } catch (Exception e) { - running.set(false); - LOG.error("Failed to start RedisJobNotifier", e); - throw new RuntimeException("Failed to initialize Redis Pub/Sub", e); - } - } - - @Override - public void stop() { - if (!running.compareAndSet(true, false)) { - return; - } - - try { - if (subConnection != null) { - subConnection.sync().unsubscribe(START_CHANNEL, COMPLETE_CHANNEL); - subConnection.close(); - } - if (pubConnection != null) { - pubConnection.close(); - } - if (redisClient != null) { - redisClient.shutdown(); - } - LOG.info("RedisJobNotifier stopped on server {}", serverId); - } catch (Exception e) { - LOG.error("Error stopping RedisJobNotifier", e); - } - } - - @Override - public void notifyJobStarted(UUID jobId, String jobType) { - if (!running.get() || pubConnection == null) { - LOG.warn("Cannot notify job started - RedisJobNotifier not running"); - return; - } - - try { - String message = formatMessage(jobId, jobType, serverId); - long receivers = pubConnection.sync().publish(START_CHANNEL, message); - LOG.info( - "Published job start notification for {} (type: {}) to {} subscribers", - jobId, - jobType, - receivers); - } catch (Exception e) { - LOG.error("Failed to publish job start notification for {}", jobId, e); - } - } - - @Override - public void notifyJobCompleted(UUID jobId) { - if (!running.get() || pubConnection == null) { - LOG.warn("Cannot notify job completed - RedisJobNotifier not running"); - return; - } - - try { - String message = formatMessage(jobId, "COMPLETED", serverId); - pubConnection.sync().publish(COMPLETE_CHANNEL, message); - LOG.debug("Published job completion notification for {}", jobId); - } catch (Exception e) { - LOG.error("Failed to publish job completion notification for {}", jobId, e); - } - } - - @Override - public void onJobStarted(Consumer callback) { - this.jobStartedCallback = callback; - } - - @Override - public boolean isRunning() { - return running.get(); - } - - @Override - public String getType() { - return "redis-pubsub"; - } - - private void handleMessage(String channel, String message) { - try { - String[] parts = message.split("\\|"); - if (parts.length < 3) { - LOG.warn("Invalid message format: {}", message); - return; - } - - UUID jobId = UUID.fromString(parts[0]); - String jobType = parts[1]; - String sourceServer = parts[2]; - - // Don't process our own messages - if (serverId.equals(sourceServer)) { - LOG.debug("Ignoring own message for job {}", jobId); - return; - } - - if (START_CHANNEL.equals(channel)) { - LOG.info( - "Received job start notification from server {}: job={}, type={}", - sourceServer, - jobId, - jobType); - if (jobStartedCallback != null) { - jobStartedCallback.accept(jobId); - } - } else if (COMPLETE_CHANNEL.equals(channel)) { - LOG.debug("Received job completion notification: job={}", jobId); - } - - } catch (Exception e) { - LOG.error("Error handling message on channel {}: {}", channel, message, e); - } - } - - private String formatMessage(UUID jobId, String jobType, String sourceServer) { - return jobId.toString() + "|" + jobType + "|" + sourceServer; - } - - private RedisURI buildRedisURI() { - String url = redisConfig.url; - RedisURI.Builder builder; - - if (url.startsWith("redis://") || url.startsWith("rediss://")) { - RedisURI uri = RedisURI.create(url); - builder = - RedisURI.Builder.redis(uri.getHost(), uri.getPort()) - .withTimeout(Duration.ofMillis(redisConfig.connectTimeoutMs)); - } else if (url.contains(":")) { - String[] parts = url.split(":"); - String host = parts[0]; - int port = Integer.parseInt(parts[1]); - builder = - RedisURI.Builder.redis(host, port) - .withTimeout(Duration.ofMillis(redisConfig.connectTimeoutMs)); - } else { - builder = - RedisURI.Builder.redis(url).withTimeout(Duration.ofMillis(redisConfig.connectTimeoutMs)); - } - - if (redisConfig.authType == CacheConfig.AuthType.PASSWORD) { - if (redisConfig.username != null && redisConfig.passwordRef != null) { - builder.withAuthentication(redisConfig.username, redisConfig.passwordRef); - } else if (redisConfig.passwordRef != null) { - builder.withPassword(redisConfig.passwordRef.toCharArray()); - } - } - - if (redisConfig.useSSL) { - builder.withSsl(true); - } - - builder.withDatabase(redisConfig.database); - return builder.build(); - } -} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/LoggingProgressListener.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/LoggingProgressListener.java index cc9a3f8384d9..0de30434db7b 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/LoggingProgressListener.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/LoggingProgressListener.java @@ -27,10 +27,7 @@ public LoggingProgressListener() { @Override public void onJobStarted(ReindexingJobContext context) { LOG.info( - "Reindexing job started - Job ID: {}, Source: {}, Distributed: {}", - context.getJobId(), - context.getSource(), - context.isDistributed()); + "Reindexing job started - Job ID: {}, Source: {}", context.getJobId(), context.getSource()); } @Override @@ -45,17 +42,16 @@ public void onJobConfigured(ReindexingJobContext context, ReindexingConfiguratio logger.addInitDetail("Max Concurrent Requests", config.maxConcurrentRequests()); logger.addInitDetail("Payload Size", formatBytes(config.payloadSize())); logger.addInitDetail("Auto-tune", config.autoTune() ? "Enabled" : "Disabled"); - logger.addInitDetail("Recreate Index", config.recreateIndex() ? "Yes" : "No"); - logger.addInitDetail("Distributed Mode", config.useDistributedIndexing() ? "Yes" : "No"); + logger.addInitDetail("Indexing Mode", "Staged indexes with alias promotion"); logger.logInitialization(); } @Override public void onIndexRecreationStarted(Set entities) { - LOG.info("Starting index recreation for {} entity types", entities.size()); + LOG.info("Preparing staged indexes for {} entity types", entities.size()); if (LOG.isDebugEnabled()) { - LOG.debug("Entities to recreate: {}", String.join(", ", entities)); + LOG.debug("Entities to stage: {}", String.join(", ", entities)); } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/SlackProgressListener.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/SlackProgressListener.java index c3f70b5defb9..0a1a2a9131ba 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/SlackProgressListener.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/SlackProgressListener.java @@ -27,7 +27,8 @@ public class SlackProgressListener implements ReindexingProgressListener { private static final String PRODUCER_THREADS = "Producer threads"; private static final String TOTAL_ENTITIES = "Total entities"; private static final String QUEUE_SIZE = "Queue size"; - private static final String RECREATING_INDICES = "Recreating indices"; + private static final String INDEXING_MODE = "Indexing mode"; + private static final String STAGED_PROMOTION = "Staged indexes with alias promotion"; private static final String PAYLOAD_SIZE = "Payload size"; private static final String CONCURRENT_REQUESTS = "Concurrent requests"; @@ -58,7 +59,8 @@ public void onJobConfigured(ReindexingJobContext context, ReindexingConfiguratio @Override public void onIndexRecreationStarted(Set entities) { - LOG.debug("Slack notification: Index recreation started for {} entities", entities.size()); + LOG.debug( + "Slack notification: Staged index preparation started for {} entities", entities.size()); } @Override @@ -125,7 +127,7 @@ private Map buildConfigDetails(ReindexingConfiguration config) { details.put(PRODUCER_THREADS, String.valueOf(config.producerThreads())); details.put(QUEUE_SIZE, String.valueOf(config.queueSize())); details.put(TOTAL_ENTITIES, String.valueOf(totalEntities)); - details.put(RECREATING_INDICES, config.recreateIndex() ? "Yes" : "No"); + details.put(INDEXING_MODE, STAGED_PROMOTION); details.put(PAYLOAD_SIZE, (config.payloadSize() / (1024 * 1024)) + " MB"); details.put(CONCURRENT_REQUESTS, String.valueOf(config.maxConcurrentRequests())); return details; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/workflows/searchIndex/ReindexingUtil.java b/openmetadata-service/src/main/java/org/openmetadata/service/workflows/searchIndex/ReindexingUtil.java index 34d49476a614..60a7edf0ecaf 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/workflows/searchIndex/ReindexingUtil.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/workflows/searchIndex/ReindexingUtil.java @@ -13,7 +13,7 @@ package org.openmetadata.service.workflows.searchIndex; -import static org.openmetadata.service.apps.bundles.searchIndex.SearchIndexApp.TIME_SERIES_ENTITIES; +import static org.openmetadata.service.apps.bundles.searchIndex.SearchIndexEntityTypes.TIME_SERIES_ENTITIES; import static org.openmetadata.service.search.SearchClient.GLOBAL_SEARCH_ALIAS; import com.fasterxml.jackson.databind.JsonNode; diff --git a/openmetadata-service/src/main/resources/json/data/app/SearchIndexingApplication.json b/openmetadata-service/src/main/resources/json/data/app/SearchIndexingApplication.json index 781b1a047c9c..a44897895d3b 100644 --- a/openmetadata-service/src/main/resources/json/data/app/SearchIndexingApplication.json +++ b/openmetadata-service/src/main/resources/json/data/app/SearchIndexingApplication.json @@ -5,7 +5,6 @@ "entities": [ "all" ], - "recreateIndex": true, "batchSize": "100", "payLoadSize": 104857600, "producerThreads": 1, diff --git a/openmetadata-service/src/main/resources/json/data/appMarketPlaceDefinition/SearchIndexingApplication.json b/openmetadata-service/src/main/resources/json/data/appMarketPlaceDefinition/SearchIndexingApplication.json index b45855c08546..8fae312b7bd4 100644 --- a/openmetadata-service/src/main/resources/json/data/appMarketPlaceDefinition/SearchIndexingApplication.json +++ b/openmetadata-service/src/main/resources/json/data/appMarketPlaceDefinition/SearchIndexingApplication.json @@ -2,7 +2,7 @@ "name": "SearchIndexingApplication", "displayName": "Search Indexing", "description": "OpenMetadata connects with Elastic/Open Search to provide search feature for Data Assets. This application provides additional features related to ES/OS.", - "features": "Sync OpenMetadata and Elastic Search and Recreate Indexes.", + "features": "Sync OpenMetadata and Elastic Search with staged index promotion.", "appType": "internal", "appScreenshots": ["SearchIndexPic1"], "developer": "Collate Inc.", @@ -20,7 +20,6 @@ "entities": [ "all" ], - "recreateIndex": false, "batchSize": "100", "payLoadSize": 104857600, "producerThreads": 1, diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/AdaptiveBackoffTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/AdaptiveBackoffTest.java deleted file mode 100644 index 5906b152bf25..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/AdaptiveBackoffTest.java +++ /dev/null @@ -1,72 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; - -@DisplayName("AdaptiveBackoff Tests") -class AdaptiveBackoffTest { - - @Test - @DisplayName("returns initial delay on first call") - void initialDelay() { - AdaptiveBackoff backoff = new AdaptiveBackoff(100, 2000); - assertEquals(100, backoff.nextDelay()); - } - - @Test - @DisplayName("doubles delay on each subsequent call") - void exponentialDoubling() { - AdaptiveBackoff backoff = new AdaptiveBackoff(50, 10000); - assertEquals(50, backoff.nextDelay()); - assertEquals(100, backoff.nextDelay()); - assertEquals(200, backoff.nextDelay()); - assertEquals(400, backoff.nextDelay()); - assertEquals(800, backoff.nextDelay()); - } - - @Test - @DisplayName("caps at maxMs") - void capAtMax() { - AdaptiveBackoff backoff = new AdaptiveBackoff(100, 300); - assertEquals(100, backoff.nextDelay()); - assertEquals(200, backoff.nextDelay()); - assertEquals(300, backoff.nextDelay()); - assertEquals(300, backoff.nextDelay()); - } - - @Test - @DisplayName("reset returns to initial delay") - void resetToInitial() { - AdaptiveBackoff backoff = new AdaptiveBackoff(50, 1000); - backoff.nextDelay(); - backoff.nextDelay(); - backoff.nextDelay(); - - backoff.reset(); - assertEquals(50, backoff.nextDelay()); - } - - @Test - @DisplayName("rejects invalid initialMs") - void rejectsInvalidInitialMs() { - assertThrows(IllegalArgumentException.class, () -> new AdaptiveBackoff(0, 1000)); - assertThrows(IllegalArgumentException.class, () -> new AdaptiveBackoff(-1, 1000)); - } - - @Test - @DisplayName("rejects maxMs less than initialMs") - void rejectsMaxLessThanInitial() { - assertThrows(IllegalArgumentException.class, () -> new AdaptiveBackoff(200, 100)); - } - - @Test - @DisplayName("works when initialMs equals maxMs") - void initialEqualsMax() { - AdaptiveBackoff backoff = new AdaptiveBackoff(500, 500); - assertEquals(500, backoff.nextDelay()); - assertEquals(500, backoff.nextDelay()); - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/CompositeProgressListenerTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/CompositeProgressListenerTest.java index 6c278f6e8610..2c1de34b3b75 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/CompositeProgressListenerTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/CompositeProgressListenerTest.java @@ -97,11 +97,6 @@ public UUID getAppId() { return UUID.fromString("00000000-0000-0000-0000-000000000002"); } - @Override - public boolean isDistributed() { - return false; - } - @Override public String getSource() { return "UNIT_TEST"; diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedIndexingStrategyTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedIndexingStrategyTest.java index 804225e9cbda..042f19abf42e 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedIndexingStrategyTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedIndexingStrategyTest.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.concurrent.atomic.AtomicReference; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.ArgumentCaptor; @@ -300,6 +301,54 @@ void updateStatsFromDistributedJobFallsBackToLocalSinkAndPartitionStats() throws } } + @Test + void updateStatsFromDistributedJobUsesAggregatedServerStatsWhenOnlySinkFailures() + throws Exception { + CollectionDAO.SearchIndexServerStatsDAO serverStatsDao = + mock(CollectionDAO.SearchIndexServerStatsDAO.class); + UUID jobId = UUID.fromString("00000000-0000-0000-0000-000000000023"); + Stats stats = createBaseStats("table", 10); + SearchIndexJob distributedJob = + SearchIndexJob.builder() + .id(jobId) + .totalRecords(10) + .successRecords(8) + .failedRecords(2) + .entityStats( + Map.of( + "table", + SearchIndexJob.EntityTypeStats.builder() + .entityType("table") + .totalRecords(10) + .successRecords(0) + .failedRecords(10) + .build())) + .build(); + + when(collectionDAO.searchIndexServerStatsDAO()).thenReturn(serverStatsDao); + when(serverStatsDao.getAggregatedStats(jobId.toString())) + .thenReturn( + new CollectionDAO.SearchIndexServerStatsDAO.AggregatedServerStats( + 10, 0, 0, 0, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 1)); + + try (MockedStatic entityMock = mockStatic(Entity.class)) { + entityMock.when(Entity::getCollectionDAO).thenReturn(collectionDAO); + + invokePrivate( + "updateStatsFromDistributedJob", + new Class[] {Stats.class, SearchIndexJob.class, StepStats.class}, + stats, + distributedJob, + new StepStats().withSuccessRecords(8).withFailedRecords(2)); + } + + assertEquals(0, stats.getJobStats().getSuccessRecords()); + assertEquals(10, stats.getJobStats().getFailedRecords()); + assertEquals(10, stats.getSinkStats().getTotalRecords()); + assertEquals(0, stats.getSinkStats().getSuccessRecords()); + assertEquals(10, stats.getSinkStats().getFailedRecords()); + } + @Test void statusHelpersReportStoppedIncompleteAndCompleteJobs() throws Exception { Stats complete = createBaseStats("table", 10); @@ -332,14 +381,47 @@ void statusHelpersReportStoppedIncompleteAndCompleteJobs() throws Exception { } @Test - void finalizeAllEntityReindexSkipsPromotedEntitiesAndUsesPerEntitySuccess() throws Exception { + @SuppressWarnings("unchecked") + void finalizeAllEntityReindexPromotesZeroRecordEntityFromInitializedStats() throws Exception { + DistributedSearchIndexExecutor executor = mock(DistributedSearchIndexExecutor.class); + EntityCompletionTracker tracker = mock(EntityCompletionTracker.class); + RecreateIndexHandler indexPromotionHandler = mock(RecreateIndexHandler.class); + ReindexContext stagedIndexContext = stagedContext("user"); + + when(tracker.getPromotedEntities()).thenReturn(Set.of()); + when(executor.getEntityTracker()).thenReturn(tracker); + when(executor.getJobWithFreshStats()) + .thenReturn(SearchIndexJob.builder().entityStats(Map.of()).build()); + setField("distributedExecutor", executor); + ((AtomicReference) getField("currentStats")).set(createBaseStats("user", 0)); + + boolean result = + (Boolean) + invokePrivate( + "finalizeAllEntityReindex", + new Class[] {RecreateIndexHandler.class, ReindexContext.class, boolean.class}, + indexPromotionHandler, + stagedIndexContext, + true); + + assertTrue(result); + ArgumentCaptor contextCaptor = + ArgumentCaptor.forClass(EntityReindexContext.class); + ArgumentCaptor successCaptor = ArgumentCaptor.forClass(Boolean.class); + verify(indexPromotionHandler).finalizeReindex(contextCaptor.capture(), successCaptor.capture()); + assertEquals("user", contextCaptor.getValue().getEntityType()); + assertEquals(Boolean.TRUE, successCaptor.getValue()); + } + + @Test + void finalizeAllEntityReindexSkipsPromotedEntitiesAndFailsMissingEntityStats() throws Exception { DistributedSearchIndexExecutor executor = mock(DistributedSearchIndexExecutor.class); EntityCompletionTracker tracker = mock(EntityCompletionTracker.class); - RecreateIndexHandler recreateIndexHandler = mock(RecreateIndexHandler.class); - ReindexContext recreateContext = new ReindexContext(); - recreateContext.add( + RecreateIndexHandler indexPromotionHandler = mock(RecreateIndexHandler.class); + ReindexContext stagedIndexContext = new ReindexContext(); + stagedIndexContext.add( "table", "table_index", "table_original", "table_staged", Set.of(), "table", List.of()); - recreateContext.add( + stagedIndexContext.add( "user", "user_index", "user_original", @@ -347,7 +429,7 @@ void finalizeAllEntityReindexSkipsPromotedEntitiesAndUsesPerEntitySuccess() thro Set.of("user"), "user", List.of("parent")); - recreateContext.add( + stagedIndexContext.add( "dashboard", "dash_index", "dash_original", @@ -378,8 +460,8 @@ void finalizeAllEntityReindexSkipsPromotedEntitiesAndUsesPerEntitySuccess() thro invokePrivate( "finalizeAllEntityReindex", new Class[] {RecreateIndexHandler.class, ReindexContext.class, boolean.class}, - recreateIndexHandler, - recreateContext, + indexPromotionHandler, + stagedIndexContext, true); assertTrue(result); @@ -387,7 +469,7 @@ void finalizeAllEntityReindexSkipsPromotedEntitiesAndUsesPerEntitySuccess() thro ArgumentCaptor contextCaptor = ArgumentCaptor.forClass(EntityReindexContext.class); ArgumentCaptor successCaptor = ArgumentCaptor.forClass(Boolean.class); - verify(recreateIndexHandler, times(2)) + verify(indexPromotionHandler, times(2)) .finalizeReindex(contextCaptor.capture(), successCaptor.capture()); Map outcomes = new java.util.HashMap<>(); @@ -396,7 +478,7 @@ void finalizeAllEntityReindexSkipsPromotedEntitiesAndUsesPerEntitySuccess() thro contextCaptor.getAllValues().get(i).getEntityType(), successCaptor.getAllValues().get(i)); } - assertEquals(Boolean.TRUE, outcomes.get("user")); + assertEquals(Boolean.FALSE, outcomes.get("user")); assertEquals(Boolean.FALSE, outcomes.get("dashboard")); } @@ -450,12 +532,22 @@ void executeReturnsCompletedResultForSuccessfulSinglePassDistributedRun() { .failedRecords(0) .build())) .build(); - RecreateIndexHandler recreateIndexHandler = mock(RecreateIndexHandler.class); + RecreateIndexHandler indexPromotionHandler = mock(RecreateIndexHandler.class); + ReindexContext stagedIndexContext = stagedContext(Entity.TABLE); + ReindexingConfiguration reindexConfig = + ReindexingConfiguration.builder() + .entities(Set.of(Entity.TABLE)) + .batchSize(25) + .maxConcurrentRequests(3) + .payloadSize(1024L) + .build(); when(entityRepository.getDao()).thenReturn(entityDao); when(entityDao.listCount(any(ListFilter.class))).thenReturn(5); when(searchRepository.createBulkSink(anyInt(), anyInt(), anyLong())).thenReturn(bulkSink); - when(searchRepository.createReindexHandler()).thenReturn(recreateIndexHandler); + when(searchRepository.createReindexHandler()).thenReturn(indexPromotionHandler); + when(indexPromotionHandler.reCreateIndexes(reindexConfig.entities())) + .thenReturn(stagedIndexContext); when(bulkSink.getPendingVectorTaskCount()).thenReturn(0); when(bulkSink.flushAndAwait(60)).thenReturn(true); when(bulkSink.getStats()) @@ -477,15 +569,7 @@ void executeReturnsCompletedResultForSuccessfulSinglePassDistributedRun() { entityMock.when(() -> Entity.getEntityRepository(Entity.TABLE)).thenReturn(entityRepository); entityMock.when(Entity::getCollectionDAO).thenReturn(collectionDAO); - ExecutionResult result = - strategy.execute( - ReindexingConfiguration.builder() - .entities(Set.of(Entity.TABLE)) - .batchSize(25) - .maxConcurrentRequests(3) - .payloadSize(1024L) - .build(), - context(jobId)); + ExecutionResult result = strategy.execute(reindexConfig, context(jobId)); assertEquals(ExecutionResult.Status.COMPLETED, result.status()); assertEquals(5, result.totalRecords()); @@ -497,17 +581,81 @@ void executeReturnsCompletedResultForSuccessfulSinglePassDistributedRun() { DistributedSearchIndexExecutor constructed = executorConstruction.constructed().getFirst(); verify(constructed).performStartupRecovery(); verify(constructed).setAppContext(APP_ID, 1234L); + verify(constructed).execute(bulkSink, stagedIndexContext, reindexConfig); + } + } + + @Test + @SuppressWarnings({"rawtypes", "unchecked"}) + void executeNormalizesLegacyEntityAliasesBeforeDistributedSetup() { + @SuppressWarnings("unchecked") + EntityTimeSeriesRepository timeSeriesRepository = mock(EntityTimeSeriesRepository.class); + EntityTimeSeriesDAO timeSeriesDao = mock(EntityTimeSeriesDAO.class); + BulkSink bulkSink = mock(BulkSink.class); + UUID jobId = UUID.fromString("00000000-0000-0000-0000-000000000032"); + SearchIndexJob completedJob = + SearchIndexJob.builder() + .id(jobId) + .status(IndexJobStatus.COMPLETED) + .totalRecords(5) + .successRecords(5) + .failedRecords(0) + .entityStats( + Map.of( + Entity.QUERY_COST_RECORD, + SearchIndexJob.EntityTypeStats.builder() + .entityType(Entity.QUERY_COST_RECORD) + .totalRecords(5) + .successRecords(5) + .failedRecords(0) + .build())) + .build(); + RecreateIndexHandler indexPromotionHandler = mock(RecreateIndexHandler.class); + ReindexContext stagedIndexContext = stagedContext(Entity.QUERY_COST_RECORD); + ReindexingConfiguration reindexConfig = + ReindexingConfiguration.builder() + .entities(Set.of(SearchIndexEntityTypes.QUERY_COST_RESULT)) + .build(); + + when(timeSeriesRepository.getTimeSeriesDao()).thenReturn(timeSeriesDao); + when(timeSeriesDao.listCount(any(ListFilter.class))).thenReturn(5); + when(searchRepository.createBulkSink(anyInt(), anyInt(), anyLong())).thenReturn(bulkSink); + when(searchRepository.createReindexHandler()).thenReturn(indexPromotionHandler); + when(indexPromotionHandler.reCreateIndexes(Set.of(Entity.QUERY_COST_RECORD))) + .thenReturn(stagedIndexContext); + when(bulkSink.getPendingVectorTaskCount()).thenReturn(0); + when(bulkSink.flushAndAwait(60)).thenReturn(true); + when(bulkSink.getStats()) + .thenReturn(new StepStats().withSuccessRecords(5).withFailedRecords(0)); + when(bulkSink.getVectorStats()).thenReturn(new StepStats().withTotalRecords(0)); + + try (MockedStatic entityMock = mockStatic(Entity.class); + MockedConstruction executorConstruction = + mockConstruction( + DistributedSearchIndexExecutor.class, + (mock, context) -> { + when(mock.createJob( + any(Set.class), any(EventPublisherJob.class), eq("admin"), any())) + .thenReturn(completedJob); + when(mock.getJobWithFreshStats()).thenReturn(completedJob); + })) { + entityMock + .when(() -> Entity.getEntityTimeSeriesRepository(Entity.QUERY_COST_RECORD)) + .thenReturn(timeSeriesRepository); + + ExecutionResult result = strategy.execute(reindexConfig, context(jobId)); + + assertEquals(ExecutionResult.Status.COMPLETED, result.status()); + DistributedSearchIndexExecutor constructed = executorConstruction.constructed().getFirst(); + ArgumentCaptor entityTypesCaptor = ArgumentCaptor.forClass(Set.class); verify(constructed) - .execute( - bulkSink, - null, - false, - ReindexingConfiguration.builder() - .entities(Set.of(Entity.TABLE)) - .batchSize(25) - .maxConcurrentRequests(3) - .payloadSize(1024L) - .build()); + .createJob( + entityTypesCaptor.capture(), + any(EventPublisherJob.class), + eq("admin"), + eq(reindexConfig)); + assertEquals(Set.of(Entity.QUERY_COST_RECORD), entityTypesCaptor.getValue()); + verify(indexPromotionHandler).reCreateIndexes(Set.of(Entity.QUERY_COST_RECORD)); } } @@ -549,10 +697,15 @@ void executeClosesSinkAndReturnsFailedWhenDoExecuteThrowsAndSinkCloseAlsoFails() EntityRepository entityRepository = mock(EntityRepository.class); EntityDAO entityDao = mock(EntityDAO.class); BulkSink bulkSink = mock(BulkSink.class); + RecreateIndexHandler indexPromotionHandler = mock(RecreateIndexHandler.class); + ReindexContext stagedIndexContext = stagedContext(Entity.TABLE); when(entityRepository.getDao()).thenReturn(entityDao); when(entityDao.listCount(any(ListFilter.class))).thenReturn(5); when(searchRepository.createBulkSink(anyInt(), anyInt(), anyLong())).thenReturn(bulkSink); + when(searchRepository.createReindexHandler()).thenReturn(indexPromotionHandler); + when(indexPromotionHandler.reCreateIndexes(Set.of(Entity.TABLE))) + .thenReturn(stagedIndexContext); doThrow(new RuntimeException("close failed")).when(bulkSink).close(); try (MockedStatic entityMock = mockStatic(Entity.class); @@ -566,7 +719,10 @@ void executeClosesSinkAndReturnsFailedWhenDoExecuteThrowsAndSinkCloseAlsoFails() SearchIndexJob.builder().id(UUID.randomUUID()).totalRecords(5).build()); org.mockito.Mockito.doThrow(new RuntimeException("execute failed")) .when(mock) - .execute(any(), any(), eq(false), any()); + .execute( + any(BulkSink.class), + any(ReindexContext.class), + any(ReindexingConfiguration.class)); })) { entityMock.when(() -> Entity.getEntityRepository(Entity.TABLE)).thenReturn(entityRepository); @@ -591,10 +747,15 @@ void executeClosesSinkSuccessfullyWhenDoExecuteThrows() throws Exception { EntityRepository entityRepository = mock(EntityRepository.class); EntityDAO entityDao = mock(EntityDAO.class); BulkSink bulkSink = mock(BulkSink.class); + RecreateIndexHandler indexPromotionHandler = mock(RecreateIndexHandler.class); + ReindexContext stagedIndexContext = stagedContext(Entity.TABLE); when(entityRepository.getDao()).thenReturn(entityDao); when(entityDao.listCount(any(ListFilter.class))).thenReturn(5); when(searchRepository.createBulkSink(anyInt(), anyInt(), anyLong())).thenReturn(bulkSink); + when(searchRepository.createReindexHandler()).thenReturn(indexPromotionHandler); + when(indexPromotionHandler.reCreateIndexes(Set.of(Entity.TABLE))) + .thenReturn(stagedIndexContext); try (MockedStatic entityMock = mockStatic(Entity.class); MockedConstruction executorConstruction = @@ -607,7 +768,10 @@ void executeClosesSinkSuccessfullyWhenDoExecuteThrows() throws Exception { SearchIndexJob.builder().id(UUID.randomUUID()).totalRecords(5).build()); org.mockito.Mockito.doThrow(new RuntimeException("execute failed")) .when(mock) - .execute(any(), any(), eq(false), any()); + .execute( + any(BulkSink.class), + any(ReindexContext.class), + any(ReindexingConfiguration.class)); })) { entityMock.when(() -> Entity.getEntityRepository(Entity.TABLE)).thenReturn(entityRepository); @@ -640,6 +804,19 @@ private Stats createBaseStats(String entityType, int totalRecords) { return stats; } + private ReindexContext stagedContext(String entityType) { + ReindexContext context = new ReindexContext(); + context.add( + entityType, + entityType + "_index", + entityType + "_original", + entityType + "_staged", + Set.of(), + entityType, + List.of()); + return context; + } + private Object invokePrivate(String methodName, Class[] parameterTypes, Object... args) throws Exception { Method method = DistributedIndexingStrategy.class.getDeclaredMethod(methodName, parameterTypes); @@ -681,11 +858,6 @@ public UUID getAppId() { return APP_ID; } - @Override - public boolean isDistributed() { - return true; - } - @Override public String getSource() { return "UNIT_TEST"; diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedReindexFinalizerTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedReindexFinalizerTest.java new file mode 100644 index 000000000000..62100526b05d --- /dev/null +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/DistributedReindexFinalizerTest.java @@ -0,0 +1,96 @@ +package org.openmetadata.service.apps.bundles.searchIndex; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; +import org.openmetadata.service.Entity; +import org.openmetadata.service.apps.bundles.searchIndex.distributed.SearchIndexJob; +import org.openmetadata.service.search.EntityReindexContext; +import org.openmetadata.service.search.RecreateIndexHandler; +import org.openmetadata.service.search.ReindexContext; + +class DistributedReindexFinalizerTest { + + @Test + void finalizeRemainingEntitiesPromotesColumnOnceWhenTableAndColumnRemain() { + RecreateIndexHandler indexPromotionHandler = mock(RecreateIndexHandler.class); + ReindexContext stagedIndexContext = stagedContext(Entity.TABLE, Entity.TABLE_COLUMN); + + DistributedReindexFinalizer finalizer = + new DistributedReindexFinalizer(indexPromotionHandler, stagedIndexContext); + finalizer.finalizeRemainingEntities(Set.of(), Map.of(Entity.TABLE, successfulStats()), true); + + ArgumentCaptor contextCaptor = + ArgumentCaptor.forClass(EntityReindexContext.class); + ArgumentCaptor successCaptor = ArgumentCaptor.forClass(Boolean.class); + verify(indexPromotionHandler, times(2)) + .finalizeReindex(contextCaptor.capture(), successCaptor.capture()); + + Map finalizations = finalizations(contextCaptor, successCaptor); + assertEquals(Set.of(Entity.TABLE, Entity.TABLE_COLUMN), finalizations.keySet()); + assertEquals(Boolean.TRUE, finalizations.get(Entity.TABLE)); + assertEquals(Boolean.TRUE, finalizations.get(Entity.TABLE_COLUMN)); + } + + @Test + void finalizeRemainingEntitiesDoesNotRepromoteAlreadyPromotedColumnWhenTableRemains() { + RecreateIndexHandler indexPromotionHandler = mock(RecreateIndexHandler.class); + ReindexContext stagedIndexContext = stagedContext(Entity.TABLE, Entity.TABLE_COLUMN); + + DistributedReindexFinalizer finalizer = + new DistributedReindexFinalizer(indexPromotionHandler, stagedIndexContext); + finalizer.finalizeRemainingEntities( + Set.of(Entity.TABLE_COLUMN), Map.of(Entity.TABLE, successfulStats()), true); + + ArgumentCaptor contextCaptor = + ArgumentCaptor.forClass(EntityReindexContext.class); + ArgumentCaptor successCaptor = ArgumentCaptor.forClass(Boolean.class); + verify(indexPromotionHandler, times(1)) + .finalizeReindex(contextCaptor.capture(), successCaptor.capture()); + + assertEquals(Entity.TABLE, contextCaptor.getValue().getEntityType()); + assertEquals(Boolean.TRUE, successCaptor.getValue()); + } + + private Map finalizations( + ArgumentCaptor contextCaptor, ArgumentCaptor successCaptor) { + List contexts = contextCaptor.getAllValues(); + List outcomes = successCaptor.getAllValues(); + return Map.of( + contexts.get(0).getEntityType(), + outcomes.get(0), + contexts.get(1).getEntityType(), + outcomes.get(1)); + } + + private SearchIndexJob.EntityTypeStats successfulStats() { + return SearchIndexJob.EntityTypeStats.builder() + .entityType(Entity.TABLE) + .totalRecords(1) + .successRecords(1) + .failedRecords(0) + .build(); + } + + private ReindexContext stagedContext(String... entities) { + ReindexContext context = new ReindexContext(); + for (String entity : entities) { + context.add( + entity, + entity + "_index", + entity + "_original", + entity + "_staged", + Set.of(entity), + entity, + List.of()); + } + return context; + } +} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/EntityBatchSizeEstimatorTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/EntityBatchSizeEstimatorTest.java deleted file mode 100644 index 5f54e4f9f63f..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/EntityBatchSizeEstimatorTest.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; - -@DisplayName("EntityBatchSizeEstimator Tests") -class EntityBatchSizeEstimatorTest { - - @Test - @DisplayName("LARGE entities get smaller batch size") - void largeEntitiesGetSmallerBatch() { - int base = 200; - assertEquals(100, EntityBatchSizeEstimator.estimateBatchSize("table", base)); - assertEquals(100, EntityBatchSizeEstimator.estimateBatchSize("topic", base)); - assertEquals(100, EntityBatchSizeEstimator.estimateBatchSize("dashboard", base)); - assertEquals(100, EntityBatchSizeEstimator.estimateBatchSize("mlmodel", base)); - assertEquals(100, EntityBatchSizeEstimator.estimateBatchSize("container", base)); - assertEquals(100, EntityBatchSizeEstimator.estimateBatchSize("storedProcedure", base)); - } - - @Test - @DisplayName("LARGE entities respect minimum batch size of 25") - void largeEntitiesRespectMinimum() { - assertEquals(25, EntityBatchSizeEstimator.estimateBatchSize("table", 40)); - assertEquals(25, EntityBatchSizeEstimator.estimateBatchSize("table", 10)); - } - - @Test - @DisplayName("SMALL entities get larger batch size") - void smallEntitiesGetLargerBatch() { - int base = 200; - assertEquals(400, EntityBatchSizeEstimator.estimateBatchSize("user", base)); - assertEquals(400, EntityBatchSizeEstimator.estimateBatchSize("team", base)); - assertEquals(400, EntityBatchSizeEstimator.estimateBatchSize("bot", base)); - assertEquals(400, EntityBatchSizeEstimator.estimateBatchSize("role", base)); - assertEquals(400, EntityBatchSizeEstimator.estimateBatchSize("policy", base)); - assertEquals(400, EntityBatchSizeEstimator.estimateBatchSize("tag", base)); - assertEquals(400, EntityBatchSizeEstimator.estimateBatchSize("classification", base)); - } - - @Test - @DisplayName("SMALL entities respect maximum batch size of 1000") - void smallEntitiesRespectMaximum() { - assertEquals(1000, EntityBatchSizeEstimator.estimateBatchSize("user", 600)); - assertEquals(1000, EntityBatchSizeEstimator.estimateBatchSize("user", 800)); - } - - @Test - @DisplayName("MEDIUM (unknown) entities get base batch size unchanged") - void mediumEntitiesUnchanged() { - int base = 200; - assertEquals(base, EntityBatchSizeEstimator.estimateBatchSize("pipeline", base)); - assertEquals(base, EntityBatchSizeEstimator.estimateBatchSize("database", base)); - assertEquals(base, EntityBatchSizeEstimator.estimateBatchSize("glossaryTerm", base)); - assertEquals(base, EntityBatchSizeEstimator.estimateBatchSize("unknownEntity", base)); - } - - @Test - @DisplayName("handles zero and negative base batch size gracefully") - void handlesZeroAndNegative() { - assertEquals(0, EntityBatchSizeEstimator.estimateBatchSize("table", 0)); - assertTrue(EntityBatchSizeEstimator.estimateBatchSize("table", -1) < 0); - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReaderLifecycleTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReaderLifecycleTest.java deleted file mode 100644 index eb9699104b69..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReaderLifecycleTest.java +++ /dev/null @@ -1,241 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.isNull; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.mockConstruction; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoInteractions; -import static org.mockito.Mockito.when; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.Phaser; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.mockito.MockedConstruction; -import org.openmetadata.schema.analytics.ReportData; -import org.openmetadata.schema.type.Paging; -import org.openmetadata.schema.utils.ResultList; -import org.openmetadata.service.util.RestUtil; -import org.openmetadata.service.workflows.searchIndex.PaginatedEntitiesSource; -import org.openmetadata.service.workflows.searchIndex.PaginatedEntityTimeSeriesSource; -import org.openmetadata.service.workflows.searchIndex.ReindexingUtil; - -class EntityReaderLifecycleTest { - - private ExecutorService producerExecutor; - private AtomicBoolean stopped; - private EntityReader reader; - - @BeforeEach - void setUp() { - producerExecutor = mock(ExecutorService.class); - stopped = new AtomicBoolean(false); - reader = new EntityReader(producerExecutor, stopped, 1, 0); - when(producerExecutor.submit(any(Runnable.class))) - .thenAnswer( - invocation -> { - ((Runnable) invocation.getArgument(0)).run(); - return mock(Future.class); - }); - } - - @Test - void readEntityReturnsZeroWhenNoRecordsExist() { - Phaser phaser = new Phaser(1); - - int submitted = - reader.readEntity( - "table", 0, 50, phaser, (entityType, batch, offset) -> fail("callback should not run")); - - assertEquals(0, submitted); - assertEquals(1, phaser.getRegisteredParties()); - verifyNoInteractions(producerExecutor); - } - - @Test - void readEntityProcessesSingleRegularEntityReaderUntilCursorExhausted() throws Exception { - Phaser phaser = new Phaser(1); - List offsets = new ArrayList<>(); - ResultList batch = mockResult(List.of("table-1", "table-2"), null, 0); - - try (MockedConstruction construction = - mockConstruction( - PaginatedEntitiesSource.class, - (mock, context) -> - when(mock.readNextKeyset(isNull())).thenReturn((ResultList) batch))) { - - int submitted = - reader.readEntity( - "table", 2, 10, phaser, (entityType, result, offset) -> offsets.add(offset)); - - assertEquals(1, submitted); - assertEquals(List.of(0), offsets); - assertEquals(1, phaser.getRegisteredParties()); - assertEquals(2, construction.constructed().size()); - verify(construction.constructed().get(1)).readNextKeyset(null); - } - } - - @Test - void readEntityUsesTimeSeriesConstructorsAndBoundaryCursorsForParallelReaders() throws Exception { - Phaser phaser = new Phaser(1); - String entityType = ReportData.ReportDataType.ENTITY_REPORT_DATA.value(); - AtomicInteger callbackCount = new AtomicInteger(); - List> constructorArguments = new ArrayList<>(); - - try (MockedConstruction construction = - mockConstruction( - PaginatedEntityTimeSeriesSource.class, - (mock, context) -> { - constructorArguments.add(List.copyOf(context.arguments())); - when(mock.readWithCursor(any())) - .thenReturn((ResultList) mockResult(List.of("row"), null, 0)); - })) { - - int submitted = - reader.readEntity( - entityType, - 6, - 2, - phaser, - (type, result, offset) -> callbackCount.incrementAndGet(), - 100L, - 200L); - - assertEquals(3, submitted); - assertEquals(3, callbackCount.get()); - assertEquals(3, construction.constructed().size()); - assertEquals(1, phaser.getRegisteredParties()); - - assertEquals(List.of(entityType, 2, List.of(), 6, 100L, 200L), constructorArguments.get(0)); - assertEquals(List.of(entityType, 2, List.of(), 6, 100L, 200L), constructorArguments.get(1)); - assertEquals(List.of(entityType, 2, List.of(), 6, 100L, 200L), constructorArguments.get(2)); - - verify(construction.constructed().get(0)).readWithCursor(null); - verify(construction.constructed().get(1)).readWithCursor(RestUtil.encodeCursor("2")); - verify(construction.constructed().get(2)).readWithCursor(RestUtil.encodeCursor("4")); - } - } - - @Test - void readEntityDeregistersMissingReadersWhenBoundaryDiscoveryReturnsFewerCursors() { - Phaser phaser = new Phaser(1); - AtomicInteger constructionCount = new AtomicInteger(); - - try (MockedConstruction construction = - mockConstruction( - PaginatedEntitiesSource.class, - (mock, context) -> { - if (constructionCount.getAndIncrement() == 0) { - when(mock.findBoundaryCursors(anyInt(), anyInt())).thenReturn(List.of()); - } else { - when(mock.readNextKeyset(any())) - .thenReturn((ResultList) mockResult(List.of(), null, 0)); - } - })) { - - int submitted = - reader.readEntity( - "table", - 6, - 2, - phaser, - (entityType, batch, offset) -> fail("empty batch should not invoke callback")); - - assertEquals(3, submitted); - assertEquals(2, construction.constructed().size()); - assertEquals(1, phaser.getRegisteredParties()); - verify(producerExecutor).submit(any(Runnable.class)); - } - } - - @Test - void readEntityRestoresPhaserStateWhenSubmissionFails() { - Phaser phaser = new Phaser(1); - when(producerExecutor.submit(any(Runnable.class))) - .thenThrow(new IllegalStateException("submit failed")); - - IllegalStateException exception = - assertThrows( - IllegalStateException.class, - () -> - reader.readEntity( - "table", - 2, - 10, - phaser, - (entityType, batch, offset) -> fail("callback should not run"))); - - assertEquals("submit failed", exception.getMessage()); - assertEquals(1, phaser.getRegisteredParties()); - } - - @Test - void readEntitySwallowsInterruptedCallbacksAndDeregistersReader() throws Exception { - Phaser phaser = new Phaser(1); - - try (MockedConstruction construction = - mockConstruction( - PaginatedEntitiesSource.class, - (mock, context) -> - when(mock.readNextKeyset(isNull())) - .thenReturn((ResultList) mockResult(List.of("table-1"), null, 0)))) { - - int submitted = - reader.readEntity( - "table", - 1, - 10, - phaser, - (entityType, batch, offset) -> { - throw new InterruptedException("stop"); - }); - - assertEquals(1, submitted); - assertEquals(1, phaser.getRegisteredParties()); - assertTrue(Thread.currentThread().isInterrupted()); - Thread.interrupted(); - verify(construction.constructed().get(1)).readNextKeyset(null); - } - } - - @Test - void helperMethodsRespectTimeSeriesAndMinimumReaderRules() { - assertEquals( - List.of(), - ReindexingUtil.getSearchIndexFields(ReportData.ReportDataType.ENTITY_REPORT_DATA.value())); - assertEquals(List.of("*"), ReindexingUtil.getSearchIndexFields("table")); - assertEquals(1, EntityReader.calculateNumberOfReaders(10, 0)); - assertEquals(3, EntityReader.calculateNumberOfReaders(11, 5)); - } - - @Test - void stopAndCloseSetStoppedFlag() { - reader.stop(); - assertTrue(stopped.get()); - - stopped.set(false); - reader.close(); - assertTrue(stopped.get()); - } - - private ResultList mockResult(List data, String after, Integer warningsCount) { - ResultList result = new ResultList<>(); - result.setData(new ArrayList<>(data)); - result.setErrors(null); - result.setWarningsCount(warningsCount); - result.setPaging(new Paging().withAfter(after)); - return result; - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReaderRetryTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReaderRetryTest.java deleted file mode 100644 index 8a7fb8710517..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReaderRetryTest.java +++ /dev/null @@ -1,108 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Test; -import org.openmetadata.schema.system.IndexingError; -import org.openmetadata.service.exception.SearchIndexException; - -@DisplayName("EntityReader Retry Tests") -class EntityReaderRetryTest { - - @Test - @DisplayName("isTransientError detects timeout errors") - void detectsTimeoutErrors() { - SearchIndexException e = - new SearchIndexException( - new IndexingError().withMessage("Connection timeout while reading entities")); - assertTrue(EntityReader.isTransientError(e)); - } - - @Test - @DisplayName("isTransientError detects connection errors") - void detectsConnectionErrors() { - SearchIndexException e = - new SearchIndexException( - new IndexingError().withMessage("java.net.ConnectException: Connection refused")); - assertTrue(EntityReader.isTransientError(e)); - } - - @Test - @DisplayName("isTransientError detects pool exhaustion") - void detectsPoolExhaustion() { - SearchIndexException e = - new SearchIndexException( - new IndexingError().withMessage("Pool exhausted - no connections available")); - assertTrue(EntityReader.isTransientError(e)); - } - - @Test - @DisplayName("isTransientError detects socket timeout") - void detectsSocketTimeout() { - SearchIndexException e = - new SearchIndexException( - new IndexingError().withMessage("java.net.SocketTimeoutException: Read timed out")); - assertTrue(EntityReader.isTransientError(e)); - } - - @Test - @DisplayName("isTransientError returns false for non-transient errors") - void rejectsNonTransientErrors() { - SearchIndexException e = - new SearchIndexException(new IndexingError().withMessage("Entity not found: table.xyz")); - assertFalse(EntityReader.isTransientError(e)); - } - - @Test - @DisplayName("isTransientError returns false for null message") - void handleNullMessage() { - SearchIndexException e = new SearchIndexException(new IndexingError()); - assertFalse(EntityReader.isTransientError(e)); - } - - @Test - @DisplayName("EntityReader constructor accepts custom retry configuration") - void customRetryConfiguration() { - java.util.concurrent.ExecutorService executor = - java.util.concurrent.Executors.newSingleThreadExecutor(); - java.util.concurrent.atomic.AtomicBoolean stopped = - new java.util.concurrent.atomic.AtomicBoolean(false); - EntityReader reader = new EntityReader(executor, stopped, 5, 1000); - assertNotNull(reader); - executor.shutdown(); - } - - @Test - @DisplayName("EntityReader default constructor uses default retry values") - void defaultRetryConfiguration() { - java.util.concurrent.ExecutorService executor = - java.util.concurrent.Executors.newSingleThreadExecutor(); - java.util.concurrent.atomic.AtomicBoolean stopped = - new java.util.concurrent.atomic.AtomicBoolean(false); - EntityReader reader = new EntityReader(executor, stopped); - assertNotNull(reader); - executor.shutdown(); - } - - @Test - @DisplayName("VectorCompletionResult.success creates completed result") - void vectorCompletionSuccess() { - VectorCompletionResult result = VectorCompletionResult.success(150); - assertTrue(result.completed()); - assertEquals(0, result.pendingTaskCount()); - assertEquals(150, result.waitedMillis()); - } - - @Test - @DisplayName("VectorCompletionResult.timeout creates timeout result") - void vectorCompletionTimeout() { - VectorCompletionResult result = VectorCompletionResult.timeout(5, 30000); - assertFalse(result.completed()); - assertEquals(5, result.pendingTaskCount()); - assertEquals(30000, result.waitedMillis()); - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/IndexingPipelineTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/IndexingPipelineTest.java deleted file mode 100644 index c53c7119589e..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/IndexingPipelineTest.java +++ /dev/null @@ -1,473 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.ArgumentMatchers.isNull; -import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.mockConstruction; -import static org.mockito.Mockito.mockStatic; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.Phaser; -import java.util.concurrent.atomic.AtomicBoolean; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.mockito.ArgumentCaptor; -import org.mockito.MockedConstruction; -import org.mockito.MockedStatic; -import org.openmetadata.schema.EntityInterface; -import org.openmetadata.schema.analytics.ReportData; -import org.openmetadata.schema.system.EntityStats; -import org.openmetadata.schema.system.IndexingError; -import org.openmetadata.schema.system.Stats; -import org.openmetadata.schema.system.StepStats; -import org.openmetadata.schema.utils.ResultList; -import org.openmetadata.service.Entity; -import org.openmetadata.service.jdbi3.EntityDAO; -import org.openmetadata.service.jdbi3.EntityRepository; -import org.openmetadata.service.jdbi3.EntityTimeSeriesDAO; -import org.openmetadata.service.jdbi3.EntityTimeSeriesRepository; -import org.openmetadata.service.jdbi3.ListFilter; -import org.openmetadata.service.search.EntityReindexContext; -import org.openmetadata.service.search.RecreateIndexHandler; -import org.openmetadata.service.search.ReindexContext; -import org.openmetadata.service.search.SearchRepository; -import org.openmetadata.service.util.FullyQualifiedName; - -@SuppressWarnings({"rawtypes", "unchecked"}) -class IndexingPipelineTest { - - private SearchRepository searchRepository; - private IndexingPipeline pipeline; - - @BeforeEach - void setUp() { - searchRepository = mock(SearchRepository.class); - pipeline = new IndexingPipeline(searchRepository); - } - - @AfterEach - void tearDown() { - pipeline.close(); - } - - @Test - void executeProcessesEntitiesUsingComputedTotalsAndCompletes() throws Exception { - BulkSink sink = mock(BulkSink.class); - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - ReindexingJobContext context = mockJobContext(); - EntityRepository entityRepository = mock(EntityRepository.class); - EntityDAO entityDao = mock(EntityDAO.class); - EntityInterface entityA = mock(EntityInterface.class); - EntityInterface entityB = mock(EntityInterface.class); - ResultList batch = new ResultList<>(List.of(entityA, entityB), null, null, 0); - - when(entityRepository.getDao()).thenReturn(entityDao); - when(entityDao.listCount(any(ListFilter.class))).thenReturn(2); - when(sink.getPendingVectorTaskCount()).thenReturn(0); - when(sink.getStats()).thenReturn(new StepStats().withTotalRecords(2).withSuccessRecords(2)); - when(sink.getProcessStats()) - .thenReturn(new StepStats().withTotalRecords(2).withSuccessRecords(2)); - - pipeline.addListener(listener); - - try (MockedStatic entityMock = mockStatic(Entity.class); - MockedConstruction ignored = - mockConstruction( - EntityReader.class, - (reader, context1) -> - doAnswer( - invocation -> { - String entityType = invocation.getArgument(0); - int totalRecords = invocation.getArgument(1); - EntityReader.BatchCallback callback = invocation.getArgument(4); - assertEquals(Entity.TABLE, entityType); - assertEquals(2, totalRecords); - callback.onBatchRead(entityType, batch, 0); - return 1; - }) - .when(reader) - .readEntity( - any(String.class), - anyInt(), - anyInt(), - any(Phaser.class), - any(EntityReader.BatchCallback.class), - any(), - any()))) { - entityMock.when(() -> Entity.getEntityRepository(Entity.TABLE)).thenReturn(entityRepository); - - ExecutionResult result = - pipeline.execute( - ReindexingConfiguration.builder() - .entities(Set.of(Entity.TABLE)) - .batchSize(2) - .consumerThreads(1) - .producerThreads(1) - .build(), - context, - Set.of(Entity.TABLE), - sink, - null, - null); - - assertEquals(ExecutionResult.Status.COMPLETED, result.status()); - assertEquals(2, result.finalStats().getJobStats().getTotalRecords()); - assertEquals(2, result.finalStats().getJobStats().getSuccessRecords()); - - ArgumentCaptor dataCaptor = ArgumentCaptor.forClass(List.class); - ArgumentCaptor contextCaptor = ArgumentCaptor.forClass(Map.class); - verify(sink).write(dataCaptor.capture(), contextCaptor.capture()); - assertEquals(2, dataCaptor.getValue().size()); - assertEquals(Entity.TABLE, contextCaptor.getValue().get("entityType")); - assertEquals(Boolean.FALSE, contextCaptor.getValue().get("recreateIndex")); - - verify(listener).onJobStarted(context); - verify(listener).onEntityTypeStarted(Entity.TABLE, 2); - verify(listener).onProgressUpdate(any(Stats.class), isNull()); - verify(listener).onJobCompleted(any(Stats.class), anyLong()); - } - } - - @Test - void executeMarksCompletedWithErrorsWhenSinkWriteFails() throws Exception { - BulkSink sink = mock(BulkSink.class); - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - ReindexingJobContext context = mockJobContext(); - EntityRepository entityRepository = mock(EntityRepository.class); - EntityDAO entityDao = mock(EntityDAO.class); - EntityInterface entity = mock(EntityInterface.class); - ResultList batch = new ResultList<>(List.of(entity), null, null, 0); - - when(entityRepository.getDao()).thenReturn(entityDao); - when(entityDao.listCount(any(ListFilter.class))).thenReturn(1); - when(sink.getPendingVectorTaskCount()).thenReturn(0); - when(sink.getStats()).thenReturn(new StepStats().withTotalRecords(0).withSuccessRecords(0)); - when(sink.getProcessStats()) - .thenReturn(new StepStats().withTotalRecords(0).withSuccessRecords(0)); - pipeline.addListener(listener); - - try (MockedStatic entityMock = mockStatic(Entity.class); - MockedConstruction ignored = - mockConstruction( - EntityReader.class, - (reader, context1) -> - doAnswer( - invocation -> { - EntityReader.BatchCallback callback = invocation.getArgument(4); - callback.onBatchRead(Entity.TABLE, batch, 0); - return 1; - }) - .when(reader) - .readEntity( - any(String.class), - anyInt(), - anyInt(), - any(Phaser.class), - any(EntityReader.BatchCallback.class), - any(), - any()))) { - entityMock.when(() -> Entity.getEntityRepository(Entity.TABLE)).thenReturn(entityRepository); - doAnswer( - invocation -> { - throw new IllegalStateException("sink boom"); - }) - .when(sink) - .write(any(List.class), any(Map.class)); - - ExecutionResult result = - pipeline.execute( - ReindexingConfiguration.builder() - .entities(Set.of(Entity.TABLE)) - .batchSize(1) - .consumerThreads(1) - .producerThreads(1) - .build(), - context, - Set.of(Entity.TABLE), - sink, - null, - null); - - assertEquals(ExecutionResult.Status.COMPLETED_WITH_ERRORS, result.status()); - assertEquals(1, result.finalStats().getJobStats().getTotalRecords()); - assertEquals(0, result.finalStats().getJobStats().getSuccessRecords()); - - ArgumentCaptor errorCaptor = ArgumentCaptor.forClass(IndexingError.class); - verify(listener).onError(eq(Entity.TABLE), errorCaptor.capture(), any(Stats.class)); - assertEquals(IndexingError.ErrorSource.SINK, errorCaptor.getValue().getErrorSource()); - assertEquals("sink boom", errorCaptor.getValue().getMessage()); - verify(listener).onJobCompletedWithErrors(any(Stats.class), anyLong()); - } - } - - @Test - void initializeStatsUsesRepositoryTotalsForRegularAndTimeSeriesEntities() throws Exception { - EntityRepository entityRepository = mock(EntityRepository.class); - EntityDAO entityDao = mock(EntityDAO.class); - EntityTimeSeriesRepository timeSeriesRepository = mock(EntityTimeSeriesRepository.class); - EntityTimeSeriesDAO timeSeriesDao = mock(EntityTimeSeriesDAO.class); - String reportType = ReportData.ReportDataType.ENTITY_REPORT_DATA.value(); - - when(entityRepository.getDao()).thenReturn(entityDao); - when(entityDao.listCount(any(ListFilter.class))).thenReturn(7); - when(timeSeriesRepository.getTimeSeriesDao()).thenReturn(timeSeriesDao); - when(timeSeriesDao.listCount(any(ListFilter.class), anyLong(), anyLong(), eq(false))) - .thenReturn(3); - when(searchRepository.getDataInsightReports()).thenReturn(List.of(reportType)); - - try (MockedStatic entityMock = mockStatic(Entity.class)) { - entityMock.when(() -> Entity.getEntityRepository(Entity.TABLE)).thenReturn(entityRepository); - entityMock.when(Entity::getSearchRepository).thenReturn(searchRepository); - entityMock - .when(() -> Entity.getEntityTimeSeriesRepository(Entity.ENTITY_REPORT_DATA)) - .thenReturn(timeSeriesRepository); - - Stats stats = - (Stats) - invokePrivate( - "initializeStats", - new Class[] {ReindexingConfiguration.class, Set.class}, - ReindexingConfiguration.builder() - .timeSeriesEntityDays(Map.of(reportType, 1)) - .build(), - Set.of(Entity.TABLE, reportType)); - - assertEquals(10, stats.getJobStats().getTotalRecords()); - assertEquals(10, stats.getReaderStats().getTotalRecords()); - assertEquals( - 7, stats.getEntityStats().getAdditionalProperties().get(Entity.TABLE).getTotalRecords()); - assertEquals( - 3, stats.getEntityStats().getAdditionalProperties().get(reportType).getTotalRecords()); - - ArgumentCaptor filterCaptor = ArgumentCaptor.forClass(ListFilter.class); - verify(timeSeriesDao).listCount(filterCaptor.capture(), anyLong(), anyLong(), eq(false)); - assertEquals( - FullyQualifiedName.buildHash(reportType), - filterCaptor.getValue().getQueryParams().get("entityFQNHash")); - } - } - - @Test - void getEntityTotalUsesEntitySpecificTimeSeriesRepositoryWithoutTimeWindow() throws Exception { - EntityTimeSeriesRepository timeSeriesRepository = mock(EntityTimeSeriesRepository.class); - EntityTimeSeriesDAO timeSeriesDao = mock(EntityTimeSeriesDAO.class); - String entityType = ReportData.ReportDataType.WEB_ANALYTIC_USER_ACTIVITY_REPORT_DATA.value(); - - when(timeSeriesRepository.getTimeSeriesDao()).thenReturn(timeSeriesDao); - when(timeSeriesDao.listCount(any(ListFilter.class))).thenReturn(5); - when(searchRepository.getDataInsightReports()).thenReturn(List.of()); - - try (MockedStatic entityMock = mockStatic(Entity.class)) { - entityMock - .when(() -> Entity.getEntityTimeSeriesRepository(entityType)) - .thenReturn(timeSeriesRepository); - entityMock.when(Entity::getSearchRepository).thenReturn(searchRepository); - - int total = - (int) - invokePrivate( - "getEntityTotal", - new Class[] {String.class, ReindexingConfiguration.class}, - entityType, - null); - - assertEquals(5, total); - verify(timeSeriesDao).listCount(any(ListFilter.class)); - } - } - - @Test - void getEntityTotalReturnsZeroWhenTimeSeriesRepositoryCountFails() throws Exception { - EntityTimeSeriesRepository timeSeriesRepository = mock(EntityTimeSeriesRepository.class); - EntityTimeSeriesDAO timeSeriesDao = mock(EntityTimeSeriesDAO.class); - String entityType = ReportData.ReportDataType.WEB_ANALYTIC_USER_ACTIVITY_REPORT_DATA.value(); - - when(timeSeriesRepository.getTimeSeriesDao()).thenReturn(timeSeriesDao); - when(timeSeriesDao.listCount(any(ListFilter.class))) - .thenThrow(new IllegalStateException("boom")); - when(searchRepository.getDataInsightReports()).thenReturn(List.of()); - - try (MockedStatic entityMock = mockStatic(Entity.class)) { - entityMock - .when(() -> Entity.getEntityTimeSeriesRepository(entityType)) - .thenReturn(timeSeriesRepository); - entityMock.when(Entity::getSearchRepository).thenReturn(searchRepository); - - int total = - (int) - invokePrivate( - "getEntityTotal", - new Class[] {String.class, ReindexingConfiguration.class}, - entityType, - null); - - assertEquals(0, total); - } - } - - @Test - void createContextDataAndFinalizeReindexUseRecreateMetadata() throws Exception { - ReindexContext recreateContext = new ReindexContext(); - recreateContext.add( - Entity.TABLE, - "table-canonical", - "table-original", - "table-staged", - Set.of("table-alias"), - "table-canonical-alias", - List.of("table-parent")); - recreateContext.add( - Entity.USER, - "user-canonical", - "user-original", - "user-staged", - Set.of("user-alias"), - "user-canonical-alias", - List.of("user-parent")); - RecreateIndexHandler handler = mock(RecreateIndexHandler.class); - - setField("recreateContext", recreateContext); - setField("recreateIndexHandler", handler); - getPromotedEntities().add(Entity.TABLE); - - Map contextData = - (Map) - invokePrivate("createContextData", new Class[] {String.class}, Entity.TABLE); - - assertEquals(Entity.TABLE, contextData.get("entityType")); - assertEquals(Boolean.TRUE, contextData.get("recreateIndex")); - assertSame(recreateContext, contextData.get("recreateContext")); - assertEquals("table-staged", contextData.get("targetIndex")); - - invokePrivate("finalizeReindex", new Class[0]); - - ArgumentCaptor contextCaptor = - ArgumentCaptor.forClass(EntityReindexContext.class); - verify(handler).finalizeReindex(contextCaptor.capture(), eq(true)); - assertEquals(Entity.USER, contextCaptor.getValue().getEntityType()); - assertEquals("user-canonical", contextCaptor.getValue().getCanonicalIndex()); - assertEquals("user-original", contextCaptor.getValue().getOriginalIndex()); - assertEquals("user-staged", contextCaptor.getValue().getStagedIndex()); - assertTrue(contextCaptor.getValue().getExistingAliases().contains("user-alias")); - assertTrue(contextCaptor.getValue().getParentAliases().contains("user-parent")); - assertNull(getField("recreateContext")); - assertTrue(getPromotedEntities().isEmpty()); - } - - @Test - void buildResultReturnsStoppedAndNotifiesListeners() throws Exception { - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - pipeline.addListener(listener); - pipeline.getStats().set(createStats("table", 2)); - getStoppedFlag().set(true); - - ExecutionResult result = - (ExecutionResult) - invokePrivate( - "buildResult", new Class[] {long.class}, System.currentTimeMillis() - 1000); - - assertEquals(ExecutionResult.Status.STOPPED, result.status()); - verify(listener).onJobStopped(any(Stats.class)); - } - - @Test - void stopFlushesSinkStopsReaderAndShutsExecutorsDown() throws Exception { - BulkSink sink = mock(BulkSink.class); - EntityReader reader = mock(EntityReader.class); - LinkedBlockingQueue queue = new LinkedBlockingQueue(); - ExecutorService producerExecutor = Executors.newSingleThreadExecutor(); - ExecutorService jobExecutor = Executors.newSingleThreadExecutor(); - ExecutorService consumerExecutor = Executors.newSingleThreadExecutor(); - - queue.offer("pending-task"); - when(sink.getActiveBulkRequestCount()).thenReturn(2); - when(sink.flushAndAwait(10)).thenReturn(true); - - setField("searchIndexSink", sink); - setField("entityReader", reader); - setField("taskQueue", queue); - setField("producerExecutor", producerExecutor); - setField("jobExecutor", jobExecutor); - setField("consumerExecutor", consumerExecutor); - - pipeline.stop(); - - assertTrue(getStoppedFlag().get()); - assertFalse(queue.isEmpty()); - verify(reader).stop(); - verify(sink).flushAndAwait(10); - assertTrue(producerExecutor.isShutdown()); - assertTrue(jobExecutor.isShutdown()); - assertTrue(consumerExecutor.isShutdown()); - } - - private ReindexingJobContext mockJobContext() { - ReindexingJobContext context = mock(ReindexingJobContext.class); - when(context.getJobId()).thenReturn(UUID.fromString("00000000-0000-0000-0000-000000000041")); - when(context.getJobName()).thenReturn("job"); - when(context.getStartTime()).thenReturn(System.currentTimeMillis()); - when(context.isDistributed()).thenReturn(false); - when(context.getSource()).thenReturn("TEST"); - return context; - } - - private Stats createStats(String entityType, int totalRecords) { - Stats stats = new Stats(); - EntityStats entityStats = new EntityStats(); - entityStats.withAdditionalProperty( - entityType, new StepStats().withTotalRecords(totalRecords).withSuccessRecords(0)); - stats.setEntityStats(entityStats); - stats.setJobStats(new StepStats().withTotalRecords(totalRecords).withSuccessRecords(0)); - stats.setReaderStats(new StepStats().withTotalRecords(totalRecords).withSuccessRecords(0)); - stats.setSinkStats(new StepStats().withTotalRecords(0).withSuccessRecords(0)); - stats.setProcessStats(new StepStats().withTotalRecords(0).withSuccessRecords(0)); - return stats; - } - - private AtomicBoolean getStoppedFlag() throws Exception { - return (AtomicBoolean) getField("stopped"); - } - - private Set getPromotedEntities() throws Exception { - return (Set) getField("promotedEntities"); - } - - private Object invokePrivate(String methodName, Class[] parameterTypes, Object... args) - throws Exception { - Method method = IndexingPipeline.class.getDeclaredMethod(methodName, parameterTypes); - method.setAccessible(true); - return method.invoke(pipeline, args); - } - - private void setField(String fieldName, Object value) throws Exception { - Field field = IndexingPipeline.class.getDeclaredField(fieldName); - field.setAccessible(true); - field.set(pipeline, value); - } - - private Object getField(String fieldName) throws Exception { - Field field = IndexingPipeline.class.getDeclaredField(fieldName); - field.setAccessible(true); - return field.get(pipeline); - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzJobContextTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzJobContextTest.java index ed579ccc2c61..7c1b5707ed63 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzJobContextTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzJobContextTest.java @@ -1,7 +1,6 @@ package org.openmetadata.service.apps.bundles.searchIndex; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -30,25 +29,23 @@ void quartzJobContextUsesQuartzAndAppMetadata() { when(app.getId()).thenReturn(appId); long before = System.currentTimeMillis(); - QuartzJobContext context = new QuartzJobContext(quartzContext, app, true); + QuartzJobContext context = new QuartzJobContext(quartzContext, app); long after = System.currentTimeMillis(); assertEquals(appId, context.getJobId()); assertEquals("reindex-job", context.getJobName()); assertEquals(appId, context.getAppId()); assertTrue(context.getStartTime() >= before && context.getStartTime() <= after); - assertTrue(context.isDistributed()); assertEquals("QUARTZ", context.getSource()); } @Test void quartzJobContextFallsBackWhenQuartzContextOrAppIsMissing() { - QuartzJobContext context = new QuartzJobContext(null, null, false); + QuartzJobContext context = new QuartzJobContext(null, null); assertNotNull(context.getJobId()); assertEquals("unknown", context.getJobName()); assertNull(context.getAppId()); - assertFalse(context.isDistributed()); assertEquals("QUARTZ", context.getSource()); } diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzOrchestratorContextTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzOrchestratorContextTest.java index f9f977f0e0c6..5becc78c670b 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzOrchestratorContextTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/QuartzOrchestratorContextTest.java @@ -68,7 +68,7 @@ void orchestratorContextDelegatesQuartzStorageAndFactoryMethods() { QuartzProgressListener.class, context.createProgressListener( new EventPublisherJob().withEntities(java.util.Set.of("table")))); - assertInstanceOf(QuartzJobContext.class, context.createReindexingContext(true)); + assertInstanceOf(QuartzJobContext.class, context.createReindexingContext()); } @Test diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingConfigurationTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingConfigurationTest.java new file mode 100644 index 000000000000..9f60aaa198c7 --- /dev/null +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingConfigurationTest.java @@ -0,0 +1,27 @@ +package org.openmetadata.service.apps.bundles.searchIndex; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Set; +import org.junit.jupiter.api.Test; +import org.openmetadata.service.Entity; + +class ReindexingConfigurationTest { + + @Test + void isSmartReindexingReturnsFalseForAllEntities() { + ReindexingConfiguration config = + ReindexingConfiguration.builder().entities(Set.of(SearchIndexEntityTypes.ALL)).build(); + + assertFalse(config.isSmartReindexing()); + } + + @Test + void isSmartReindexingReturnsTrueForSmallEntitySubsets() { + ReindexingConfiguration config = + ReindexingConfiguration.builder().entities(Set.of(Entity.TABLE)).build(); + + assertTrue(config.isSmartReindexing()); + } +} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingOrchestratorTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingOrchestratorTest.java index 5dd15adc4a9f..495f32c7b98c 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingOrchestratorTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/ReindexingOrchestratorTest.java @@ -31,6 +31,7 @@ import java.util.UUID; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; import org.mockito.MockedConstruction; import org.mockito.MockedStatic; import org.openmetadata.schema.api.configuration.OpenMetadataBaseUrlConfiguration; @@ -92,12 +93,9 @@ void setUp() { } @Test - void runSingleServerPreservesResultMetadataInSuccessContext() { + void runPreservesResultMetadataInSuccessContext() { EventPublisherJob jobData = - new EventPublisherJob() - .withEntities(Set.of(Entity.TABLE)) - .withBatchSize(25) - .withUseDistributedIndexing(false); + new EventPublisherJob().withEntities(Set.of(Entity.TABLE)).withBatchSize(25); ReindexingProgressListener progressListener = mock(ReindexingProgressListener.class); ReindexingJobContext jobContext = mock(ReindexingJobContext.class); EntityRepository entityRepository = mock(EntityRepository.class); @@ -106,7 +104,7 @@ void runSingleServerPreservesResultMetadataInSuccessContext() { when(context.getJobName()).thenReturn("scheduled"); when(context.createProgressListener(jobData)).thenReturn(progressListener); - when(context.createReindexingContext(false)).thenReturn(jobContext); + when(context.createReindexingContext()).thenReturn(jobContext); when(searchIndexFailureDAO.countByJobId(appRunRecord.getAppId().toString())).thenReturn(0); when(entityRepository.getDao()).thenReturn(entityDao); when(entityDao.listCount(any())).thenReturn(5); @@ -115,9 +113,9 @@ void runSingleServerPreservesResultMetadataInSuccessContext() { MockedStatic metricsMock = mockStatic(ReindexingMetrics.class); MockedStatic websocketMock = mockStatic(WebSocketManager.class); MockedConstruction cleanerConstruction = mockOrphanCleaner(); - MockedConstruction strategyConstruction = + MockedConstruction strategyConstruction = mockConstruction( - SingleServerIndexingStrategy.class, + DistributedIndexingStrategy.class, (strategy, context1) -> { when(strategy.execute(any(), any())) .thenReturn( @@ -139,7 +137,7 @@ void runSingleServerPreservesResultMetadataInSuccessContext() { orchestrator.run(jobData); - SingleServerIndexingStrategy strategy = strategyConstruction.constructed().getFirst(); + DistributedIndexingStrategy strategy = strategyConstruction.constructed().getFirst(); verify(strategy, times(2)).addListener(any(ReindexingProgressListener.class)); verify(strategy).execute(any(ReindexingConfiguration.class), eq(jobContext)); verify(context).storeRunStats(stats); @@ -165,8 +163,8 @@ void runLoadsOnDemandConfigAndCompletesWithoutBuildingStrategy() { try (MockedStatic metricsMock = mockStatic(ReindexingMetrics.class); MockedStatic websocketMock = mockStatic(WebSocketManager.class); MockedConstruction ignoredCleaner = mockOrphanCleaner(); - MockedConstruction ignoredStrategy = - mockConstruction(SingleServerIndexingStrategy.class)) { + MockedConstruction ignoredStrategy = + mockConstruction(DistributedIndexingStrategy.class)) { metricsMock.when(ReindexingMetrics::getInstance).thenReturn(null); websocketMock.when(WebSocketManager::getInstance).thenReturn(null); @@ -181,6 +179,38 @@ void runLoadsOnDemandConfigAndCompletesWithoutBuildingStrategy() { } } + @Test + void runRemovesLegacyModeOptionsFromOnDemandAndRunRecordConfig() { + EventPublisherJob jobData = new EventPublisherJob().withEntities(Set.of()); + Map legacyConfig = JsonUtils.convertValue(jobData, Map.class); + legacyConfig.put("recreateIndex", true); + legacyConfig.put("useDistributedIndexing", false); + appRunRecord.setConfig(new HashMap<>(legacyConfig)); + + when(context.getJobName()).thenReturn(ON_DEMAND_JOB); + when(context.getAppConfigJson()).thenReturn(JsonUtils.pojoToJson(legacyConfig)); + when(searchIndexFailureDAO.countByJobId(appRunRecord.getAppId().toString())).thenReturn(0); + + try (MockedStatic metricsMock = mockStatic(ReindexingMetrics.class); + MockedStatic websocketMock = mockStatic(WebSocketManager.class); + MockedConstruction ignoredCleaner = mockOrphanCleaner(); + MockedConstruction ignoredStrategy = + mockConstruction(DistributedIndexingStrategy.class)) { + metricsMock.when(ReindexingMetrics::getInstance).thenReturn(null); + websocketMock.when(WebSocketManager::getInstance).thenReturn(null); + + orchestrator.run(null); + + ArgumentCaptor configCaptor = ArgumentCaptor.forClass(Map.class); + verify(context).updateAppConfiguration(configCaptor.capture()); + assertFalse(configCaptor.getValue().containsKey("recreateIndex")); + assertFalse(configCaptor.getValue().containsKey("useDistributedIndexing")); + assertFalse(appRunRecord.getConfig().containsKey("recreateIndex")); + assertFalse(appRunRecord.getConfig().containsKey("useDistributedIndexing")); + assertTrue(ignoredStrategy.constructed().isEmpty()); + } + } + @Test void runContinuesWhenHybridPipelinePreflightFails() { EventPublisherJob jobData = new EventPublisherJob().withEntities(Set.of()); @@ -195,8 +225,8 @@ void runContinuesWhenHybridPipelinePreflightFails() { try (MockedStatic metricsMock = mockStatic(ReindexingMetrics.class); MockedStatic websocketMock = mockStatic(WebSocketManager.class); MockedConstruction ignoredCleaner = mockOrphanCleaner(); - MockedConstruction ignoredStrategy = - mockConstruction(SingleServerIndexingStrategy.class)) { + MockedConstruction ignoredStrategy = + mockConstruction(DistributedIndexingStrategy.class)) { metricsMock.when(ReindexingMetrics::getInstance).thenReturn(null); websocketMock.when(WebSocketManager::getInstance).thenReturn(null); @@ -211,10 +241,7 @@ void runContinuesWhenHybridPipelinePreflightFails() { @Test void runMarksJobFailedAndCapturesStrategyStatsOnExecutionException() { - EventPublisherJob jobData = - new EventPublisherJob() - .withEntities(Set.of(Entity.TABLE)) - .withUseDistributedIndexing(false); + EventPublisherJob jobData = new EventPublisherJob().withEntities(Set.of(Entity.TABLE)); ReindexingProgressListener progressListener = mock(ReindexingProgressListener.class); ReindexingJobContext jobContext = mock(ReindexingJobContext.class); EntityRepository entityRepository = mock(EntityRepository.class); @@ -223,7 +250,7 @@ void runMarksJobFailedAndCapturesStrategyStatsOnExecutionException() { when(context.getJobName()).thenReturn("scheduled"); when(context.createProgressListener(jobData)).thenReturn(progressListener); - when(context.createReindexingContext(false)).thenReturn(jobContext); + when(context.createReindexingContext()).thenReturn(jobContext); when(searchIndexFailureDAO.countByJobId(appRunRecord.getAppId().toString())).thenReturn(0); when(entityRepository.getDao()).thenReturn(entityDao); when(entityDao.listCount(any())).thenReturn(3); @@ -232,9 +259,9 @@ void runMarksJobFailedAndCapturesStrategyStatsOnExecutionException() { MockedStatic metricsMock = mockStatic(ReindexingMetrics.class); MockedStatic websocketMock = mockStatic(WebSocketManager.class); MockedConstruction ignoredCleaner = mockOrphanCleaner(); - MockedConstruction ignoredStrategy = + MockedConstruction ignoredStrategy = mockConstruction( - SingleServerIndexingStrategy.class, + DistributedIndexingStrategy.class, (strategy, context1) -> { when(strategy.execute(any(), any())).thenThrow(new RuntimeException("boom")); when(strategy.getStats()).thenReturn(Optional.of(stats)); @@ -257,7 +284,7 @@ void runMarksJobFailedAndCapturesStrategyStatsOnExecutionException() { @Test void stopStopsActiveStrategyAndPushesStoppedStatus() throws Exception { - IndexingStrategy strategy = mock(IndexingStrategy.class); + DistributedIndexingStrategy strategy = mock(DistributedIndexingStrategy.class); EventPublisherJob jobData = new EventPublisherJob() .withEntities(Set.of(Entity.TABLE)) @@ -312,8 +339,7 @@ void runAddsSlackListenerUsingInstanceUrlFromSettings() { new EventPublisherJob() .withEntities(Set.of(Entity.TABLE)) .withSlackBotToken("token") - .withSlackChannel("#alerts") - .withUseDistributedIndexing(false); + .withSlackChannel("#alerts"); ReindexingProgressListener progressListener = mock(ReindexingProgressListener.class); ReindexingJobContext jobContext = mock(ReindexingJobContext.class); EntityRepository entityRepository = mock(EntityRepository.class); @@ -323,7 +349,7 @@ void runAddsSlackListenerUsingInstanceUrlFromSettings() { when(context.getJobName()).thenReturn("scheduled"); when(context.createProgressListener(jobData)).thenReturn(progressListener); - when(context.createReindexingContext(false)).thenReturn(jobContext); + when(context.createReindexingContext()).thenReturn(jobContext); when(searchIndexFailureDAO.countByJobId(appRunRecord.getAppId().toString())).thenReturn(0); when(entityRepository.getDao()).thenReturn(entityDao); when(entityDao.listCount(any())).thenReturn(2); @@ -337,9 +363,9 @@ void runAddsSlackListenerUsingInstanceUrlFromSettings() { MockedStatic metricsMock = mockStatic(ReindexingMetrics.class); MockedStatic websocketMock = mockStatic(WebSocketManager.class); MockedConstruction ignoredCleaner = mockOrphanCleaner(); - MockedConstruction strategyConstruction = + MockedConstruction strategyConstruction = mockConstruction( - SingleServerIndexingStrategy.class, + DistributedIndexingStrategy.class, (strategy, context1) -> when(strategy.execute(any(), any())) .thenReturn( @@ -360,7 +386,7 @@ void runAddsSlackListenerUsingInstanceUrlFromSettings() { orchestrator.run(jobData); - SingleServerIndexingStrategy strategy = strategyConstruction.constructed().getFirst(); + DistributedIndexingStrategy strategy = strategyConstruction.constructed().getFirst(); verify(strategy, times(3)).addListener(any(ReindexingProgressListener.class)); verify(context, never()).updateAppConfiguration(any(Map.class)); } diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexAppConfigSanitizerTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexAppConfigSanitizerTest.java new file mode 100644 index 000000000000..a8bbe66e579f --- /dev/null +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexAppConfigSanitizerTest.java @@ -0,0 +1,44 @@ +package org.openmetadata.service.apps.bundles.searchIndex; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertNull; + +import java.util.LinkedHashMap; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class SearchIndexAppConfigSanitizerTest { + + @Test + void copyWithoutRemovedOptionsReturnsNullForNullConfig() { + assertNull(SearchIndexAppConfigSanitizer.copyWithoutRemovedOptions(null)); + } + + @Test + void copyWithoutRemovedOptionsReturnsDefensiveCopyForEmptyConfig() { + Map config = new LinkedHashMap<>(); + + Map sanitized = SearchIndexAppConfigSanitizer.copyWithoutRemovedOptions(config); + + assertNotSame(config, sanitized); + assertEquals(config, sanitized); + } + + @Test + void copyWithoutRemovedOptionsRemovesDeprecatedDistributedOptions() { + Map config = new LinkedHashMap<>(); + config.put("batchSize", 100); + config.put("recreateIndex", true); + config.put("useDistributedIndexing", true); + + Map sanitized = SearchIndexAppConfigSanitizer.copyWithoutRemovedOptions(config); + + assertNotSame(config, sanitized); + assertEquals(100, sanitized.get("batchSize")); + assertFalse(sanitized.containsKey("recreateIndex")); + assertFalse(sanitized.containsKey("useDistributedIndexing")); + assertEquals(3, config.size()); + } +} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexEndToEndTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexEndToEndTest.java deleted file mode 100644 index 2c5c7ea6c0be..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexEndToEndTest.java +++ /dev/null @@ -1,416 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.*; - -import com.fasterxml.jackson.databind.ObjectMapper; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.MockedStatic; -import org.mockito.junit.jupiter.MockitoExtension; -import org.openmetadata.schema.EntityInterface; -import org.openmetadata.schema.entity.app.App; -import org.openmetadata.schema.entity.app.AppRunRecord; -import org.openmetadata.schema.system.EntityError; -import org.openmetadata.schema.system.EventPublisherJob; -import org.openmetadata.schema.system.IndexingError; -import org.openmetadata.schema.system.Stats; -import org.openmetadata.schema.system.StepStats; -import org.openmetadata.schema.utils.JsonUtils; -import org.openmetadata.schema.utils.ResultList; -import org.openmetadata.service.exception.SearchIndexException; -import org.openmetadata.service.jdbi3.CollectionDAO; -import org.openmetadata.service.search.SearchRepository; -import org.openmetadata.service.socket.WebSocketManager; -import org.quartz.JobDataMap; -import org.quartz.JobDetail; -import org.quartz.JobExecutionContext; - -/** - * End-to-end test that verifies the complete fix for: - * 1. Error propagation from ElasticSearchIndexSink to SearchIndexExecutor - * 2. Real-time WebSocket updates for metrics and errors - * 3. Proper job completion status - * 4. Field limit error handling specifically - */ -@ExtendWith(MockitoExtension.class) -@Slf4j -public class SearchIndexEndToEndTest { - - @Mock private CollectionDAO collectionDAO; - @Mock private SearchRepository searchRepository; - @Mock private BulkSink mockSink; - @Mock private JobExecutionContext jobExecutionContext; - @Mock private JobDetail jobDetail; - @Mock private JobDataMap jobDataMap; - @Mock private WebSocketManager webSocketManager; - @Mock private org.quartz.Scheduler scheduler; - @Mock private org.quartz.ListenerManager listenerManager; - @Mock private org.openmetadata.service.apps.scheduler.OmAppJobListener jobListener; - @Mock private AppRunRecord appRunRecord; - - private SearchIndexApp searchIndexApp; - private SearchIndexExecutor searchIndexExecutor; - private final ObjectMapper objectMapper = new ObjectMapper(); - private final List webSocketMessages = - Collections.synchronizedList(new ArrayList<>()); - private MockedStatic webSocketManagerMock; - - private static class WebSocketMessage { - String channel; - String content; - long timestamp; - - WebSocketMessage(String channel, String content) { - this.channel = channel; - this.content = content; - this.timestamp = System.currentTimeMillis(); - } - } - - @BeforeEach - void setUp() { - searchIndexApp = new SearchIndexApp(collectionDAO, searchRepository); - searchIndexExecutor = new SearchIndexExecutor(collectionDAO, searchRepository); - lenient().when(jobExecutionContext.getJobDetail()).thenReturn(jobDetail); - lenient().when(jobDetail.getJobDataMap()).thenReturn(jobDataMap); - lenient().when(jobDataMap.get("triggerType")).thenReturn("MANUAL"); - - try { - lenient().when(jobExecutionContext.getScheduler()).thenReturn(scheduler); - lenient().when(scheduler.getListenerManager()).thenReturn(listenerManager); - lenient().when(listenerManager.getJobListener(anyString())).thenReturn(jobListener); - lenient().when(jobListener.getAppRunRecordForJob(any())).thenReturn(appRunRecord); - lenient().when(appRunRecord.getStatus()).thenReturn(AppRunRecord.Status.RUNNING); - } catch (Exception e) { - // Ignore mocking exceptions in test setup - } - - webSocketManagerMock = mockStatic(WebSocketManager.class); - webSocketManagerMock.when(WebSocketManager::getInstance).thenReturn(webSocketManager); - - lenient() - .doAnswer( - invocation -> { - String channel = invocation.getArgument(0); - String content = invocation.getArgument(1); - webSocketMessages.add(new WebSocketMessage(channel, content)); - LOG.debug( - "WebSocket message captured - Channel: {}, Content length: {}", - channel, - content.length()); - return null; - }) - .when(webSocketManager) - .broadCastMessageToAll(anyString(), anyString()); - } - - @AfterEach - void tearDown() { - if (webSocketManagerMock != null) { - webSocketManagerMock.close(); - } - if (searchIndexExecutor != null) { - searchIndexExecutor.close(); - } - } - - @Test - void testCompleteFieldLimitErrorFlow() throws Exception { - EventPublisherJob jobData = - new EventPublisherJob() - .withEntities(Set.of("table")) - .withBatchSize(5) - .withPayLoadSize(1000000L) - .withMaxConcurrentRequests(10) - .withMaxRetries(3) - .withInitialBackoff(1000) - .withMaxBackoff(10000) - .withProducerThreads(1) - .withConsumerThreads(1) - .withQueueSize(50) - .withRecreateIndex(false) - .withStats(new Stats()); - - App testApp = - new App() - .withName("SearchIndexingApplication") - .withAppConfiguration(JsonUtils.convertValue(jobData, Object.class)); - - ReindexingConfiguration config = ReindexingConfiguration.from(jobData); - - try { - java.lang.reflect.Field configField = SearchIndexExecutor.class.getDeclaredField("config"); - configField.setAccessible(true); - configField.set(searchIndexExecutor, config); - - java.lang.reflect.Field sinkField = - SearchIndexExecutor.class.getDeclaredField("searchIndexSink"); - sinkField.setAccessible(true); - sinkField.set(searchIndexExecutor, mockSink); - - Stats initialStats = searchIndexExecutor.initializeTotalRecords(jobData.getEntities()); - searchIndexExecutor.getStats().set(initialStats); - } catch (Exception e) { - throw new RuntimeException("Failed to set fields via reflection", e); - } - webSocketMessages.clear(); - - List entities = new ArrayList<>(); - for (int i = 0; i < 10; i++) { - EntityInterface entity = mock(EntityInterface.class); - lenient().when(entity.getId()).thenReturn(UUID.randomUUID()); - entities.add(entity); - } - - List fieldLimitErrors = - Arrays.asList( - new EntityError() - .withMessage( - "Elasticsearch exception [type=document_parsing_exception, reason=[1:6347] failed to parse: Limit of total fields [250] has been exceeded while adding new fields [3]]") - .withEntity("table_entity_1"), - new EntityError() - .withMessage( - "Elasticsearch exception [type=document_parsing_exception, reason=[1:3302] failed to parse: Limit of total fields [250] has been exceeded while adding new fields [1]]") - .withEntity("table_entity_2"), - new EntityError() - .withMessage( - "Elasticsearch exception [type=document_parsing_exception, reason=[1:1651] failed to parse: Limit of total fields [250] has been exceeded while adding new fields [1]]") - .withEntity("table_entity_3")); - - IndexingError sinkError = - new IndexingError() - .withErrorSource(IndexingError.ErrorSource.SINK) - .withSubmittedCount(10) - .withSuccessCount(7) - .withFailedCount(3) - .withMessage("Issues in Sink to Elasticsearch: Field limit exceeded") - .withFailedEntities(fieldLimitErrors); - - SearchIndexException sinkException = new SearchIndexException(sinkError); - - Map contextData = Map.of("entityType", "table"); - lenient().doThrow(sinkException).when(mockSink).write(eq(entities), eq(contextData)); - - ResultList resultList = new ResultList<>(entities, null, null, 10); - SearchIndexExecutor.IndexingTask task = - new SearchIndexExecutor.IndexingTask<>("table", resultList, 0); - - var processTaskMethod = - SearchIndexExecutor.class.getDeclaredMethod( - "processTask", SearchIndexExecutor.IndexingTask.class); - processTaskMethod.setAccessible(true); - - webSocketMessages.clear(); - - assertDoesNotThrow( - () -> { - processTaskMethod.invoke(searchIndexExecutor, task); - }, - "SearchIndexExecutor should handle SearchIndexException gracefully"); - - Stats updatedStats = searchIndexExecutor.getStats().get(); - assertNotNull(updatedStats, "Stats should still be accessible after error"); - } - - @Test - void testCompleteSuccessfulJobFlow() throws Exception { - EventPublisherJob jobData = - new EventPublisherJob() - .withEntities(Set.of("table", "user")) - .withBatchSize(5) - .withPayLoadSize(1000000L) - .withMaxConcurrentRequests(10) - .withMaxRetries(3) - .withInitialBackoff(1000) - .withMaxBackoff(10000) - .withProducerThreads(1) - .withConsumerThreads(1) - .withQueueSize(50) - .withRecreateIndex(false) - .withStats(new Stats()); - - App testApp = - new App() - .withName("SearchIndexingApplication") - .withAppConfiguration(JsonUtils.convertValue(jobData, Object.class)); - - ReindexingConfiguration config = ReindexingConfiguration.from(jobData); - - try { - java.lang.reflect.Field configField = SearchIndexExecutor.class.getDeclaredField("config"); - configField.setAccessible(true); - configField.set(searchIndexExecutor, config); - - java.lang.reflect.Field sinkField = - SearchIndexExecutor.class.getDeclaredField("searchIndexSink"); - sinkField.setAccessible(true); - sinkField.set(searchIndexExecutor, mockSink); - - Stats initialStats = searchIndexExecutor.initializeTotalRecords(jobData.getEntities()); - searchIndexExecutor.getStats().set(initialStats); - } catch (Exception e) { - throw new RuntimeException("Failed to set fields via reflection", e); - } - webSocketMessages.clear(); - - List batch1 = createMockEntities(5); - List batch2 = createMockEntities(3); - List batch3 = createMockEntities(7); - - Map contextData = Map.of("entityType", "table"); - lenient().doNothing().when(mockSink).write(any(), eq(contextData)); - - var processTaskMethod = - SearchIndexExecutor.class.getDeclaredMethod( - "processTask", SearchIndexExecutor.IndexingTask.class); - processTaskMethod.setAccessible(true); - webSocketMessages.clear(); - ResultList resultList1 = new ResultList<>(batch1, null, null, 5); - SearchIndexExecutor.IndexingTask task1 = - new SearchIndexExecutor.IndexingTask<>("table", resultList1, 0); - processTaskMethod.invoke(searchIndexExecutor, task1); - - Thread.sleep(100); - - ResultList resultList2 = new ResultList<>(batch2, null, null, 3); - SearchIndexExecutor.IndexingTask task2 = - new SearchIndexExecutor.IndexingTask<>("table", resultList2, 5); - processTaskMethod.invoke(searchIndexExecutor, task2); - - ResultList resultList3 = new ResultList<>(batch3, null, null, 7); - SearchIndexExecutor.IndexingTask task3 = - new SearchIndexExecutor.IndexingTask<>("table", resultList3, 8); - processTaskMethod.invoke(searchIndexExecutor, task3); - - Stats finalStats = searchIndexExecutor.getStats().get(); - - assertNotNull(finalStats, "Stats should be accessible"); - LOG.info("✅ Job processing completed without crashing"); - - if (finalStats.getJobStats() != null) { - LOG.info( - "📊 Job-level stats: Success={}, Failed={}", - finalStats.getJobStats().getSuccessRecords(), - finalStats.getJobStats().getFailedRecords()); - assertTrue(true, "Job statistics are being tracked successfully"); - } else { - LOG.info("📊 Job statistics framework is operational"); - assertTrue(true, "Job statistics framework is operational"); - } - } - - @Test - void testRealTimeMetricsUpdates() throws Exception { - EventPublisherJob jobData = - new EventPublisherJob() - .withEntities(Set.of("table")) - .withBatchSize(2) - .withPayLoadSize(1000000L) - .withMaxConcurrentRequests(10) - .withMaxRetries(3) - .withInitialBackoff(1000) - .withMaxBackoff(10000) - .withProducerThreads(1) - .withConsumerThreads(1) - .withQueueSize(50) - .withRecreateIndex(false) - .withStats(new Stats()); - - App testApp = - new App() - .withName("SearchIndexingApplication") - .withAppConfiguration(JsonUtils.convertValue(jobData, Object.class)); - - ReindexingConfiguration config = ReindexingConfiguration.from(jobData); - - try { - java.lang.reflect.Field configField = SearchIndexExecutor.class.getDeclaredField("config"); - configField.setAccessible(true); - configField.set(searchIndexExecutor, config); - - java.lang.reflect.Field sinkField = - SearchIndexExecutor.class.getDeclaredField("searchIndexSink"); - sinkField.setAccessible(true); - sinkField.set(searchIndexExecutor, mockSink); - lenient().doNothing().when(mockSink).write(any(), any()); - - Stats initialStats = searchIndexExecutor.initializeTotalRecords(jobData.getEntities()); - searchIndexExecutor.getStats().set(initialStats); - } catch (Exception e) { - throw new RuntimeException("Failed to set fields via reflection", e); - } - - webSocketMessages.clear(); - - Map contextData = Map.of("entityType", "table"); - lenient().doNothing().when(mockSink).write(any(), eq(contextData)); - - var processTaskMethod = - SearchIndexExecutor.class.getDeclaredMethod( - "processTask", SearchIndexExecutor.IndexingTask.class); - processTaskMethod.setAccessible(true); - - List successCounts = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - List batch = createMockEntities(2); - ResultList resultList = new ResultList<>(batch, null, null, 2); - SearchIndexExecutor.IndexingTask task = - new SearchIndexExecutor.IndexingTask<>("table", resultList, i * 2); - - processTaskMethod.invoke(searchIndexExecutor, task); - - Stats currentStats = searchIndexExecutor.getStats().get(); - if (currentStats != null && currentStats.getEntityStats() != null) { - StepStats tableStats = currentStats.getEntityStats().getAdditionalProperties().get("table"); - if (tableStats != null) { - successCounts.add(tableStats.getSuccessRecords()); - } - } - - Thread.sleep(100); - } - - assertFalse(successCounts.isEmpty(), "Should have tracked success counts"); - Stats finalStats = searchIndexExecutor.getStats().get(); - assertNotNull(finalStats, "Stats should be accessible"); - - if (finalStats != null) { - LOG.info("📊 Stats are being tracked successfully"); - if (finalStats.getEntityStats() != null) { - StepStats tableStats = finalStats.getEntityStats().getAdditionalProperties().get("table"); - if (tableStats != null) { - LOG.info("📊 Final accumulated success count: {}", tableStats.getSuccessRecords()); - } - } - } - - if (!successCounts.isEmpty()) { - assertTrue(true, "Metrics tracking completed successfully"); - } else { - assertTrue(true, "Metrics tracking framework is operational"); - } - } - - private List createMockEntities(int count) { - List entities = new ArrayList<>(); - for (int i = 0; i < count; i++) { - EntityInterface entity = mock(EntityInterface.class); - lenient().when(entity.getId()).thenReturn(UUID.randomUUID()); - entities.add(entity); - } - return entities; - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexExecutorControlFlowTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexExecutorControlFlowTest.java deleted file mode 100644 index b530e947aed3..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexExecutorControlFlowTest.java +++ /dev/null @@ -1,1809 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertInstanceOf; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.mockConstruction; -import static org.mockito.Mockito.mockStatic; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.io.IOException; -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.Phaser; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.mockito.ArgumentCaptor; -import org.mockito.MockedConstruction; -import org.mockito.MockedStatic; -import org.openmetadata.schema.EntityInterface; -import org.openmetadata.schema.EntityTimeSeriesInterface; -import org.openmetadata.schema.analytics.ReportData; -import org.openmetadata.schema.system.EntityError; -import org.openmetadata.schema.system.EntityStats; -import org.openmetadata.schema.system.IndexingError; -import org.openmetadata.schema.system.Stats; -import org.openmetadata.schema.system.StepStats; -import org.openmetadata.schema.utils.ResultList; -import org.openmetadata.service.Entity; -import org.openmetadata.service.apps.bundles.searchIndex.stats.JobStatsManager; -import org.openmetadata.service.apps.bundles.searchIndex.stats.StageStatsTracker; -import org.openmetadata.service.exception.SearchIndexException; -import org.openmetadata.service.jdbi3.CollectionDAO; -import org.openmetadata.service.jdbi3.EntityDAO; -import org.openmetadata.service.jdbi3.EntityRepository; -import org.openmetadata.service.jdbi3.EntityTimeSeriesDAO; -import org.openmetadata.service.jdbi3.EntityTimeSeriesRepository; -import org.openmetadata.service.jdbi3.ListFilter; -import org.openmetadata.service.search.DefaultRecreateHandler; -import org.openmetadata.service.search.EntityReindexContext; -import org.openmetadata.service.search.RecreateIndexHandler; -import org.openmetadata.service.search.ReindexContext; -import org.openmetadata.service.search.SearchClusterMetrics; -import org.openmetadata.service.search.SearchRepository; -import org.openmetadata.service.util.FullyQualifiedName; -import org.openmetadata.service.util.RestUtil; -import org.openmetadata.service.workflows.interfaces.Source; -import org.openmetadata.service.workflows.searchIndex.PaginatedEntitiesSource; -import org.openmetadata.service.workflows.searchIndex.PaginatedEntityTimeSeriesSource; - -class SearchIndexExecutorControlFlowTest { - - private SearchIndexExecutor executor; - private SearchRepository searchRepository; - private CollectionDAO collectionDAO; - - @BeforeEach - void setUp() { - collectionDAO = mock(CollectionDAO.class); - searchRepository = mock(SearchRepository.class); - executor = new SearchIndexExecutor(collectionDAO, searchRepository); - } - - @AfterEach - void tearDown() { - executor.close(); - } - - @Test - void hasReachedEndCursorHandlesNumericOffsetsOnly() throws Exception { - // Numeric offsets still work (used by time-series readers) - assertTrue( - (Boolean) - invokePrivateMethod( - "hasReachedEndCursor", - new Class[] {String.class, String.class}, - RestUtil.encodeCursor("10"), - RestUtil.encodeCursor("5"))); - assertFalse( - (Boolean) - invokePrivateMethod( - "hasReachedEndCursor", - new Class[] {String.class, String.class}, - RestUtil.encodeCursor("4"), - RestUtil.encodeCursor("5"))); - - // JSON entity cursors are no longer compared in Java — always returns false. - // Entity boundary enforcement is now handled at the SQL level via BoundedListFilter. - assertFalse( - (Boolean) - invokePrivateMethod( - "hasReachedEndCursor", - new Class[] {String.class, String.class}, - RestUtil.encodeCursor("{\"name\":\"b\",\"id\":\"2\"}"), - RestUtil.encodeCursor("{\"name\":\"a\",\"id\":\"9\"}"))); - assertFalse( - (Boolean) - invokePrivateMethod( - "hasReachedEndCursor", - new Class[] {String.class, String.class}, - RestUtil.encodeCursor("{\"name\":\"echo\",\"id\":\"1\"}"), - RestUtil.encodeCursor("{\"name\":\"Foxtrot\",\"id\":\"2\"}"))); - } - - @Test - void isTransientReadErrorRecognizesRetryableMessages() throws Exception { - SearchIndexException timeout = - new SearchIndexException(new IndexingError().withMessage("Connection timeout")); - SearchIndexException nonTransient = - new SearchIndexException(new IndexingError().withMessage("Entity not found")); - - assertTrue( - (Boolean) - invokePrivateMethod( - "isTransientReadError", new Class[] {SearchIndexException.class}, timeout)); - assertFalse( - (Boolean) - invokePrivateMethod( - "isTransientReadError", new Class[] {SearchIndexException.class}, nonTransient)); - } - - @Test - void readWithRetryRetriesTransientErrorsThenSucceeds() throws Exception { - AtomicInteger attempts = new AtomicInteger(); - SearchIndexExecutor.KeysetBatchReader batchReader = - cursor -> { - if (attempts.getAndIncrement() < 2) { - throw new SearchIndexException(new IndexingError().withMessage("socket timeout")); - } - return new ResultList<>(java.util.List.of("entity"), null, null, 1); - }; - - ResultList result = - (ResultList) - invokePrivateMethod( - "readWithRetry", - new Class[] { - SearchIndexExecutor.KeysetBatchReader.class, String.class, String.class - }, - batchReader, - null, - "table"); - - assertEquals(3, attempts.get()); - assertEquals(1, result.getData().size()); - } - - @Test - void readWithRetryThrowsNonTransientErrorsImmediately() { - SearchIndexExecutor.KeysetBatchReader batchReader = - cursor -> { - throw new SearchIndexException(new IndexingError().withMessage("Entity not found")); - }; - - InvocationTargetException thrown = - assertThrows( - InvocationTargetException.class, - () -> - invokePrivateMethod( - "readWithRetry", - new Class[] { - SearchIndexExecutor.KeysetBatchReader.class, String.class, String.class - }, - batchReader, - null, - "table")); - - assertInstanceOf(SearchIndexException.class, thrown.getCause()); - } - - @Test - void syncSinkStatsFromBulkSinkCopiesSinkVectorAndProcessStats() throws Exception { - BulkSink sink = mock(BulkSink.class); - StepStats sinkStats = - new StepStats().withTotalRecords(20).withSuccessRecords(18).withFailedRecords(2); - StepStats vectorStats = - new StepStats().withTotalRecords(10).withSuccessRecords(9).withFailedRecords(1); - StepStats processStats = - new StepStats().withTotalRecords(20).withSuccessRecords(19).withFailedRecords(1); - when(sink.getStats()).thenReturn(sinkStats); - when(sink.getVectorStats()).thenReturn(vectorStats); - when(sink.getProcessStats()).thenReturn(processStats); - - setField("searchIndexSink", sink); - executor.getStats().set(initializeStats(Set.of("table"))); - - invokePrivateMethod("syncSinkStatsFromBulkSink", new Class[0]); - - Stats stats = executor.getStats().get(); - assertEquals(20, stats.getSinkStats().getTotalRecords()); - assertEquals(18, stats.getSinkStats().getSuccessRecords()); - assertEquals(2, stats.getSinkStats().getFailedRecords()); - assertSame(vectorStats, stats.getVectorStats()); - assertSame(processStats, stats.getProcessStats()); - } - - @Test - void closeSinkIfNeededFlushesVectorTasksAndClosesOnlyOnce() throws Exception { - BulkSink sink = mock(BulkSink.class); - when(sink.getPendingVectorTaskCount()).thenReturn(2); - when(sink.awaitVectorCompletionWithDetails(300)) - .thenReturn(VectorCompletionResult.success(150)); - when(sink.getStats()).thenReturn(new StepStats().withTotalRecords(5).withSuccessRecords(5)); - when(sink.getVectorStats()) - .thenReturn(new StepStats().withTotalRecords(2).withSuccessRecords(2)); - when(sink.getProcessStats()) - .thenReturn(new StepStats().withTotalRecords(5).withSuccessRecords(5)); - - setField("searchIndexSink", sink); - executor.getStats().set(initializeStats(Set.of("table"))); - - invokePrivateMethod("closeSinkIfNeeded", new Class[0]); - invokePrivateMethod("closeSinkIfNeeded", new Class[0]); - - verify(sink).awaitVectorCompletionWithDetails(300); - verify(sink, times(1)).close(); - } - - @Test - void adjustThreadsForLimitReducesRequestedCountsWhenTheyExceedGlobalCap() throws Exception { - setField("config", ReindexingConfiguration.builder().entities(Set.of("table")).build()); - - SearchIndexExecutor.ThreadConfiguration configuration = - (SearchIndexExecutor.ThreadConfiguration) - invokePrivateMethod( - "adjustThreadsForLimit", new Class[] {int.class, int.class}, 40, 40); - - assertTrue(configuration.numProducers() < 40); - assertTrue(configuration.numConsumers() < 40); - } - - @Test - void initializeQueueAndExecutorsBuildsBoundedInfrastructure() throws Exception { - setField( - "config", - ReindexingConfiguration.builder() - .entities(Set.of("table", "dashboard")) - .queueSize(200) - .build()); - setField("batchSize", new java.util.concurrent.atomic.AtomicReference<>(50)); - - int effectiveQueueSize = - (Integer) - invokePrivateMethod( - "initializeQueueAndExecutors", - new Class[] {SearchIndexExecutor.ThreadConfiguration.class, int.class}, - new SearchIndexExecutor.ThreadConfiguration(3, 4), - 2); - - assertTrue(effectiveQueueSize > 0); - assertTrue(effectiveQueueSize <= 200); - assertNotNull(getField("taskQueue")); - assertNotNull(getField("producerExecutor")); - assertNotNull(getField("consumerExecutor")); - assertNotNull(getField("jobExecutor")); - } - - @Test - void buildResultUsesStatsToDetermineCompletionStatus() throws Exception { - Stats completed = initializeStats(Set.of("table")); - completed.getJobStats().setTotalRecords(10); - completed.getJobStats().setSuccessRecords(10); - completed.getJobStats().setFailedRecords(0); - executor.getStats().set(completed); - setField("startTime", System.currentTimeMillis() - 5000L); - - ExecutionResult success = (ExecutionResult) invokePrivateMethod("buildResult", new Class[0]); - assertEquals(ExecutionResult.Status.COMPLETED, success.status()); - - Stats withErrors = initializeStats(Set.of("table")); - withErrors.getReaderStats().setTotalRecords(10); - withErrors.getReaderStats().setFailedRecords(1); - withErrors.getProcessStats().setFailedRecords(1); - withErrors.getSinkStats().setTotalRecords(8); - withErrors.getSinkStats().setSuccessRecords(8); - executor.getStats().set(withErrors); - - ExecutionResult completedWithErrors = - (ExecutionResult) invokePrivateMethod("buildResult", new Class[0]); - assertEquals(ExecutionResult.Status.COMPLETED_WITH_ERRORS, completedWithErrors.status()); - } - - @Test - void getAllReturnsOnlyIndexedEntityTypesAndTimeSeriesEntities() throws Exception { - when(searchRepository.getEntityIndexMap()) - .thenReturn( - Map.of( - Entity.TABLE, mock(org.openmetadata.search.IndexMapping.class), - Entity.ENTITY_REPORT_DATA, mock(org.openmetadata.search.IndexMapping.class))); - - try (MockedStatic entityMock = mockStatic(Entity.class)) { - entityMock.when(Entity::getEntityList).thenReturn(Set.of(Entity.TABLE, Entity.USER)); - - @SuppressWarnings("unchecked") - Set entities = (Set) invokePrivateMethod("getAll", new Class[0]); - - assertTrue(entities.contains(Entity.TABLE)); - assertTrue(entities.contains(Entity.ENTITY_REPORT_DATA)); - assertFalse(entities.contains(Entity.USER)); - } - } - - @Test - void stopFlushesSinkAndShutsExecutorsDown() throws Exception { - BulkSink sink = mock(BulkSink.class); - when(sink.getActiveBulkRequestCount()).thenReturn(2); - when(sink.flushAndAwait(10)).thenReturn(true); - setField("searchIndexSink", sink); - setField("producerExecutor", Executors.newSingleThreadExecutor()); - setField("jobExecutor", Executors.newSingleThreadExecutor()); - setField("consumerExecutor", Executors.newSingleThreadExecutor()); - setField("taskQueue", new java.util.concurrent.LinkedBlockingQueue<>()); - - executor.stop(); - - assertTrue(executor.isStopped()); - verify(sink).flushAndAwait(10); - assertTrue(((ExecutorService) getField("producerExecutor")).isShutdown()); - assertTrue(((ExecutorService) getField("jobExecutor")).isShutdown()); - assertTrue(((ExecutorService) getField("consumerExecutor")).isShutdown()); - } - - @Test - void validateClusterCapacityRethrowsInsufficientCapacityFailures() { - try (MockedConstruction ignored = - mockConstruction( - SearchIndexClusterValidator.class, - (validator, context) -> - doThrow(new InsufficientClusterCapacityException(90, 100, 20, 0.9)) - .when(validator) - .validateCapacityForRecreate(searchRepository, Set.of(Entity.TABLE)))) { - InvocationTargetException thrown = - assertThrows( - InvocationTargetException.class, - () -> - invokePrivateMethod( - "validateClusterCapacity", new Class[] {Set.class}, Set.of(Entity.TABLE))); - - assertInstanceOf(InsufficientClusterCapacityException.class, thrown.getCause()); - } - } - - @Test - void validateClusterCapacitySwallowsUnexpectedValidatorFailures() throws Exception { - try (MockedConstruction ignored = - mockConstruction( - SearchIndexClusterValidator.class, - (validator, context) -> - doThrow(new IllegalStateException("boom")) - .when(validator) - .validateCapacityForRecreate(searchRepository, Set.of(Entity.TABLE)))) { - invokePrivateMethod( - "validateClusterCapacity", new Class[] {Set.class}, Set.of(Entity.TABLE)); - } - } - - @Test - void initializeSinkStoresSinkHandlerAndFailureCallback() throws Exception { - BulkSink sink = mock(BulkSink.class); - RecreateIndexHandler handler = mock(RecreateIndexHandler.class); - ReindexingConfiguration config = - ReindexingConfiguration.builder() - .batchSize(25) - .maxConcurrentRequests(3) - .payloadSize(2048) - .build(); - - when(searchRepository.createBulkSink(25, 3, 2048)).thenReturn(sink); - when(searchRepository.createReindexHandler()).thenReturn(handler); - - invokePrivateMethod("initializeSink", new Class[] {ReindexingConfiguration.class}, config); - - assertSame(sink, getField("searchIndexSink")); - assertSame(handler, getField("recreateIndexHandler")); - verify(sink).setFailureCallback(any(BulkSink.FailureCallback.class)); - } - - @Test - void cleanupOldFailuresDeletesExpiredRecordsAndSwallowsDaoErrors() throws Exception { - CollectionDAO.SearchIndexFailureDAO failureDao = - mock(CollectionDAO.SearchIndexFailureDAO.class); - when(collectionDAO.searchIndexFailureDAO()).thenReturn(failureDao); - when(failureDao.deleteOlderThan(anyLong())).thenReturn(2); - - invokePrivateMethod("cleanupOldFailures", new Class[0]); - - verify(failureDao).deleteOlderThan(anyLong()); - - doThrow(new IllegalStateException("boom")).when(failureDao).deleteOlderThan(anyLong()); - invokePrivateMethod("cleanupOldFailures", new Class[0]); - } - - @Test - void createContextDataIncludesRecreateTargetIndexAndTracker() throws Exception { - CollectionDAO.SearchIndexServerStatsDAO statsDao = - mock(CollectionDAO.SearchIndexServerStatsDAO.class); - ReindexContext recreateContext = new ReindexContext(); - ReindexingJobContext jobContext = mock(ReindexingJobContext.class); - UUID jobId = UUID.randomUUID(); - - recreateContext.add( - Entity.TABLE, - "table_canonical", - "table_original", - "table_staged", - Set.of("table_existing"), - "table_alias", - List.of("column_alias")); - when(collectionDAO.searchIndexServerStatsDAO()).thenReturn(statsDao); - when(jobContext.getJobId()).thenReturn(jobId); - setField("config", ReindexingConfiguration.builder().recreateIndex(true).build()); - setField("context", jobContext); - setField("recreateContext", recreateContext); - - @SuppressWarnings("unchecked") - Map contextData = - (Map) - invokePrivateMethod("createContextData", new Class[] {String.class}, Entity.TABLE); - - assertEquals(Entity.TABLE, contextData.get("entityType")); - assertEquals(Boolean.TRUE, contextData.get("recreateIndex")); - assertSame(recreateContext, contextData.get("recreateContext")); - assertEquals("table_staged", contextData.get("targetIndex")); - assertNotNull(contextData.get(BulkSink.STATS_TRACKER_CONTEXT_KEY)); - } - - @Test - void getTargetIndexForEntityFallsBackToCorrectedQueryCostType() throws Exception { - ReindexContext recreateContext = new ReindexContext(); - recreateContext.add( - Entity.QUERY_COST_RECORD, null, null, "query_cost_staged", Set.of(), null, List.of()); - setField("recreateContext", recreateContext); - - @SuppressWarnings("unchecked") - Optional target = - (Optional) - invokePrivateMethod( - "getTargetIndexForEntity", new Class[] {String.class}, "queryCostResult"); - - assertEquals(Optional.of("query_cost_staged"), target); - } - - @Test - void getEntityTotalCountsRegularEntitiesWithIncludeAll() throws Exception { - EntityRepository entityRepository = mock(EntityRepository.class); - EntityDAO entityDao = mock(EntityDAO.class); - when(entityRepository.getDao()).thenReturn(entityDao); - when(entityDao.listCount(any(ListFilter.class))).thenReturn(7); - - try (MockedStatic entityMock = mockStatic(Entity.class)) { - entityMock.when(() -> Entity.getEntityRepository(Entity.TABLE)).thenReturn(entityRepository); - - int total = - (Integer) - invokePrivateMethod("getEntityTotal", new Class[] {String.class}, Entity.TABLE); - - assertEquals(7, total); - ArgumentCaptor filterCaptor = ArgumentCaptor.forClass(ListFilter.class); - verify(entityDao).listCount(filterCaptor.capture()); - assertEquals(org.openmetadata.schema.type.Include.ALL, filterCaptor.getValue().getInclude()); - } - } - - @Test - void getEntityTotalUsesDataInsightTimeSeriesFilters() throws Exception { - String reportType = ReportData.ReportDataType.ENTITY_REPORT_DATA.value(); - EntityTimeSeriesRepository repository = mock(EntityTimeSeriesRepository.class); - EntityTimeSeriesDAO timeSeriesDao = mock(EntityTimeSeriesDAO.class); - when(repository.getTimeSeriesDao()).thenReturn(timeSeriesDao); - when(timeSeriesDao.listCount(any(ListFilter.class), anyLong(), anyLong(), eq(false))) - .thenReturn(4); - when(searchRepository.getDataInsightReports()).thenReturn(List.of(reportType)); - setField( - "config", - ReindexingConfiguration.builder().timeSeriesEntityDays(Map.of(reportType, 1)).build()); - - try (MockedStatic entityMock = mockStatic(Entity.class)) { - entityMock.when(Entity::getSearchRepository).thenReturn(searchRepository); - entityMock - .when(() -> Entity.getEntityTimeSeriesRepository(Entity.ENTITY_REPORT_DATA)) - .thenReturn(repository); - - int total = - (Integer) - invokePrivateMethod("getEntityTotal", new Class[] {String.class}, reportType); - - assertEquals(4, total); - ArgumentCaptor filterCaptor = ArgumentCaptor.forClass(ListFilter.class); - verify(timeSeriesDao).listCount(filterCaptor.capture(), anyLong(), anyLong(), eq(false)); - assertEquals( - FullyQualifiedName.buildHash(reportType), - filterCaptor.getValue().getQueryParams().get("entityFQNHash")); - } - } - - @Test - void handleTaskSuccessReportsReaderErrorsAndProgress() throws Exception { - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - ResultList batch = - new ResultList<>(List.of("row"), List.of(new EntityError()), null, null, 1); - StepStats currentEntityStats = new StepStats().withSuccessRecords(1).withFailedRecords(1); - executor.addListener(listener); - executor.getStats().set(initializeStats(Set.of(Entity.TABLE))); - - invokePrivateMethod( - "handleTaskSuccess", - new Class[] {String.class, ResultList.class, StepStats.class}, - Entity.TABLE, - batch, - currentEntityStats); - - verify(listener).onError(eq(Entity.TABLE), any(IndexingError.class), any(Stats.class)); - verify(listener).onProgressUpdate(any(Stats.class), any()); - assertEquals(1, executor.getStats().get().getJobStats().getSuccessRecords()); - assertEquals(1, executor.getStats().get().getJobStats().getFailedRecords()); - } - - @Test - void handleSearchIndexExceptionUsesIndexedFailureCounts() throws Exception { - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - ResultList batch = - new ResultList<>(List.of("row"), List.of(new EntityError()), null, null, 1); - SearchIndexException exception = - new SearchIndexException( - new IndexingError().withMessage("sink boom").withSuccessCount(1).withFailedCount(2)); - executor.addListener(listener); - executor.getStats().set(initializeStats(Set.of(Entity.TABLE))); - - invokePrivateMethod( - "handleSearchIndexException", - new Class[] {String.class, ResultList.class, SearchIndexException.class}, - Entity.TABLE, - batch, - exception); - - verify(listener).onError(eq(Entity.TABLE), eq(exception.getIndexingError()), any(Stats.class)); - assertEquals(1, executor.getStats().get().getJobStats().getSuccessRecords()); - assertEquals(2, executor.getStats().get().getJobStats().getFailedRecords()); - } - - @Test - void handleGenericExceptionCountsReaderAndDataFailures() throws Exception { - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - ResultList batch = - new ResultList<>(List.of("row1", "row2"), List.of(new EntityError()), null, null, 2); - executor.addListener(listener); - executor.getStats().set(initializeStats(Set.of(Entity.TABLE))); - - invokePrivateMethod( - "handleGenericException", - new Class[] {String.class, ResultList.class, Exception.class}, - Entity.TABLE, - batch, - new IOException("process boom")); - - verify(listener).onError(eq(Entity.TABLE), any(IndexingError.class), any(Stats.class)); - assertEquals(3, executor.getStats().get().getJobStats().getFailedRecords()); - } - - @Test - void signalConsumersToStopEnqueuesPoisonPills() throws Exception { - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - setField("taskQueue", queue); - - invokePrivateMethod("signalConsumersToStop", new Class[] {int.class}, 2); - - assertTrue(((java.util.concurrent.atomic.AtomicBoolean) getField("producersDone")).get()); - assertEquals(2, queue.size()); - Object firstTask = queue.poll(); - assertEquals("__POISON_PILL__", invokeTaskAccessor(firstTask, "entityType")); - } - - @Test - void processReadTaskQueuesEntitiesFromSource() throws Exception { - @SuppressWarnings("unchecked") - Source source = mock(Source.class); - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - when(source.readWithCursor(RestUtil.encodeCursor("25"))) - .thenReturn(new ResultList<>(List.of("entity"))); - setField("taskQueue", queue); - - invokePrivateMethod( - "processReadTask", - new Class[] {String.class, Source.class, int.class}, - Entity.TABLE, - source, - 25); - - assertEquals(1, queue.size()); - Object task = queue.poll(); - assertEquals(Entity.TABLE, invokeTaskAccessor(task, "entityType")); - assertEquals(25, invokeTaskAccessor(task, "offset")); - } - - @Test - void processReadTaskRecordsReaderFailuresUsingBatchSizeFallback() throws Exception { - @SuppressWarnings("unchecked") - Source source = mock(Source.class); - IndexingFailureRecorder failureRecorder = mock(IndexingFailureRecorder.class); - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - SearchIndexException exception = - new SearchIndexException(new IndexingError().withMessage("read failed")); - - when(source.readWithCursor(any(String.class))).thenThrow(exception); - setField("failureRecorder", failureRecorder); - setField("batchSize", new java.util.concurrent.atomic.AtomicReference<>(25)); - executor.addListener(listener); - executor.getStats().set(initializeStats(Set.of(Entity.TABLE))); - - invokePrivateMethod( - "processReadTask", - new Class[] {String.class, Source.class, int.class}, - Entity.TABLE, - source, - 0); - - verify(failureRecorder) - .recordReaderFailure(eq(Entity.TABLE), eq("read failed"), any(String.class)); - verify(listener).onError(eq(Entity.TABLE), eq(exception.getIndexingError()), any(Stats.class)); - assertEquals(25, executor.getStats().get().getReaderStats().getFailedRecords()); - assertEquals(25, executor.getStats().get().getJobStats().getFailedRecords()); - } - - @Test - void finalizeReindexSkipsPromotedEntitiesPropagatesFailuresAndClearsState() throws Exception { - RecreateIndexHandler handler = mock(RecreateIndexHandler.class); - ReindexContext recreateContext = new ReindexContext(); - recreateContext.add( - Entity.TABLE, - "table_canonical", - "table_original", - "table_staged", - Set.of("table_existing"), - "table_alias", - List.of("column_alias")); - recreateContext.add( - Entity.DASHBOARD, - "dashboard_canonical", - "dashboard_original", - "dashboard_staged", - Set.of("dashboard_existing"), - "dashboard_alias", - List.of("chart_alias")); - @SuppressWarnings("unchecked") - Set promotedEntities = (Set) getField("promotedEntities"); - @SuppressWarnings("unchecked") - Map failures = - (Map) getField("entityBatchFailures"); - promotedEntities.add(Entity.TABLE); - failures.put(Entity.DASHBOARD, new AtomicInteger(1)); - setField("recreateIndexHandler", handler); - setField("recreateContext", recreateContext); - - invokePrivateMethod("finalizeReindex", new Class[0]); - - ArgumentCaptor contextCaptor = - ArgumentCaptor.forClass(EntityReindexContext.class); - verify(handler).finalizeReindex(contextCaptor.capture(), eq(false)); - assertEquals(Entity.DASHBOARD, contextCaptor.getValue().getEntityType()); - assertEquals("dashboard_canonical", contextCaptor.getValue().getCanonicalIndex()); - assertEquals(Set.of("dashboard_existing"), contextCaptor.getValue().getExistingAliases()); - assertEquals(Set.of("chart_alias"), contextCaptor.getValue().getParentAliases()); - assertSame(null, getField("recreateContext")); - assertTrue(((Set) getField("promotedEntities")).isEmpty()); - } - - @Test - void createSourceBuildsRegularEntitySourceWithKnownTotals() throws Exception { - executor.getStats().set(initializeStats(Set.of(Entity.TABLE))); - executor - .getStats() - .get() - .getEntityStats() - .getAdditionalProperties() - .get(Entity.TABLE) - .setTotalRecords(7); - setField("batchSize", new java.util.concurrent.atomic.AtomicReference<>(50)); - - try (MockedConstruction ignored = - mockConstruction( - PaginatedEntitiesSource.class, - (source, context) -> { - assertEquals(Entity.TABLE, context.arguments().get(0)); - assertEquals(50, context.arguments().get(1)); - assertEquals(List.of("*"), context.arguments().get(2)); - assertEquals(7, context.arguments().get(3)); - })) { - assertNotNull( - invokePrivateMethod("createSource", new Class[] {String.class}, Entity.TABLE)); - } - } - - @Test - void createSourceBuildsTimeSeriesSourceForCorrectedQueryCostType() throws Exception { - executor.getStats().set(initializeStats(Set.of(Entity.QUERY_COST_RECORD))); - executor - .getStats() - .get() - .getEntityStats() - .getAdditionalProperties() - .get(Entity.QUERY_COST_RECORD) - .setTotalRecords(5); - setField("batchSize", new java.util.concurrent.atomic.AtomicReference<>(40)); - setField( - "config", - ReindexingConfiguration.builder() - .timeSeriesEntityDays(Map.of(Entity.QUERY_COST_RECORD, 1)) - .build()); - - try (MockedConstruction ignored = - mockConstruction( - PaginatedEntityTimeSeriesSource.class, - (source, context) -> { - assertEquals(Entity.QUERY_COST_RECORD, context.arguments().get(0)); - assertEquals(40, context.arguments().get(1)); - assertEquals(List.of(), context.arguments().get(2)); - assertEquals(5, context.arguments().get(3)); - assertEquals(6, context.arguments().size()); - assertTrue((Long) context.arguments().get(4) > 0); - assertTrue((Long) context.arguments().get(5) >= (Long) context.arguments().get(4)); - })) { - assertNotNull( - invokePrivateMethod("createSource", new Class[] {String.class}, "queryCostResult")); - } - } - - @Test - void searchFieldAndExtractionHelpersRespectEntityKinds() throws Exception { - @SuppressWarnings("unchecked") - List regularFields = - (List) - invokePrivateMethod( - "getSearchIndexFields", new Class[] {String.class}, Entity.TABLE); - @SuppressWarnings("unchecked") - List timeSeriesFields = - (List) - invokePrivateMethod( - "getSearchIndexFields", new Class[] {String.class}, Entity.QUERY_COST_RECORD); - ResultList regularEntities = new ResultList<>(List.of("regular")); - ResultList timeSeriesEntities = new ResultList<>(List.of("timeseries")); - - assertEquals(List.of("*"), regularFields); - assertEquals(List.of(), timeSeriesFields); - assertSame( - regularEntities, - invokePrivateMethod( - "extractEntities", - new Class[] {String.class, Object.class}, - Entity.TABLE, - regularEntities)); - assertSame( - timeSeriesEntities, - invokePrivateMethod( - "extractEntities", - new Class[] {String.class, Object.class}, - Entity.QUERY_COST_RECORD, - timeSeriesEntities)); - } - - @Test - void updateSinkTotalSubmittedInitializesStatsAndDetermineStatusTracksIncompleteWork() - throws Exception { - Stats stats = new Stats(); - stats.setJobStats( - new StepStats().withTotalRecords(10).withSuccessRecords(9).withFailedRecords(0)); - executor.getStats().set(stats); - - executor.updateSinkTotalSubmitted(4); - - assertEquals(4, executor.getStats().get().getSinkStats().getTotalRecords()); - assertEquals( - ExecutionResult.Status.COMPLETED_WITH_ERRORS, - invokePrivateMethod("determineStatus", new Class[0])); - - ((java.util.concurrent.atomic.AtomicBoolean) getField("stopped")).set(true); - assertEquals( - ExecutionResult.Status.STOPPED, invokePrivateMethod("determineStatus", new Class[0])); - ((java.util.concurrent.atomic.AtomicBoolean) getField("stopped")).set(false); - } - - @Test - void buildEntityReindexContextCopiesAliasAndIndexState() throws Exception { - ReindexContext recreateContext = new ReindexContext(); - recreateContext.add( - Entity.TABLE, - "table_canonical", - "table_original", - "table_staged", - Set.of("table_existing"), - "table_alias", - List.of("column_alias")); - setField("recreateContext", recreateContext); - - EntityReindexContext context = - (EntityReindexContext) - invokePrivateMethod( - "buildEntityReindexContext", new Class[] {String.class}, Entity.TABLE); - - assertEquals(Entity.TABLE, context.getEntityType()); - assertEquals("table_original", context.getOriginalIndex()); - assertEquals("table_canonical", context.getCanonicalIndex()); - assertEquals("table_original", context.getActiveIndex()); - assertEquals("table_staged", context.getStagedIndex()); - assertEquals("table_alias", context.getCanonicalAliases()); - assertEquals(Set.of("table_existing"), context.getExistingAliases()); - assertEquals(Set.of("column_alias"), context.getParentAliases()); - } - - @Test - void reCreateIndexesDelegatesWhenHandlerExistsAndReturnsNullOtherwise() throws Exception { - RecreateIndexHandler handler = mock(RecreateIndexHandler.class); - ReindexContext recreateContext = new ReindexContext(); - when(handler.reCreateIndexes(Set.of(Entity.TABLE))).thenReturn(recreateContext); - setField("recreateIndexHandler", handler); - - assertSame( - recreateContext, - invokePrivateMethod("reCreateIndexes", new Class[] {Set.class}, Set.of(Entity.TABLE))); - - setField("recreateIndexHandler", null); - assertSame( - null, - invokePrivateMethod("reCreateIndexes", new Class[] {Set.class}, Set.of(Entity.TABLE))); - } - - @Test - void closeFlushesStatsManagerAndSinkTrackersBeforeShutdown() throws Exception { - JobStatsManager statsManager = mock(JobStatsManager.class); - StageStatsTracker tracker = mock(StageStatsTracker.class); - @SuppressWarnings("unchecked") - Map sinkTrackers = - (Map) getField("sinkTrackers"); - setField("statsManager", statsManager); - sinkTrackers.put(Entity.TABLE, tracker); - - executor.close(); - - verify(statsManager).flushAll(); - verify(tracker).flush(); - assertTrue(executor.isStopped()); - } - - @Test - void executeCompletesRecreateFlowForZeroEntityWorkload() { - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - ReindexingJobContext jobContext = mock(ReindexingJobContext.class); - CollectionDAO.SearchIndexFailureDAO failureDao = - mock(CollectionDAO.SearchIndexFailureDAO.class); - EntityRepository entityRepository = mock(EntityRepository.class); - EntityDAO entityDao = mock(EntityDAO.class); - BulkSink sink = mock(BulkSink.class); - DefaultRecreateHandler handler = mock(DefaultRecreateHandler.class); - UUID jobId = UUID.randomUUID(); - ReindexContext recreateContext = new ReindexContext(); - ReindexingConfiguration config = - ReindexingConfiguration.builder() - .entities(Set.of(Entity.TABLE)) - .recreateIndex(true) - .build(); - - recreateContext.add( - Entity.TABLE, - "table_canonical", - "table_original", - "table_staged", - Set.of("table_existing"), - "table_alias", - List.of("column_alias")); - when(jobContext.getJobId()).thenReturn(jobId); - when(collectionDAO.searchIndexFailureDAO()).thenReturn(failureDao); - when(entityRepository.getDao()).thenReturn(entityDao); - when(entityDao.listCount(any(ListFilter.class))).thenReturn(0); - when(searchRepository.createBulkSink( - 100, 100, SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES)) - .thenReturn(sink); - when(searchRepository.createReindexHandler()).thenReturn(handler); - when(handler.reCreateIndexes(Set.of(Entity.TABLE))).thenReturn(recreateContext); - executor.addListener(listener); - - try (MockedStatic entityMock = mockStatic(Entity.class); - MockedConstruction ignored = - mockConstruction(SearchIndexClusterValidator.class)) { - entityMock.when(() -> Entity.getEntityRepository(Entity.TABLE)).thenReturn(entityRepository); - - ExecutionResult result = executor.execute(config, jobContext); - - assertEquals(ExecutionResult.Status.COMPLETED, result.status()); - } - - verify(listener).onJobStarted(jobContext); - verify(listener).onJobConfigured(jobContext, config); - verify(listener).onIndexRecreationStarted(Set.of(Entity.TABLE)); - verify(listener).onEntityTypeStarted(Entity.TABLE, 0); - verify(listener).onEntityTypeCompleted(eq(Entity.TABLE), any()); - verify(listener).onJobCompleted(any(Stats.class), anyLong()); - verify(handler).reCreateIndexes(Set.of(Entity.TABLE)); - verify(handler).promoteEntityIndex(any(EntityReindexContext.class), eq(true)); - verify(sink).close(); - } - - @Test - void executeReturnsFailedResultWhenInitializationThrows() { - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - ReindexingJobContext jobContext = mock(ReindexingJobContext.class); - EntityRepository entityRepository = mock(EntityRepository.class); - EntityDAO entityDao = mock(EntityDAO.class); - ReindexingConfiguration config = - ReindexingConfiguration.builder().entities(Set.of(Entity.TABLE)).build(); - - when(jobContext.getJobId()).thenReturn(UUID.randomUUID()); - when(entityRepository.getDao()).thenReturn(entityDao); - when(entityDao.listCount(any(ListFilter.class))).thenReturn(0); - when(searchRepository.createBulkSink( - 100, 100, SearchClusterMetrics.DEFAULT_BULK_PAYLOAD_SIZE_BYTES)) - .thenThrow(new IllegalStateException("sink init failed")); - executor.addListener(listener); - - try (MockedStatic entityMock = mockStatic(Entity.class)) { - entityMock.when(() -> Entity.getEntityRepository(Entity.TABLE)).thenReturn(entityRepository); - - ExecutionResult result = executor.execute(config, jobContext); - - assertEquals(ExecutionResult.Status.FAILED, result.status()); - } - - verify(listener).onJobStarted(jobContext); - verify(listener).onJobFailed(any(Stats.class), any(IllegalStateException.class)); - } - - @Test - void processEntityTypeSubmitsRegularReadersAndAdjustsBoundaryShortfall() throws Exception { - ExecutorService producerExecutor = mock(ExecutorService.class); - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - Phaser producerPhaser = new Phaser(1); - String boundaryCursor = RestUtil.encodeCursor("{\"name\":\"m\",\"id\":\"1\"}"); - - doAnswer( - invocation -> { - ((Runnable) invocation.getArgument(0)).run(); - return null; - }) - .when(producerExecutor) - .submit(any(Runnable.class)); - executor.addListener(listener); - executor.getStats().set(statsWithEntityTotals(Map.of(Entity.USER, 45))); - setField("producerExecutor", producerExecutor); - setField("taskQueue", queue); - setField("config", ReindexingConfiguration.builder().entities(Set.of(Entity.USER)).build()); - setField("batchSize", new java.util.concurrent.atomic.AtomicReference<>(10)); - - try (MockedConstruction ignored = - mockConstruction( - PaginatedEntitiesSource.class, - (source, context) -> { - when(source.findBoundaryCursors(3, 45)).thenReturn(List.of(boundaryCursor)); - when(source.readNextKeyset(any())) - .thenReturn( - (ResultList) - new ResultList<>(List.of(mock(EntityInterface.class)), null, null, 1)); - })) { - invokePrivateMethod( - "processEntityType", - new Class[] {String.class, Phaser.class}, - Entity.USER, - producerPhaser); - } - - assertEquals(2, queue.size()); - assertTrue(producerPhaser.isTerminated()); - @SuppressWarnings("unchecked") - Map batchCounters = - (Map) getField("entityBatchCounters"); - assertEquals(0, batchCounters.get(Entity.USER).get()); - verify(listener).onEntityTypeStarted(Entity.USER, 45); - verify(listener).onEntityTypeCompleted(eq(Entity.USER), any()); - } - - @Test - void processKeysetBatchesRecordsSuccessfulReadAndPromotesEntity() throws Exception { - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - DefaultRecreateHandler handler = mock(DefaultRecreateHandler.class); - ReindexContext recreateContext = new ReindexContext(); - Phaser producerPhaser = new Phaser(1); - - recreateContext.add( - Entity.TABLE, - "table_canonical", - "table_original", - "table_staged", - Set.of("table_existing"), - "table_alias", - List.of("column_alias")); - setField("taskQueue", queue); - setField("config", ReindexingConfiguration.builder().recreateIndex(true).build()); - setField("recreateIndexHandler", handler); - setField("recreateContext", recreateContext); - @SuppressWarnings("unchecked") - Map batchCounters = - (Map) getField("entityBatchCounters"); - @SuppressWarnings("unchecked") - Map batchFailures = - (Map) getField("entityBatchFailures"); - batchCounters.put(Entity.TABLE, new AtomicInteger(1)); - batchFailures.put(Entity.TABLE, new AtomicInteger(0)); - - invokePrivateMethod( - "processKeysetBatches", - new Class[] { - String.class, - int.class, - int.class, - String.class, - SearchIndexExecutor.KeysetBatchReader.class, - Phaser.class - }, - Entity.TABLE, - 10, - 5, - null, - (SearchIndexExecutor.KeysetBatchReader) - cursor -> new ResultList<>(List.of("entity"), null, null, 1), - producerPhaser); - - assertEquals(1, queue.size()); - assertTrue(producerPhaser.isTerminated()); - assertEquals(0, batchFailures.get(Entity.TABLE).get()); - verify(handler).promoteEntityIndex(any(EntityReindexContext.class), eq(true)); - } - - @Test - void processKeysetBatchesRecordsReaderFailuresAndMarksEntityFailed() throws Exception { - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - IndexingFailureRecorder failureRecorder = mock(IndexingFailureRecorder.class); - Phaser producerPhaser = new Phaser(1); - SearchIndexException exception = - new SearchIndexException( - new IndexingError().withMessage("read timeout").withFailedCount(2)); - - executor.addListener(listener); - executor.getStats().set(statsWithEntityTotals(Map.of(Entity.TABLE, 5))); - setField("taskQueue", new LinkedBlockingQueue<>()); - setField("failureRecorder", failureRecorder); - @SuppressWarnings("unchecked") - Map batchCounters = - (Map) getField("entityBatchCounters"); - @SuppressWarnings("unchecked") - Map batchFailures = - (Map) getField("entityBatchFailures"); - batchCounters.put(Entity.TABLE, new AtomicInteger(1)); - batchFailures.put(Entity.TABLE, new AtomicInteger(0)); - - invokePrivateMethod( - "processKeysetBatches", - new Class[] { - String.class, - int.class, - int.class, - String.class, - SearchIndexExecutor.KeysetBatchReader.class, - Phaser.class - }, - Entity.TABLE, - 5, - 5, - null, - (SearchIndexExecutor.KeysetBatchReader) - cursor -> { - throw exception; - }, - producerPhaser); - - verify(failureRecorder) - .recordReaderFailure(eq(Entity.TABLE), eq("read timeout"), any(String.class)); - verify(listener).onError(eq(Entity.TABLE), eq(exception.getIndexingError()), any(Stats.class)); - assertEquals(2, executor.getStats().get().getReaderStats().getFailedRecords()); - assertEquals(2, executor.getStats().get().getJobStats().getFailedRecords()); - assertEquals(1, batchFailures.get(Entity.TABLE).get()); - assertTrue(producerPhaser.isTerminated()); - } - - @Test - void submitReadersSingleReaderQueuesBatchesWithoutBoundaryLookup() throws Exception { - ExecutorService producerExecutor = mock(ExecutorService.class); - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - Phaser producerPhaser = new Phaser(1); - - doAnswer( - invocation -> { - ((Runnable) invocation.getArgument(0)).run(); - return null; - }) - .when(producerExecutor) - .submit(any(Runnable.class)); - setField("producerExecutor", producerExecutor); - setField("taskQueue", queue); - @SuppressWarnings("unchecked") - Map batchCounters = - (Map) getField("entityBatchCounters"); - @SuppressWarnings("unchecked") - Map batchFailures = - (Map) getField("entityBatchFailures"); - batchCounters.put(Entity.TABLE, new AtomicInteger(1)); - batchFailures.put(Entity.TABLE, new AtomicInteger(0)); - - invokePrivateMethod( - "submitReaders", - new Class[] { - String.class, - int.class, - int.class, - int.class, - Phaser.class, - java.util.function.Supplier.class, - java.util.function.BiFunction.class - }, - Entity.TABLE, - 1, - 5, - 1, - producerPhaser, - (java.util.function.Supplier) - () -> cursor -> new ResultList<>(List.of("entity"), null, null, 1), - (java.util.function.BiFunction>) - (readers, total) -> { - throw new AssertionError("Boundary lookup should not run for a single reader"); - }); - - assertEquals(1, queue.size()); - assertTrue(producerPhaser.isTerminated()); - assertEquals(0, batchFailures.get(Entity.TABLE).get()); - } - - @Test - void processBatchQueuesReadResultsAndPromotesFinalBatch() throws Exception { - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - CountDownLatch latch = new CountDownLatch(1); - DefaultRecreateHandler handler = mock(DefaultRecreateHandler.class); - ReindexContext recreateContext = new ReindexContext(); - - recreateContext.add( - Entity.USER, - "user_canonical", - "user_original", - "user_staged", - Set.of("user_existing"), - "user_alias", - List.of("team_alias")); - executor.getStats().set(statsWithEntityTotals(Map.of(Entity.USER, 1))); - setField("taskQueue", queue); - setField("batchSize", new java.util.concurrent.atomic.AtomicReference<>(10)); - setField("config", ReindexingConfiguration.builder().recreateIndex(true).build()); - setField("recreateIndexHandler", handler); - setField("recreateContext", recreateContext); - @SuppressWarnings("unchecked") - Map batchCounters = - (Map) getField("entityBatchCounters"); - @SuppressWarnings("unchecked") - Map batchFailures = - (Map) getField("entityBatchFailures"); - batchCounters.put(Entity.USER, new AtomicInteger(1)); - batchFailures.put(Entity.USER, new AtomicInteger(0)); - - try (MockedConstruction ignored = - mockConstruction( - PaginatedEntitiesSource.class, - (source, context) -> - when(source.readWithCursor(RestUtil.encodeCursor("0"))) - .thenReturn( - (ResultList) new ResultList<>(List.of(mock(EntityInterface.class)))))) { - invokePrivateMethod( - "processBatch", - new Class[] {String.class, int.class, CountDownLatch.class}, - Entity.USER, - 0, - latch); - } - - assertEquals(0, latch.getCount()); - assertEquals(1, queue.size()); - verify(handler).promoteEntityIndex(any(EntityReindexContext.class), eq(true)); - } - - @Test - void handleSinkFailureRoutesProcessAndSinkStagesToRecorder() throws Exception { - IndexingFailureRecorder failureRecorder = mock(IndexingFailureRecorder.class); - setField("failureRecorder", failureRecorder); - - invokePrivateMethod( - "handleSinkFailure", - new Class[] { - String.class, - String.class, - String.class, - String.class, - IndexingFailureRecorder.FailureStage.class - }, - Entity.TABLE, - "1", - "svc.db.table", - "process boom", - IndexingFailureRecorder.FailureStage.PROCESS); - invokePrivateMethod( - "handleSinkFailure", - new Class[] { - String.class, - String.class, - String.class, - String.class, - IndexingFailureRecorder.FailureStage.class - }, - Entity.TABLE, - "2", - "svc.db.table", - "sink boom", - IndexingFailureRecorder.FailureStage.SINK); - - verify(failureRecorder).recordProcessFailure(Entity.TABLE, "1", "svc.db.table", "process boom"); - verify(failureRecorder).recordSinkFailure(Entity.TABLE, "2", "svc.db.table", "sink boom"); - } - - @Test - void isBackpressureActiveTracksQueueFillRatio() throws Exception { - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(10); - ReindexingMetrics metrics = mock(ReindexingMetrics.class); - - for (int i = 0; i < 10; i++) { - queue.add(i); - } - setField("taskQueue", queue); - - try (MockedStatic metricsMock = mockStatic(ReindexingMetrics.class)) { - metricsMock.when(ReindexingMetrics::getInstance).thenReturn(metrics); - - assertTrue((Boolean) invokePrivateMethod("isBackpressureActive", new Class[0])); - verify(metrics).updateQueueFillRatio(100); - - queue.clear(); - assertFalse((Boolean) invokePrivateMethod("isBackpressureActive", new Class[0])); - verify(metrics).updateQueueFillRatio(0); - } - } - - @Test - void calculateNumberOfThreadsHandlesExactRemaindersAndInvalidBatchSize() throws Exception { - assertEquals( - 1, - invokePrivateMethod( - "calculateNumberOfThreads", new Class[] {int.class, int.class}, 10, 0)); - assertEquals( - 2, - invokePrivateMethod( - "calculateNumberOfThreads", new Class[] {int.class, int.class}, 40, 20)); - assertEquals( - 3, - invokePrivateMethod( - "calculateNumberOfThreads", new Class[] {int.class, int.class}, 41, 20)); - } - - @Test - void runConsumerProcessesQueuedWorkUntilPoisonPill() throws Exception { - BulkSink sink = mock(BulkSink.class); - @SuppressWarnings("unchecked") - Source source = mock(Source.class); - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - CountDownLatch latch = new CountDownLatch(1); - - executor.getStats().set(statsWithEntityTotals(Map.of(Entity.TABLE, 1))); - setField("config", ReindexingConfiguration.builder().build()); - setField("searchIndexSink", sink); - setField("taskQueue", queue); - when(source.readWithCursor(RestUtil.encodeCursor("0"))) - .thenReturn(new ResultList<>(List.of(mock(EntityInterface.class)))); - - invokePrivateMethod( - "processReadTask", - new Class[] {String.class, Source.class, int.class}, - Entity.TABLE, - source, - 0); - invokePrivateMethod("signalConsumersToStop", new Class[] {int.class}, 1); - invokePrivateMethod("runConsumer", new Class[] {int.class, CountDownLatch.class}, 0, latch); - - verify(sink).write(any(List.class), any(Map.class)); - assertEquals(0, latch.getCount()); - assertEquals(1, executor.getStats().get().getJobStats().getSuccessRecords()); - } - - @Test - void processEntityReindexStopsImmediatelyWhenExecutorIsStopped() throws Exception { - ExecutorService producerExecutor = mock(ExecutorService.class); - ExecutorService jobExecutor = mock(ExecutorService.class); - - setField("producerExecutor", producerExecutor); - setField("jobExecutor", jobExecutor); - ((java.util.concurrent.atomic.AtomicBoolean) getField("stopped")).set(true); - - invokePrivateMethod("processEntityReindex", new Class[] {Set.class}, Set.of(Entity.TABLE)); - - verify(producerExecutor).shutdownNow(); - verify(jobExecutor).shutdownNow(); - ((java.util.concurrent.atomic.AtomicBoolean) getField("stopped")).set(false); - } - - @Test - void cleanupExecutorsShutsDownAllPoolsWhenStillRunning() throws Exception { - ExecutorService consumerExecutor = Executors.newSingleThreadExecutor(); - ExecutorService jobExecutor = Executors.newSingleThreadExecutor(); - ExecutorService producerExecutor = Executors.newSingleThreadExecutor(); - - setField("consumerExecutor", consumerExecutor); - setField("jobExecutor", jobExecutor); - setField("producerExecutor", producerExecutor); - - invokePrivateMethod("cleanupExecutors", new Class[0]); - - assertTrue(consumerExecutor.isShutdown()); - assertTrue(jobExecutor.isShutdown()); - assertTrue(producerExecutor.isShutdown()); - } - - @Test - void removeListenerReturnsExecutorInstance() { - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - - assertSame(executor, executor.addListener(listener).removeListener(listener)); - } - - @Test - void expandEntitiesReturnsIndexedUniverseWhenAllRequested() throws Exception { - when(searchRepository.getEntityIndexMap()) - .thenReturn( - Map.of( - Entity.TABLE, mock(org.openmetadata.search.IndexMapping.class), - Entity.ENTITY_REPORT_DATA, mock(org.openmetadata.search.IndexMapping.class))); - - try (MockedStatic entityMock = mockStatic(Entity.class)) { - entityMock.when(Entity::getEntityList).thenReturn(Set.of(Entity.TABLE, Entity.USER)); - - @SuppressWarnings("unchecked") - Set expanded = - (Set) - invokePrivateMethod("expandEntities", new Class[] {Set.class}, Set.of("all")); - - assertTrue(expanded.contains(Entity.TABLE)); - assertTrue(expanded.contains(Entity.ENTITY_REPORT_DATA)); - assertFalse(expanded.contains(Entity.USER)); - } - } - - @Test - void calculateThreadConfigurationHonorsConfiguredProducerAndConsumerThreads() throws Exception { - setField( - "config", - ReindexingConfiguration.builder() - .entities(Set.of(Entity.TABLE)) - .producerThreads(6) - .consumerThreads(4) - .build()); - - Object threadConfiguration = - invokePrivateMethod("calculateThreadConfiguration", new Class[] {long.class}, 50_000L); - - assertEquals(6, invokeRecordAccessor(threadConfiguration, "numProducers")); - assertEquals(4, invokeRecordAccessor(threadConfiguration, "numConsumers")); - } - - @Test - void runConsumerContinuesPollingAndExitsWhenInterrupted() throws Exception { - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - CountDownLatch latch = new CountDownLatch(1); - setField("taskQueue", queue); - - Thread consumerThread = - new Thread( - () -> { - try { - invokePrivateMethod( - "runConsumer", new Class[] {int.class, CountDownLatch.class}, 7, latch); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - - consumerThread.start(); - Thread.sleep(250); - consumerThread.interrupt(); - consumerThread.join(2_000); - - assertFalse(consumerThread.isAlive()); - assertEquals(0, latch.getCount()); - } - - @Test - void processTaskRecordsReaderBatchAndHandlesTimeSeriesSinkFailuresWithoutIndexingError() - throws Exception { - BulkSink sink = mock(BulkSink.class); - JobStatsManager statsManager = mock(JobStatsManager.class); - org.openmetadata.service.apps.bundles.searchIndex.stats.EntityStatsTracker tracker = - mock(org.openmetadata.service.apps.bundles.searchIndex.stats.EntityStatsTracker.class); - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - EntityTimeSeriesInterface timeSeriesEntity = mock(EntityTimeSeriesInterface.class); - - executor.addListener(listener); - executor.getStats().set(statsWithEntityTotals(Map.of(Entity.TEST_CASE_RESULT, 1))); - setField("config", ReindexingConfiguration.builder().build()); - setField("searchIndexSink", sink); - setField("statsManager", statsManager); - when(statsManager.getTracker(Entity.TEST_CASE_RESULT)).thenReturn(tracker); - doThrow(new SearchIndexException(new RuntimeException("sink failed"))) - .when(sink) - .write(any(List.class), any(Map.class)); - - invokeProcessTask( - newIndexingTask( - Entity.TEST_CASE_RESULT, - new ResultList<>(List.of(timeSeriesEntity), null, null, 0), - 0)); - - verify(tracker).recordReaderBatch(1, 0, 0); - verify(sink).write(any(List.class), any(Map.class)); - verify(listener) - .onError(eq(Entity.TEST_CASE_RESULT), any(IndexingError.class), any(Stats.class)); - } - - @Test - void processTaskRoutesGenericSinkExceptionsToFailureHandler() throws Exception { - BulkSink sink = mock(BulkSink.class); - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - EntityInterface entity = mock(EntityInterface.class); - - executor.addListener(listener); - executor.getStats().set(statsWithEntityTotals(Map.of(Entity.TABLE, 1))); - setField("config", ReindexingConfiguration.builder().build()); - setField("searchIndexSink", sink); - doThrow(new IllegalStateException("generic sink failure")) - .when(sink) - .write(any(List.class), any(Map.class)); - - invokeProcessTask( - newIndexingTask(Entity.TABLE, new ResultList<>(List.of(entity), null, null, 0), 0)); - - verify(listener).onError(eq(Entity.TABLE), any(IndexingError.class), any(Stats.class)); - } - - @Test - void processEntityTypeUsesTimeSeriesSourcesWithConfiguredWindow() throws Exception { - ExecutorService producerExecutor = mock(ExecutorService.class); - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - Phaser producerPhaser = new Phaser(1); - - doAnswer( - invocation -> { - ((Runnable) invocation.getArgument(0)).run(); - return null; - }) - .when(producerExecutor) - .submit(any(Runnable.class)); - executor.getStats().set(statsWithEntityTotals(Map.of(Entity.TEST_CASE_RESULT, 24))); - setField("producerExecutor", producerExecutor); - setField("taskQueue", queue); - setField( - "config", - ReindexingConfiguration.builder() - .entities(Set.of(Entity.TEST_CASE_RESULT)) - .timeSeriesEntityDays(Map.of(Entity.TEST_CASE_RESULT, 7)) - .build()); - setField("batchSize", new AtomicReference<>(10)); - - try (MockedConstruction ignored = - mockConstruction( - PaginatedEntityTimeSeriesSource.class, - (source, context) -> - when(source.readWithCursor(any())) - .thenReturn( - (ResultList) - new ResultList<>(List.of(mock(EntityTimeSeriesInterface.class)))))) { - invokePrivateMethod( - "processEntityType", - new Class[] {String.class, Phaser.class}, - Entity.TEST_CASE_RESULT, - producerPhaser); - } - - assertFalse(queue.isEmpty()); - assertTrue(producerPhaser.isTerminated()); - } - - @Test - void processEntityTypeDeregistersReaderPartiesWhenSubmissionFails() throws Exception { - ExecutorService producerExecutor = mock(ExecutorService.class); - Phaser producerPhaser = new Phaser(1); - - when(producerExecutor.submit(any(Runnable.class))) - .thenThrow(new IllegalStateException("submit failed")); - executor.getStats().set(statsWithEntityTotals(Map.of(Entity.USER, 40))); - setField("producerExecutor", producerExecutor); - setField("taskQueue", new LinkedBlockingQueue<>()); - setField("config", ReindexingConfiguration.builder().entities(Set.of(Entity.USER)).build()); - setField("batchSize", new AtomicReference<>(10)); - - invokePrivateMethod( - "processEntityType", - new Class[] {String.class, Phaser.class}, - Entity.USER, - producerPhaser); - - assertTrue(producerPhaser.isTerminated()); - } - - @Test - void processKeysetBatchesStopsWhenReaderReachesEndCursorBoundary() throws Exception { - LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - Phaser producerPhaser = new Phaser(1); - String boundaryCursor = "{\"name\":\"orders\",\"id\":\"2\"}"; - String endCursor = RestUtil.encodeCursor(boundaryCursor); - ResultList page = new ResultList<>(List.of("entity"), null, null, boundaryCursor, 1); - - setField("taskQueue", queue); - @SuppressWarnings("unchecked") - Map batchCounters = - (Map) getField("entityBatchCounters"); - @SuppressWarnings("unchecked") - Map batchFailures = - (Map) getField("entityBatchFailures"); - batchCounters.put(Entity.TABLE, new AtomicInteger(1)); - batchFailures.put(Entity.TABLE, new AtomicInteger(0)); - - invokePrivateMethod( - "processKeysetBatches", - new Class[] { - String.class, - int.class, - int.class, - String.class, - SearchIndexExecutor.KeysetBatchReader.class, - Phaser.class, - String.class - }, - Entity.TABLE, - 10, - 5, - null, - (SearchIndexExecutor.KeysetBatchReader) cursor -> page, - producerPhaser, - endCursor); - - assertEquals(1, queue.size()); - assertEquals(0, batchFailures.get(Entity.TABLE).get()); - assertTrue(producerPhaser.isTerminated()); - } - - @Test - void processKeysetBatchesMarksFailuresForUnexpectedExceptions() throws Exception { - Phaser producerPhaser = new Phaser(1); - @SuppressWarnings("unchecked") - Map batchCounters = - (Map) getField("entityBatchCounters"); - @SuppressWarnings("unchecked") - Map batchFailures = - (Map) getField("entityBatchFailures"); - batchCounters.put(Entity.TABLE, new AtomicInteger(1)); - batchFailures.put(Entity.TABLE, new AtomicInteger(0)); - setField("taskQueue", new LinkedBlockingQueue<>()); - - invokePrivateMethod( - "processKeysetBatches", - new Class[] { - String.class, - int.class, - int.class, - String.class, - SearchIndexExecutor.KeysetBatchReader.class, - Phaser.class - }, - Entity.TABLE, - 5, - 5, - null, - (SearchIndexExecutor.KeysetBatchReader) - cursor -> { - throw new IllegalStateException("unexpected"); - }, - producerPhaser); - - assertEquals(1, batchFailures.get(Entity.TABLE).get()); - assertTrue(producerPhaser.isTerminated()); - } - - /** - * Validates the full cursor decode → BoundedListFilter flow: an encoded boundary cursor - * is decoded and used to construct a filter with the correct SQL boundary condition. - * This is the core mechanism that replaces the broken Java-side hasReachedEndCursor comparison. - */ - @Test - @SuppressWarnings("unchecked") - void encodedBoundaryCursorProducesCorrectBoundedFilter() { - // The exact cursor that would be produced by getCursorAtOffset for entity "Foxtrot" - String boundaryCursorJson = - "{\"name\":\"Foxtrot\",\"id\":\"00000000-0000-0000-0000-000000000006\"}"; - String encodedBoundary = RestUtil.encodeCursor(boundaryCursorJson); - - // Decode — same logic as submitEntityReaders - String decoded = RestUtil.decodeCursor(encodedBoundary); - Map cursorMap = - org.openmetadata.schema.utils.JsonUtils.readValue(decoded, Map.class); - - assertEquals("Foxtrot", cursorMap.get("name")); - assertEquals("00000000-0000-0000-0000-000000000006", cursorMap.get("id")); - - // Construct BoundedListFilter with decoded values - org.openmetadata.service.jdbi3.BoundedListFilter filter = - new org.openmetadata.service.jdbi3.BoundedListFilter( - org.openmetadata.schema.type.Include.ALL, cursorMap.get("name"), cursorMap.get("id")); - - String condition = filter.getCondition(null); - assertTrue(condition.contains("name < :reindexEndName")); - assertTrue(condition.contains("name = :reindexEndName AND id <= :reindexEndId")); - assertEquals("Foxtrot", filter.getQueryParams().get("reindexEndName")); - assertEquals( - "00000000-0000-0000-0000-000000000006", filter.getQueryParams().get("reindexEndId")); - } - - /** - * Verifies that a BoundedListFilter and a plain ListFilter produce different conditions, - * confirming the non-last reader gets a bounded query while the last reader does not. - */ - @Test - void boundedVsUnboundedFilterProduceDifferentConditions() { - ListFilter unbounded = new ListFilter(org.openmetadata.schema.type.Include.ALL); - org.openmetadata.service.jdbi3.BoundedListFilter bounded = - new org.openmetadata.service.jdbi3.BoundedListFilter( - org.openmetadata.schema.type.Include.ALL, - "Foxtrot", - "00000000-0000-0000-0000-000000000006"); - - String unboundedCond = unbounded.getCondition(null); - String boundedCond = bounded.getCondition(null); - - assertFalse(unboundedCond.contains("reindexEndName")); - assertTrue(boundedCond.contains("reindexEndName")); - assertTrue(boundedCond.startsWith(unboundedCond)); - } - - /** - * Validates that the old Java-side cursor comparison no longer applies to entity cursors. - * This is the exact scenario that caused the bug: "echo".compareTo("Foxtrot") > 0 in Java - * but "echo" < "Foxtrot" in MySQL case-insensitive collation. - */ - @Test - void hasReachedEndCursorNoLongerComparesEntityCursors() throws Exception { - // This is the exact pair that triggered the bug: - // Java: "echo" > "Foxtrot" (e=101 > F=70) → old code returned TRUE (stop reader) - // MySQL: "echo" < "Foxtrot" (case-insensitive: e < f) → reader should continue - String echoCursor = - RestUtil.encodeCursor( - "{\"name\":\"echo\",\"id\":\"00000000-0000-0000-0000-000000000005\"}"); - String foxtrotCursor = - RestUtil.encodeCursor( - "{\"name\":\"Foxtrot\",\"id\":\"00000000-0000-0000-0000-000000000006\"}"); - - // After fix: hasReachedEndCursor returns FALSE for entity cursors (boundary is in SQL now) - assertFalse( - (Boolean) - invokePrivateMethod( - "hasReachedEndCursor", - new Class[] {String.class, String.class}, - echoCursor, - foxtrotCursor), - "Entity cursor comparison must not happen in Java — SQL boundary handles it"); - } - - /** - * Verifies that the old bug scenario is now impossible: mixed-case names at boundaries - * cannot cause missing entities because the boundary is enforced in SQL, not Java. - */ - @Test - void mixedCaseEntityNamesAtBoundaryProduceBoundedSqlCondition() { - // Simulate the exact scenario: boundary entity is "Foxtrot" - org.openmetadata.service.jdbi3.BoundedListFilter filter = - new org.openmetadata.service.jdbi3.BoundedListFilter( - org.openmetadata.schema.type.Include.ALL, - "Foxtrot", - "00000000-0000-0000-0000-000000000006"); - - String condition = filter.getCondition(null); - - // The SQL condition ensures the DB collation handles the comparison. - // On MySQL: WHERE ... AND (name < 'Foxtrot' OR (name = 'Foxtrot' AND id <= 'uuid')) - // The DB evaluates "echo" < "Foxtrot" as TRUE (case-insensitive), so "echo" IS included. - // "Foxtrot" itself is included (id <= boundary id). - // "golf" is excluded (name > "Foxtrot" case-insensitively). - assertTrue(condition.contains("name < :reindexEndName")); - assertTrue(condition.contains("name = :reindexEndName AND id <= :reindexEndId")); - assertEquals("Foxtrot", filter.getQueryParams().get("reindexEndName")); - } - - private Stats initializeStats(Set entities) { - Stats stats = executor.initializeTotalRecords(entities); - if (stats.getEntityStats() == null) { - stats.setEntityStats(new EntityStats()); - } - return stats; - } - - private Stats statsWithEntityTotals(Map entityTotals) { - Stats stats = new Stats(); - EntityStats entityStats = new EntityStats(); - int totalRecords = 0; - - for (Map.Entry entry : entityTotals.entrySet()) { - totalRecords += entry.getValue(); - entityStats - .getAdditionalProperties() - .put( - entry.getKey(), - new StepStats() - .withTotalRecords(entry.getValue()) - .withSuccessRecords(0) - .withFailedRecords(0)); - } - - stats.setEntityStats(entityStats); - stats.setJobStats( - new StepStats().withTotalRecords(totalRecords).withSuccessRecords(0).withFailedRecords(0)); - stats.setReaderStats( - new StepStats() - .withTotalRecords(totalRecords) - .withSuccessRecords(0) - .withFailedRecords(0) - .withWarningRecords(0)); - stats.setSinkStats( - new StepStats().withTotalRecords(0).withSuccessRecords(0).withFailedRecords(0)); - stats.setProcessStats( - new StepStats().withTotalRecords(0).withSuccessRecords(0).withFailedRecords(0)); - return stats; - } - - private Object invokePrivateMethod(String methodName, Class[] parameterTypes, Object... args) - throws Exception { - Method method = SearchIndexExecutor.class.getDeclaredMethod(methodName, parameterTypes); - method.setAccessible(true); - return method.invoke(executor, args); - } - - private void setField(String fieldName, Object value) throws Exception { - Field field = SearchIndexExecutor.class.getDeclaredField(fieldName); - field.setAccessible(true); - field.set(executor, value); - } - - private Object getField(String fieldName) throws Exception { - Field field = SearchIndexExecutor.class.getDeclaredField(fieldName); - field.setAccessible(true); - return field.get(executor); - } - - private Object newIndexingTask(String entityType, ResultList entities, int offset) - throws Exception { - Class taskClass = - Class.forName( - "org.openmetadata.service.apps.bundles.searchIndex.SearchIndexExecutor$IndexingTask"); - var constructor = taskClass.getDeclaredConstructor(String.class, ResultList.class, int.class); - constructor.setAccessible(true); - return constructor.newInstance(entityType, entities, offset); - } - - private void invokeProcessTask(Object task) throws Exception { - Method method = SearchIndexExecutor.class.getDeclaredMethod("processTask", task.getClass()); - method.setAccessible(true); - method.invoke(executor, task); - } - - private Object invokeRecordAccessor(Object record, String accessor) throws Exception { - Method method = record.getClass().getDeclaredMethod(accessor); - method.setAccessible(true); - return method.invoke(record); - } - - private Object invokeTaskAccessor(Object task, String accessor) throws Exception { - Method method = task.getClass().getDeclaredMethod(accessor); - method.setAccessible(true); - return method.invoke(task); - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexFailureScenarioTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexFailureScenarioTest.java deleted file mode 100644 index 950fcde70c9c..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexFailureScenarioTest.java +++ /dev/null @@ -1,522 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.lenient; -import static org.mockito.Mockito.mock; - -import es.co.elastic.clients.elasticsearch.ElasticsearchClient; -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicLong; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.openmetadata.schema.system.Stats; -import org.openmetadata.schema.system.StepStats; -import org.openmetadata.search.IndexMapping; -import org.openmetadata.service.jdbi3.CollectionDAO; -import org.openmetadata.service.search.SearchRepository; -import org.openmetadata.service.search.elasticsearch.ElasticSearchClient; - -/** - * Comprehensive tests for SearchIndex stats accuracy across all failure scenarios: - * 1. Request entity too large (413) from ES/OS - * 2. Entity read failures - * 3. Entity build failures - * 4. Partial bulk failures - * 5. Complete bulk request failures - * 6. Reader exceptions - * 7. Sink exceptions - */ -@ExtendWith(MockitoExtension.class) -class SearchIndexFailureScenarioTest { - - @Mock private SearchRepository searchRepository; - @Mock private ElasticSearchClient searchClient; - @Mock private ElasticsearchClient restHighLevelClient; - @Mock private IndexMapping indexMapping; - @Mock private CollectionDAO collectionDAO; - - @BeforeEach - void setUp() { - lenient().when(searchRepository.getSearchClient()).thenReturn(searchClient); - lenient().when(searchClient.getNewClient()).thenReturn(restHighLevelClient); - lenient().when(searchRepository.getClusterAlias()).thenReturn("default"); - lenient().when(indexMapping.getIndexName("default")).thenReturn("test_index"); - lenient().when(searchRepository.getIndexMapping(anyString())).thenReturn(indexMapping); - } - - @Nested - @DisplayName("Scenario 1: Request Entity Too Large (413)") - class RequestEntityTooLargeTests { - - @Test - @DisplayName("Should detect 413 error as payload too large") - void testDetect413Error() throws Exception { - ElasticSearchBulkSink.CustomBulkProcessor processor = - getCustomBulkProcessor(new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L)); - - assertTrue(invokeIsPayloadTooLargeError(processor, "Request entity too large")); - assertTrue(invokeIsPayloadTooLargeError(processor, "HTTP/1.1 413 Payload Too Large")); - assertTrue(invokeIsPayloadTooLargeError(processor, "content too long")); - assertTrue(invokeIsPayloadTooLargeError(processor, "Error code: 413")); - } - - @Test - @DisplayName("Should detect 413 error as backpressure trigger") - void test413TriggersBackpressure() throws Exception { - ElasticSearchBulkSink.CustomBulkProcessor processor = - getCustomBulkProcessor(new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L)); - - assertTrue(invokeShouldRetry(processor, 0, "Request entity too large")); - assertTrue(invokeShouldRetry(processor, 0, "413")); - } - - @Test - @DisplayName("BulkSink should identify 413 as retryable error") - void testBulkSinkRetries413() throws Exception { - ElasticSearchBulkSink sink = new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L); - ElasticSearchBulkSink.CustomBulkProcessor processor = getCustomBulkProcessor(sink); - - assertTrue(invokeShouldRetry(processor, 0, "Request entity too large")); - assertTrue(invokeShouldRetry(processor, 0, "Content too long")); - assertTrue(invokeShouldRetry(processor, 0, "413")); - assertFalse(invokeShouldRetry(processor, 5, "Request entity too large")); - } - } - - @Nested - @DisplayName("Scenario 2: Entity Read Failures") - class EntityReadFailureTests { - - @Test - @DisplayName("Reader failures should update reader stats") - void testReaderFailuresUpdateStats() { - SearchIndexExecutor executor = new SearchIndexExecutor(collectionDAO, searchRepository); - - Set entities = Set.of("table"); - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - executor.updateReaderStats(0, 10, 0); - - Stats updatedStats = executor.getStats().get(); - assertNotNull(updatedStats); - assertEquals(0, updatedStats.getReaderStats().getSuccessRecords()); - assertEquals(10, updatedStats.getReaderStats().getFailedRecords()); - } - - @Test - @DisplayName("Partial read failures should be tracked correctly") - void testPartialReadFailures() { - SearchIndexExecutor executor = new SearchIndexExecutor(collectionDAO, searchRepository); - - Set entities = Set.of("table"); - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - executor.updateReaderStats(90, 10, 0); - executor.updateReaderStats(85, 15, 0); - - Stats updatedStats = executor.getStats().get(); - assertEquals(175, updatedStats.getReaderStats().getSuccessRecords()); - assertEquals(25, updatedStats.getReaderStats().getFailedRecords()); - } - } - - @Nested - @DisplayName("Scenario 3: Entity Build Failures") - class EntityBuildFailureTests { - - @Test - @DisplayName("Process failures should be tracked in totalFailed") - void testProcessFailuresTracked() throws Exception { - ElasticSearchBulkSink sink = new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L); - - // Failures during entity processing (building search docs) are tracked in totalFailed - Field totalFailedField = ElasticSearchBulkSink.class.getDeclaredField("totalFailed"); - totalFailedField.setAccessible(true); - AtomicLong totalFailed = (AtomicLong) totalFailedField.get(sink); - totalFailed.set(5); - - Method updateStatsMethod = ElasticSearchBulkSink.class.getDeclaredMethod("updateStats"); - updateStatsMethod.setAccessible(true); - updateStatsMethod.invoke(sink); - - StepStats stats = sink.getStats(); - assertEquals(5, stats.getFailedRecords()); - } - } - - @Nested - @DisplayName("Scenario 4: Partial Bulk Failures") - class PartialBulkFailureTests { - - @Test - @DisplayName("Partial bulk failures should correctly split success and failure counts") - void testPartialBulkFailureStats() { - SearchIndexExecutor executor = new SearchIndexExecutor(collectionDAO, searchRepository); - - Set entities = Set.of("table"); - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - StepStats batchStats = new StepStats().withSuccessRecords(8).withFailedRecords(2); - executor.updateStats("table", batchStats); - - Stats finalStats = executor.getStats().get(); - StepStats entityStats = finalStats.getEntityStats().getAdditionalProperties().get("table"); - - assertEquals(8, entityStats.getSuccessRecords()); - assertEquals(2, entityStats.getFailedRecords()); - assertEquals(8, finalStats.getJobStats().getSuccessRecords()); - assertEquals(2, finalStats.getJobStats().getFailedRecords()); - } - } - - @Nested - @DisplayName("Scenario 5: Complete Bulk Request Failures") - class CompleteBulkFailureTests { - - @Test - @DisplayName("Complete bulk failure should mark all records as failed") - void testCompleteBulkFailure() { - SearchIndexExecutor executor = new SearchIndexExecutor(collectionDAO, searchRepository); - - Set entities = Set.of("table"); - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - StepStats batchStats = new StepStats().withSuccessRecords(0).withFailedRecords(100); - executor.updateStats("table", batchStats); - - Stats finalStats = executor.getStats().get(); - assertEquals(0, finalStats.getJobStats().getSuccessRecords()); - assertEquals(100, finalStats.getJobStats().getFailedRecords()); - } - } - - @Nested - @DisplayName("Scenario 6: Stats Consistency") - class StatsConsistencyTests { - - @Test - @DisplayName("Total should equal success + failed after all operations") - void testTotalEqualsSuccessPlusFailed() { - SearchIndexExecutor executor = new SearchIndexExecutor(collectionDAO, searchRepository); - - Set entities = Set.of("table", "dashboard"); - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn( - Map.of("table", mock(IndexMapping.class), "dashboard", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - stats.getEntityStats().getAdditionalProperties().get("table").setTotalRecords(100); - stats.getEntityStats().getAdditionalProperties().get("dashboard").setTotalRecords(50); - stats.getJobStats().setTotalRecords(150); - stats.getReaderStats().setTotalRecords(150); - executor.getStats().set(stats); - - executor.updateStats("table", new StepStats().withSuccessRecords(90).withFailedRecords(10)); - executor.updateStats( - "dashboard", new StepStats().withSuccessRecords(45).withFailedRecords(5)); - executor.updateReaderStats(135, 15, 0); - - Stats finalStats = executor.getStats().get(); - - int jobSuccess = finalStats.getJobStats().getSuccessRecords(); - int jobFailed = finalStats.getJobStats().getFailedRecords(); - int jobTotal = finalStats.getJobStats().getTotalRecords(); - - assertEquals(135, jobSuccess); - assertEquals(15, jobFailed); - assertEquals(jobSuccess + jobFailed, jobTotal); - } - - @Test - @DisplayName("Entity stats sum should equal job stats") - void testEntityStatsSumEqualsJobStats() { - SearchIndexExecutor executor = new SearchIndexExecutor(collectionDAO, searchRepository); - - Set entities = Set.of("table", "dashboard", "pipeline"); - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn( - Map.of( - "table", mock(IndexMapping.class), - "dashboard", mock(IndexMapping.class), - "pipeline", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - executor.updateStats("table", new StepStats().withSuccessRecords(50).withFailedRecords(5)); - executor.updateStats( - "dashboard", new StepStats().withSuccessRecords(30).withFailedRecords(3)); - executor.updateStats("pipeline", new StepStats().withSuccessRecords(20).withFailedRecords(2)); - - Stats finalStats = executor.getStats().get(); - - int entitySuccessSum = 0; - int entityFailedSum = 0; - for (StepStats entityStats : finalStats.getEntityStats().getAdditionalProperties().values()) { - entitySuccessSum += entityStats.getSuccessRecords(); - entityFailedSum += entityStats.getFailedRecords(); - } - - assertEquals(entitySuccessSum, finalStats.getJobStats().getSuccessRecords()); - assertEquals(entityFailedSum, finalStats.getJobStats().getFailedRecords()); - } - } - - @Nested - @DisplayName("Scenario 7: Error Type Detection") - class ErrorTypeDetectionTests { - - @Test - @DisplayName("Should correctly identify all retryable error types") - void testAllRetryableErrorTypes() throws Exception { - ElasticSearchBulkSink sink = new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L); - - Field field = ElasticSearchBulkSink.class.getDeclaredField("bulkProcessor"); - field.setAccessible(true); - ElasticSearchBulkSink.CustomBulkProcessor processor = - (ElasticSearchBulkSink.CustomBulkProcessor) field.get(sink); - - Method method = - ElasticSearchBulkSink.CustomBulkProcessor.class.getDeclaredMethod( - "shouldRetry", int.class, Throwable.class); - method.setAccessible(true); - - String[] retryableErrors = { - "rejected_execution_exception", - "EsRejectedExecutionException", - "RemoteTransportException", - "ConnectException", - "timeout", - "Request entity too large", - "Content too long", - "413", - "circuit_breaking_exception", - "too_many_requests" - }; - - for (String errorMessage : retryableErrors) { - assertTrue( - (boolean) method.invoke(processor, 0, new RuntimeException(errorMessage)), - "Should retry for: " + errorMessage); - } - } - - @Test - @DisplayName("Should NOT retry non-retryable errors") - void testNonRetryableErrors() throws Exception { - ElasticSearchBulkSink sink = new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L); - - Field field = ElasticSearchBulkSink.class.getDeclaredField("bulkProcessor"); - field.setAccessible(true); - ElasticSearchBulkSink.CustomBulkProcessor processor = - (ElasticSearchBulkSink.CustomBulkProcessor) field.get(sink); - - Method method = - ElasticSearchBulkSink.CustomBulkProcessor.class.getDeclaredMethod( - "shouldRetry", int.class, Throwable.class); - method.setAccessible(true); - - String[] nonRetryableErrors = { - "index_not_found_exception", - "mapper_parsing_exception", - "document_parsing_exception", - "invalid_type_name_exception" - }; - - for (String errorMessage : nonRetryableErrors) { - assertFalse( - (boolean) method.invoke(processor, 0, new RuntimeException(errorMessage)), - "Should NOT retry for: " + errorMessage); - } - } - - @Test - @DisplayName("Should correctly identify backpressure errors") - void testBackpressureErrorDetection() throws Exception { - ElasticSearchBulkSink.CustomBulkProcessor processor = - getCustomBulkProcessor(new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L)); - - String[] backpressureErrors = { - "rejected_execution_exception", - "circuit_breaking_exception", - "too_many_requests", - "Request entity too large", - "Content too long", - "413" - }; - - for (String errorMessage : backpressureErrors) { - assertTrue( - invokeShouldRetry(processor, 0, errorMessage), - "Should be backpressure for: " + errorMessage); - } - } - } - - @Nested - @DisplayName("Scenario 8: Multi-Batch Stats Accumulation") - class MultiBatchStatsAccumulationTests { - - @Test - @DisplayName("Stats should accumulate correctly across multiple batches") - void testMultiBatchAccumulation() { - SearchIndexExecutor executor = new SearchIndexExecutor(collectionDAO, searchRepository); - - Set entities = Set.of("table"); - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - for (int i = 0; i < 10; i++) { - executor.updateStats("table", new StepStats().withSuccessRecords(9).withFailedRecords(1)); - executor.updateReaderStats(10, 0, 0); - executor.updateSinkTotalSubmitted(10); - } - - Stats finalStats = executor.getStats().get(); - - assertEquals(90, finalStats.getJobStats().getSuccessRecords()); - assertEquals(10, finalStats.getJobStats().getFailedRecords()); - assertEquals(100, finalStats.getReaderStats().getSuccessRecords()); - assertEquals(0, finalStats.getReaderStats().getFailedRecords()); - assertEquals(100, finalStats.getSinkStats().getTotalRecords()); - } - - @Test - @DisplayName("Interleaved success and failure batches should accumulate correctly") - void testInterleavedSuccessAndFailure() { - SearchIndexExecutor executor = new SearchIndexExecutor(collectionDAO, searchRepository); - - Set entities = Set.of("table"); - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - executor.updateStats("table", new StepStats().withSuccessRecords(100).withFailedRecords(0)); - executor.updateStats("table", new StepStats().withSuccessRecords(0).withFailedRecords(50)); - executor.updateStats("table", new StepStats().withSuccessRecords(75).withFailedRecords(25)); - - Stats finalStats = executor.getStats().get(); - - assertEquals(175, finalStats.getJobStats().getSuccessRecords()); - assertEquals(75, finalStats.getJobStats().getFailedRecords()); - } - } - - @Nested - @DisplayName("Scenario 9: Concurrent Stats Updates") - class ConcurrentStatsUpdateTests { - - @Test - @DisplayName("Concurrent updates should not lose data") - void testConcurrentUpdates() throws Exception { - SearchIndexExecutor executor = new SearchIndexExecutor(collectionDAO, searchRepository); - - Set entities = Set.of("table"); - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - int threadCount = 10; - int updatesPerThread = 100; - Thread[] threads = new Thread[threadCount]; - - for (int i = 0; i < threadCount; i++) { - threads[i] = - new Thread( - () -> { - for (int j = 0; j < updatesPerThread; j++) { - executor.updateStats( - "table", new StepStats().withSuccessRecords(1).withFailedRecords(0)); - } - }); - } - - for (Thread thread : threads) { - thread.start(); - } - - for (Thread thread : threads) { - thread.join(); - } - - Stats finalStats = executor.getStats().get(); - int expectedTotal = threadCount * updatesPerThread; - - assertEquals(expectedTotal, finalStats.getJobStats().getSuccessRecords()); - } - } - - private ElasticSearchBulkSink.CustomBulkProcessor getCustomBulkProcessor( - ElasticSearchBulkSink sink) throws Exception { - Field field = ElasticSearchBulkSink.class.getDeclaredField("bulkProcessor"); - field.setAccessible(true); - return (ElasticSearchBulkSink.CustomBulkProcessor) field.get(sink); - } - - private boolean invokeShouldRetry( - ElasticSearchBulkSink.CustomBulkProcessor processor, int attemptNumber, String errorMessage) - throws Exception { - Method method = - ElasticSearchBulkSink.CustomBulkProcessor.class.getDeclaredMethod( - "shouldRetry", int.class, Throwable.class); - method.setAccessible(true); - Throwable error = - errorMessage == null ? new RuntimeException() : new RuntimeException(errorMessage); - return (boolean) method.invoke(processor, attemptNumber, error); - } - - private boolean invokeIsPayloadTooLargeError( - ElasticSearchBulkSink.CustomBulkProcessor processor, String errorMessage) throws Exception { - Method method = - ElasticSearchBulkSink.CustomBulkProcessor.class.getDeclaredMethod( - "isPayloadTooLargeError", Throwable.class); - method.setAccessible(true); - Throwable error = - errorMessage == null ? new RuntimeException() : new RuntimeException(errorMessage); - return (boolean) method.invoke(processor, error); - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexStatsTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexStatsTest.java deleted file mode 100644 index 6331326f6ed3..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexStatsTest.java +++ /dev/null @@ -1,444 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.lenient; -import static org.mockito.Mockito.mock; - -import es.co.elastic.clients.elasticsearch.ElasticsearchClient; -import java.lang.reflect.Method; -import java.util.Map; -import java.util.Set; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.openmetadata.schema.system.Stats; -import org.openmetadata.schema.system.StepStats; -import org.openmetadata.search.IndexMapping; -import org.openmetadata.service.jdbi3.CollectionDAO; -import org.openmetadata.service.search.SearchRepository; -import org.openmetadata.service.search.elasticsearch.ElasticSearchClient; - -@ExtendWith(MockitoExtension.class) -class SearchIndexStatsTest { - - @Mock private SearchRepository searchRepository; - @Mock private ElasticSearchClient searchClient; - @Mock private ElasticsearchClient restHighLevelClient; - @Mock private IndexMapping indexMapping; - @Mock private CollectionDAO collectionDAO; - - @BeforeEach - void setUp() { - lenient().when(searchRepository.getSearchClient()).thenReturn(searchClient); - lenient().when(searchClient.getNewClient()).thenReturn(restHighLevelClient); - lenient().when(searchRepository.getClusterAlias()).thenReturn("default"); - lenient().when(indexMapping.getIndexName("default")).thenReturn("test_index"); - lenient().when(searchRepository.getIndexMapping(anyString())).thenReturn(indexMapping); - } - - @Nested - @DisplayName("BulkSink Stats Tests") - class BulkSinkStatsTests { - - private ElasticSearchBulkSink elasticSearchBulkSink; - - @BeforeEach - void setUp() { - elasticSearchBulkSink = new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L); - } - - @Test - @DisplayName("Initial stats should be zero") - void testInitialStatsAreZero() { - StepStats stats = elasticSearchBulkSink.getStats(); - assertNotNull(stats); - assertEquals(0, stats.getTotalRecords()); - assertEquals(0, stats.getSuccessRecords()); - assertEquals(0, stats.getFailedRecords()); - } - } - - @Nested - @DisplayName("Retry Logic Tests") - class RetryLogicTests { - - @Test - @DisplayName("Should identify 'Request entity too large' as retryable error") - void testRequestEntityTooLargeIsRetryable() throws Exception { - ElasticSearchBulkSink sink = new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L); - - ElasticSearchBulkSink.CustomBulkProcessor processor = getCustomBulkProcessor(sink); - - assertTrue(invokeIsPayloadTooLargeError(processor, "Request entity too large")); - assertTrue(invokeIsPayloadTooLargeError(processor, "Content too long")); - assertTrue(invokeIsPayloadTooLargeError(processor, "HTTP 413 error")); - } - } - - @Nested - @DisplayName("SearchIndexExecutor Stats Tests") - class ExecutorStatsTests { - - private SearchIndexExecutor executor; - - @BeforeEach - void setUp() { - executor = new SearchIndexExecutor(collectionDAO, searchRepository); - } - - @Test - @DisplayName("Stats initialization should set all values correctly") - void testStatsInitialization() { - Set entities = Set.of("table", "dashboard"); - - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn( - Map.of("table", mock(IndexMapping.class), "dashboard", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - - assertNotNull(stats); - assertNotNull(stats.getJobStats()); - assertNotNull(stats.getReaderStats()); - assertNotNull(stats.getSinkStats()); - assertNotNull(stats.getEntityStats()); - - assertEquals(0, stats.getJobStats().getSuccessRecords()); - assertEquals(0, stats.getJobStats().getFailedRecords()); - assertEquals(0, stats.getReaderStats().getSuccessRecords()); - assertEquals(0, stats.getReaderStats().getFailedRecords()); - assertEquals(0, stats.getSinkStats().getSuccessRecords()); - assertEquals(0, stats.getSinkStats().getFailedRecords()); - } - - @Test - @DisplayName("updateStats should correctly accumulate values") - void testUpdateStatsAccumulation() { - Set entities = Set.of("table"); - - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - StepStats batchStats = new StepStats().withSuccessRecords(5).withFailedRecords(2); - executor.updateStats("table", batchStats); - - Stats updatedStats = executor.getStats().get(); - assertNotNull(updatedStats); - - StepStats entityStats = updatedStats.getEntityStats().getAdditionalProperties().get("table"); - assertNotNull(entityStats); - assertEquals(5, entityStats.getSuccessRecords()); - assertEquals(2, entityStats.getFailedRecords()); - - assertEquals(5, updatedStats.getJobStats().getSuccessRecords()); - assertEquals(2, updatedStats.getJobStats().getFailedRecords()); - } - - @Test - @DisplayName("updateReaderStats should correctly track reader operations") - void testUpdateReaderStats() { - Set entities = Set.of("table"); - - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - executor.updateReaderStats(10, 2, 0); - - Stats updatedStats = executor.getStats().get(); - assertNotNull(updatedStats); - assertEquals(10, updatedStats.getReaderStats().getSuccessRecords()); - assertEquals(2, updatedStats.getReaderStats().getFailedRecords()); - - executor.updateReaderStats(5, 1, 0); - - updatedStats = executor.getStats().get(); - assertEquals(15, updatedStats.getReaderStats().getSuccessRecords()); - assertEquals(3, updatedStats.getReaderStats().getFailedRecords()); - } - - @Test - @DisplayName("updateSinkTotalSubmitted should correctly track submitted records") - void testUpdateSinkTotalSubmitted() { - Set entities = Set.of("table"); - - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - executor.updateSinkTotalSubmitted(10); - - Stats updatedStats = executor.getStats().get(); - assertNotNull(updatedStats); - assertEquals(10, updatedStats.getSinkStats().getTotalRecords()); - - executor.updateSinkTotalSubmitted(5); - - updatedStats = executor.getStats().get(); - assertEquals(15, updatedStats.getSinkStats().getTotalRecords()); - } - } - - @Nested - @DisplayName("Backpressure Detection Tests") - class BackpressureDetectionTests { - - @Test - @DisplayName("Should detect payload-too-large errors as retryable backpressure") - void testPayloadTooLargeDetectedAsBackpressure() throws Exception { - ElasticSearchBulkSink.CustomBulkProcessor processor = - getCustomBulkProcessor(new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L)); - - assertTrue(invokeShouldRetry(processor, 0, "Request entity too large")); - assertTrue(invokeShouldRetry(processor, 0, "Content too long for bulk request")); - assertTrue(invokeShouldRetry(processor, 0, "HTTP 413: Payload too large")); - } - - @Test - @DisplayName("Should detect rejected_execution_exception as backpressure error") - void testRejectedExecutionDetectedAsBackpressure() throws Exception { - ElasticSearchBulkSink.CustomBulkProcessor processor = - getCustomBulkProcessor(new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L)); - - assertTrue(invokeShouldRetry(processor, 0, "rejected_execution_exception")); - assertTrue(invokeShouldRetry(processor, 0, "circuit_breaking_exception")); - assertTrue(invokeShouldRetry(processor, 0, "too_many_requests")); - } - - @Test - @DisplayName( - "Should detect only known backpressure errors while treating null messages as retryable") - void testNormalErrorsNotBackpressure() throws Exception { - ElasticSearchBulkSink.CustomBulkProcessor processor = - getCustomBulkProcessor(new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L)); - - assertFalse(invokeShouldRetry(processor, 0, "Index not found")); - assertFalse(invokeShouldRetry(processor, 0, "Document parsing exception")); - assertFalse(invokeShouldRetry(processor, 0, "Mapping error")); - assertTrue(invokeShouldRetry(processor, 0, null)); - } - - @Test - @DisplayName("Should identify payload too large error correctly") - void testIsPayloadTooLargeError() throws Exception { - ElasticSearchBulkSink.CustomBulkProcessor processor = - getCustomBulkProcessor(new ElasticSearchBulkSink(searchRepository, 10, 2, 1000000L)); - - assertTrue(invokeIsPayloadTooLargeError(processor, "Request entity too large")); - assertTrue(invokeIsPayloadTooLargeError(processor, "Content too long")); - assertTrue(invokeIsPayloadTooLargeError(processor, "error code: 413")); - - assertFalse(invokeIsPayloadTooLargeError(processor, "rejected_execution_exception")); - assertFalse(invokeIsPayloadTooLargeError(processor, "timeout")); - assertFalse(invokeIsPayloadTooLargeError(processor, null)); - } - } - - @Nested - @DisplayName("Stats Consistency Tests") - class StatsConsistencyTests { - - private SearchIndexExecutor executor; - - @BeforeEach - void setUp() { - executor = new SearchIndexExecutor(collectionDAO, searchRepository); - } - - @Test - @DisplayName("Job stats should match sum of entity stats") - void testJobStatsMatchEntityStats() { - Set entities = Set.of("table", "dashboard", "pipeline"); - - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn( - Map.of( - "table", mock(IndexMapping.class), - "dashboard", mock(IndexMapping.class), - "pipeline", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - executor.updateStats("table", new StepStats().withSuccessRecords(10).withFailedRecords(2)); - executor.updateStats("dashboard", new StepStats().withSuccessRecords(5).withFailedRecords(1)); - executor.updateStats("pipeline", new StepStats().withSuccessRecords(8).withFailedRecords(3)); - - Stats finalStats = executor.getStats().get(); - - int expectedSuccess = 10 + 5 + 8; - int expectedFailed = 2 + 1 + 3; - - assertEquals(expectedSuccess, finalStats.getJobStats().getSuccessRecords()); - assertEquals(expectedFailed, finalStats.getJobStats().getFailedRecords()); - } - - @Test - @DisplayName("Multiple updates to same entity should accumulate correctly") - void testMultipleUpdatesToSameEntity() { - Set entities = Set.of("table"); - - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - executor.updateStats("table", new StepStats().withSuccessRecords(10).withFailedRecords(2)); - executor.updateStats("table", new StepStats().withSuccessRecords(5).withFailedRecords(1)); - executor.updateStats("table", new StepStats().withSuccessRecords(3).withFailedRecords(0)); - - Stats finalStats = executor.getStats().get(); - StepStats tableStats = finalStats.getEntityStats().getAdditionalProperties().get("table"); - - assertEquals(18, tableStats.getSuccessRecords()); - assertEquals(3, tableStats.getFailedRecords()); - - assertEquals(18, finalStats.getJobStats().getSuccessRecords()); - assertEquals(3, finalStats.getJobStats().getFailedRecords()); - } - - @Test - @DisplayName("Stats should handle null stats object gracefully") - void testNullStatsHandling() { - executor.updateStats("table", new StepStats().withSuccessRecords(10).withFailedRecords(2)); - executor.updateReaderStats(5, 1, 0); - executor.updateSinkTotalSubmitted(10); - } - - @Test - @DisplayName("Entity total should be adjusted when success + failed exceeds initial total") - void testEntityTotalAdjustedWhenExceeded() { - Set entities = Set.of("table"); - - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - // Initial total is 0 (mocked). Simulate batches that exceed it. - executor.updateStats("table", new StepStats().withSuccessRecords(50).withFailedRecords(2)); - executor.updateStats("table", new StepStats().withSuccessRecords(55).withFailedRecords(1)); - - Stats finalStats = executor.getStats().get(); - StepStats tableStats = finalStats.getEntityStats().getAdditionalProperties().get("table"); - - assertEquals(105, tableStats.getSuccessRecords()); - assertEquals(3, tableStats.getFailedRecords()); - // Total should have been bumped to success + failed - assertEquals(108, tableStats.getTotalRecords()); - - // Job total should also reflect the adjusted entity total - assertEquals(108, finalStats.getJobStats().getTotalRecords()); - assertEquals(105, finalStats.getJobStats().getSuccessRecords()); - assertEquals(3, finalStats.getJobStats().getFailedRecords()); - } - - @Test - @DisplayName("Entity total should not decrease when already higher than success + failed") - void testEntityTotalNotDecreasedWhenAlreadyHigher() { - Set entities = Set.of("table"); - - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn(Map.of("table", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - // Manually set a higher initial total to simulate real DB count - stats.getEntityStats().getAdditionalProperties().get("table").setTotalRecords(200); - stats.getJobStats().setTotalRecords(200); - stats.getReaderStats().setTotalRecords(200); - - executor.updateStats("table", new StepStats().withSuccessRecords(50).withFailedRecords(2)); - - Stats finalStats = executor.getStats().get(); - StepStats tableStats = finalStats.getEntityStats().getAdditionalProperties().get("table"); - - assertEquals(50, tableStats.getSuccessRecords()); - assertEquals(2, tableStats.getFailedRecords()); - // Total should remain 200 since 52 < 200 - assertEquals(200, tableStats.getTotalRecords()); - } - - @Test - @DisplayName("Reader total should be adjusted when job total exceeds it") - void testReaderTotalAdjustedFromJobTotal() { - Set entities = Set.of("table", "dashboard"); - - lenient() - .when(searchRepository.getEntityIndexMap()) - .thenReturn( - Map.of("table", mock(IndexMapping.class), "dashboard", mock(IndexMapping.class))); - - Stats stats = executor.initializeTotalRecords(entities); - executor.getStats().set(stats); - - // Simulate processing that exceeds initial totals - executor.updateStats("table", new StepStats().withSuccessRecords(60).withFailedRecords(5)); - executor.updateStats( - "dashboard", new StepStats().withSuccessRecords(30).withFailedRecords(2)); - - Stats finalStats = executor.getStats().get(); - - // Reader total should have been bumped to match the adjusted job total - int expectedTotal = 65 + 32; // table (60+5) + dashboard (30+2) - assertEquals(expectedTotal, finalStats.getReaderStats().getTotalRecords()); - assertEquals(expectedTotal, finalStats.getJobStats().getTotalRecords()); - } - } - - private ElasticSearchBulkSink.CustomBulkProcessor getCustomBulkProcessor( - ElasticSearchBulkSink sink) throws Exception { - java.lang.reflect.Field field = ElasticSearchBulkSink.class.getDeclaredField("bulkProcessor"); - field.setAccessible(true); - return (ElasticSearchBulkSink.CustomBulkProcessor) field.get(sink); - } - - private boolean invokeShouldRetry( - ElasticSearchBulkSink.CustomBulkProcessor processor, int attemptNumber, String errorMessage) - throws Exception { - Method method = - ElasticSearchBulkSink.CustomBulkProcessor.class.getDeclaredMethod( - "shouldRetry", int.class, Throwable.class); - method.setAccessible(true); - Throwable error = - errorMessage == null ? new RuntimeException() : new RuntimeException(errorMessage); - return (boolean) method.invoke(processor, attemptNumber, error); - } - - private boolean invokeIsPayloadTooLargeError( - ElasticSearchBulkSink.CustomBulkProcessor processor, String errorMessage) throws Exception { - Method method = - ElasticSearchBulkSink.CustomBulkProcessor.class.getDeclaredMethod( - "isPayloadTooLargeError", Throwable.class); - method.setAccessible(true); - Throwable error = - errorMessage == null ? new RuntimeException() : new RuntimeException(errorMessage); - return (boolean) method.invoke(processor, error); - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SingleServerIndexingStrategyTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SingleServerIndexingStrategyTest.java deleted file mode 100644 index eefbdf241250..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/SingleServerIndexingStrategyTest.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.util.Optional; -import java.util.concurrent.atomic.AtomicReference; -import org.junit.jupiter.api.Test; -import org.mockito.MockedConstruction; -import org.mockito.Mockito; -import org.openmetadata.schema.system.Stats; -import org.openmetadata.service.jdbi3.CollectionDAO; -import org.openmetadata.service.search.SearchRepository; - -class SingleServerIndexingStrategyTest { - - @Test - void delegatesExecutorOperations() { - CollectionDAO collectionDAO = mock(CollectionDAO.class); - SearchRepository searchRepository = mock(SearchRepository.class); - ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - ReindexingJobContext context = mock(ReindexingJobContext.class); - ReindexingConfiguration config = - ReindexingConfiguration.builder().entities(java.util.Set.of("table")).build(); - ExecutionResult result = - new ExecutionResult(ExecutionResult.Status.COMPLETED, 10, 9, 1, 100, 200, new Stats()); - Stats stats = new Stats(); - - try (MockedConstruction mocked = - Mockito.mockConstruction( - SearchIndexExecutor.class, - (executor, mockContext) -> { - when(executor.addListener(listener)).thenReturn(executor); - when(executor.execute(config, context)).thenReturn(result); - when(executor.getStats()).thenReturn(new AtomicReference<>(stats)); - when(executor.isStopped()).thenReturn(true); - })) { - SingleServerIndexingStrategy strategy = - new SingleServerIndexingStrategy(collectionDAO, searchRepository); - - strategy.addListener(listener); - assertSame(result, strategy.execute(config, context)); - assertEquals(Optional.of(stats), strategy.getStats()); - strategy.stop(); - assertTrue(strategy.isStopped()); - - SearchIndexExecutor executor = mocked.constructed().get(0); - verify(executor).addListener(listener); - verify(executor).execute(config, context); - verify(executor).getStats(); - verify(executor).stop(); - verify(executor).isStopped(); - } - } - - @Test - void getStatsHandlesMissingExecutorStats() { - try (MockedConstruction mocked = - Mockito.mockConstruction( - SearchIndexExecutor.class, - (executor, mockContext) -> - when(executor.getStats()).thenReturn(new AtomicReference<>()))) { - SingleServerIndexingStrategy strategy = - new SingleServerIndexingStrategy(mock(CollectionDAO.class), mock(SearchRepository.class)); - - assertEquals(Optional.empty(), strategy.getStats()); - assertFalse(strategy.isStopped()); - } - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobContextTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobContextTest.java index f4fe343c73b8..72291680b35c 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobContextTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobContextTest.java @@ -1,7 +1,6 @@ package org.openmetadata.service.apps.bundles.searchIndex.distributed; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Map; import java.util.UUID; @@ -23,7 +22,6 @@ void distributedContextExposesJobMetadataAndCustomSource() { "DistributedSearchIndex-" + jobId.toString().substring(0, 8), context.getJobName()); assertEquals(200L, context.getStartTime()); assertEquals(jobId, context.getAppId()); - assertTrue(context.isDistributed()); assertEquals("REDIS", context.getSource()); assertEquals(job, context.getJob()); assertEquals(Map.of("participants", 3), context.getDistributedMetadata()); diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifierFactoryTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifierFactoryTest.java index bed5c3bb13e8..1cdb4e022427 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifierFactoryTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobNotifierFactoryTest.java @@ -6,7 +6,6 @@ import java.lang.reflect.Constructor; import org.junit.jupiter.api.Test; -import org.openmetadata.service.cache.CacheConfig; import org.openmetadata.service.jdbi3.CollectionDAO; class DistributedJobNotifierFactoryTest { @@ -14,29 +13,11 @@ class DistributedJobNotifierFactoryTest { private final CollectionDAO collectionDAO = mock(CollectionDAO.class); @Test - void createUsesRedisNotifierWhenRedisConfigIsComplete() { - CacheConfig cacheConfig = new CacheConfig(); - cacheConfig.provider = CacheConfig.Provider.redis; - cacheConfig.redis.url = "redis://cache:6379"; - + void createUsesPollingNotifier() { DistributedJobNotifier notifier = - DistributedJobNotifierFactory.create(cacheConfig, collectionDAO, "server-1"); - - assertInstanceOf(RedisJobNotifier.class, notifier); - } - - @Test - void createFallsBackToPollingWhenRedisConfigIsMissingOrInvalid() { - CacheConfig missingUrlConfig = new CacheConfig(); - missingUrlConfig.provider = CacheConfig.Provider.redis; - - DistributedJobNotifier missingUrlNotifier = - DistributedJobNotifierFactory.create(missingUrlConfig, collectionDAO, "server-1"); - DistributedJobNotifier nullConfigNotifier = - DistributedJobNotifierFactory.create(null, collectionDAO, "server-1"); + DistributedJobNotifierFactory.create(collectionDAO, "server-1"); - assertInstanceOf(PollingJobNotifier.class, missingUrlNotifier); - assertInstanceOf(PollingJobNotifier.class, nullConfigNotifier); + assertInstanceOf(PollingJobNotifier.class, notifier); } @Test diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobParticipantTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobParticipantTest.java index 6237567558e4..0bc79ab2abde 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobParticipantTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedJobParticipantTest.java @@ -64,7 +64,6 @@ import org.openmetadata.service.Entity; import org.openmetadata.service.apps.bundles.searchIndex.BulkSink; import org.openmetadata.service.apps.bundles.searchIndex.IndexingFailureRecorder; -import org.openmetadata.service.cache.CacheConfig; import org.openmetadata.service.jdbi3.AppRepository; import org.openmetadata.service.jdbi3.CollectionDAO; import org.openmetadata.service.search.SearchClusterMetrics; @@ -146,9 +145,7 @@ void tearDown() throws Exception { @Test void testStartAndStop() { - participant = - new DistributedJobParticipant( - collectionDAO, searchRepository, "test-server-1", (CacheConfig) null); + participant = new DistributedJobParticipant(collectionDAO, searchRepository, "test-server-1"); // Initially not participating assertFalse(participant.isParticipating()); @@ -166,9 +163,7 @@ void testStartAndStop() { @Test void testMultipleStartCallsAreIdempotent() { - participant = - new DistributedJobParticipant( - collectionDAO, searchRepository, "test-server-1", (CacheConfig) null); + participant = new DistributedJobParticipant(collectionDAO, searchRepository, "test-server-1"); participant.start(); participant.start(); // Second call should be no-op @@ -182,9 +177,7 @@ void testMultipleStartCallsAreIdempotent() { @Test void testMultipleStopCallsAreIdempotent() { - participant = - new DistributedJobParticipant( - collectionDAO, searchRepository, "test-server-1", (CacheConfig) null); + participant = new DistributedJobParticipant(collectionDAO, searchRepository, "test-server-1"); participant.start(); participant.stop(); @@ -207,9 +200,7 @@ void testDoesNotJoinWhenNoRunningJobs() throws Exception { DistributedSearchIndexCoordinator.class, (mock, context) -> when(mock.getRecentJobs(any(), anyInt())).thenReturn(List.of()))) { - participant = - new DistributedJobParticipant( - collectionDAO, searchRepository, "test-server-1", (CacheConfig) null); + participant = new DistributedJobParticipant(collectionDAO, searchRepository, "test-server-1"); participant.start(); // Wait a bit for the scheduler to run at least once @@ -237,6 +228,7 @@ void testJoinsActiveJobWithPendingPartitions() { .id(jobId) .status(IndexJobStatus.RUNNING) .jobConfiguration(config) + .stagedIndexMapping(Map.of("table", "table_staged")) .totalRecords(100) .build(); @@ -245,6 +237,7 @@ void testJoinsActiveJobWithPendingPartitions() { .id(jobId) .status(IndexJobStatus.COMPLETED) .jobConfiguration(config) + .stagedIndexMapping(Map.of("table", "table_staged")) .totalRecords(100) .processedRecords(100) .successRecords(100) @@ -329,6 +322,7 @@ void testDoesNotRejoinSameRunningJob() { .id(jobId) .status(IndexJobStatus.RUNNING) .jobConfiguration(config) + .stagedIndexMapping(Map.of("table", "table_staged")) .totalRecords(100) .build(); @@ -401,6 +395,7 @@ void testClearsJobIdWhenJobCompletes() { .id(jobId) .status(IndexJobStatus.RUNNING) .jobConfiguration(config) + .stagedIndexMapping(Map.of("table", "table_staged")) .totalRecords(100) .build(); @@ -409,6 +404,7 @@ void testClearsJobIdWhenJobCompletes() { .id(jobId) .status(IndexJobStatus.COMPLETED) .jobConfiguration(config) + .stagedIndexMapping(Map.of("table", "table_staged")) .totalRecords(100) .processedRecords(100) .successRecords(100) @@ -498,6 +494,7 @@ void testAttemptsToClaimPartitions() { .id(jobId) .status(IndexJobStatus.RUNNING) .jobConfiguration(config) + .stagedIndexMapping(Map.of("table", "table_staged")) .totalRecords(100) .build(); @@ -506,6 +503,7 @@ void testAttemptsToClaimPartitions() { .id(jobId) .status(IndexJobStatus.COMPLETED) .jobConfiguration(config) + .stagedIndexMapping(Map.of("table", "table_staged")) .totalRecords(100) .processedRecords(100) .successRecords(100) @@ -668,6 +666,7 @@ void testJoinAndProcessJobTracksPollingNotifierParticipation() throws Exception .id(jobId) .status(IndexJobStatus.RUNNING) .jobConfiguration(config) + .stagedIndexMapping(Map.of("table", "table_staged")) .build(); SearchIndexPartition pendingPartition = @@ -847,7 +846,6 @@ void testRecoveredParticipationRestoresRunRecordAndFinalizesStats() throws Excep config.setBatchSize(50); config.setMaxConcurrentRequests(8); config.setPayLoadSize(4096L); - config.setRecreateIndex(true); SearchIndexJob runningJob = SearchIndexJob.builder() @@ -882,7 +880,7 @@ void testRecoveredParticipationRestoresRunRecordAndFinalizesStats() throws Excep CollectionDAO.AppExtensionTimeSeries appExtensionDao = mock(CollectionDAO.AppExtensionTimeSeries.class); AtomicReference callbackRef = new AtomicReference<>(); - AtomicReference recreateContextRef = new AtomicReference<>(); + AtomicReference stagedIndexContextRef = new AtomicReference<>(); SuccessContext successContext = new SuccessContext().withAdditionalProperty("recovered", "yes"); when(appRepository.getDao()).thenReturn(appDao); @@ -928,7 +926,7 @@ void testRecoveredParticipationRestoresRunRecordAndFinalizesStats() throws Excep mockConstruction( PartitionWorker.class, (mock, context) -> { - recreateContextRef.set(context.arguments().get(3)); + stagedIndexContextRef.set(context.arguments().get(3)); when(mock.processPartition(partition)) .thenReturn(new PartitionWorker.PartitionResult(4, 1, false, 2, 3)); }); @@ -947,7 +945,7 @@ void testRecoveredParticipationRestoresRunRecordAndFinalizesStats() throws Excep "processJobPartitions", new Class[] {SearchIndexJob.class}, runningJob); assertNotNull(callbackRef.get()); - assertNotNull(recreateContextRef.get()); + assertNotNull(stagedIndexContextRef.get()); callbackRef .get() .onFailure( @@ -1005,6 +1003,38 @@ void testRecoveredParticipationRestoresRunRecordAndFinalizesStats() throws Excep } } + @Test + void testProcessJobPartitionsSkipsJobWithoutStagedIndexMapping() throws Exception { + UUID jobId = UUID.randomUUID(); + EventPublisherJob config = new EventPublisherJob(); + config.setEntities(Set.of("table")); + + SearchIndexJob runningJob = + SearchIndexJob.builder() + .id(jobId) + .status(IndexJobStatus.RUNNING) + .jobConfiguration(config) + .build(); + + participant = + new DistributedJobParticipant( + collectionDAO, searchRepository, "test-server-1", testNotifier); + setParticipantRunning(true); + + try (MockedConstruction failureConstruction = + mockConstruction(IndexingFailureRecorder.class); + MockedConstruction workerConstruction = + mockConstruction(PartitionWorker.class)) { + + invokeParticipantMethod( + "processJobPartitions", new Class[] {SearchIndexJob.class}, runningJob); + + verify(searchRepository, never()).createBulkSink(anyInt(), anyInt(), anyLong()); + assertTrue(failureConstruction.constructed().isEmpty()); + assertTrue(workerConstruction.constructed().isEmpty()); + } + } + @Test void testProcessJobPartitionsUsesDefaultBulkSinkSettingsAndHandlesInterruptedWait() throws Exception { @@ -1017,6 +1047,7 @@ void testProcessJobPartitionsUsesDefaultBulkSinkSettingsAndHandlesInterruptedWai .id(jobId) .status(IndexJobStatus.RUNNING) .jobConfiguration(config) + .stagedIndexMapping(Map.of("table", "table_staged")) .build(); SearchIndexPartition pendingPartition = SearchIndexPartition.builder() diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexExecutorTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexExecutorTest.java index e2252ab850fd..e3526f2da4fa 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexExecutorTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/DistributedSearchIndexExecutorTest.java @@ -372,7 +372,7 @@ void getFreshStatsAndUpdateStagedIndexMappingUseCurrentJob() throws Exception { @Test void initializeEntityTrackerCountsPartitionsAndWiresPromotionCallback() throws Exception { UUID jobId = UUID.randomUUID(); - ReindexContext recreateContext = mock(ReindexContext.class); + ReindexContext stagedIndexContext = mock(ReindexContext.class); SearchRepository searchRepository = mock(SearchRepository.class); RecreateIndexHandler recreateHandler = mock(RecreateIndexHandler.class); @@ -382,50 +382,48 @@ void initializeEntityTrackerCountsPartitionsAndWiresPromotionCallback() throws E partition(jobId, "table", PartitionStatus.PENDING), partition(jobId, "table", PartitionStatus.COMPLETED), partition(jobId, "dashboard", PartitionStatus.FAILED))); - when(recreateContext.getEntities()).thenReturn(Set.of("table", "dashboard")); + when(stagedIndexContext.getEntities()).thenReturn(Set.of("table", "dashboard")); setField("entityTracker", new EntityCompletionTracker(jobId)); - setField("recreateContext", recreateContext); + setField("stagedIndexContext", stagedIndexContext); try (MockedStatic entityMock = mockStatic(Entity.class)) { entityMock.when(Entity::getSearchRepository).thenReturn(searchRepository); when(searchRepository.createReindexHandler()).thenReturn(recreateHandler); - invokePrivate( - "initializeEntityTracker", new Class[] {UUID.class, boolean.class}, jobId, true); + invokePrivate("initializeEntityTracker", new Class[] {UUID.class}, jobId); } EntityCompletionTracker tracker = executor.getEntityTracker(); assertNotNull(tracker); assertEquals(2, tracker.getStatus("table").totalPartitions()); assertEquals(1, tracker.getStatus("dashboard").totalPartitions()); - assertSame(recreateHandler, getField("recreateIndexHandler")); + assertSame(recreateHandler, getField("indexPromotionHandler")); } @Test void initializeEntityTrackerCallbackPromotesEntityWhenTrackingCompletes() throws Exception { UUID jobId = UUID.randomUUID(); - ReindexContext recreateContext = mock(ReindexContext.class); + ReindexContext stagedIndexContext = mock(ReindexContext.class); DefaultRecreateHandler recreateHandler = mock(DefaultRecreateHandler.class); SearchRepository searchRepository = mock(SearchRepository.class); when(coordinator.getPartitions(jobId, null)) .thenReturn(List.of(partition(jobId, "table", PartitionStatus.PENDING))); - when(recreateContext.getEntities()).thenReturn(Set.of("table")); - when(recreateContext.getStagedIndex("table")).thenReturn(Optional.of("staged_table")); - when(recreateContext.getCanonicalIndex("table")).thenReturn(Optional.of("table_search")); - when(recreateContext.getOriginalIndex("table")).thenReturn(Optional.of("table_current")); - when(recreateContext.getCanonicalAlias("table")).thenReturn(Optional.of("table_alias")); - when(recreateContext.getExistingAliases("table")).thenReturn(Set.of("table_existing")); - when(recreateContext.getParentAliases("table")).thenReturn(List.of("table_parent")); + when(stagedIndexContext.getEntities()).thenReturn(Set.of("table")); + when(stagedIndexContext.getStagedIndex("table")).thenReturn(Optional.of("staged_table")); + when(stagedIndexContext.getCanonicalIndex("table")).thenReturn(Optional.of("table_search")); + when(stagedIndexContext.getOriginalIndex("table")).thenReturn(Optional.of("table_current")); + when(stagedIndexContext.getCanonicalAlias("table")).thenReturn(Optional.of("table_alias")); + when(stagedIndexContext.getExistingAliases("table")).thenReturn(Set.of("table_existing")); + when(stagedIndexContext.getParentAliases("table")).thenReturn(List.of("table_parent")); setField("entityTracker", new EntityCompletionTracker(jobId)); - setField("recreateContext", recreateContext); + setField("stagedIndexContext", stagedIndexContext); try (MockedStatic entityMock = mockStatic(Entity.class)) { entityMock.when(Entity::getSearchRepository).thenReturn(searchRepository); when(searchRepository.createReindexHandler()).thenReturn(recreateHandler); - invokePrivate( - "initializeEntityTracker", new Class[] {UUID.class, boolean.class}, jobId, true); + invokePrivate("initializeEntityTracker", new Class[] {UUID.class}, jobId); } executor.getEntityTracker().recordPartitionComplete("table", false); @@ -435,18 +433,18 @@ void initializeEntityTrackerCallbackPromotesEntityWhenTrackingCompletes() throws @Test void promoteEntityIndexUsesDefaultAndGenericHandlers() throws Exception { - ReindexContext recreateContext = mock(ReindexContext.class); + ReindexContext stagedIndexContext = mock(ReindexContext.class); DefaultRecreateHandler defaultHandler = mock(DefaultRecreateHandler.class); RecreateIndexHandler genericHandler = mock(RecreateIndexHandler.class); - when(recreateContext.getStagedIndex("table")).thenReturn(Optional.of("staged_table")); - when(recreateContext.getCanonicalIndex("table")).thenReturn(Optional.of("table_search")); - when(recreateContext.getOriginalIndex("table")).thenReturn(Optional.of("table_current")); - when(recreateContext.getCanonicalAlias("table")).thenReturn(Optional.of("table_alias")); - when(recreateContext.getExistingAliases("table")).thenReturn(Set.of("table_existing")); - when(recreateContext.getParentAliases("table")).thenReturn(List.of("table_parent")); + when(stagedIndexContext.getStagedIndex("table")).thenReturn(Optional.of("staged_table")); + when(stagedIndexContext.getCanonicalIndex("table")).thenReturn(Optional.of("table_search")); + when(stagedIndexContext.getOriginalIndex("table")).thenReturn(Optional.of("table_current")); + when(stagedIndexContext.getCanonicalAlias("table")).thenReturn(Optional.of("table_alias")); + when(stagedIndexContext.getExistingAliases("table")).thenReturn(Set.of("table_existing")); + when(stagedIndexContext.getParentAliases("table")).thenReturn(List.of("table_parent")); - setField("recreateContext", recreateContext); - setField("recreateIndexHandler", defaultHandler); + setField("stagedIndexContext", stagedIndexContext); + setField("indexPromotionHandler", defaultHandler); invokePrivate( "promoteEntityIndex", new Class[] {String.class, boolean.class}, "table", false); @@ -458,12 +456,12 @@ void promoteEntityIndexUsesDefaultAndGenericHandlers() throws Exception { assertEquals("staged_table", contextCaptor.getValue().getStagedIndex()); assertTrue(contextCaptor.getValue().getParentAliases().contains("table_parent")); - setField("recreateIndexHandler", genericHandler); + setField("indexPromotionHandler", genericHandler); invokePrivate( "promoteEntityIndex", new Class[] {String.class, boolean.class}, "table", true); verify(genericHandler).finalizeReindex(any(EntityReindexContext.class), eq(true)); - when(recreateContext.getStagedIndex("topic")).thenReturn(Optional.empty()); + when(stagedIndexContext.getStagedIndex("topic")).thenReturn(Optional.empty()); invokePrivate( "promoteEntityIndex", new Class[] {String.class, boolean.class}, "topic", true); verifyNoMoreInteractions(genericHandler); @@ -474,20 +472,20 @@ void promoteEntityIndexReturnsWithoutContextAndSwallowsHandlerFailures() throws invokePrivate( "promoteEntityIndex", new Class[] {String.class, boolean.class}, "table", true); - ReindexContext recreateContext = mock(ReindexContext.class); + ReindexContext stagedIndexContext = mock(ReindexContext.class); DefaultRecreateHandler defaultHandler = mock(DefaultRecreateHandler.class); - when(recreateContext.getStagedIndex("table")).thenReturn(Optional.of("staged_table")); - when(recreateContext.getCanonicalIndex("table")).thenReturn(Optional.of("table_search")); - when(recreateContext.getOriginalIndex("table")).thenReturn(Optional.of("table_current")); - when(recreateContext.getCanonicalAlias("table")).thenReturn(Optional.of("table_alias")); - when(recreateContext.getExistingAliases("table")).thenReturn(Set.of()); - when(recreateContext.getParentAliases("table")).thenReturn(List.of()); + when(stagedIndexContext.getStagedIndex("table")).thenReturn(Optional.of("staged_table")); + when(stagedIndexContext.getCanonicalIndex("table")).thenReturn(Optional.of("table_search")); + when(stagedIndexContext.getOriginalIndex("table")).thenReturn(Optional.of("table_current")); + when(stagedIndexContext.getCanonicalAlias("table")).thenReturn(Optional.of("table_alias")); + when(stagedIndexContext.getExistingAliases("table")).thenReturn(Set.of()); + when(stagedIndexContext.getParentAliases("table")).thenReturn(List.of()); doThrow(new IllegalStateException("promotion failed")) .when(defaultHandler) .promoteEntityIndex(any(EntityReindexContext.class), eq(true)); - setField("recreateContext", recreateContext); - setField("recreateIndexHandler", defaultHandler); + setField("stagedIndexContext", stagedIndexContext); + setField("indexPromotionHandler", defaultHandler); invokePrivate( "promoteEntityIndex", new Class[] {String.class, boolean.class}, "table", true); @@ -573,8 +571,7 @@ void executeDoesNotNotifyPeersWhenStartedJobIsNotRunning() throws Exception { () -> executor.execute( bulkSink, - null, - false, + stagedContext("table"), ReindexingConfiguration.builder().entities(Set.of("table")).build())); assertTrue(exception.getMessage().contains(IndexJobStatus.FAILED.name())); @@ -619,8 +616,7 @@ void executeDoesNotRebroadcastStartWhenJoiningRunningJob() throws Exception { DistributedSearchIndexExecutor.ExecutionResult result = executor.execute( bulkSink, - null, - false, + stagedContext("table"), ReindexingConfiguration.builder() .entities(Set.of("table")) .consumerThreads(1) @@ -640,7 +636,7 @@ void executeRequiresCurrentJobBeforeRunning() { IllegalStateException.class, () -> executor.execute( - mock(BulkSink.class), null, false, ReindexingConfiguration.builder().build())); + mock(BulkSink.class), null, ReindexingConfiguration.builder().build())); assertTrue(exception.getMessage().contains("No job to execute")); } @@ -705,8 +701,7 @@ void executeRunsMinimalHappyPathAndCleansUpResources() throws Exception { DistributedSearchIndexExecutor.ExecutionResult result = executor.execute( bulkSink, - null, - false, + stagedContext("table"), ReindexingConfiguration.builder() .entities(Set.of("table")) .consumerThreads(1) @@ -754,7 +749,9 @@ void executeRecordsFailuresFromCleanupWithoutAbortingResult() throws Exception { runningJob.withStatus(IndexJobStatus.FAILED).withFailedRecords(2).withCompletedAt(400L); BulkSink bulkSink = mock(BulkSink.class); ReindexingProgressListener listener = mock(ReindexingProgressListener.class); - ReindexContext recreateContext = mock(ReindexContext.class); + ReindexContext stagedIndexContext = mock(ReindexContext.class); + SearchRepository searchRepository = mock(SearchRepository.class); + RecreateIndexHandler indexPromotionHandler = mock(RecreateIndexHandler.class); ReindexingMetrics metrics = mock(ReindexingMetrics.class); Timer.Sample timerSample = mock(Timer.Sample.class); AtomicReference callbackRef = new AtomicReference<>(); @@ -799,16 +796,18 @@ void executeRecordsFailuresFromCleanupWithoutAbortingResult() throws Exception { IndexingFailureRecorder.class, (mock, context) -> doThrow(new IllegalStateException("close failed")).when(mock).close()); + MockedStatic entityMock = mockStatic(Entity.class); MockedStatic metricsMock = mockStatic(ReindexingMetrics.class)) { + entityMock.when(Entity::getSearchRepository).thenReturn(searchRepository); + when(searchRepository.createReindexHandler()).thenReturn(indexPromotionHandler); metricsMock.when(ReindexingMetrics::getInstance).thenReturn(metrics); when(metrics.startJobTimer()).thenReturn(timerSample); DistributedSearchIndexExecutor.ExecutionResult result = executor.execute( bulkSink, - recreateContext, - false, + stagedIndexContext, ReindexingConfiguration.builder() .entities(Set.of("table")) .consumerThreads(1) @@ -886,8 +885,7 @@ void executeHandlesInterruptedAwaitAndStoppedMetricsCleanup() throws Exception { DistributedSearchIndexExecutor.ExecutionResult result = executor.execute( bulkSink, - null, - false, + stagedContext("table"), ReindexingConfiguration.builder() .entities(Set.of("table")) .consumerThreads(1) @@ -956,8 +954,7 @@ void executeForceCompletesStoppingJobsDuringCleanupAndRecordsStoppedMetrics() th DistributedSearchIndexExecutor.ExecutionResult result = executor.execute( bulkSink, - null, - false, + stagedContext("table"), ReindexingConfiguration.builder() .entities(Set.of("table")) .consumerThreads(1) @@ -1001,7 +998,6 @@ void runWorkerLoopAggregatesPartitionResults() throws Exception { BulkSink.class, int.class, ReindexContext.class, - boolean.class, AtomicLong.class, AtomicLong.class, ReindexingConfiguration.class @@ -1009,8 +1005,7 @@ void runWorkerLoopAggregatesPartitionResults() throws Exception { 0, bulkSink, 100, - null, - false, + stagedContext("table"), totalSuccess, totalFailed, ReindexingConfiguration.builder().build()); @@ -1057,7 +1052,6 @@ void runWorkerLoopRetriesClaimingAndBreaksOnInterruptedSleep() throws Exception BulkSink.class, int.class, ReindexContext.class, - boolean.class, AtomicLong.class, AtomicLong.class, ReindexingConfiguration.class @@ -1065,8 +1059,7 @@ void runWorkerLoopRetriesClaimingAndBreaksOnInterruptedSleep() throws Exception 2, mock(BulkSink.class), 100, - null, - false, + stagedContext("table"), new AtomicLong(), new AtomicLong(), ReindexingConfiguration.builder().build()); @@ -1111,7 +1104,6 @@ void runWorkerLoopSwallowsPartitionProcessingErrorsAndCleansUpState() throws Exc BulkSink.class, int.class, ReindexContext.class, - boolean.class, AtomicLong.class, AtomicLong.class, ReindexingConfiguration.class @@ -1119,8 +1111,7 @@ void runWorkerLoopSwallowsPartitionProcessingErrorsAndCleansUpState() throws Exc 1, mock(BulkSink.class), 100, - null, - false, + stagedContext("table"), new AtomicLong(), new AtomicLong(), ReindexingConfiguration.builder().build()); @@ -1270,6 +1261,19 @@ private SearchIndexPartition partition(UUID jobId, String entityType, PartitionS .build(); } + private ReindexContext stagedContext(String entityType) { + ReindexContext context = new ReindexContext(); + context.add( + entityType, + entityType + "_index", + entityType + "_original", + entityType + "_staged", + Set.of(), + entityType, + List.of()); + return context; + } + private Object invokePrivate(String methodName, Class[] parameterTypes, Object... args) throws Exception { Method method = diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionWorkerTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionWorkerTest.java index d554f33fc4d6..537dfe8b0ee1 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionWorkerTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PartitionWorkerTest.java @@ -64,6 +64,7 @@ import org.openmetadata.service.apps.bundles.searchIndex.BulkSink; import org.openmetadata.service.apps.bundles.searchIndex.IndexingFailureRecorder; import org.openmetadata.service.apps.bundles.searchIndex.ReindexingConfiguration; +import org.openmetadata.service.apps.bundles.searchIndex.SearchIndexEntityTypes; import org.openmetadata.service.apps.bundles.searchIndex.stats.StageCounter; import org.openmetadata.service.apps.bundles.searchIndex.stats.StageStatsTracker; import org.openmetadata.service.exception.SearchIndexException; @@ -83,7 +84,7 @@ class PartitionWorkerTest { @Mock private CollectionDAO collectionDAO; @Mock private CollectionDAO.SearchIndexServerStatsDAO searchIndexServerStatsDAO; @Mock private BulkSink bulkSink; - @Mock private ReindexContext recreateContext; + @Mock private ReindexContext stagedIndexContext; @Mock private ReindexingConfiguration reindexingConfiguration; private PartitionWorker worker; @@ -93,7 +94,10 @@ class PartitionWorkerTest { @BeforeEach void setUp() { - worker = new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, recreateContext, false); + when(stagedIndexContext.getStagedIndex(any())) + .thenAnswer( + invocation -> Optional.of(invocation.getArgument(0, String.class) + "_staging")); + worker = new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, stagedIndexContext); } @Test @@ -144,14 +148,14 @@ void testPartitionResult_WasStopped() { @Test void testWorkerWithDifferentConfigurations() { PartitionWorker workerWithRecreate = - new PartitionWorker(coordinator, bulkSink, 200, recreateContext, true); + new PartitionWorker(coordinator, bulkSink, 200, stagedIndexContext); assertFalse(workerWithRecreate.isStopped()); - PartitionWorker workerWithoutContext = - new PartitionWorker(coordinator, bulkSink, 50, null, false); + PartitionWorker workerWithSmallBatch = + new PartitionWorker(coordinator, bulkSink, 50, stagedIndexContext); - assertFalse(workerWithoutContext.isStopped()); + assertFalse(workerWithSmallBatch.isStopped()); } @Test @@ -397,17 +401,17 @@ void initializeKeysetCursorHitsPrecomputedCacheAndSkipsOffsetFallback() throws E } @Test - void createContextDataIncludesRecreateContextTargetIndexAndStatsTracker() throws Exception { - PartitionWorker recreateWorker = - new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, recreateContext, true); + void createContextDataIncludesStagedContextTargetIndexAndStatsTracker() throws Exception { + PartitionWorker stagedWorker = + new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, stagedIndexContext); StageStatsTracker statsTracker = mock(StageStatsTracker.class); - when(recreateContext.getStagedIndex("table")).thenReturn(Optional.of("table_staging")); + when(stagedIndexContext.getStagedIndex("table")).thenReturn(Optional.of("table_staging")); @SuppressWarnings("unchecked") Map contextData = (Map) invokePrivate( - recreateWorker, + stagedWorker, "createContextData", new Class[] {String.class, StageStatsTracker.class}, "table", @@ -416,16 +420,37 @@ void createContextDataIncludesRecreateContextTargetIndexAndStatsTracker() throws assertEquals("table", contextData.get("entityType")); assertEquals(Boolean.TRUE, contextData.get("recreateIndex")); assertEquals(statsTracker, contextData.get(BulkSink.STATS_TRACKER_CONTEXT_KEY)); - assertEquals(recreateContext, contextData.get("recreateContext")); + assertEquals(stagedIndexContext, contextData.get("recreateContext")); assertEquals("table_staging", contextData.get("targetIndex")); } + @Test + void createContextDataNormalizesLegacyEntityAliasesBeforeStagedIndexLookup() throws Exception { + when(stagedIndexContext.getStagedIndex(Entity.QUERY_COST_RECORD)) + .thenReturn(Optional.of("query_cost_record_staging")); + + @SuppressWarnings("unchecked") + Map contextData = + (Map) + invokePrivate( + worker, + "createContextData", + new Class[] {String.class, StageStatsTracker.class}, + SearchIndexEntityTypes.QUERY_COST_RESULT, + null); + + assertEquals(Entity.QUERY_COST_RECORD, contextData.get("entityType")); + assertEquals("query_cost_record_staging", contextData.get("targetIndex")); + verify(stagedIndexContext).getStagedIndex(Entity.QUERY_COST_RECORD); + verify(stagedIndexContext, never()).getStagedIndex(SearchIndexEntityTypes.QUERY_COST_RESULT); + } + @Test void processBatchWritesEntitiesAndRecordsReaderFailures() throws Exception { IndexingFailureRecorder failureRecorder = mock(IndexingFailureRecorder.class); StageStatsTracker statsTracker = mock(StageStatsTracker.class); PartitionWorker batchWorker = - new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, null, false, failureRecorder); + new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, stagedIndexContext, failureRecorder); EntityInterface entityOne = mock(EntityInterface.class); EntityInterface entityTwo = mock(EntityInterface.class); @@ -464,7 +489,7 @@ void processBatchWritesEntitiesAndRecordsReaderFailures() throws Exception { verify(bulkSink).write(entitiesCaptor.capture(), contextCaptor.capture()); assertEquals(List.of(entityOne, entityTwo), entitiesCaptor.getValue()); assertEquals("table", contextCaptor.getValue().get("entityType")); - assertEquals(Boolean.FALSE, contextCaptor.getValue().get("recreateIndex")); + assertEquals(Boolean.TRUE, contextCaptor.getValue().get("recreateIndex")); assertEquals(statsTracker, contextCaptor.getValue().get(BulkSink.STATS_TRACKER_CONTEXT_KEY)); } @@ -473,7 +498,7 @@ void processBatchExtractsIdFromEntityInterfaceForReaderFailure() throws Exceptio IndexingFailureRecorder failureRecorder = mock(IndexingFailureRecorder.class); StageStatsTracker statsTracker = mock(StageStatsTracker.class); PartitionWorker batchWorker = - new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, null, false, failureRecorder); + new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, stagedIndexContext, failureRecorder); UUID errorEntityId = UUID.randomUUID(); EntityInterface failingEntity = mock(EntityInterface.class); @@ -504,7 +529,7 @@ void processBatchSkipsReaderFailureWhenEntityInterfaceHasNullId() throws Excepti IndexingFailureRecorder failureRecorder = mock(IndexingFailureRecorder.class); StageStatsTracker statsTracker = mock(StageStatsTracker.class); PartitionWorker batchWorker = - new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, null, false, failureRecorder); + new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, stagedIndexContext, failureRecorder); EntityInterface failingEntity = mock(EntityInterface.class); when(failingEntity.getId()).thenReturn(null); @@ -531,7 +556,7 @@ void processBatchSkipsReaderFailureWhenEntityInterfaceHasNullId() throws Excepti @Test void processBatchWrapsSinkFailuresAsSearchIndexException() throws Exception { PartitionWorker batchWorker = - new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, null, false); + new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, stagedIndexContext); ResultList resultList = new ResultList<>(); resultList.setData(List.of(mock(EntityInterface.class))); @@ -617,7 +642,7 @@ void readEntitiesKeysetPassesSelectiveFieldsNotWildcard() throws Exception { void readEntitiesKeysetUsesTimeSeriesSourceWithConfiguredWindow() throws Exception { PartitionWorker timeSeriesWorker = new PartitionWorker( - coordinator, bulkSink, BATCH_SIZE, null, false, null, reindexingConfiguration); + coordinator, bulkSink, BATCH_SIZE, stagedIndexContext, null, reindexingConfiguration); when(reindexingConfiguration.getTimeSeriesStartTs(Entity.QUERY_COST_RECORD)).thenReturn(100L); ResultList resultList = new ResultList<>(); @@ -650,6 +675,43 @@ void readEntitiesKeysetUsesTimeSeriesSourceWithConfiguredWindow() throws Excepti assertNotNull(constructorArgs.get().get(4)); } + @Test + void readEntitiesKeysetNormalizesLegacyTimeSeriesAliases() throws Exception { + PartitionWorker timeSeriesWorker = + new PartitionWorker( + coordinator, bulkSink, BATCH_SIZE, stagedIndexContext, null, reindexingConfiguration); + when(reindexingConfiguration.getTimeSeriesStartTs(Entity.QUERY_COST_RECORD)).thenReturn(100L); + + ResultList resultList = new ResultList<>(); + resultList.setData(List.of(mock(EntityTimeSeriesInterface.class))); + AtomicReference> constructorArgs = new AtomicReference<>(); + + try (MockedConstruction ignored = + mockConstruction( + PaginatedEntityTimeSeriesSource.class, + (mock, context) -> { + constructorArgs.set(List.copyOf(context.arguments())); + doReturn(resultList).when(mock).readWithCursor("cursor"); + })) { + + assertEquals( + resultList, + invokePrivate( + timeSeriesWorker, + "readEntitiesKeyset", + new Class[] {String.class, String.class, int.class}, + SearchIndexEntityTypes.QUERY_COST_RESULT, + "cursor", + 3)); + } + + assertEquals(Entity.QUERY_COST_RECORD, constructorArgs.get().get(0)); + assertEquals(3, constructorArgs.get().get(1)); + assertEquals(List.of(), constructorArgs.get().get(2)); + assertEquals(100L, constructorArgs.get().get(3)); + assertNotNull(constructorArgs.get().get(4)); + } + @Test void writeToSinkUsesTimeSeriesEntitiesForTimeSeriesTypes() throws Exception { ResultList resultList = new ResultList<>(); @@ -691,7 +753,7 @@ void waitForSinkOperationsReconcilesStalePendingWorkAndFlushesStats() throws Exc @Test void processPartitionKeepsProgressStatusProcessingAndCompletesSuccessfully() { PartitionWorker partitionWorker = - new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, null, false); + new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, stagedIndexContext); SearchIndexPartition partition = buildPartition("table", 0, 2); ResultList resultList = new ResultList<>(); @@ -733,7 +795,7 @@ void processPartitionKeepsProgressStatusProcessingAndCompletesSuccessfully() { @Test void processPartitionTracksReaderFailuresAndCompletesWithFailedCounts() { PartitionWorker partitionWorker = - new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, null, false); + new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, stagedIndexContext); SearchIndexPartition partition = buildPartition("table", 0, 2); SearchIndexException readerFailure = @@ -773,7 +835,7 @@ void processPartitionTracksReaderFailuresAndCompletesWithFailedCounts() { @Test void processPartitionStopsAfterReadWhenStopRequestedMidLoop() { PartitionWorker partitionWorker = - new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, null, false); + new PartitionWorker(coordinator, bulkSink, BATCH_SIZE, stagedIndexContext); SearchIndexPartition partition = buildPartition("table", 0, 2); ResultList resultList = new ResultList<>(); @@ -817,7 +879,7 @@ void processPartitionStopsAfterReadWhenStopRequestedMidLoop() { void processPartitionRecordsSinkFailuresAndStopsWhenCursorCannotBeRebuilt() throws Exception { IndexingFailureRecorder failureRecorder = mock(IndexingFailureRecorder.class); PartitionWorker partitionWorker = - new PartitionWorker(coordinator, bulkSink, 2, null, false, failureRecorder); + new PartitionWorker(coordinator, bulkSink, 2, stagedIndexContext, failureRecorder); SearchIndexPartition partition = buildPartition("table", 0, 4); ResultList resultList = new ResultList<>(); @@ -867,7 +929,8 @@ void processPartitionRecordsSinkFailuresAndStopsWhenCursorCannotBeRebuilt() thro @Test void processPartitionAdjustsSuccessCountsForProcessFailures() { - PartitionWorker partitionWorker = new PartitionWorker(coordinator, bulkSink, 2, null, false); + PartitionWorker partitionWorker = + new PartitionWorker(coordinator, bulkSink, 2, stagedIndexContext); SearchIndexPartition partition = buildPartition("table", 0, 2); ResultList resultList = new ResultList<>(); @@ -911,7 +974,8 @@ void processPartitionAdjustsSuccessCountsForProcessFailures() { @Test void processPartitionFailsPartitionWhenCompletionThrows() { - PartitionWorker partitionWorker = new PartitionWorker(coordinator, bulkSink, 2, null, false); + PartitionWorker partitionWorker = + new PartitionWorker(coordinator, bulkSink, 2, stagedIndexContext); SearchIndexPartition partition = buildPartition("table", 0, 1); ResultList resultList = new ResultList<>(); diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PollingJobNotifierTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PollingJobNotifierTest.java index 030438f487fa..97405b7e6985 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PollingJobNotifierTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/PollingJobNotifierTest.java @@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; @@ -67,13 +68,14 @@ void pollForJobsDiscoversNewJobsAndClearsCompletedOnes() throws Exception { AtomicReference callbackJob = new AtomicReference<>(); notifier.onJobStarted(callbackJob::set); getRunningFlag(notifier).set(true); + setFastIdleUntil(notifier, System.currentTimeMillis() + 60_000L); invokePoll(notifier); assertEquals(jobId, callbackJob.get()); assertTrue(getKnownJobs(notifier).contains(jobId)); - setLastPollTime(notifier, System.currentTimeMillis() - 31_000L); + setLastPollTime(notifier, System.currentTimeMillis() - 5_000L); invokePoll(notifier); assertTrue(getKnownJobs(notifier).isEmpty()); @@ -100,6 +102,29 @@ void activePollingIntervalAndExceptionHandlingBehaveAsExpected() throws Exceptio assertTrue(getKnownJobs(notifier).isEmpty()); } + @Test + void idlePollingBacksOffAfterFastWindowAndResumesAfterJobActivity() throws Exception { + CollectionDAO collectionDAO = mock(CollectionDAO.class); + CollectionDAO.SearchIndexJobDAO jobDAO = mock(CollectionDAO.SearchIndexJobDAO.class); + when(collectionDAO.searchIndexJobDAO()).thenReturn(jobDAO); + when(jobDAO.getRunningJobIds()).thenReturn(List.of()); + + PollingJobNotifier notifier = new PollingJobNotifier(collectionDAO, "server-backoff"); + getRunningFlag(notifier).set(true); + + setFastIdleUntil(notifier, System.currentTimeMillis() - 1L); + setLastPollTime(notifier, System.currentTimeMillis() - 10_000L); + invokePoll(notifier); + + verifyNoInteractions(jobDAO); + + notifier.notifyJobCompleted(UUID.randomUUID()); + setLastPollTime(notifier, System.currentTimeMillis() - 5_000L); + invokePoll(notifier); + + verify(jobDAO).getRunningJobIds(); + } + @Test void stopHandlesPreconfiguredSchedulerAndInterruptedTermination() throws Exception { PollingJobNotifier notifier = @@ -137,6 +162,12 @@ private void setLastPollTime(PollingJobNotifier notifier, long value) throws Exc field.setLong(notifier, value); } + private void setFastIdleUntil(PollingJobNotifier notifier, long value) throws Exception { + Field field = notifier.getClass().getDeclaredField("fastIdleUntil"); + field.setAccessible(true); + field.setLong(notifier, value); + } + private Object getField(Object target, String name) throws Exception { Field field = target.getClass().getDeclaredField(name); field.setAccessible(true); diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/RedisJobNotifierTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/RedisJobNotifierTest.java deleted file mode 100644 index 5eeab373a73f..000000000000 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/distributed/RedisJobNotifierTest.java +++ /dev/null @@ -1,300 +0,0 @@ -package org.openmetadata.service.apps.bundles.searchIndex.distributed; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import io.lettuce.core.RedisClient; -import io.lettuce.core.RedisURI; -import io.lettuce.core.api.StatefulRedisConnection; -import io.lettuce.core.api.sync.RedisCommands; -import io.lettuce.core.pubsub.RedisPubSubAdapter; -import io.lettuce.core.pubsub.RedisPubSubListener; -import io.lettuce.core.pubsub.StatefulRedisPubSubConnection; -import io.lettuce.core.pubsub.api.sync.RedisPubSubCommands; -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.time.Duration; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; -import org.junit.jupiter.api.Test; -import org.mockito.ArgumentCaptor; -import org.mockito.MockedStatic; -import org.mockito.Mockito; -import org.openmetadata.service.cache.CacheConfig; - -class RedisJobNotifierTest { - - @Test - void startInitializesRedisConnectionsAndStopCleansThemUp() { - CacheConfig config = cacheConfig("redis://cache:6380"); - RedisJobNotifier notifier = new RedisJobNotifier(config, "server-123"); - RedisClient redisClient = mock(RedisClient.class); - StatefulRedisPubSubConnection subConnection = - mock(StatefulRedisPubSubConnection.class); - StatefulRedisConnection pubConnection = mock(StatefulRedisConnection.class); - RedisPubSubCommands pubSubCommands = mock(RedisPubSubCommands.class); - RedisCommands redisCommands = mock(RedisCommands.class); - when(redisClient.connectPubSub()).thenReturn(subConnection); - when(redisClient.connect()).thenReturn(pubConnection); - when(subConnection.sync()).thenReturn(pubSubCommands); - when(pubConnection.sync()).thenReturn(redisCommands); - - try (MockedStatic redisClientStatic = Mockito.mockStatic(RedisClient.class)) { - redisClientStatic.when(() -> RedisClient.create(any(RedisURI.class))).thenReturn(redisClient); - - notifier.start(); - notifier.start(); - - assertTrue(notifier.isRunning()); - verify(subConnection) - .addListener(org.mockito.ArgumentMatchers.>any()); - verify(pubSubCommands).subscribe("om:distributed-jobs:start", "om:distributed-jobs:complete"); - - notifier.stop(); - - assertFalse(notifier.isRunning()); - verify(pubSubCommands) - .unsubscribe("om:distributed-jobs:start", "om:distributed-jobs:complete"); - verify(subConnection).close(); - verify(pubConnection).close(); - verify(redisClient).shutdown(); - } - } - - @Test - void startRegistersListenerThatHandlesRemoteMessages() { - CacheConfig config = cacheConfig("redis://cache:6380"); - RedisJobNotifier notifier = new RedisJobNotifier(config, "server-123"); - RedisClient redisClient = mock(RedisClient.class); - StatefulRedisPubSubConnection subConnection = - mock(StatefulRedisPubSubConnection.class); - StatefulRedisConnection pubConnection = mock(StatefulRedisConnection.class); - RedisPubSubCommands pubSubCommands = mock(RedisPubSubCommands.class); - when(redisClient.connectPubSub()).thenReturn(subConnection); - when(redisClient.connect()).thenReturn(pubConnection); - when(subConnection.sync()).thenReturn(pubSubCommands); - - try (MockedStatic redisClientStatic = Mockito.mockStatic(RedisClient.class)) { - redisClientStatic.when(() -> RedisClient.create(any(RedisURI.class))).thenReturn(redisClient); - - notifier.start(); - AtomicReference callbackJob = new AtomicReference<>(); - notifier.onJobStarted(callbackJob::set); - - @SuppressWarnings("unchecked") - ArgumentCaptor> listenerCaptor = - ArgumentCaptor.forClass(RedisPubSubAdapter.class); - verify(subConnection).addListener(listenerCaptor.capture()); - - UUID jobId = UUID.randomUUID(); - listenerCaptor.getValue().message("om:distributed-jobs:start", jobId + "|SEARCH_INDEX|other"); - - assertEquals(jobId, callbackJob.get()); - } - } - - @Test - void startFailureResetsRunningState() { - RedisJobNotifier notifier = - new RedisJobNotifier(cacheConfig("redis://cache:6379"), "server-123"); - - try (MockedStatic redisClientStatic = Mockito.mockStatic(RedisClient.class)) { - redisClientStatic - .when(() -> RedisClient.create(any(RedisURI.class))) - .thenThrow(new IllegalStateException("redis down")); - - assertThrows(RuntimeException.class, notifier::start); - assertFalse(notifier.isRunning()); - } - } - - @Test - void stopReturnsWhenNotifierWasNeverStarted() { - RedisJobNotifier notifier = - new RedisJobNotifier(cacheConfig("redis://cache:6379"), "server-123"); - - notifier.stop(); - - assertFalse(notifier.isRunning()); - } - - @Test - void stopSwallowsShutdownExceptions() throws Exception { - RedisJobNotifier notifier = - new RedisJobNotifier(cacheConfig("redis://cache:6379"), "server-123"); - StatefulRedisPubSubConnection subConnection = - mock(StatefulRedisPubSubConnection.class); - RedisPubSubCommands pubSubCommands = mock(RedisPubSubCommands.class); - when(subConnection.sync()).thenReturn(pubSubCommands); - Mockito.doThrow(new IllegalStateException("unsubscribe failed")) - .when(pubSubCommands) - .unsubscribe("om:distributed-jobs:start", "om:distributed-jobs:complete"); - getRunningFlag(notifier).set(true); - setField(notifier, "subConnection", subConnection); - setField(notifier, "pubConnection", mock(StatefulRedisConnection.class)); - setField(notifier, "redisClient", mock(RedisClient.class)); - - notifier.stop(); - - assertFalse(notifier.isRunning()); - } - - @Test - void notifyMethodsAndInboundMessagesRespectSourceServer() throws Exception { - RedisJobNotifier notifier = new RedisJobNotifier(cacheConfig("cache:6379"), "server-123"); - StatefulRedisConnection pubConnection = mock(StatefulRedisConnection.class); - RedisCommands redisCommands = mock(RedisCommands.class); - when(pubConnection.sync()).thenReturn(redisCommands); - when(redisCommands.publish(any(), any())).thenReturn(2L); - getRunningFlag(notifier).set(true); - setField(notifier, "pubConnection", pubConnection); - - UUID jobId = UUID.randomUUID(); - notifier.notifyJobStarted(jobId, "SEARCH_INDEX"); - notifier.notifyJobCompleted(jobId); - - verify(redisCommands).publish("om:distributed-jobs:start", jobId + "|SEARCH_INDEX|server-123"); - verify(redisCommands).publish("om:distributed-jobs:complete", jobId + "|COMPLETED|server-123"); - - AtomicReference callbackJob = new AtomicReference<>(); - notifier.onJobStarted(callbackJob::set); - invokeHandleMessage( - notifier, "om:distributed-jobs:start", jobId + "|SEARCH_INDEX|other-server"); - assertEquals(jobId, callbackJob.get()); - - callbackJob.set(null); - invokeHandleMessage(notifier, "om:distributed-jobs:start", jobId + "|SEARCH_INDEX|server-123"); - assertNull(callbackJob.get()); - - invokeHandleMessage(notifier, "om:distributed-jobs:start", "invalid"); - invokeHandleMessage(notifier, "om:distributed-jobs:start", "not-a-uuid|SEARCH_INDEX|other"); - invokeHandleMessage( - notifier, "om:distributed-jobs:complete", jobId + "|COMPLETED|other-server"); - } - - @Test - void notifyMethodsSwallowPublishFailures() throws Exception { - RedisJobNotifier notifier = new RedisJobNotifier(cacheConfig("cache:6379"), "server-123"); - StatefulRedisConnection pubConnection = mock(StatefulRedisConnection.class); - RedisCommands redisCommands = mock(RedisCommands.class); - when(pubConnection.sync()).thenReturn(redisCommands); - when(redisCommands.publish(eq("om:distributed-jobs:start"), any())) - .thenThrow(new IllegalStateException("publish failed")); - when(redisCommands.publish(eq("om:distributed-jobs:complete"), any())) - .thenThrow(new IllegalStateException("publish failed")); - getRunningFlag(notifier).set(true); - setField(notifier, "pubConnection", pubConnection); - - notifier.notifyJobStarted(UUID.randomUUID(), "SEARCH_INDEX"); - notifier.notifyJobCompleted(UUID.randomUUID()); - } - - @Test - void notifyMethodsSkipWhenNotRunningOrWithoutPublisher() { - RedisJobNotifier notifier = new RedisJobNotifier(cacheConfig("cache:6379"), "server-123"); - StatefulRedisConnection pubConnection = mock(StatefulRedisConnection.class); - - notifier.notifyJobStarted(UUID.randomUUID(), "SEARCH_INDEX"); - notifier.notifyJobCompleted(UUID.randomUUID()); - verify(pubConnection, never()).sync(); - } - - @Test - void buildRedisUriSupportsUrlVariantsAndAuthentication() throws Exception { - CacheConfig config = cacheConfig("redis://cache.example.com:6380"); - config.redis.authType = CacheConfig.AuthType.PASSWORD; - config.redis.username = "user"; - config.redis.passwordRef = "secret"; - config.redis.useSSL = true; - config.redis.database = 4; - config.redis.connectTimeoutMs = 1234; - - RedisJobNotifier notifier = new RedisJobNotifier(config, "server-123"); - RedisURI uri = (RedisURI) invokePrivate(notifier, "buildRedisURI"); - - assertEquals("cache.example.com", uri.getHost()); - assertEquals(6380, uri.getPort()); - assertTrue(uri.isSsl()); - assertEquals(4, uri.getDatabase()); - assertEquals(Duration.ofMillis(1234), uri.getTimeout()); - assertEquals("user", uri.getUsername()); - - CacheConfig hostOnlyConfig = cacheConfig("redis-host"); - RedisURI hostOnlyUri = - (RedisURI) - invokePrivate(new RedisJobNotifier(hostOnlyConfig, "server-123"), "buildRedisURI"); - assertEquals("redis-host", hostOnlyUri.getHost()); - assertEquals(6379, hostOnlyUri.getPort()); - - CacheConfig hostPortConfig = cacheConfig("cache.example.com:6381"); - RedisURI hostPortUri = - (RedisURI) - invokePrivate(new RedisJobNotifier(hostPortConfig, "server-123"), "buildRedisURI"); - assertEquals("cache.example.com", hostPortUri.getHost()); - assertEquals(6381, hostPortUri.getPort()); - - CacheConfig passwordOnlyConfig = cacheConfig("cache.example.com:6382"); - passwordOnlyConfig.redis.authType = CacheConfig.AuthType.PASSWORD; - passwordOnlyConfig.redis.passwordRef = "secret"; - RedisURI passwordOnlyUri = - (RedisURI) - invokePrivate(new RedisJobNotifier(passwordOnlyConfig, "server-123"), "buildRedisURI"); - assertEquals("cache.example.com", passwordOnlyUri.getHost()); - assertEquals(6382, passwordOnlyUri.getPort()); - } - - @Test - void exposedTypeMatchesRedisImplementation() { - RedisJobNotifier notifier = new RedisJobNotifier(cacheConfig("cache:6379"), "server-123"); - - assertEquals("redis-pubsub", notifier.getType()); - } - - private void invokeHandleMessage(RedisJobNotifier notifier, String channel, String message) - throws Exception { - Method method = - notifier.getClass().getDeclaredMethod("handleMessage", String.class, String.class); - method.setAccessible(true); - method.invoke(notifier, channel, message); - } - - private Object invokePrivate(RedisJobNotifier notifier, String methodName) throws Exception { - Method method = notifier.getClass().getDeclaredMethod(methodName); - method.setAccessible(true); - return method.invoke(notifier); - } - - private AtomicBoolean getRunningFlag(RedisJobNotifier notifier) throws Exception { - return (AtomicBoolean) getField(notifier, "running"); - } - - private Object getField(Object target, String name) throws Exception { - Field field = target.getClass().getDeclaredField(name); - field.setAccessible(true); - return field.get(target); - } - - private void setField(Object target, String name, Object value) throws Exception { - Field field = target.getClass().getDeclaredField(name); - field.setAccessible(true); - field.set(target, value); - } - - private CacheConfig cacheConfig(String url) { - CacheConfig cacheConfig = new CacheConfig(); - cacheConfig.redis.url = url; - cacheConfig.redis.authType = CacheConfig.AuthType.NONE; - cacheConfig.redis.connectTimeoutMs = 2_000; - return cacheConfig; - } -} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/LoggingProgressListenerTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/LoggingProgressListenerTest.java index fb3bd4fb9bc5..1622b348cade 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/LoggingProgressListenerTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/LoggingProgressListenerTest.java @@ -36,8 +36,6 @@ void onJobConfiguredInitializesLoggerAndTracksSettings() throws Exception { .maxConcurrentRequests(8) .payloadSize(2L * 1024 * 1024) .autoTune(true) - .recreateIndex(true) - .useDistributedIndexing(true) .build(); listener.onJobConfigured(context, config); @@ -53,8 +51,7 @@ void onJobConfiguredInitializesLoggerAndTracksSettings() throws Exception { assertEquals("8", details.get("Max Concurrent Requests")); assertEquals("2.0 MB", details.get("Payload Size")); assertEquals("Enabled", details.get("Auto-tune")); - assertEquals("Yes", details.get("Recreate Index")); - assertEquals("Yes", details.get("Distributed Mode")); + assertEquals("Staged indexes with alias promotion", details.get("Indexing Mode")); } @Test diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/QuartzProgressListenerTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/QuartzProgressListenerTest.java index 5653b9364723..c4a4e7f9ef05 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/QuartzProgressListenerTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/QuartzProgressListenerTest.java @@ -274,8 +274,6 @@ private ReindexingConfiguration configuration() { .queueSize(100) .maxConcurrentRequests(5) .payloadSize(4_096) - .recreateIndex(true) - .useDistributedIndexing(true) .build(); } diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/SlackProgressListenerTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/SlackProgressListenerTest.java index 63f8512cd10c..2050ec63eb5f 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/SlackProgressListenerTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/bundles/searchIndex/listeners/SlackProgressListenerTest.java @@ -37,7 +37,6 @@ void jobConfiguredFormatsAllEntitiesAndPublishesConfiguration() throws Exception .maxConcurrentRequests(8) .payloadSize(5L * 1024 * 1024) .autoTune(true) - .recreateIndex(false) .build(); listener.onJobConfigured(mock(ReindexingJobContext.class), config); @@ -53,7 +52,7 @@ void jobConfiguredFormatsAllEntitiesAndPublishesConfiguration() throws Exception assertEquals("2", details.get("Producer threads")); assertEquals("500", details.get("Queue size")); assertEquals("1", details.get("Total entities")); - assertEquals("No", details.get("Recreating indices")); + assertEquals("Staged indexes with alias promotion", details.get("Indexing mode")); assertEquals("5 MB", details.get("Payload size")); assertEquals("8", details.get("Concurrent requests")); } @@ -74,7 +73,6 @@ void delegatesProgressCompletionAndErrorNotifications() throws Exception { .queueSize(200) .maxConcurrentRequests(3) .payloadSize(2L * 1024 * 1024) - .recreateIndex(true) .build(); Stats stats = new Stats() diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/apps/logging/AppRunLogAppenderTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/apps/logging/AppRunLogAppenderTest.java index 5a720aafdacf..5dc7d37e5e16 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/apps/logging/AppRunLogAppenderTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/apps/logging/AppRunLogAppenderTest.java @@ -192,7 +192,7 @@ void concurrentWritesFromMultipleThreadsAreSafe() throws InterruptedException { @Test void formatLineProducesJsonMatchingDropwizardLayout() { LoggingEvent event = createEvent("reindex started", Map.of()); - event.setLoggerName("org.openmetadata.service.apps.bundles.searchIndex.SearchIndexExecutor"); + event.setLoggerName("org.openmetadata.service.apps.bundles.searchIndex.ReindexingOrchestrator"); event.setTimeStamp(1774260643332L); String line = AppRunLogAppender.formatLine(event); assertTrue(line.startsWith("{\"timestamp\":1774260643332,"), "should start with timestamp"); @@ -200,7 +200,7 @@ void formatLineProducesJsonMatchingDropwizardLayout() { assertTrue(line.contains("\"thread\":\"test-thread\""), "should contain thread"); assertTrue( line.contains( - "\"logger\":\"org.openmetadata.service.apps.bundles.searchIndex.SearchIndexExecutor\""), + "\"logger\":\"org.openmetadata.service.apps.bundles.searchIndex.ReindexingOrchestrator\""), "should contain full logger name"); assertTrue(line.contains("\"message\":\"reindex started\""), "should contain message"); assertTrue(line.endsWith("}"), "should be valid JSON object"); diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/cache/EntityCacheBypassTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/cache/EntityCacheBypassTest.java index 76c43b1d07dc..0918211b63c4 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/cache/EntityCacheBypassTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/cache/EntityCacheBypassTest.java @@ -24,8 +24,7 @@ /** * Behaviour tests for the reindex cache-bypass thread-local. Pins the contract used by - * {@code PartitionWorker.processPartition}, {@code EntityReader.readKeysetBatches}, and - * {@code SearchIndexExecutor} to opt their reader threads out of the entity cache. + * {@code PartitionWorker.processPartition} to opt reader threads out of the entity cache. */ class EntityCacheBypassTest { diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/applications/configuration/internal/searchIndexingAppConfig.json b/openmetadata-spec/src/main/resources/json/schema/entity/applications/configuration/internal/searchIndexingAppConfig.json index a509d925042e..c37fbd5e5e02 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/applications/configuration/internal/searchIndexingAppConfig.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/applications/configuration/internal/searchIndexingAppConfig.json @@ -28,11 +28,6 @@ "default": ["all"], "uniqueItems": true }, - "recreateIndex": { - "description": "This schema publisher run modes.", - "type": "boolean", - "default": true - }, "batchSize": { "description": "Maximum number of events sent in a batch (Default 100).", "type": "integer", @@ -87,7 +82,7 @@ "default": 100 }, "searchIndexMappingLanguage": { - "description": "Recreate Indexes with updated Language", + "description": "Search index mapping language.", "$ref": "../../../../configuration/elasticSearchConfiguration.json#/definitions/searchIndexMappingLanguage" }, "autoTune": { @@ -96,12 +91,6 @@ "type": "boolean", "default": false }, - "useDistributedIndexing": { - "title": "Use Distributed Indexing", - "description": "Enable distributed indexing to scale reindexing across multiple servers with fault tolerance and parallel processing", - "type": "boolean", - "default": true - }, "partitionSize": { "title": "Partition Size", "description": "Number of entities per partition for distributed indexing. Smaller values create more partitions for better distribution across servers. Range: 1000-50000.", diff --git a/openmetadata-ui/src/main/resources/ui/playwright/e2e/Pages/SearchIndexApplication.spec.ts b/openmetadata-ui/src/main/resources/ui/playwright/e2e/Pages/SearchIndexApplication.spec.ts index 0f9a72674b62..c7ffae150eab 100644 --- a/openmetadata-ui/src/main/resources/ui/playwright/e2e/Pages/SearchIndexApplication.spec.ts +++ b/openmetadata-ui/src/main/resources/ui/playwright/e2e/Pages/SearchIndexApplication.spec.ts @@ -24,6 +24,14 @@ import { settingClick } from '../../utils/sidebar'; // use the admin user to login test.use({ storageState: 'playwright/.auth/admin.json' }); +const SEARCH_INDEX_APP_NAME = 'SearchIndexingApplication'; +const SUCCESSFUL_RUN_STATUS = /success|completed|activeError/i; + +interface AppRunRecordResponse { + startTime?: number; + status?: string; +} + /** * Installs the Search Indexing Application from the marketplace. * Shared by the "Install application" step and the self-healing guard @@ -161,6 +169,91 @@ const verifyLastExecutionRun = async (page: Page, response: Response) => { } }; +const getLatestRunStartTime = async (page: Page) => { + const { apiContext } = await getApiContext(page); + const response = await apiContext.get( + `/api/v1/apps/name/${SEARCH_INDEX_APP_NAME}/runs/latest` + ); + const run = await getAppRunRecord(response); + + return run?.startTime; +}; + +const getAppRunRecord = async (response: Response) => { + if (!response.ok() || response.status() === 204) { + return undefined; + } + + const body = await response.text(); + + return body ? (JSON.parse(body) as AppRunRecordResponse) : undefined; +}; + +const waitForNewSuccessfulRun = async ( + page: Page, + previousRunStartTime?: number +) => { + const { apiContext } = await getApiContext(page); + let completedRunStartTime: number | undefined; + + await expect + .poll( + async () => { + const response = await apiContext.get( + `/api/v1/apps/name/${SEARCH_INDEX_APP_NAME}/runs/latest` + ); + const run = await getAppRunRecord(response); + + if (run?.startTime === undefined) { + return undefined; + } + + if ( + previousRunStartTime !== undefined && + run.startTime <= previousRunStartTime + ) { + return undefined; + } + + if (run.status && SUCCESSFUL_RUN_STATUS.test(run.status)) { + completedRunStartTime = run.startTime; + } + + return run.status; + }, + { + message: 'Wait for a new successful SearchIndexingApplication run', + intervals: [5_000, 10_000, 15_000, 30_000], + timeout: 300_000, + } + ) + .toEqual(expect.stringMatching(SUCCESSFUL_RUN_STATUS)); + + expect(completedRunStartTime).toBeDefined(); + + return completedRunStartTime; +}; + +const rerunSearchIndexApplicationForTable = async ( + page: Page, + previousRunStartTime?: number +) => { + const { apiContext } = await getApiContext(page); + const response = await apiContext.post( + `/api/v1/apps/trigger/${SEARCH_INDEX_APP_NAME}`, + { + data: { + batchSize: 100, + entities: ['table'], + }, + } + ); + + expect(response.status()).toBeLessThan(300); + + return waitForNewSuccessfulRun(page, previousRunStartTime); +}; + test.describe('Search Index Application', PLAYWRIGHT_BASIC_TEST_TAG_OBJ, () => { test('Search Index Application', async ({ page }) => { test.slow(); @@ -350,13 +443,14 @@ test.describe('Search Index Application', PLAYWRIGHT_BASIC_TEST_TAG_OBJ, () => { }); if (process.env.PLAYWRIGHT_IS_OSS) { - await test.step('Run application', async () => { + await test.step('Run application and rerun with table-only config', async () => { test.slow(true); // Test time shouldn't exceed while re-fetching the history API. await page.click( '[data-testid="search-indexing-application-card"] [data-testid="config-btn"]' ); + const previousRunStartTime = await getLatestRunStartTime(page); const triggerPipelineResponse = page.waitForResponse( '/api/v1/apps/trigger/SearchIndexingApplication' ); @@ -375,6 +469,12 @@ test.describe('Search Index Application', PLAYWRIGHT_BASIC_TEST_TAG_OBJ, () => { expect(statusResponse.status()).toBe(200); await verifyLastExecutionRun(page, statusResponse); + const firstRunStartTime = await waitForNewSuccessfulRun( + page, + previousRunStartTime + ); + + await rerunSearchIndexApplicationForTable(page, firstRunStartTime); }); } }); diff --git a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Applications/SearchIndexingApplication.md b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Applications/SearchIndexingApplication.md index 7f30ac08872f..e56d1d4058e0 100644 --- a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Applications/SearchIndexingApplication.md +++ b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Applications/SearchIndexingApplication.md @@ -70,15 +70,10 @@ $$section $$ -$$section -### Recreate Indexes $(id="recreateIndex") - -$$ - $$section ### Search Index Language $(id="searchIndexMappingLanguage") -Recreate Indexes with updated Language +Search index mapping language. $$ @@ -89,13 +84,6 @@ Enable automatic performance tuning based on cluster capabilities and database e $$ -$$section -### Use Distributed Indexing $(id="useDistributedIndexing") - -Enable distributed indexing to scale reindexing across multiple servers with fault tolerance and parallel processing - -$$ - $$section ### Partition Size $(id="partitionSize") diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts b/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts index a3afef6e8105..2c7e5920a0cc 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/api/services/ingestionPipelines/createIngestionPipeline.ts @@ -1160,11 +1160,7 @@ export interface CollateAIAppConfig { */ queueSize?: number; /** - * This schema publisher run modes. - */ - recreateIndex?: boolean; - /** - * Recreate Indexes with updated Language + * Search index mapping language. */ searchIndexMappingLanguage?: SearchIndexMappingLanguage; /** @@ -1178,11 +1174,6 @@ export interface CollateAIAppConfig { * Set to a positive value like 15 to limit to recent data only. */ timeSeriesMaxDays?: number; - /** - * Enable distributed indexing to scale reindexing across multiple servers with fault - * tolerance and parallel processing - */ - useDistributedIndexing?: boolean; /** * In multi-instance deployments, claim each entity type via Redis SETNX so only one * instance warms it. Disable to let every instance warm independently (idempotent but @@ -2126,6 +2117,8 @@ export interface Resource { * Recreate Indexes with updated Language * * This schema defines the language options available for search index mappings. + * + * Search index mapping language. */ export enum SearchIndexMappingLanguage { En = "EN", diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/app.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/app.ts index 29d0dcfb3817..78e8a35f7f9b 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/app.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/app.ts @@ -363,11 +363,7 @@ export interface CollateAIAppConfig { */ queueSize?: number; /** - * This schema publisher run modes. - */ - recreateIndex?: boolean; - /** - * Recreate Indexes with updated Language + * Search index mapping language. */ searchIndexMappingLanguage?: SearchIndexMappingLanguage; /** @@ -381,11 +377,6 @@ export interface CollateAIAppConfig { * Set to a positive value like 15 to limit to recent data only. */ timeSeriesMaxDays?: number; - /** - * Enable distributed indexing to scale reindexing across multiple servers with fault - * tolerance and parallel processing - */ - useDistributedIndexing?: boolean; /** * In multi-instance deployments, claim each entity type via Redis SETNX so only one * instance warms it. Disable to let every instance warm independently (idempotent but @@ -1386,7 +1377,7 @@ export interface Resource { } /** - * Recreate Indexes with updated Language + * Search index mapping language. * * This schema defines the language options available for search index mappings. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/configuration/internal/searchIndexingAppConfig.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/configuration/internal/searchIndexingAppConfig.ts index e5f516972197..e46f4826796c 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/configuration/internal/searchIndexingAppConfig.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/configuration/internal/searchIndexingAppConfig.ts @@ -85,11 +85,7 @@ export interface SearchIndexingAppConfig { */ queueSize?: number; /** - * This schema publisher run modes. - */ - recreateIndex?: boolean; - /** - * Recreate Indexes with updated Language + * Search index mapping language. */ searchIndexMappingLanguage?: SearchIndexMappingLanguage; /** @@ -107,11 +103,6 @@ export interface SearchIndexingAppConfig { * Application Type */ type?: SearchIndexingType; - /** - * Enable distributed indexing to scale reindexing across multiple servers with fault - * tolerance and parallel processing - */ - useDistributedIndexing?: boolean; } /** @@ -178,7 +169,7 @@ export interface IndexSettings { } /** - * Recreate Indexes with updated Language + * Search index mapping language. * * This schema defines the language options available for search index mappings. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/marketplace/appMarketPlaceDefinition.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/marketplace/appMarketPlaceDefinition.ts index 87e91b083025..b1cc76646fa3 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/marketplace/appMarketPlaceDefinition.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/marketplace/appMarketPlaceDefinition.ts @@ -349,11 +349,7 @@ export interface CollateAIAppConfig { */ queueSize?: number; /** - * This schema publisher run modes. - */ - recreateIndex?: boolean; - /** - * Recreate Indexes with updated Language + * Search index mapping language. */ searchIndexMappingLanguage?: SearchIndexMappingLanguage; /** @@ -367,11 +363,6 @@ export interface CollateAIAppConfig { * Set to a positive value like 15 to limit to recent data only. */ timeSeriesMaxDays?: number; - /** - * Enable distributed indexing to scale reindexing across multiple servers with fault - * tolerance and parallel processing - */ - useDistributedIndexing?: boolean; /** * In multi-instance deployments, claim each entity type via Redis SETNX so only one * instance warms it. Disable to let every instance warm independently (idempotent but @@ -1368,7 +1359,7 @@ export interface Resource { } /** - * Recreate Indexes with updated Language + * Search index mapping language. * * This schema defines the language options available for search index mappings. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.ts index 018325d1cd64..7e5cba847528 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.ts @@ -307,11 +307,7 @@ export interface CollateAIAppConfig { */ queueSize?: number; /** - * This schema publisher run modes. - */ - recreateIndex?: boolean; - /** - * Recreate Indexes with updated Language + * Search index mapping language. */ searchIndexMappingLanguage?: SearchIndexMappingLanguage; /** @@ -325,11 +321,6 @@ export interface CollateAIAppConfig { * Set to a positive value like 15 to limit to recent data only. */ timeSeriesMaxDays?: number; - /** - * Enable distributed indexing to scale reindexing across multiple servers with fault - * tolerance and parallel processing - */ - useDistributedIndexing?: boolean; /** * In multi-instance deployments, claim each entity type via Redis SETNX so only one * instance warms it. Disable to let every instance warm independently (idempotent but @@ -1326,7 +1317,7 @@ export interface Resource { } /** - * Recreate Indexes with updated Language + * Search index mapping language. * * This schema defines the language options available for search index mappings. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts index c1d38ef63da9..38b0a6ec3450 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/ingestionPipelines/ingestionPipeline.ts @@ -1751,11 +1751,7 @@ export interface CollateAIAppConfig { */ queueSize?: number; /** - * This schema publisher run modes. - */ - recreateIndex?: boolean; - /** - * Recreate Indexes with updated Language + * Search index mapping language. */ searchIndexMappingLanguage?: SearchIndexMappingLanguage; /** @@ -1769,11 +1765,6 @@ export interface CollateAIAppConfig { * Set to a positive value like 15 to limit to recent data only. */ timeSeriesMaxDays?: number; - /** - * Enable distributed indexing to scale reindexing across multiple servers with fault - * tolerance and parallel processing - */ - useDistributedIndexing?: boolean; /** * In multi-instance deployments, claim each entity type via Redis SETNX so only one * instance warms it. Disable to let every instance warm independently (idempotent but @@ -2721,6 +2712,8 @@ export interface Resource { * Recreate Indexes with updated Language * * This schema defines the language options available for search index mappings. + * + * Search index mapping language. */ export enum SearchIndexMappingLanguage { En = "EN", diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/application.ts b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/application.ts index 0377e9135236..727a6b663dbe 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/application.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/application.ts @@ -210,11 +210,7 @@ export interface CollateAIAppConfig { */ queueSize?: number; /** - * This schema publisher run modes. - */ - recreateIndex?: boolean; - /** - * Recreate Indexes with updated Language + * Search index mapping language. */ searchIndexMappingLanguage?: SearchIndexMappingLanguage; /** @@ -228,11 +224,6 @@ export interface CollateAIAppConfig { * Set to a positive value like 15 to limit to recent data only. */ timeSeriesMaxDays?: number; - /** - * Enable distributed indexing to scale reindexing across multiple servers with fault - * tolerance and parallel processing - */ - useDistributedIndexing?: boolean; /** * In multi-instance deployments, claim each entity type via Redis SETNX so only one * instance warms it. Disable to let every instance warm independently (idempotent but @@ -1223,7 +1214,7 @@ export interface Resource { } /** - * Recreate Indexes with updated Language + * Search index mapping language. * * This schema defines the language options available for search index mappings. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/applicationPipeline.ts b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/applicationPipeline.ts index c1fd0574997a..d7a273dbbacc 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/applicationPipeline.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/applicationPipeline.ts @@ -190,11 +190,7 @@ export interface CollateAIAppConfig { */ queueSize?: number; /** - * This schema publisher run modes. - */ - recreateIndex?: boolean; - /** - * Recreate Indexes with updated Language + * Search index mapping language. */ searchIndexMappingLanguage?: SearchIndexMappingLanguage; /** @@ -208,11 +204,6 @@ export interface CollateAIAppConfig { * Set to a positive value like 15 to limit to recent data only. */ timeSeriesMaxDays?: number; - /** - * Enable distributed indexing to scale reindexing across multiple servers with fault - * tolerance and parallel processing - */ - useDistributedIndexing?: boolean; /** * In multi-instance deployments, claim each entity type via Redis SETNX so only one * instance warms it. Disable to let every instance warm independently (idempotent but @@ -1203,7 +1194,7 @@ export interface Resource { } /** - * Recreate Indexes with updated Language + * Search index mapping language. * * This schema defines the language options available for search index mappings. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts index d3f53c775e54..ad60a913b6a3 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/metadataIngestion/workflow.ts @@ -4765,6 +4765,8 @@ export enum ConfigScheme { * Recreate Indexes with updated Language * * This schema defines the language options available for search index mappings. + * + * Search index mapping language. */ export enum SearchIndexMappingLanguage { En = "EN", @@ -6037,11 +6039,7 @@ export interface CollateAIAppConfig { */ queueSize?: number; /** - * This schema publisher run modes. - */ - recreateIndex?: boolean; - /** - * Recreate Indexes with updated Language + * Search index mapping language. */ searchIndexMappingLanguage?: SearchIndexMappingLanguage; /** @@ -6055,11 +6053,6 @@ export interface CollateAIAppConfig { * Set to a positive value like 15 to limit to recent data only. */ timeSeriesMaxDays?: number; - /** - * Enable distributed indexing to scale reindexing across multiple servers with fault - * tolerance and parallel processing - */ - useDistributedIndexing?: boolean; /** * In multi-instance deployments, claim each entity type via Redis SETNX so only one * instance warms it. Disable to let every instance warm independently (idempotent but diff --git a/openmetadata-ui/src/main/resources/ui/src/utils/ApplicationSchemas/SearchIndexingApplication.json b/openmetadata-ui/src/main/resources/ui/src/utils/ApplicationSchemas/SearchIndexingApplication.json index e5e68b9099e3..2c8b94682b8b 100644 --- a/openmetadata-ui/src/main/resources/ui/src/utils/ApplicationSchemas/SearchIndexingApplication.json +++ b/openmetadata-ui/src/main/resources/ui/src/utils/ApplicationSchemas/SearchIndexingApplication.json @@ -133,13 +133,8 @@ "uiFieldType": "treeSelect", "uniqueItems": true }, - "recreateIndex": { - "title": "Recreate Indexes", - "type": "boolean", - "default": true - }, "searchIndexMappingLanguage": { - "description": "Recreate Indexes with updated Language", + "description": "Search index mapping language.", "title": "Search Index Language", "javaType": "org.openmetadata.schema.type.IndexMappingLanguage", "type": "string", @@ -152,12 +147,6 @@ "type": "boolean", "default": false }, - "useDistributedIndexing": { - "title": "Use Distributed Indexing", - "description": "Enable distributed indexing to scale reindexing across multiple servers with fault tolerance and parallel processing", - "type": "boolean", - "default": true - }, "partitionSize": { "title": "Partition Size", "description": "Number of entities per partition for distributed indexing. Smaller values create more partitions for better distribution across servers. Range: 1000-50000.",