diff --git a/fdbserver/core/ServerKnobs.cpp b/fdbserver/core/ServerKnobs.cpp index 1180d9ec047..facf1fa2e62 100644 --- a/fdbserver/core/ServerKnobs.cpp +++ b/fdbserver/core/ServerKnobs.cpp @@ -1150,6 +1150,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( AUDIT_RESTORE_BATCH_KEY_LIMIT, 100000 ); // 100K keys per batch (was hardcoded 10K) init( AUDIT_PROGRESS_PERSIST_BYTES_INTERVAL, 100000000 ); // 100MB - only persist progress after this many bytes init( ENABLE_AUDIT_VERBOSE_TRACE, false ); + // Disabled in simulation: audit_storage locationmetadata already runs at controlled times in sim, + // and periodic triggering interferes with DD quiescence checks and the test harness audit checks. + init( AUDIT_LOCATION_METADATA_INTERVAL, 3600.0 ); if ( isSimulated ) AUDIT_LOCATION_METADATA_INTERVAL = 0; init( LOGGING_STORAGE_COMMIT_WHEN_IO_TIMEOUT, true ); init( LOGGING_RECENT_STORAGE_COMMIT_SIZE, 20 ); init( LOGGING_COMPLETE_STORAGE_COMMIT_PROBABILITY, 0.001 ); diff --git a/fdbserver/core/include/fdbserver/core/Knobs.h b/fdbserver/core/include/fdbserver/core/Knobs.h index 7a6f3641af2..8aae8a2beb2 100644 --- a/fdbserver/core/include/fdbserver/core/Knobs.h +++ b/fdbserver/core/include/fdbserver/core/Knobs.h @@ -1087,6 +1087,7 @@ class SWIFT_CXX_IMMORTAL_SINGLETON_TYPE ServerKnobs : public KnobsImpl launchAudit(Reference self, AuditType auditType, KeyValueStoreType auditStorageEngineType); Future auditStorage(Reference self, TriggerAuditRequest req); +Future periodicAuditLocationMetadata(Reference self); void loadAndDispatchAudit(Reference self, std::shared_ptr audit); Future dispatchAuditStorageServerShard(Reference self, std::shared_ptr audit); Future scheduleAuditStorageShardOnServer(Reference self, @@ -2969,6 +2970,8 @@ Future dataDistribution(Reference self, .detail("InitialMode", self->initData->bulkDumpMode); actors.push_back(bulkDumpCore(self, self->initialized.getFuture())); + actors.push_back(periodicAuditLocationMetadata(self)); + co_await waitForAll(actors); ASSERT_WE_THINK(false); co_return; @@ -3994,6 +3997,30 @@ Future cancelAuditStorage(Reference self, TriggerAuditReq } } +Future periodicAuditLocationMetadata(Reference self) { + if (SERVER_KNOBS->AUDIT_LOCATION_METADATA_INTERVAL <= 0) { + co_return; + } + co_await self->auditStorageInitialized.getFuture(); + TraceEvent("PeriodicAuditLocationMetadataEnabled", self->ddId) + .detail("IntervalSeconds", SERVER_KNOBS->AUDIT_LOCATION_METADATA_INTERVAL); + while (true) { + co_await delay(SERVER_KNOBS->AUDIT_LOCATION_METADATA_INTERVAL); + try { + co_await self->auditStorageLocationMetadataLaunchingLock.take(TaskPriority::DefaultYield); + FlowLock::Releaser holder(self->auditStorageLocationMetadataLaunchingLock); + UID auditID = + co_await launchAudit(self, allKeys, AuditType::ValidateLocationMetadata, KeyValueStoreType::END); + TraceEvent("PeriodicAuditLocationMetadataLaunched", self->ddId).detail("AuditID", auditID); + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw; + } + TraceEvent(SevWarn, "PeriodicAuditLocationMetadataError", self->ddId).error(e); + } + } +} + // Handling audit requests // For each request, launch audit storage and reply to CC with following three replies: // (1) auditID: reply auditID when the audit is successfully launch