Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions fdbclient/ServerKnobs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( SERVER_READY_QUORUM_TIMEOUT, 15.0 ); if( randomize && BUGGIFY ) SERVER_READY_QUORUM_TIMEOUT = 1.0;
init( REMOVE_RETRY_DELAY, 1.0 );
init( MOVE_KEYS_KRM_LIMIT, 2000 ); if( randomize && BUGGIFY ) MOVE_KEYS_KRM_LIMIT = 2;
init( FINISH_MOVE_KEYS_MAX_RETRIES, 50 ); // ~4 min total with exponential backoff (0.2s to 5s cap)
init( MOVE_KEYS_KRM_LIMIT_BYTES, 1e5 ); if( randomize && BUGGIFY ) MOVE_KEYS_KRM_LIMIT_BYTES = 5e4; //This must be sufficiently larger than CLIENT_KNOBS->KEY_SIZE_LIMIT (fdbclient/Knobs.h) to ensure that at least two entries will be returned from an attempt to read a key range map
init( MOVE_SHARD_KRM_ROW_LIMIT, 20000 );
init( MOVE_SHARD_KRM_BYTE_LIMIT, 1e6 );
Expand Down
2 changes: 2 additions & 0 deletions fdbclient/include/fdbclient/ServerKnobs.h
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,8 @@ class SWIFT_CXX_IMMORTAL_SINGLETON_TYPE ServerKnobs : public KnobsImpl<ServerKno
double SERVER_READY_QUORUM_TIMEOUT;
double REMOVE_RETRY_DELAY;
int MOVE_KEYS_KRM_LIMIT;
int FINISH_MOVE_KEYS_MAX_RETRIES; // Max retries in finishMoveKeys before returning move to queue; set very high to
// disable
int MOVE_KEYS_KRM_LIMIT_BYTES; // This must be sufficiently larger than CLIENT_KNOBS->KEY_SIZE_LIMIT
// (fdbclient/Knobs.h) to ensure that at least two entries will be returned from an
// attempt to read a key range map
Expand Down
3 changes: 2 additions & 1 deletion fdbserver/DDRelocationQueue.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2260,7 +2260,8 @@ ACTOR Future<Void> dataDistributionRelocator(DDQueue* self,

//TraceEvent("RelocateShardFinished", distributorId).detail("RelocateId", relocateShardInterval.pairID);

if (error.code() != error_code_move_to_removed_server) {
if (error.code() != error_code_move_to_removed_server &&
error.code() != error_code_finish_move_keys_too_many_retries) {
if (!error.code()) {
try {
wait(healthyDestinations
Expand Down
3 changes: 2 additions & 1 deletion fdbserver/DataDistribution.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ std::set<int> const& normalDDQueueErrors() {
static std::set<int> s{ error_code_movekeys_conflict,
error_code_broken_promise,
error_code_data_move_cancelled,
error_code_data_move_dest_team_not_found };
error_code_data_move_dest_team_not_found,
error_code_finish_move_keys_too_many_retries };
return s;
}

Expand Down
68 changes: 68 additions & 0 deletions fdbserver/MoveKeys.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ static inline void dprint(fmt::format_string<T...> fmt, T&&... args) {
}

namespace {

// Exponential backoff with jitter for transaction_too_old retries in finishMoveKeys.
// Base formula: 0.1 * 2^retries, capped at 5.0s, then jittered to [0.75x, 1.25x].
// Called with retries >= 1 (retries is incremented before the call), so effective
// start is ~0.2s. Jitter prevents FlowLock slots from retrying in lockstep.
double finishMoveKeysBackoff(int retries) {
double base = std::min(0.1 * (1 << std::min(retries, 6)), 5.0);
return std::min(base * (0.75 + 0.5 * deterministicRandom()->random01()), 5.0); // jitter: [0.75x, 1.25x], capped
}

struct Shard {
Shard() = default;
Shard(KeyRangeRef range, const UID& id) : range(range), id(id) {}
Expand Down Expand Up @@ -1648,10 +1658,18 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
}

wait(waitForAll(actors));

// Inject transaction_too_old before commit to exercise the
// retry limit and finish_move_keys_too_many_retries path.
if (BUGGIFY_WITH_PROB(0.01)) {
CODE_PROBE(true, "finishMoveKeys injecting transaction_too_old before commit");
throw transaction_too_old();
}
wait(tr.commit());
txnCommitted->increment(1);

begin = endKey;
retries = 0;
break;
}
// This leads to a count of transactions starting that exceeds the sum of
Expand All @@ -1664,6 +1682,27 @@ ACTOR static Future<Void> finishMoveKeys(Database occ,
state Error err = error;
wait(tr.onError(error));
retries++;
// tr.onError delays are short for transaction_too_old. With 15
// FlowLock slots all retrying, this creates a retry storm. Add
// additional exponential backoff capped at 5s.
if (err.code() == error_code_transaction_too_old) {
double backoff = finishMoveKeysBackoff(retries);
CODE_PROBE(true, "finishMoveKeys transaction_too_old backoff");
TraceEvent("FinishMoveKeysBackoff", relocationIntervalId)
.suppressFor(1.0)
.detail("Retries", retries)
.detail("BackoffSeconds", backoff);
if (retries > SERVER_KNOBS->FINISH_MOVE_KEYS_MAX_RETRIES) {
CODE_PROBE(true, "finishMoveKeys giving up after max retries");
TraceEvent(SevWarnAlways, "RelocateShard_FinishMoveKeysGivingUp", relocationIntervalId)
.error(err)
.detail("KeyBegin", keys.begin)
.detail("KeyEnd", keys.end)
.detail("Retries", retries);
throw finish_move_keys_too_many_retries();
}
wait(delay(backoff));
}
if (retries % 10 == 0) {
TraceEvent(retries == 20 ? SevWarnAlways : SevWarn,
"RelocateShard_FinishMoveKeysRetrying",
Expand Down Expand Up @@ -3709,3 +3748,32 @@ ACTOR Future<Void> prepareBlobRestore(Database occ,
}
}
}

// Unit test for the finishMoveKeys backoff formula. A simulation workload can't reliably
// trigger transaction_too_old in finishMoveKeys because:
// 1. finishMoveKeys only runs when DD schedules data moves, which depends on cluster
// configuration and shard placement — not guaranteed in all simulation configs
// 2. Even with injection, the injection point must fire before check() runs, but
// finishMoveKeys may not be called if no moves are scheduled
// So we test the backoff arithmetic directly.
TEST_CASE("/fdbserver/MoveKeys/finishMoveKeysBackoff") {
// Verify exponential backoff with jitter: base = 0.1 * 2^retries capped at 5.0s,
// then jittered to [0.75x, 1.25x]. In production retries >= 1 at call site.
for (int i = 0; i < 100; i++) {
double v0 = finishMoveKeysBackoff(0);
ASSERT(v0 >= 0.1 * 0.75 && v0 <= 0.1 * 1.25);
double v1 = finishMoveKeysBackoff(1);
ASSERT(v1 >= 0.2 * 0.75 && v1 <= 0.2 * 1.25);
double v3 = finishMoveKeysBackoff(3);
ASSERT(v3 >= 0.8 * 0.75 && v3 <= 0.8 * 1.25);
double v6 = finishMoveKeysBackoff(6);
ASSERT(v6 >= 5.0 * 0.75 && v6 <= 5.0); // capped at 5.0
double v100 = finishMoveKeysBackoff(100);
ASSERT(v100 >= 5.0 * 0.75 && v100 <= 5.0); // still capped
}

// Verify the retry limit knob exists and is positive
ASSERT(SERVER_KNOBS->FINISH_MOVE_KEYS_MAX_RETRIES > 0);

return Void();
}
1 change: 1 addition & 0 deletions flow/include/flow/error_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ ERROR( bulkload_fileset_invalid_filepath, 1245, "Bulkload fileset provides inval
ERROR( bulkload_manifest_decode_error, 1246, "Bulkload manifest string is failed to decode" )
ERROR( range_lock_reject, 1247, "Range lock is rejected" )
ERROR( range_unlock_reject, 1248, "Range unlock is rejected" )
ERROR( finish_move_keys_too_many_retries, 1249, "finishMoveKeys exceeded retry limit" )

// 15xx Platform errors
ERROR( platform_error, 1500, "Platform error" )
Expand Down