diff --git a/fdbcli/StatusCommand.cpp b/fdbcli/StatusCommand.cpp index 5558fb705ec..ff53f12e487 100644 --- a/fdbcli/StatusCommand.cpp +++ b/fdbcli/StatusCommand.cpp @@ -102,6 +102,23 @@ int getNumofNonExcludedMachines(StatusObjectReader statusObjCluster) { return numOfNonExcludedMachines; } +bool logEpochsMayBeLosingData(StatusObjectReader statusObjCluster) { + if (!statusObjCluster.has("logs")) { + return true; + } + + bool sawLogEpoch = false; + for (StatusObjectReader logEpoch : statusObjCluster.last().get_array()) { + sawLogEpoch = true; + bool possiblyLosingData = true; + if (!logEpoch.get("possibly_losing_data", possiblyLosingData) || possiblyLosingData) { + return true; + } + } + + return !sawLogEpoch; +} + std::string getDateInfoString(StatusObjectReader statusObj, std::string key) { time_t curTime; if (!statusObj.has(key)) { @@ -689,16 +706,22 @@ void printStatus(StatusObjectReader statusObj, if (dataLoss == -1) { ASSERT_WE_THINK(availLoss == -1); - outputString += format( - "\n\n Warning: the database may have data loss and availability loss. Please restart " - "following tlog interfaces, otherwise storage servers may never be able to catch " - "up.\n"); - StatusObjectReader logs; + const bool possiblyLosingData = logEpochsMayBeLosingData(statusObjCluster); + if (possiblyLosingData) { + outputString += format( + "\n\n Warning: the database may have data loss and availability loss. Please " + "restart following tlog interfaces, otherwise storage servers may never be able " + "to catch up.\n"); + } else { + outputString += format( + "\n\n Warning: the database may have availability loss. The current log state " + "does not indicate data loss.\n"); + } if (statusObjCluster.has("logs")) { for (StatusObjectReader logEpoch : statusObjCluster.last().get_array()) { - bool possiblyLosingData; - if (logEpoch.get("possibly_losing_data", possiblyLosingData) && - !possiblyLosingData) { + bool logEpochPossiblyLosingData; + if (logEpoch.get("possibly_losing_data", logEpochPossiblyLosingData) && + !logEpochPossiblyLosingData) { continue; } // Current epoch doesn't have an end version. diff --git a/fdbcli/fdbcli.cpp b/fdbcli/fdbcli.cpp index f0dd3140397..829db33af1c 100644 --- a/fdbcli/fdbcli.cpp +++ b/fdbcli/fdbcli.cpp @@ -865,6 +865,8 @@ void LogCommand(std::string line, UID randomID, std::string errMsg) { } struct CLIOptions { + static constexpr int DEFERRED_EXIT_CODE = -2; + std::string program_name; int exit_code = -1; @@ -877,6 +879,7 @@ struct CLIOptions { std::string logGroup; int exit_timeout = 0; Optional exec; + Optional statusJsonFile; bool initialStatusCheck = true; bool cliHints = true; bool cliHistory = true; @@ -907,12 +910,15 @@ struct CLIOptions { while (args.Next()) { int ec = processArg(args); + if (ec == DEFERRED_EXIT_CODE) { + return; + } if (ec != -1) { exit_code = ec; return; } } - if (exit_timeout && !exec.present()) { + if (exit_timeout && !exec.present() && !statusJsonFile.present()) { fprintf(stderr, "ERROR: --timeout may only be specified with --exec\n"); exit_code = FDB_EXIT_ERROR; return; @@ -1018,7 +1024,8 @@ struct CLIOptions { printProgramUsage(program_name.c_str()); return 0; case OPT_STATUS_FROM_JSON: - return printStatusFromJSON(args.OptionArg()); + statusJsonFile = args.OptionArg(); + return DEFERRED_EXIT_CODE; case OPT_TRACE_FORMAT: if (!validateTraceFormat(args.OptionArg())) { fprintf(stderr, "WARNING: Unrecognized trace format `%s'\n", args.OptionArg()); @@ -2136,6 +2143,21 @@ int main(int argc, char** argv) { return 1; } + if (opt.statusJsonFile.present()) { + try { + API->selectApiVersion(opt.apiVersion); + if (opt.useFutureProtocolVersion) { + API->useFutureProtocolVersion(); + } + API->setupNetwork(); + opt.setupKnobs(); + return printStatusFromJSON(opt.statusJsonFile.get()); + } catch (Error& e) { + fprintf(stderr, "ERROR: %s (%d)\n", e.what(), e.code()); + return 1; + } + } + if (opt.debugTLS) { // Backdoor into NativeAPI's tlsConfig, which is where the above network option settings ended up. extern TLSConfig tlsConfig; diff --git a/fdbcli/tests/fdbcli_tests.py b/fdbcli/tests/fdbcli_tests.py index aece349fffc..17ed8ac9ddb 100755 --- a/fdbcli/tests/fdbcli_tests.py +++ b/fdbcli/tests/fdbcli_tests.py @@ -471,6 +471,64 @@ def get_value_from_status_json(retry, *args): return result +def status_json_file_region_failover_message(): + status_json = { + "client": { + "cluster_file": {"path": "fdb.cluster", "up_to_date": True}, + "coordinators": {"coordinators": [], "quorum_reachable": True}, + "database_status": {"available": True, "healthy": False}, + "messages": [], + "timestamp": 1417807090, + }, + "cluster": { + "configuration": { + "redundancy_mode": "three_data_hall", + "storage_engine": "ssd-2", + "coordinators_count": 3, + "excluded_servers": [], + }, + "data": {"state": {"name": "healthy", "healthy": True}}, + "fault_tolerance": { + "max_zone_failures_without_losing_availability": -1, + "max_zone_failures_without_losing_data": -1, + }, + "logs": [ + { + "epoch": 1, + "current": True, + "begin_version": 1, + "possibly_losing_data": False, + "log_interfaces": [ + { + "id": "aaaaaaaaaaaaaaaa", + "healthy": False, + "address": "1.1.1.1:4500", + } + ], + } + ], + "machines": {}, + "processes": {}, + }, + } + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json") as status_file: + json.dump(status_json, status_file) + status_file.flush() + result = subprocess.run( + [command_template[0], "--status-from-json", status_file.name], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=fdbcli_env, + ) + + stdout = result.stdout.decode("utf-8") + stderr = result.stderr.decode("utf-8") + assert result.returncode == 0, stderr + assert "Warning: the database may have availability loss." in stdout + assert "may have data loss" not in stdout + + @enable_logging() def consistencycheck(logger): consistency_check_on_output = "ConsistencyCheck is on" @@ -948,6 +1006,7 @@ def tls_address_suffix(): versionepoch() integer_options() tls_address_suffix() + status_json_file_region_failover_message() # TODO: fix the issue when running through the external client # quota() idempotency_ids()