diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index c5c19088a3b..087fff22614 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -42,6 +42,8 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 fetch-depth: 0 # git-stub-vcs needs full history - uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8 + with: + cache-bin: false if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version diff --git a/Cargo.lock b/Cargo.lock index 70a728f0dc7..80ffbe9f1b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -623,7 +623,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=bc489ddf0f38f75e0c194b86cf6f0de377f68845#bc489ddf0f38f75e0c194b86cf6f0de377f68845" +source = "git+https://github.com/oxidecomputer/propolis?rev=58ab73bde89ade637b0ca8118682ee9575da6c2a#58ab73bde89ade637b0ca8118682ee9575da6c2a" dependencies = [ "bhyve_api_sys", "libc", @@ -633,7 +633,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=bc489ddf0f38f75e0c194b86cf6f0de377f68845#bc489ddf0f38f75e0c194b86cf6f0de377f68845" +source = "git+https://github.com/oxidecomputer/propolis?rev=58ab73bde89ade637b0ca8118682ee9575da6c2a#58ab73bde89ade637b0ca8118682ee9575da6c2a" dependencies = [ "libc", "strum 0.26.3", @@ -848,7 +848,7 @@ dependencies = [ name = "bootstrap-agent-api" version = "0.1.0" dependencies = [ - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "omicron-common", "omicron-uuid-kinds", @@ -886,7 +886,7 @@ name = "bootstrap-agent-lockstep-api" version = "0.1.0" dependencies = [ "bootstrap-agent-lockstep-types", - "dropshot 0.17.0", + "dropshot", "omicron-uuid-kinds", "omicron-workspace-hack", ] @@ -1064,15 +1064,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "cargo-platform" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e35af189006b9c0f00a064685c727031e3ed2d8020f7ba284d78cc2671bd36ea" -dependencies = [ - "serde", -] - [[package]] name = "cargo-platform" version = "0.2.0" @@ -1108,20 +1099,6 @@ dependencies = [ "url", ] -[[package]] -name = "cargo_metadata" -version = "0.19.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd5eb614ed4c27c5d706420e4320fbe3216ab31fa1c33cd8246ac36dae4479ba" -dependencies = [ - "camino", - "cargo-platform 0.1.9", - "semver 1.0.28", - "serde", - "serde_json", - "thiserror 2.0.18", -] - [[package]] name = "cargo_metadata" version = "0.21.0" @@ -1270,7 +1247,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "dropshot 0.17.0", + "dropshot", "futures", "libc", "omicron-common", @@ -1451,7 +1428,7 @@ dependencies = [ "clap", "clickhouse-admin-server-client", "clickhouse-admin-types", - "dropshot 0.17.0", + "dropshot", "futures", "omicron-common", "omicron-workspace-hack", @@ -1473,7 +1450,7 @@ name = "clickhouse-admin-api" version = "0.1.0" dependencies = [ "clickhouse-admin-types-versions", - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "omicron-common", "omicron-uuid-kinds", @@ -1535,7 +1512,7 @@ dependencies = [ "camino", "clickhouse-admin-types", "clickward", - "dropshot 0.17.0", + "dropshot", "omicron-workspace-hack", ] @@ -1635,7 +1612,7 @@ name = "cockroach-admin-api" version = "0.1.0" dependencies = [ "cockroach-admin-types-versions", - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "http", "omicron-common", @@ -2020,7 +1997,7 @@ name = "crdb-seed" version = "0.1.0" dependencies = [ "anyhow", - "dropshot 0.17.0", + "dropshot", "omicron-test-utils", "omicron-workspace-hack", "slog", @@ -2150,14 +2127,14 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=7103cd3a3d7b0112d2949dd135db06fef0c156bb#7103cd3a3d7b0112d2949dd135db06fef0c156bb" +source = "git+https://github.com/oxidecomputer/crucible?rev=bd9a0e2abe6b6b89aec8c85f4ee57474144ed150#bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" dependencies = [ "anyhow", "chrono", "crucible-workspace-hack", "percent-encoding", - "progenitor 0.10.0", - "reqwest 0.12.28", + "progenitor 0.14.0", + "reqwest 0.13.2", "schemars 0.8.22", "serde", "serde_json", @@ -2166,7 +2143,7 @@ dependencies = [ [[package]] name = "crucible-client-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=ae1da83e66c648574827298f4bc444632bf4d047#ae1da83e66c648574827298f4bc444632bf4d047" +source = "git+https://github.com/oxidecomputer/crucible?rev=bd9a0e2abe6b6b89aec8c85f4ee57474144ed150#bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" dependencies = [ "base64 0.22.1", "crucible-workspace-hack", @@ -2179,13 +2156,13 @@ dependencies = [ [[package]] name = "crucible-common" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=7103cd3a3d7b0112d2949dd135db06fef0c156bb#7103cd3a3d7b0112d2949dd135db06fef0c156bb" +source = "git+https://github.com/oxidecomputer/crucible?rev=bd9a0e2abe6b6b89aec8c85f4ee57474144ed150#bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" dependencies = [ "anyhow", "atty", "crucible-workspace-hack", - "dropshot 0.16.7", - "nix 0.29.0", + "dropshot", + "nix 0.31.2", "rustls-pemfile 1.0.4", "schemars 0.8.22", "serde", @@ -2199,7 +2176,7 @@ dependencies = [ "thiserror 2.0.18", "tokio", "tokio-rustls 0.24.1", - "toml 0.8.23", + "toml 1.0.6+spec-1.1.0", "twox-hash", "uuid", "vergen", @@ -2209,14 +2186,14 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=7103cd3a3d7b0112d2949dd135db06fef0c156bb#7103cd3a3d7b0112d2949dd135db06fef0c156bb" +source = "git+https://github.com/oxidecomputer/crucible?rev=bd9a0e2abe6b6b89aec8c85f4ee57474144ed150#bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" dependencies = [ "anyhow", "chrono", "crucible-workspace-hack", "percent-encoding", - "progenitor 0.10.0", - "reqwest 0.12.28", + "progenitor 0.14.0", + "reqwest 0.13.2", "schemars 0.8.22", "serde", "serde_json", @@ -2226,7 +2203,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=7103cd3a3d7b0112d2949dd135db06fef0c156bb#7103cd3a3d7b0112d2949dd135db06fef0c156bb" +source = "git+https://github.com/oxidecomputer/crucible?rev=bd9a0e2abe6b6b89aec8c85f4ee57474144ed150#bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" dependencies = [ "crucible-workspace-hack", "libc", @@ -2957,7 +2934,7 @@ dependencies = [ "clap", "dns-server-api", "dns-service-client", - "dropshot 0.17.0", + "dropshot", "git-stub-vcs", "hickory-client", "hickory-proto 0.25.2", @@ -2989,7 +2966,7 @@ name = "dns-server-api" version = "0.1.0" dependencies = [ "chrono", - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "internal-dns-types-versions", "omicron-workspace-hack", @@ -3024,7 +3001,7 @@ dependencies = [ "clap", "dns-server", "dns-service-client", - "dropshot 0.17.0", + "dropshot", "expectorate", "internal-dns-types", "omicron-test-utils", @@ -3167,57 +3144,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "dropshot" -version = "0.16.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d69fd85c8dfc67252d02f260595f6b62b5abceb1b88b4b9722369d27936e5fa4" -dependencies = [ - "async-stream", - "async-trait", - "base64 0.22.1", - "bytes", - "camino", - "chrono", - "debug-ignore", - "dropshot_endpoint 0.16.7", - "form_urlencoded", - "futures", - "hostname 0.4.2", - "http", - "http-body-util", - "hyper", - "hyper-util", - "indexmap 2.14.0", - "multer", - "openapiv3", - "paste", - "percent-encoding", - "rustls 0.22.4", - "rustls-pemfile 2.2.0", - "schemars 0.8.22", - "scopeguard", - "semver 1.0.28", - "serde", - "serde_json", - "serde_path_to_error", - "serde_urlencoded", - "sha1", - "slog", - "slog-async", - "slog-bunyan", - "slog-json", - "slog-term", - "thiserror 2.0.18", - "tokio", - "tokio-rustls 0.25.0", - "toml 0.9.12+spec-1.1.0", - "usdt 0.6.0", - "uuid", - "version_check", - "waitgroup", -] - [[package]] name = "dropshot" version = "0.17.0" @@ -3232,7 +3158,7 @@ dependencies = [ "camino", "chrono", "debug-ignore", - "dropshot_endpoint 0.17.0", + "dropshot_endpoint", "form_urlencoded", "futures", "hostname 0.4.2", @@ -3283,7 +3209,7 @@ dependencies = [ "clap", "debug-ignore", "drift", - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "fs-err 3.3.0", "git-stub", @@ -3317,21 +3243,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "dropshot_endpoint" -version = "0.16.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67d106478e4a4782556981d028a667f41c4845cdaa6e2d3a9f58c5d15e725401" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "semver 1.0.28", - "serde", - "serde_tokenstream", - "syn 2.0.117", -] - [[package]] name = "dropshot_endpoint" version = "0.17.0" @@ -3613,7 +3524,7 @@ dependencies = [ name = "ereport-types" version = "0.1.0" dependencies = [ - "dropshot 0.17.0", + "dropshot", "omicron-uuid-kinds", "omicron-workspace-hack", "schemars 0.8.22", @@ -4029,7 +3940,7 @@ dependencies = [ name = "gateway-api" version = "0.1.0" dependencies = [ - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "ereport-types", "gateway-types-versions", @@ -4164,7 +4075,7 @@ name = "gateway-test-utils" version = "0.1.0" dependencies = [ "camino", - "dropshot 0.17.0", + "dropshot", "gateway-client", "gateway-messages", "gateway-types", @@ -4192,7 +4103,7 @@ name = "gateway-types-versions" version = "0.1.0" dependencies = [ "daft", - "dropshot 0.17.0", + "dropshot", "gateway-messages", "hex", "omicron-uuid-kinds", @@ -5308,7 +5219,7 @@ dependencies = [ "chrono", "crucible-smf", "debug-ignore", - "dropshot 0.17.0", + "dropshot", "futures", "http", "iddqd", @@ -5537,7 +5448,7 @@ name = "installinator-api" version = "0.1.0" dependencies = [ "anyhow", - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "hyper", "installinator-common-versions", @@ -5616,7 +5527,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "dropshot 0.17.0", + "dropshot", "hickory-resolver 0.25.2", "internal-dns-resolver", "internal-dns-types", @@ -5636,7 +5547,7 @@ dependencies = [ "dns-server", "dns-server-api", "dns-service-client", - "dropshot 0.17.0", + "dropshot", "expectorate", "futures", "hickory-proto 0.25.2", @@ -6755,7 +6666,7 @@ dependencies = [ "base64 0.22.1", "chrono", "cookie", - "dropshot 0.17.0", + "dropshot", "futures", "headers", "http", @@ -6822,7 +6733,7 @@ version = "0.1.0" dependencies = [ "anyhow", "camino", - "dropshot 0.17.0", + "dropshot", "expectorate", "ipnet", "libc", @@ -6974,7 +6885,7 @@ dependencies = [ "db-macros", "diesel", "diesel-dtrace", - "dropshot 0.17.0", + "dropshot", "ereport-types", "expectorate", "futures", @@ -7091,7 +7002,7 @@ dependencies = [ "api_identity", "base64 0.22.1", "chrono", - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "http", "hyper", @@ -7142,7 +7053,7 @@ dependencies = [ name = "nexus-internal-api" version = "0.1.0" dependencies = [ - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "http", "nexus-types", @@ -7209,7 +7120,7 @@ dependencies = [ name = "nexus-lockstep-api" version = "0.1.0" dependencies = [ - "dropshot 0.17.0", + "dropshot", "http", "nexus-types", "nexus-types-versions", @@ -7296,7 +7207,7 @@ dependencies = [ "assert_matches", "camino", "chrono", - "dropshot 0.17.0", + "dropshot", "futures", "gateway-client", "gateway-messages", @@ -7477,7 +7388,7 @@ dependencies = [ "cockroach-admin-types", "daft", "debug-ignore", - "dropshot 0.17.0", + "dropshot", "expectorate", "gateway-client", "gateway-types", @@ -7663,7 +7574,7 @@ dependencies = [ "crucible-agent-client", "dns-service-client", "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=187aee7de2e50f907099ea06c04aac96c3455665)", - "dropshot 0.17.0", + "dropshot", "futures", "gateway-messages", "gateway-test-utils", @@ -7736,7 +7647,7 @@ dependencies = [ "daft", "derive-where", "derive_more 0.99.20", - "dropshot 0.17.0", + "dropshot", "either", "ereport-types", "expectorate", @@ -7804,7 +7715,7 @@ dependencies = [ "base64 0.22.1", "chrono", "daft", - "dropshot 0.17.0", + "dropshot", "http", "mg-admin-client", "omicron-common", @@ -7864,18 +7775,6 @@ dependencies = [ "libc", ] -[[package]] -name = "nix" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" -dependencies = [ - "bitflags 2.11.0", - "cfg-if", - "cfg_aliases 0.2.1", - "libc", -] - [[package]] name = "nix" version = "0.31.2" @@ -7921,7 +7820,7 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" name = "ntp-admin-api" version = "0.1.0" dependencies = [ - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "http", "ntp-admin-types-versions", @@ -8221,7 +8120,7 @@ dependencies = [ "clickhouse-admin-test-utils", "clickhouse-admin-types", "clickward", - "dropshot 0.17.0", + "dropshot", "expectorate", "flume", "http", @@ -8265,7 +8164,7 @@ dependencies = [ "cockroach-admin-types", "cockroach-admin-types-versions", "csv", - "dropshot 0.17.0", + "dropshot", "expectorate", "http", "illumos-utils", @@ -8330,7 +8229,7 @@ dependencies = [ "camino-tempfile", "chrono", "daft", - "dropshot 0.17.0", + "dropshot", "expectorate", "futures", "hex", @@ -8406,7 +8305,7 @@ dependencies = [ "anyhow", "camino", "clap", - "dropshot 0.17.0", + "dropshot", "expectorate", "futures", "gateway-client", @@ -8481,7 +8380,7 @@ dependencies = [ "camino", "chrono", "clap", - "dropshot 0.17.0", + "dropshot", "ereport-types", "expectorate", "futures", @@ -8533,7 +8432,7 @@ dependencies = [ "async-trait", "camino", "camino-tempfile", - "dropshot 0.17.0", + "dropshot", "omicron-workspace-hack", "serde", "serde_json", @@ -8550,7 +8449,7 @@ dependencies = [ "anyhow", "assert_matches", "dns-service-client", - "dropshot 0.17.0", + "dropshot", "futures", "internal-dns-resolver", "internal-dns-types", @@ -8632,7 +8531,7 @@ dependencies = [ "dns-server", "dns-service-client", "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=187aee7de2e50f907099ea06c04aac96c3455665)", - "dropshot 0.17.0", + "dropshot", "ereport-types", "expectorate", "fatfs", @@ -8734,7 +8633,6 @@ dependencies = [ "rdb-types", "ref-cast", "regex", - "reqwest 0.12.28", "reqwest 0.13.2", "ring", "rustls 0.22.4", @@ -8793,7 +8691,7 @@ dependencies = [ "camino", "chrono", "clap", - "dropshot 0.17.0", + "dropshot", "expectorate", "http", "nexus-test-utils", @@ -8847,7 +8745,7 @@ dependencies = [ "csv", "daft", "diesel", - "dropshot 0.17.0", + "dropshot", "dyn-clone", "ereport-types", "expectorate", @@ -8997,7 +8895,7 @@ dependencies = [ "anyhow", "camino", "clap", - "dropshot 0.17.0", + "dropshot", "internal-dns-resolver", "internal-dns-types", "nexus-db-model", @@ -9076,7 +8974,7 @@ dependencies = [ "bytes", "camino", "clap", - "dropshot 0.17.0", + "dropshot", "futures", "libc", "omicron-common", @@ -9123,11 +9021,12 @@ dependencies = [ "clap", "clickhouse-admin-types", "crucible-agent-client", + "crucible-client-types", "derive_more 0.99.20", "dice-verifier 0.3.0-pre0 (git+https://github.com/oxidecomputer/dice-util?branch=main)", "display-error-chain", "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=cc8e02a0800034c431c8cf96b889ea638da3d194)", - "dropshot 0.17.0", + "dropshot", "expectorate", "flate2", "flume", @@ -9179,7 +9078,6 @@ dependencies = [ "regress 0.10.5", "repo-depot-api", "repo-depot-client", - "reqwest 0.12.28", "reqwest 0.13.2", "schemars 0.8.22", "secrecy 0.10.3", @@ -9245,7 +9143,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", - "dropshot 0.17.0", + "dropshot", "expectorate", "filetime", "futures", @@ -9442,7 +9340,6 @@ dependencies = [ "tokio-util", "toml 0.7.8", "toml_datetime 0.6.11", - "toml_datetime 0.7.5+spec-1.1.0", "toml_edit 0.19.15", "toml_edit 0.22.27", "toml_parser", @@ -9454,7 +9351,7 @@ dependencies = [ "usdt-impl 0.6.0", "uuid", "vergen", - "vergen-lib", + "vergen-lib 9.1.0", "winnow 0.7.14", "x509-cert", "zerocopy 0.8.40", @@ -9779,7 +9676,7 @@ dependencies = [ name = "oximeter-api" version = "0.1.0" dependencies = [ - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "omicron-common", "omicron-workspace-hack", @@ -9811,7 +9708,7 @@ dependencies = [ "camino", "chrono", "clap", - "dropshot 0.17.0", + "dropshot", "expectorate", "futures", "httpmock", @@ -9874,7 +9771,7 @@ dependencies = [ "crossterm 0.29.0", "debug-ignore", "display-error-chain", - "dropshot 0.17.0", + "dropshot", "expectorate", "futures", "gethostname", @@ -9929,7 +9826,7 @@ version = "0.1.0" dependencies = [ "cfg-if", "chrono", - "dropshot 0.17.0", + "dropshot", "futures", "http", "hyper", @@ -9977,7 +9874,7 @@ dependencies = [ "anyhow", "chrono", "clap", - "dropshot 0.17.0", + "dropshot", "either", "internal-dns-resolver", "internal-dns-types", @@ -10003,7 +9900,7 @@ dependencies = [ name = "oximeter-producer-api" version = "0.1.0" dependencies = [ - "dropshot 0.17.0", + "dropshot", "omicron-workspace-hack", "oximeter-types-versions 0.1.0", ] @@ -11073,17 +10970,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "progenitor" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced2eadb9776a201d0585b4b072fd44d7d2104e0f3452d967b5a78966f4855cf" -dependencies = [ - "progenitor-client 0.10.0", - "progenitor-impl 0.10.0", - "progenitor-macro 0.10.0", -] - [[package]] name = "progenitor" version = "0.11.2" @@ -11189,28 +11075,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "progenitor-impl" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b17e5363daa50bf1cccfade6b0fb970d2278758fd5cfa9ab69f25028e4b1afa3" -dependencies = [ - "heck 0.5.0", - "http", - "indexmap 2.14.0", - "openapiv3", - "proc-macro2", - "quote", - "regex", - "schemars 0.8.22", - "serde", - "serde_json", - "syn 2.0.117", - "thiserror 2.0.18", - "typify 0.4.3", - "unicode-ident", -] - [[package]] name = "progenitor-impl" version = "0.11.2" @@ -11277,24 +11141,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "progenitor-macro" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4972aec926d1e06d6abc11ab3f063d2f7063be3dd46fd2839442c14d8e48f3ed" -dependencies = [ - "openapiv3", - "proc-macro2", - "progenitor-impl 0.10.0", - "quote", - "schemars 0.8.22", - "serde", - "serde_json", - "serde_tokenstream", - "serde_yaml", - "syn 2.0.117", -] - [[package]] name = "progenitor-macro" version = "0.11.2" @@ -11352,7 +11198,7 @@ dependencies = [ [[package]] name = "propolis-api-types-versions" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=bc489ddf0f38f75e0c194b86cf6f0de377f68845#bc489ddf0f38f75e0c194b86cf6f0de377f68845" +source = "git+https://github.com/oxidecomputer/propolis?rev=58ab73bde89ade637b0ca8118682ee9575da6c2a#58ab73bde89ade637b0ca8118682ee9575da6c2a" dependencies = [ "crucible-client-types", "propolis_types", @@ -11365,14 +11211,14 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=bc489ddf0f38f75e0c194b86cf6f0de377f68845#bc489ddf0f38f75e0c194b86cf6f0de377f68845" +source = "git+https://github.com/oxidecomputer/propolis?rev=58ab73bde89ade637b0ca8118682ee9575da6c2a#58ab73bde89ade637b0ca8118682ee9575da6c2a" dependencies = [ "async-trait", "base64 0.21.7", "crucible-client-types", "futures", - "progenitor 0.13.0", - "progenitor-client 0.13.0", + "progenitor 0.14.0", + "progenitor-client 0.14.0", "propolis-api-types-versions", "rand 0.9.2", "reqwest 0.13.2", @@ -11389,16 +11235,16 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=bc489ddf0f38f75e0c194b86cf6f0de377f68845#bc489ddf0f38f75e0c194b86cf6f0de377f68845" +source = "git+https://github.com/oxidecomputer/propolis?rev=58ab73bde89ade637b0ca8118682ee9575da6c2a#58ab73bde89ade637b0ca8118682ee9575da6c2a" dependencies = [ "anyhow", "atty", "base64 0.21.7", "clap", - "dropshot 0.17.0", + "dropshot", "futures", "hyper", - "progenitor 0.13.0", + "progenitor 0.14.0", "propolis-api-types-versions", "propolis_api_types", "propolis_types", @@ -11422,7 +11268,7 @@ dependencies = [ [[package]] name = "propolis_api_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=bc489ddf0f38f75e0c194b86cf6f0de377f68845#bc489ddf0f38f75e0c194b86cf6f0de377f68845" +source = "git+https://github.com/oxidecomputer/propolis?rev=58ab73bde89ade637b0ca8118682ee9575da6c2a#58ab73bde89ade637b0ca8118682ee9575da6c2a" dependencies = [ "crucible-client-types", "propolis-api-types-versions", @@ -11431,7 +11277,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=bc489ddf0f38f75e0c194b86cf6f0de377f68845#bc489ddf0f38f75e0c194b86cf6f0de377f68845" +source = "git+https://github.com/oxidecomputer/propolis?rev=58ab73bde89ade637b0ca8118682ee9575da6c2a#58ab73bde89ade637b0ca8118682ee9575da6c2a" dependencies = [ "schemars 0.8.22", "serde", @@ -11737,7 +11583,7 @@ name = "range-requests" version = "0.1.0" dependencies = [ "bytes", - "dropshot 0.17.0", + "dropshot", "futures", "http", "http-body", @@ -11865,7 +11711,7 @@ dependencies = [ "colored 2.2.0", "daft", "datatest-stable", - "dropshot 0.17.0", + "dropshot", "expectorate", "gateway-types", "humantime", @@ -11912,7 +11758,7 @@ dependencies = [ "anyhow", "chrono", "clap", - "dropshot 0.17.0", + "dropshot", "futures", "gateway-client", "gateway-types", @@ -12098,7 +11944,7 @@ checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" name = "repo-depot-api" version = "0.1.0" dependencies = [ - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "omicron-workspace-hack", "schemars 0.8.22", @@ -12176,6 +12022,7 @@ dependencies = [ "bytes", "cookie", "cookie_store", + "encoding_rs", "futures-channel", "futures-core", "futures-util", @@ -12188,6 +12035,7 @@ dependencies = [ "hyper-util", "js-sys", "log", + "mime", "percent-encoding", "pin-project-lite", "quinn", @@ -12859,7 +12707,7 @@ source = "git+https://github.com/oxidecomputer/scim2-rs?rev=018ae6f7bd752cd9b212 dependencies = [ "anyhow", "chrono", - "dropshot 0.17.0", + "dropshot", "http", "iddqd", "schemars 0.8.22", @@ -13451,7 +13299,7 @@ version = "0.1.0" dependencies = [ "camino", "chrono", - "dropshot 0.17.0", + "dropshot", "dropshot-api-manager-types", "http", "iddqd", @@ -13513,7 +13361,7 @@ dependencies = [ "chrono", "debug-ignore", "derive_more 0.99.20", - "dropshot 0.17.0", + "dropshot", "either", "expectorate", "futures", @@ -13564,7 +13412,7 @@ dependencies = [ "async-trait", "chrono", "derive_more 0.99.20", - "dropshot 0.17.0", + "dropshot", "futures", "illumos-utils", "omicron-common", @@ -13663,7 +13511,7 @@ version = "0.1.0" dependencies = [ "camino", "camino-tempfile-ext", - "dropshot 0.17.0", + "dropshot", "expectorate", "iddqd", "illumos-utils", @@ -14139,7 +13987,7 @@ dependencies = [ "anyhow", "async-trait", "clap", - "dropshot 0.17.0", + "dropshot", "futures", "gateway-ereport-messages", "gateway-messages", @@ -15275,21 +15123,6 @@ dependencies = [ "toml_edit 0.22.27", ] -[[package]] -name = "toml" -version = "0.9.12+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863" -dependencies = [ - "indexmap 2.14.0", - "serde_core", - "serde_spanned 1.0.4", - "toml_datetime 0.7.5+spec-1.1.0", - "toml_parser", - "toml_writer", - "winnow 0.7.14", -] - [[package]] name = "toml" version = "1.0.6+spec-1.1.0" @@ -15648,7 +15481,7 @@ dependencies = [ "anyhow", "dns-server", "dns-service-client", - "dropshot 0.17.0", + "dropshot", "hickory-proto 0.25.2", "hickory-resolver 0.25.2", "internal-dns-types", @@ -15672,7 +15505,7 @@ dependencies = [ "daft", "debug-ignore", "derive_more 0.99.20", - "dropshot 0.17.0", + "dropshot", "futures", "gfss", "hex", @@ -15721,7 +15554,7 @@ dependencies = [ "ciborium", "daft", "derive_more 0.99.20", - "dropshot 0.17.0", + "dropshot", "gfss", "hex", "hkdf", @@ -15757,7 +15590,7 @@ dependencies = [ "bootstore", "camino", "daft", - "dropshot 0.17.0", + "dropshot", "gfss", "iddqd", "omicron-uuid-kinds", @@ -16232,7 +16065,7 @@ dependencies = [ "clap", "debug-ignore", "display-error-chain", - "dropshot 0.17.0", + "dropshot", "flate2", "fs-err 3.3.0", "futures", @@ -16482,24 +16315,24 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vergen" -version = "9.0.6" +version = "9.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b2bf58be11fc9414104c6d3a2e464163db5ef74b12296bda593cac37b6e4777" +checksum = "b849a1f6d8639e8de261e81ee0fc881e3e3620db1af9f2e0da015d4382ceaf75" dependencies = [ "anyhow", - "cargo_metadata 0.19.2", + "cargo_metadata 0.23.1", "derive_builder", "regex", "rustc_version 0.4.1", "rustversion", - "vergen-lib", + "vergen-lib 9.1.0", ] [[package]] name = "vergen-git2" -version = "1.0.7" +version = "9.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f6ee511ec45098eabade8a0750e76eec671e7fb2d9360c563911336bea9cac1" +checksum = "d51ab55ddf1188c8d679f349775362b0fa9e90bd7a4ac69838b2a087623f0d57" dependencies = [ "anyhow", "derive_builder", @@ -16507,7 +16340,7 @@ dependencies = [ "rustversion", "time", "vergen", - "vergen-lib", + "vergen-lib 9.1.0", ] [[package]] @@ -16521,7 +16354,7 @@ dependencies = [ "rustversion", "time", "vergen", - "vergen-lib", + "vergen-lib 0.1.6", ] [[package]] @@ -16535,6 +16368,17 @@ dependencies = [ "rustversion", ] +[[package]] +name = "vergen-lib" +version = "9.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34a29ba7e9c59e62f229ae1932fb1b8fb8a6fdcc99215a641913f5f5a59a569" +dependencies = [ + "anyhow", + "derive_builder", + "rustversion", +] + [[package]] name = "version_check" version = "0.9.5" @@ -16900,7 +16744,7 @@ version = "0.1.0" dependencies = [ "anyhow", "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=187aee7de2e50f907099ea06c04aac96c3455665)", - "dropshot 0.17.0", + "dropshot", "gateway-client", "gateway-types", "maplit", @@ -16965,7 +16809,7 @@ dependencies = [ "debug-ignore", "display-error-chain", "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=187aee7de2e50f907099ea06c04aac96c3455665)", - "dropshot 0.17.0", + "dropshot", "either", "expectorate", "flate2", @@ -17044,7 +16888,7 @@ name = "wicketd-api" version = "0.1.0" dependencies = [ "bootstrap-agent-lockstep-client", - "dropshot 0.17.0", + "dropshot", "gateway-client", "omicron-common", "omicron-passwords", @@ -17961,7 +17805,7 @@ dependencies = [ "anyhow", "camino", "clap", - "dropshot 0.17.0", + "dropshot", "illumos-utils", "omicron-common", "omicron-workspace-hack", diff --git a/Cargo.toml b/Cargo.toml index 7780221d91f..4a70101cc11 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -473,10 +473,11 @@ crossterm = { version = "0.29.0", features = ["event-stream"] } # NOTE: if you change the pinned revision of the `crucible` dependencies, you # must also update the references in package-manifest.toml to match the new # revision. -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "7103cd3a3d7b0112d2949dd135db06fef0c156bb" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "7103cd3a3d7b0112d2949dd135db06fef0c156bb" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "7103cd3a3d7b0112d2949dd135db06fef0c156bb" } -crucible-common = { git = "https://github.com/oxidecomputer/crucible", rev = "7103cd3a3d7b0112d2949dd135db06fef0c156bb" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" } +crucible-common = { git = "https://github.com/oxidecomputer/crucible", rev = "bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" } +crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" } # NOTE: See above! csv = "1.3.1" curve25519-dalek = "4" @@ -721,11 +722,11 @@ progenitor-client010 = { package = "progenitor-client", version = "0.10.0" } # NOTE: if you change the pinned revision of the `bhyve_api` and propolis # dependencies, you must also update the references in package-manifest.toml to # match the new revision. -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "bc489ddf0f38f75e0c194b86cf6f0de377f68845" } -propolis-api-types-versions = { git = "https://github.com/oxidecomputer/propolis", rev = "bc489ddf0f38f75e0c194b86cf6f0de377f68845" } -propolis_api_types = { git = "https://github.com/oxidecomputer/propolis", rev = "bc489ddf0f38f75e0c194b86cf6f0de377f68845" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "bc489ddf0f38f75e0c194b86cf6f0de377f68845" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "bc489ddf0f38f75e0c194b86cf6f0de377f68845" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "58ab73bde89ade637b0ca8118682ee9575da6c2a" } +propolis-api-types-versions = { git = "https://github.com/oxidecomputer/propolis", rev = "58ab73bde89ade637b0ca8118682ee9575da6c2a" } +propolis_api_types = { git = "https://github.com/oxidecomputer/propolis", rev = "58ab73bde89ade637b0ca8118682ee9575da6c2a" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "58ab73bde89ade637b0ca8118682ee9575da6c2a" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "58ab73bde89ade637b0ca8118682ee9575da6c2a" } # NOTE: see above! proptest = "1.7.0" qorb = "0.4.1" @@ -751,7 +752,6 @@ regress = "0.10.4" repo-depot-api = { path = "sled-agent/repo-depot-api" } repo-depot-client = { path = "clients/repo-depot-client" } reqwest = { version = "0.13", default-features = false } -reqwest012 = { package = "reqwest", version = "0.12", default-features = false } ring = "0.17.14" rpassword = "7.4.0" rstest = "0.25.0" @@ -1074,11 +1074,16 @@ opt-level = 3 # crucible-agent-client = { path = "../crucible/agent-client" } # crucible-pantry-client = { path = "../crucible/pantry-client" } # crucible-smf = { path = "../crucible/smf" } +# crucible-common = { path = "../crucible/common" } +# crucible-client-types = { path = "../crucible/crucible-client-types" } # [patch."https://github.com/oxidecomputer/diesel-dtrace"] # diesel-dtrace = { path = "../diesel-dtrace" } # [patch."https://github.com/oxidecomputer/propolis"] +# bhyve_api = { path = "../propolis/crates/bhyve-api" } +# propolis-api-types-versions = { path = "../propolis/crates/propolis-api-types-versions" } +# propolis_api_types = { path = "../propolis/crates/propolis-api-types" } # propolis-client = { path = "../propolis/lib/propolis-client" } # propolis-mock-server = { path = "../propolis/bin/mock-server" } diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 27de5c27917..53c7dce9f45 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -58,6 +58,7 @@ nexus-reconfigurator-preparation.workspace = true nexus-saga-recovery.workspace = true nexus-types.workspace = true omicron-common.workspace = true +omicron-nexus.workspace = true omicron-uuid-kinds.workspace = true omicron-workspace-hack.workspace = true owo-colors.workspace = true @@ -101,7 +102,6 @@ expectorate.workspace = true http.workspace = true nexus-test-utils.workspace = true nexus-test-utils-macros.workspace = true -omicron-nexus.workspace = true omicron-test-utils.workspace = true subprocess.workspace = true diff --git a/dev-tools/omdb/src/bin/omdb/crucible_pantry.rs b/dev-tools/omdb/src/bin/omdb/crucible_pantry.rs index 3751d934488..94837631261 100644 --- a/dev-tools/omdb/src/bin/omdb/crucible_pantry.rs +++ b/dev-tools/omdb/src/bin/omdb/crucible_pantry.rs @@ -10,6 +10,7 @@ use clap::Args; use clap::Subcommand; use crucible_pantry_client::Client; use crucible_pantry_client::types::VolumeStatus; +use omicron_nexus::app::crucible::VolumeHealth; use tabled::Tabled; use uuid::Uuid; @@ -101,12 +102,29 @@ async fn cmd_volume_info( args: &VolumeArgs, ) -> Result<(), anyhow::Error> { let volume = args.uuid.to_string(); - let VolumeStatus { active, num_job_handles, seen_active } = - *client.volume_status(&volume).await.context("listing volumes")?; + + let VolumeStatus { active, num_job_handles, seen_active, info } = client + .volume_status(&volume) + .await + .context("listing volumes")? + .into_inner(); println!(" active: {}", active); println!(" num_job_handles: {}", num_job_handles); println!(" seen_active: {}", seen_active); + + use omicron_nexus::app::crucible::crucible_pantry_client_volume_health; + + match crucible_pantry_client_volume_health(&info) { + VolumeHealth::Healthy => { + println!(" volume health: healthy"); + } + + VolumeHealth::Degraded { reason } => { + println!(" volume health: degraded ({reason})"); + } + } + Ok(()) } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 0a24e5eb3e2..5f908eabdb4 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -99,7 +99,6 @@ ref-cast.workspace = true rdb-types.workspace = true regex.workspace = true reqwest = { workspace = true, features = ["http2", "json"] } -reqwest012 = { workspace = true } ring.workspace = true samael.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } diff --git a/nexus/src/app/crucible.rs b/nexus/src/app/crucible.rs index 0037143e66d..1c0e952b164 100644 --- a/nexus/src/app/crucible.rs +++ b/nexus/src/app/crucible.rs @@ -3,6 +3,13 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! Functions common to interacting with Crucible agents +//! +//! A note: there are multiple places in this file that have two layers of +//! retries. This is because the majority of the requests to the Crucible agent +//! are requests for something to happen in the background, and it's the +//! client's responsibility to poll for a state change. One example of this is +//! for creating regions: the inner loop retries the POST until it succeeds, and +//! the outer loop checks the state returned. use super::*; @@ -19,11 +26,15 @@ use futures::StreamExt; use nexus_db_queries::db; use nexus_db_queries::db::identity::Asset; use omicron_common::api::external::Error; +use omicron_common::backoff::backon_retry_policy_internal_service; use omicron_common::backoff::{self, BackoffError}; -use omicron_common::progenitor_operation_retry::ProgenitorOperationRetry; -use omicron_common::progenitor_operation_retry::ProgenitorOperationRetryError; use omicron_uuid_kinds::DatasetUuid; +use progenitor_extras::retry::GoneCheckResult; +use progenitor_extras::retry::IndefiniteRetryOperationWhileError; +use progenitor_extras::retry::IndefiniteRetryOperationWhileErrorKind; +use progenitor_extras::retry::retry_operation_while_indefinitely; use slog::Logger; +use std::collections::VecDeque; // Arbitrary limit on concurrency, for operations issued on multiple regions // within a disk at the same time. @@ -41,21 +52,23 @@ enum WaitError { Permanent(#[from] Error), } -/// Convert an error returned from the ProgenitorOperationRetry loops in this -/// file into an external Error +/// Convert an error returned from a retry loop into an external Error fn into_external_error( - e: ProgenitorOperationRetryError, + e: IndefiniteRetryOperationWhileError< + crucible_agent_client::types::Error, + Error, + >, ) -> Error { - match e { - ProgenitorOperationRetryError::Gone => Error::Gone, + match e.kind { + IndefiniteRetryOperationWhileErrorKind::Gone => Error::Gone, - ProgenitorOperationRetryError::GoneCheckError(e) => { + IndefiniteRetryOperationWhileErrorKind::GoneCheckError(e) => { Error::internal_error(&format!( "insufficient permission for crucible_agent_gone_check: {e}" )) } - ProgenitorOperationRetryError::ProgenitorError(e) => match e { + IndefiniteRetryOperationWhileErrorKind::OperationError(e) => match e { crucible_agent_client::Error::ErrorResponse(rv) => { if rv.status().is_client_error() { Error::invalid_request(&rv.message) @@ -64,7 +77,7 @@ fn into_external_error( } } - _ => Error::internal_error(&format!("unexpected failure: {e}",)), + _ => Error::internal_error(&format!("unexpected failure: {e}")), }, } } @@ -74,27 +87,28 @@ impl super::Nexus { &self, dataset: &db::model::CrucibleDataset, ) -> CrucibleAgentClient { - // Use reqwest012_client because the rev-pinned crucible-agent-client - // is still on reqwest 0.12. CrucibleAgentClient::new_with_client( &format!("http://{}", dataset.address()), - self.reqwest012_client.clone(), + self.reqwest_client.clone(), ) } /// Return if the Crucible agent is expected to be there and answer Nexus: - /// true means it's gone, and the caller should bail out of the - /// ProgenitorOperationRetry loop. + /// if it's [`GoneCheckResult::Gone`], the caller should bail out of the + /// retry loop. async fn crucible_agent_gone_check( &self, dataset_id: DatasetUuid, - ) -> Result { + ) -> Result { let on_in_service_physical_disk = self .datastore() .crucible_dataset_physical_disk_in_service(dataset_id) .await?; - Ok(!on_in_service_physical_disk) + Ok(match on_in_service_physical_disk { + true => GoneCheckResult::StillAvailable, + false => GoneCheckResult::Gone, + }) } /// Return a region's associated address @@ -156,6 +170,7 @@ impl super::Nexus { source: Option, ) -> Result { let client = self.crucible_agent_client_for_dataset(dataset); + let region_id = region.id(); let dataset_id = dataset.id(); let Ok(extent_count) = u32::try_from(region.extent_count()) else { @@ -179,21 +194,35 @@ impl super::Nexus { }; let create_region = || async { - let region = match ProgenitorOperationRetry::new( - || async { client.region_create(®ion_request).await }, - || async { self.crucible_agent_gone_check(dataset_id).await }, + let create_region_operation = + || async { client.region_create(®ion_request).await }; + + let gone_check = + || async { self.crucible_agent_gone_check(dataset_id).await }; + + let region = match retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + create_region_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to create region {region_id}, retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); + }, ) - .run(log) .await { - Ok(v) => Ok(v), + Ok(v) => Ok(v.into_inner()), Err(e) => { error!( log, "region_create saw {:?}", e; - "region_id" => %region.id(), + "region_id" => %region_id, "dataset_id" => %dataset_id, ); @@ -228,8 +257,8 @@ impl super::Nexus { log, "Region requested, not yet created. Retrying in {:?}", delay; - "dataset" => %dataset.id(), - "region" => %region.id(), + "dataset" => %dataset_id, + "region" => %region_id, ); }; @@ -251,8 +280,6 @@ impl super::Nexus { WaitError::Permanent(e) => e, })?; - let returned_region = returned_region.into_inner(); - // Record the returned port self.datastore() .region_set_port(region.id(), returned_region.port_number) @@ -361,21 +388,35 @@ impl super::Nexus { // transitions from Requested to Created let create_running_snapshot = || async { - let running_snapshot = match ProgenitorOperationRetry::new( - || async { - client - .region_run_snapshot( - &RegionId(region_id.to_string()), - &snapshot_id.to_string(), - ) - .await + let run_snapshot_operation = || async { + client + .region_run_snapshot( + &RegionId(region_id.to_string()), + &snapshot_id.to_string(), + ) + .await + }; + + let gone_check = + || async { self.crucible_agent_gone_check(dataset_id).await }; + + let running_snapshot = match retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + run_snapshot_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to run region {region_id} snapshot \ + {snapshot_id}, retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); }, - || async { self.crucible_agent_gone_check(dataset_id).await }, ) - .run(log) .await { - Ok(v) => Ok(v), + Ok(v) => Ok(v.into_inner()), Err(e) => { error!( @@ -443,8 +484,6 @@ impl super::Nexus { WaitError::Permanent(e) => e, })?; - let running_snapshot = running_snapshot.into_inner(); - Ok((region, snapshot, running_snapshot)) } @@ -459,13 +498,26 @@ impl super::Nexus { let client = self.crucible_agent_client_for_dataset(dataset); let dataset_id = dataset.id(); - let result = ProgenitorOperationRetry::new( - || async { - client.region_get(&RegionId(region_id.to_string())).await + let region_get_operation = || async { + client.region_get(&RegionId(region_id.to_string())).await + }; + + let gone_check = + || async { self.crucible_agent_gone_check(dataset_id).await }; + + let result = retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + region_get_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to get region {region_id}, retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); }, - || async { self.crucible_agent_gone_check(dataset_id).await }, ) - .run(log) .await; match result { @@ -504,18 +556,32 @@ impl super::Nexus { let client = self.crucible_agent_client_for_dataset(dataset); let dataset_id = dataset.id(); - let result = ProgenitorOperationRetry::new( - || async { - client - .region_get_snapshot( - &RegionId(region_id.to_string()), - &snapshot_id.to_string(), - ) - .await + let snapshot_get_operation = || async { + client + .region_get_snapshot( + &RegionId(region_id.to_string()), + &snapshot_id.to_string(), + ) + .await + }; + + let gone_check = + || async { self.crucible_agent_gone_check(dataset_id).await }; + + let result = retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + snapshot_get_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to get region {region_id} snapshot {snapshot_id}, \ + retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); }, - || async { self.crucible_agent_gone_check(dataset_id).await }, ) - .run(log) .await; match result { @@ -552,15 +618,27 @@ impl super::Nexus { let client = self.crucible_agent_client_for_dataset(dataset); let dataset_id = dataset.id(); - let result = ProgenitorOperationRetry::new( - || async { - client - .region_get_snapshots(&RegionId(region_id.to_string())) - .await + let region_get_snapshots_operation = || async { + client.region_get_snapshots(&RegionId(region_id.to_string())).await + }; + + let gone_check = + || async { self.crucible_agent_gone_check(dataset_id).await }; + + let result = retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + region_get_snapshots_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to get snapshots for region {region_id}, \ + retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); }, - || async { self.crucible_agent_gone_check(dataset_id).await }, ) - .run(log) .await; match result { @@ -592,13 +670,26 @@ impl super::Nexus { let client = self.crucible_agent_client_for_dataset(dataset); let dataset_id = dataset.id(); - let result = ProgenitorOperationRetry::new( - || async { - client.region_delete(&RegionId(region_id.to_string())).await + let region_delete_operation = || async { + client.region_delete(&RegionId(region_id.to_string())).await + }; + + let gone_check = + || async { self.crucible_agent_gone_check(dataset_id).await }; + + let result = retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + region_delete_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to delete region {region_id}, retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); }, - || async { self.crucible_agent_gone_check(dataset_id).await }, ) - .run(log) .await; match result { @@ -635,18 +726,32 @@ impl super::Nexus { let client = self.crucible_agent_client_for_dataset(dataset); let dataset_id = dataset.id(); - let result = ProgenitorOperationRetry::new( - || async { - client - .region_delete_running_snapshot( - &RegionId(region_id.to_string()), - &snapshot_id.to_string(), - ) - .await + let delete_running_snapshot_operation = || async { + client + .region_delete_running_snapshot( + &RegionId(region_id.to_string()), + &snapshot_id.to_string(), + ) + .await + }; + + let gone_check = + || async { self.crucible_agent_gone_check(dataset_id).await }; + + let result = retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + delete_running_snapshot_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to delete region {region_id} running snapshot \ + {snapshot_id}, retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); }, - || async { self.crucible_agent_gone_check(dataset_id).await }, ) - .run(log) .await; match result { @@ -684,18 +789,32 @@ impl super::Nexus { let client = self.crucible_agent_client_for_dataset(dataset); let dataset_id = dataset.id(); - let result = ProgenitorOperationRetry::new( - || async { - client - .region_delete_snapshot( - &RegionId(region_id.to_string()), - &snapshot_id.to_string(), - ) - .await + let delete_snapshot_operation = || async { + client + .region_delete_snapshot( + &RegionId(region_id.to_string()), + &snapshot_id.to_string(), + ) + .await + }; + + let gone_check = + || async { self.crucible_agent_gone_check(dataset_id).await }; + + let result = retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + delete_snapshot_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to delete region {region_id} snapshot \ + {snapshot_id}, retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); }, - || async { self.crucible_agent_gone_check(dataset_id).await }, ) - .run(log) .await; match result { @@ -1362,3 +1481,234 @@ impl super::Nexus { .collect::, _>>() } } + +pub enum VolumeHealth { + Healthy, + + Degraded { reason: VolumeDegradedReason }, +} + +pub enum VolumeDegradedReason { + /// The Volume is only partially active + UpstairsNotActive, + + /// Not all three downstairs are present for one or more region sets. + ReducedRedundancy, + + /// For one or more region sets, three downstairs are present but one or + /// more is degraded. + DownstairsDegraded, +} + +impl std::fmt::Display for VolumeDegradedReason { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + VolumeDegradedReason::UpstairsNotActive => { + write!(f, "volume is only partially active") + } + + VolumeDegradedReason::ReducedRedundancy => { + write!( + f, + "part of the volume is operating at reduced redundancy", + ) + } + + VolumeDegradedReason::DownstairsDegraded => { + write!(f, "one or more downstairs is degraded") + } + } + } +} + +// Crucible can return a `VolumeInfo` that describes the state of the entire +// Volume in a tree structure. Both Propolis (from `propolis_client::types`) and +// the Crucible Pantry (from `crucible_pantry_client::types`) export this type +// from their import of the `crucible-client-types` crate, meaning two versions +// could exist that Nexus could read. Do the simplest thing: write two versions +// of the function that reads each type returns a `VolumeHealth`. These +// functions currently are the same, but in the future may temporarily look +// different if Propolis and the Crucible Pantry import different +// `crucible-client-types` versions. These types should eventually be derived +// from the same `crucible-client-types` version though as that is equivalent to +// both imports being up to date. + +/// Given a [`propolis_client::types::VolumeInfo`], return if the Volume should +/// be considered healthy by Nexus. +pub fn propolis_client_volume_health( + info: &propolis_client::types::VolumeInfo, +) -> VolumeHealth { + use propolis_client::types::DownstairsInfoStatus; + use propolis_client::types::UpstairsInfoStatus; + use propolis_client::types::VolumeInfo; + + let mut parts: VecDeque<&VolumeInfo> = VecDeque::new(); + parts.push_back(info); + + while let Some(part) = parts.pop_front() { + match part { + VolumeInfo::Volume { sub_volumes, read_only_parent } => { + for sub_volume in sub_volumes { + parts.push_back(sub_volume); + } + + if let Some(read_only_parent) = read_only_parent { + parts.push_back(read_only_parent); + } + } + + VolumeInfo::Upstairs { + state, + block_size: _, + upstairs_id: _, + session_id: _, + generation: _, + read_only: _, + encrypted: _, + reconcile_in_progress, + live_repair_in_progress, + targets, + } => { + if *reconcile_in_progress || *live_repair_in_progress { + return VolumeHealth::Degraded { + reason: VolumeDegradedReason::DownstairsDegraded, + }; + } + + match state { + UpstairsInfoStatus::Initializing + | UpstairsInfoStatus::GoActive + | UpstairsInfoStatus::Deactivating + | UpstairsInfoStatus::Disabled => { + return VolumeHealth::Degraded { + reason: VolumeDegradedReason::UpstairsNotActive, + }; + } + + UpstairsInfoStatus::Active => { + // ok! + } + } + + for target in targets { + match target.state { + DownstairsInfoStatus::Connecting { .. } => { + return VolumeHealth::Degraded { + reason: VolumeDegradedReason::ReducedRedundancy, + }; + } + + DownstairsInfoStatus::Active => { + // ok! + } + + DownstairsInfoStatus::LiveRepair => { + return VolumeHealth::Degraded { + reason: + VolumeDegradedReason::DownstairsDegraded, + }; + } + + DownstairsInfoStatus::Stopping => { + return VolumeHealth::Degraded { + reason: VolumeDegradedReason::ReducedRedundancy, + }; + } + } + } + } + } + } + + VolumeHealth::Healthy +} + +/// Given a [`crucible_pantry_client::types::VolumeInfo`], return if the Volume +/// should be considered healthy by Nexus. +pub fn crucible_pantry_client_volume_health( + info: &crucible_pantry_client::types::VolumeInfo, +) -> VolumeHealth { + use crucible_pantry_client::types::DownstairsInfoStatus; + use crucible_pantry_client::types::UpstairsInfoStatus; + use crucible_pantry_client::types::VolumeInfo; + + let mut parts: VecDeque<&VolumeInfo> = VecDeque::new(); + parts.push_back(info); + + while let Some(part) = parts.pop_front() { + match part { + VolumeInfo::Volume { sub_volumes, read_only_parent } => { + for sub_volume in sub_volumes { + parts.push_back(sub_volume); + } + + if let Some(read_only_parent) = read_only_parent { + parts.push_back(read_only_parent); + } + } + + VolumeInfo::Upstairs { + state, + block_size: _, + upstairs_id: _, + session_id: _, + generation: _, + read_only: _, + encrypted: _, + reconcile_in_progress, + live_repair_in_progress, + targets, + } => { + if *reconcile_in_progress || *live_repair_in_progress { + return VolumeHealth::Degraded { + reason: VolumeDegradedReason::DownstairsDegraded, + }; + } + + match state { + UpstairsInfoStatus::Initializing + | UpstairsInfoStatus::GoActive + | UpstairsInfoStatus::Deactivating + | UpstairsInfoStatus::Disabled => { + return VolumeHealth::Degraded { + reason: VolumeDegradedReason::UpstairsNotActive, + }; + } + + UpstairsInfoStatus::Active => { + // ok! + } + } + + for target in targets { + match target.state { + DownstairsInfoStatus::Connecting { .. } => { + return VolumeHealth::Degraded { + reason: VolumeDegradedReason::ReducedRedundancy, + }; + } + + DownstairsInfoStatus::Active => { + // ok! + } + + DownstairsInfoStatus::LiveRepair => { + return VolumeHealth::Degraded { + reason: + VolumeDegradedReason::DownstairsDegraded, + }; + } + + DownstairsInfoStatus::Stopping => { + return VolumeHealth::Degraded { + reason: VolumeDegradedReason::ReducedRedundancy, + }; + } + } + } + } + } + } + + VolumeHealth::Healthy +} diff --git a/nexus/src/app/disk.rs b/nexus/src/app/disk.rs index 32ce2894b71..7fe564a6b5b 100644 --- a/nexus/src/app/disk.rs +++ b/nexus/src/app/disk.rs @@ -564,11 +564,9 @@ impl super::Nexus { // that user's program can act accordingly. In a way, the user's // program is an externally driven saga instead. - // Use reqwest012_client because the rev-pinned - // crucible-pantry-client is still on reqwest 0.12. let client = crucible_pantry_client::Client::new_with_client( &format!("http://{}", endpoint), - self.reqwest012_client.clone(), + self.reqwest_client.clone(), ); let request = crucible_pantry_client::types::BulkWriteRequest { offset: param.offset, diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 6778ea72598..00f8dc27544 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -66,7 +66,7 @@ pub(crate) mod background; mod bfd; mod bgp; mod certificate; -mod crucible; +pub mod crucible; mod deployment; mod device_auth; mod disk; @@ -224,18 +224,8 @@ pub struct Nexus { /// /// (This does not need to be in an `Arc` because `reqwest::Client` uses /// `Arc` internally.) - /// - /// Currently unused because all `new_with_client` call sites use - /// `reqwest012_client` for cross-repo dependencies that are still on - /// reqwest 0.12. This field will be used again once rev pins are updated. - #[allow(dead_code)] reqwest_client: reqwest::Client, - /// `reqwest012::Client` for cross-repo dependencies where the rev-pinned - /// dependency is still on reqwest 0.12. Remove once all rev pins are - /// updated. - reqwest012_client: reqwest012::Client, - /// Client to the timeseries database. timeseries_client: oximeter_db::Client, @@ -437,14 +427,6 @@ impl Nexus { .build() .map_err(|e| InlineErrorChain::new(&e).to_string())?; - // reqwest 0.12 client for cross-repo dependencies still on reqwest - // 0.12. Remove once all rev pins are updated. - let reqwest012_client = reqwest012::ClientBuilder::new() - .connect_timeout(std::time::Duration::from_secs(15)) - .timeout(std::time::Duration::from_secs(15)) - .build() - .map_err(|e| InlineErrorChain::new(&e).to_string())?; - // Client to the ClickHouse database. let timeseries_client = match &config.pkg.timeseries_db.address { None => { @@ -549,7 +531,6 @@ impl Nexus { producer_server: std::sync::Mutex::new(None), populate_status, reqwest_client, - reqwest012_client, timeseries_client, webhook_delivery_client, tunables: config.pkg.tunables.clone(), diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index b160c68e4c9..a7775f7f755 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -17,8 +17,9 @@ use nexus_db_queries::db; use nexus_db_queries::db::datastore::CrucibleDisk; use nexus_types::saga::saga_action_failed; use omicron_common::api::external::Error; -use omicron_common::progenitor_operation_retry::ProgenitorOperationRetry; -use omicron_common::progenitor_operation_retry::ProgenitorOperationRetryError; +use omicron_common::backoff::backon_retry_policy_internal_service; +use progenitor_extras::retry::IndefiniteRetryOperationWhileError; +use progenitor_extras::retry::retry_operation_while_indefinitely; use slog::Logger; use slog_error_chain::InlineErrorChain; use std::net::SocketAddrV6; @@ -46,12 +47,12 @@ pub(crate) async fn get_pantry_address( // gone, and we detect "gone" by seeing whether the pantry address we've chosen // is still present when we resolve all the crucible pantry records in DNS. // -// This function never returns an error because it's expected to be used with -// `ProgenitorOperationRetry`, which treats an error in the "gone check" as a -// fatal error. We don't want to amplify failures: if something is wrong with -// DNS, we can't go back and choose another pantry anyway, so we'll just keep -// retrying until DNS comes back. All that to say: a failure to resolve DNS is -// treated as "the pantry is not gone". +// This function never returns an error because it's expected to be used within +// a `retry_operation_while_indefinitely` loop, which treats an error in the +// "gone check" as a fatal error. We don't want to amplify failures: if +// something is wrong with DNS, we can't go back and choose another pantry +// anyway, so we'll just keep retrying until DNS comes back. All that to say: a +// failure to resolve DNS is treated as "the pantry is not gone". pub(crate) async fn is_pantry_gone( nexus: &Nexus, pantry_address: SocketAddrV6, @@ -136,20 +137,43 @@ pub(crate) async fn call_pantry_attach_for_volume( let attach_operation = || async { client.attach(&attach_id.to_string(), &attach_request).await }; - let gone_check = - || async { Ok(is_pantry_gone(nexus, pantry_address, log).await) }; - ProgenitorOperationRetry::new(attach_operation, gone_check) - .run(log) - .await - .map_err(|e| { + let gone_check = || async { + let result = match is_pantry_gone(nexus, pantry_address, log).await { + true => GoneCheckResult::Gone, + false => GoneCheckResult::StillAvailable, + }; + + Ok(result) + }; + + retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + attach_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to attach {attach_id} to pantry {pantry_address}, \ + retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); + }, + ) + .await + .map(|_response| ()) + .map_err( + |e: IndefiniteRetryOperationWhileError< + crucible_pantry_client::types::Error, + Error, + >| { saga_action_failed(Error::internal_error(&format!( "pantry attach failed: {}", InlineErrorChain::new(&e) ))) - })?; - - Ok(()) + }, + ) } pub(crate) async fn call_pantry_detach( @@ -157,7 +181,7 @@ pub(crate) async fn call_pantry_detach( log: &slog::Logger, attach_id: Uuid, pantry_address: SocketAddrV6, -) -> Result<(), ProgenitorOperationRetryError> { +) -> Result<(), IndefiniteRetryOperationWhileError> { let endpoint = format!("http://{}", pantry_address); info!(log, "sending detach for {attach_id} to endpoint {endpoint}"); @@ -166,13 +190,32 @@ pub(crate) async fn call_pantry_detach( let detach_operation = || async { client.detach(&attach_id.to_string()).await }; - let gone_check = - || async { Ok(is_pantry_gone(nexus, pantry_address, log).await) }; - ProgenitorOperationRetry::new(detach_operation, gone_check) - .run(log) - .await - .map(|_response| ()) + let gone_check = || async { + let result = match is_pantry_gone(nexus, pantry_address, log).await { + true => GoneCheckResult::Gone, + false => GoneCheckResult::StillAvailable, + }; + + Ok(result) + }; + + retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + detach_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to detach {attach_id} from pantry {pantry_address}, \ + retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); + }, + ) + .await + .map(|_response| ()) } pub(crate) fn find_only_new_region( diff --git a/nexus/src/app/sagas/finalize_disk.rs b/nexus/src/app/sagas/finalize_disk.rs index 91ddb15e8f3..b409fa5fff0 100644 --- a/nexus/src/app/sagas/finalize_disk.rs +++ b/nexus/src/app/sagas/finalize_disk.rs @@ -20,7 +20,6 @@ use nexus_types::saga::saga_action_failed; use omicron_common::api::external; use omicron_common::api::external::Error; use omicron_common::api::external::Name; -use omicron_common::progenitor_operation_retry::ProgenitorOperationRetryError; use serde::Deserialize; use serde::Serialize; use slog_error_chain::InlineErrorChain; @@ -316,13 +315,16 @@ async fn sfd_call_pantry_detach_for_disk( ) .await { - // If the detach succeeds, then proceed with finalization. If the detach - // fails because the associated pantry is gone, then we have to be able - // to proceed with finalization in order to be able to eventually delete - // the disk. The associated pantry may have been expunged at any time - // during the import and this part of the code doesn't know the state of - // the disk, but we can't fail and leave the disk un-deleteable. - Ok(()) | Err(ProgenitorOperationRetryError::Gone) => Ok(()), + // If the detach succeeds, then proceed with finalization. + Ok(()) => Ok(()), + + // If the detach fails because the associated pantry is gone, then we + // have to be able to proceed with finalization in order to be able to + // eventually delete the disk. The associated pantry may have been + // expunged at any time during the import and this part of the code + // doesn't know the state of the disk, but we can't fail and leave the + // disk un-deleteable. + Err(e) if e.is_gone() => Ok(()), Err(e) => Err(saga_action_failed(Error::internal_error(&format!( "pantry detach failed: {}", diff --git a/nexus/src/app/sagas/region_replacement_drive.rs b/nexus/src/app/sagas/region_replacement_drive.rs index b55a225d756..dcf22540d72 100644 --- a/nexus/src/app/sagas/region_replacement_drive.rs +++ b/nexus/src/app/sagas/region_replacement_drive.rs @@ -140,6 +140,8 @@ use super::{ ACTION_GENERATE_ID, ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, }; +use crate::app::crucible::VolumeHealth; +use crate::app::crucible::propolis_client_volume_health; use crate::app::db::datastore::CrucibleDisk; use crate::app::db::datastore::InstanceAndActiveVmm; use crate::app::sagas::common_storage::get_pantry_address; @@ -774,7 +776,8 @@ async fn check_from_previous_pantry_step( error!( log, - "pantry returned an error checking on volume: {e}"; + "pantry returned an error checking on volume: \ + {e}"; "region replacement id" => %request_id, "last replacement drive time" => ?step_time, "last replacement drive step" => "pantry", @@ -1592,10 +1595,10 @@ async fn execute_propolis_drive_action( ))), })?; - // If the Volume is active, then reconciliation finished + // If the Volume is healthy, then reconciliation finished // successfully. // - // There's a few reasons it may not be active yet: + // There's a few reasons it may not be healthy yet: // // - Propolis could be shutting down, and tearing down the Upstairs // in the process (which deactivates the Volume) @@ -1604,10 +1607,25 @@ async fn execute_propolis_drive_action( // // - reconciliation could have failed // - // If it's not active, wait until the next invocation of this saga + // If it's not healthy, wait until the next invocation of this saga // to decide what to do next. - result.into_inner().active + let health = + propolis_client_volume_health(&result.into_inner().volume_info); + + match health { + VolumeHealth::Healthy => { + // If "healthy" is seen after we have replaced a downstairs, + // then we're done waiting - the replacement is done + true + } + + VolumeHealth::Degraded { reason } => { + info!(log, "volume is not healthy: {reason}"); + + false + } + } } ReplaceResult::Missing => { diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 1debdf8f33d..86ddf534c65 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -106,12 +106,12 @@ use nexus_db_queries::db::identity::{Asset, Resource}; use nexus_types::external_api::{disk, snapshot}; use nexus_types::saga::saga_action_failed; use omicron_common::api::external::Error; -use omicron_common::progenitor_operation_retry::ProgenitorOperationRetry; -use omicron_common::progenitor_operation_retry::ProgenitorOperationRetryError; use omicron_common::{ api::external, backoff::backon_retry_policy_internal_service, }; use omicron_uuid_kinds::{GenericUuid, PropolisUuid, VolumeUuid}; +use progenitor_extras::retry::GoneCheckResult; +use progenitor_extras::retry::IndefiniteRetryOperationWhileError; use progenitor_extras::retry::retry_operation_while_indefinitely; use rand::{RngCore, SeedableRng, rngs::StdRng}; use serde::Deserialize; @@ -869,14 +869,13 @@ async fn ssc_send_snapshot_request_to_sled_agent( .await }; - let notify_log = log.clone(); retry_operation_while_indefinitely( backon_retry_policy_internal_service(), snapshot_operation, gone_check, |notification| { slog::warn!( - notify_log, + log, "failed to issue VMM disk snapshot request, retrying in {:?}", notification.delay; InlineErrorChain::new(¬ification.error), @@ -1180,8 +1179,11 @@ async fn ssc_call_pantry_attach_for_disk_undo( ) .await { + Ok(()) => (), + // We can treat the pantry being permanently gone as success. - Ok(()) | Err(ProgenitorOperationRetryError::Gone) => (), + Err(err) if err.is_gone() => (), + Err(err) => { return Err(anyhow!( "failed to detach disk {} from pantry at {}: {}", @@ -1235,15 +1237,42 @@ async fn ssc_call_pantry_snapshot_for_disk( ) .await }; - let gone_check = - || async { Ok(is_pantry_gone(nexus, pantry_address, log).await) }; - ProgenitorOperationRetry::new(snapshot_operation, gone_check) - .run(log) - .await - .map_err(|e| { - saga_action_failed(Error::internal_error(&e.to_string())) - })?; + let gone_check = || async { + let result = match is_pantry_gone(nexus, pantry_address, log).await { + true => GoneCheckResult::Gone, + false => GoneCheckResult::StillAvailable, + }; + + Ok(result) + }; + + retry_operation_while_indefinitely( + backon_retry_policy_internal_service(), + snapshot_operation, + gone_check, + |notification| { + slog::warn!( + log, + "failed to issue pantry disk snapshot request, \ + retrying in {:?}", + notification.delay; + InlineErrorChain::new(¬ification.error), + ); + }, + ) + .await + .map_err( + |e: IndefiniteRetryOperationWhileError< + crucible_pantry_client::types::Error, + Error, + >| { + saga_action_failed(Error::internal_error(&format!( + "pantry snapshot request failed: {}", + InlineErrorChain::new(&e) + ))) + }, + )?; Ok(()) } diff --git a/package-manifest.toml b/package-manifest.toml index 59c82a79971..a806e70e9c0 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -624,10 +624,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "7103cd3a3d7b0112d2949dd135db06fef0c156bb" +source.commit = "bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "8e245572e4b8d1c018884268a6afdf7f79efc22e61b4ed5b5526957bf61ccdcd" +source.sha256 = "918a3db80758e93b2f01c8bd78a358065f4636c4bda387e12948830d1028909a" output.type = "zone" output.intermediate_only = true @@ -636,10 +636,10 @@ service_name = "crucible_pantry_prebuilt" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "7103cd3a3d7b0112d2949dd135db06fef0c156bb" +source.commit = "bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "7998ddb0bda4c97e3d5fec7c8079bbdfb27ef06dba69ab2867278dc2cd7544f4" +source.sha256 = "3567a37b4136b03fd08032ec88886223c8b33c227a7cb9e1ebdbe3e0198d6b98" output.type = "zone" output.intermediate_only = true @@ -653,10 +653,10 @@ service_name = "crucible_utils" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "7103cd3a3d7b0112d2949dd135db06fef0c156bb" +source.commit = "bd9a0e2abe6b6b89aec8c85f4ee57474144ed150" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-utils.sha256.txt -source.sha256 = "cc661b84fd258467ec1961e8c9879f76d2d07903fb9161012afa75c37490e24f" +source.sha256 = "e1b87fa2a3b916ed67b31de02238691ddbf59def31336c1f73e182fd067a5c79" output.type = "tarball" # Refer to @@ -667,10 +667,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "bc489ddf0f38f75e0c194b86cf6f0de377f68845" +source.commit = "58ab73bde89ade637b0ca8118682ee9575da6c2a" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "f4c742c766a7260f20d6ef3c0d68c362e7ba768037836bfa09abdf1c3c72e605" +source.sha256 = "7344b58d0243a293d0eceb4e323c7f4efdce3e5effb549c593478662de24add5" output.type = "zone" [package.mg-ddm-gz] diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index ddffa9ab618..e31ae2caba0 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -26,8 +26,6 @@ cfg-if.workspace = true chrono.workspace = true clap.workspace = true clickhouse-admin-types.workspace = true -# Only used by the simulated sled agent. -crucible-agent-client.workspace = true derive_more.workspace = true dice-verifier = { workspace = true, features = ["ipcc", "mock"] } display-error-chain.workspace = true @@ -80,7 +78,6 @@ range-requests.workspace = true repo-depot-api.workspace = true repo-depot-client.workspace = true reqwest = { workspace = true, features = ["rustls", "stream"] } -reqwest012 = { workspace = true } schemars = { workspace = true, features = ["chrono", "uuid1"] } serde.workspace = true serde_json = { workspace = true, features = ["raw_value"] } @@ -132,6 +129,10 @@ zeroize.workspace = true zip.workspace = true zone.workspace = true +# Only used by the simulated sled agent. +crucible-agent-client.workspace = true +crucible-client-types.workspace = true + [target.'cfg(target_os = "illumos")'.dependencies] opte-ioctl.workspace = true diff --git a/sled-agent/src/sim/http_entrypoints_pantry.rs b/sled-agent/src/sim/http_entrypoints_pantry.rs index 231593c74af..947bfaccc86 100644 --- a/sled-agent/src/sim/http_entrypoints_pantry.rs +++ b/sled-agent/src/sim/http_entrypoints_pantry.rs @@ -89,6 +89,8 @@ pub struct VolumeStatus { /// How many job handles are there for this Volume? pub num_job_handles: usize, + + pub info: crucible_client_types::VolumeInfo, } /// Get a current Volume's status diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 7e2bfacfe5b..07c37570b4e 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -52,6 +52,7 @@ use slog::Logger; use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; +use std::collections::VecDeque; use std::net::{IpAddr, SocketAddr}; use std::str::FromStr; use std::sync::Arc; @@ -2018,6 +2019,137 @@ pub struct Pantry { inner: Mutex, } +/// Based on the argument VolumeConstructionRequest, return simulated VolumeInfo +/// in the same shape that describes a fully active Volume. +fn generate_new_volume_info( + value: &VolumeConstructionRequest, +) -> crucible_client_types::VolumeInfo { + let mut traverse: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); + let mut flattened: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); + let mut output: VecDeque = + VecDeque::new(); + + traverse.push_back(value); + + while let Some(part) = traverse.pop_back() { + flattened.push_back(part); + + match part { + VolumeConstructionRequest::Volume { + id: _, + block_size: _, + sub_volumes, + read_only_parent, + } => { + for sub_volume in sub_volumes { + traverse.push_back(sub_volume); + } + + if let Some(read_only_parent) = read_only_parent { + traverse.push_back(read_only_parent); + } + } + + VolumeConstructionRequest::Url { .. } + | VolumeConstructionRequest::File { .. } + | VolumeConstructionRequest::Region { .. } => {} + } + } + + while let Some(part) = flattened.pop_back() { + match part { + VolumeConstructionRequest::Volume { + id: _, + block_size: _, + sub_volumes, + read_only_parent, + } => { + let mut subs = Vec::with_capacity(sub_volumes.len()); + let mut rop = None; + + if read_only_parent.is_some() { + rop = Some(Box::new(output.pop_back().unwrap())); + } + + for _ in 0..sub_volumes.len() { + subs.push(output.pop_back().unwrap()); + } + + subs.reverse(); + + let info = crucible_client_types::VolumeInfo::Volume { + sub_volumes: subs, + read_only_parent: rop, + }; + + output.push_back(info); + } + + VolumeConstructionRequest::Region { + block_size, + blocks_per_extent: _, + extent_count: _, + opts: + crucible_client_types::CrucibleOpts { + id, + target, + lossy: _, + flush_timeout: _, + key, + cert_pem: _, + key_pem: _, + root_cert_pem: _, + control: _, + read_only, + }, + generation, + } => { + let info = crucible_client_types::VolumeInfo::Upstairs { + state: crucible_client_types::UpstairsInfoStatus::Active, + block_size: Some(*block_size), + upstairs_id: *id, + session_id: Uuid::new_v4(), + generation: *generation, + read_only: *read_only, + encrypted: key.is_some(), + reconcile_in_progress: false, + live_repair_in_progress: false, + targets: target.iter().map(|target| + crucible_client_types::DownstairsInfo { + // TODO update this when region id is part of the + // construction request + region_id: Some(Uuid::new_v4()), + + target_addr: Some(*target), + + repair_addr: Some({ + // TODO update this when repair address is part + // of the construction request + let mut target: SocketAddr = *target; + target.set_port(target.port() + 4000); + target + }), + + state: + crucible_client_types::DownstairsInfoStatus::Active, + } + ).collect(), + }; + + output.push_back(info); + } + + VolumeConstructionRequest::Url { .. } + | VolumeConstructionRequest::File { .. } => { + panic!("should not see variant {part:?}"); + } + } + } + + assert_eq!(output.len(), 1); + output.pop_front().unwrap() +} + impl Pantry { pub fn new(simulated_upstairs: Arc) -> Self { Self { @@ -2059,6 +2191,8 @@ impl Pantry { ) -> Result<()> { let mut inner = self.inner.lock().unwrap(); + let info = generate_new_volume_info(&volume_construction_request); + inner.volumes.insert( volume_id, PantryVolume { @@ -2067,6 +2201,7 @@ impl Pantry { active: true, seen_active: true, num_job_handles: 0, + info, }, activate_job: None, }, @@ -2089,6 +2224,8 @@ impl Pantry { let auto_activate_volumes = inner.auto_activate_volumes; + let info = generate_new_volume_info(&volume_construction_request); + inner.volumes.insert( volume_id, PantryVolume { @@ -2097,6 +2234,7 @@ impl Pantry { active: auto_activate_volumes, seen_active: auto_activate_volumes, num_job_handles: 1, + info, }, activate_job: Some(activate_job_id.clone()), }, diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index a2e3bbe507f..5f797b97a36 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -84,7 +84,7 @@ log = { version = "0.4.29", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.8.0" } newtype-uuid = { version = "1.3.2", features = ["proptest1"] } -nix = { version = "0.31.2", features = ["fs", "net", "signal"] } +nix = { version = "0.31.2", features = ["feature", "fs", "net", "signal", "uio"] } num-bigint-dig = { version = "0.8.6", default-features = false, features = ["i128", "prime", "serde", "u64_digit", "zeroize"] } num-integer = { version = "0.1.46", features = ["i128"] } num-iter = { version = "0.1.45", default-features = false, features = ["i128"] } @@ -111,7 +111,7 @@ rand_chacha-468e82937335b1c9 = { package = "rand_chacha", version = "0.3.1", def regex = { version = "1.12.3" } regex-automata = { version = "0.4.14", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "std", "unicode"] } regex-syntax = { version = "0.8.10" } -reqwest-594e8ee84c453af0 = { package = "reqwest", version = "0.13.2", default-features = false, features = ["blocking", "cookies", "http2", "json", "query", "rustls", "stream"] } +reqwest-594e8ee84c453af0 = { package = "reqwest", version = "0.13.2", features = ["blocking", "cookies", "json", "query", "stream"] } reqwest-5ef9efb8ec2df382 = { package = "reqwest", version = "0.12.28", features = ["blocking", "json", "rustls-tls", "stream"] } rsa = { version = "0.9.10", features = ["serde", "sha2"] } rustc-hash = { version = "2.1.1" } @@ -146,7 +146,6 @@ tokio-rustls = { version = "0.26.4", default-features = false, features = ["logg tokio-stream = { version = "0.1.18", features = ["net", "sync"] } tokio-util = { version = "0.7.18", features = ["codec", "io-util", "rt", "time"] } toml = { version = "0.7.8" } -toml_datetime-ca01ad9e24f5d932 = { package = "toml_datetime", version = "0.7.5", features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.27", features = ["serde"] } toml_parser = { version = "1.0.9" } tough = { version = "0.22.0", default-features = false, features = ["http"] } @@ -232,7 +231,7 @@ log = { version = "0.4.29", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.8.0" } newtype-uuid = { version = "1.3.2", features = ["proptest1"] } -nix = { version = "0.31.2", features = ["fs", "net", "signal"] } +nix = { version = "0.31.2", features = ["feature", "fs", "net", "signal", "uio"] } num-bigint-dig = { version = "0.8.6", default-features = false, features = ["i128", "prime", "serde", "u64_digit", "zeroize"] } num-integer = { version = "0.1.46", features = ["i128"] } num-iter = { version = "0.1.45", default-features = false, features = ["i128"] } @@ -259,7 +258,7 @@ rand_chacha-468e82937335b1c9 = { package = "rand_chacha", version = "0.3.1", def regex = { version = "1.12.3" } regex-automata = { version = "0.4.14", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "std", "unicode"] } regex-syntax = { version = "0.8.10" } -reqwest-594e8ee84c453af0 = { package = "reqwest", version = "0.13.2", default-features = false, features = ["blocking", "cookies", "http2", "json", "query", "rustls", "stream"] } +reqwest-594e8ee84c453af0 = { package = "reqwest", version = "0.13.2", features = ["blocking", "cookies", "json", "query", "stream"] } reqwest-5ef9efb8ec2df382 = { package = "reqwest", version = "0.12.28", features = ["blocking", "json", "rustls-tls", "stream"] } rsa = { version = "0.9.10", features = ["serde", "sha2"] } rustc-hash = { version = "2.1.1" } @@ -297,7 +296,6 @@ tokio-rustls = { version = "0.26.4", default-features = false, features = ["logg tokio-stream = { version = "0.1.18", features = ["net", "sync"] } tokio-util = { version = "0.7.18", features = ["codec", "io-util", "rt", "time"] } toml = { version = "0.7.8" } -toml_datetime-ca01ad9e24f5d932 = { package = "toml_datetime", version = "0.7.5", features = ["serde"] } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.27", features = ["serde"] } toml_parser = { version = "1.0.9" } tough = { version = "0.22.0", default-features = false, features = ["http"] } @@ -307,8 +305,8 @@ usdt = { version = "0.6.0" } usdt-impl-3b31131e45eafb45 = { package = "usdt-impl", version = "0.6.0", default-features = false, features = ["des"] } usdt-impl-d8f496e17d97b5cb = { package = "usdt-impl", version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.23.0", features = ["serde", "v4"] } -vergen = { version = "9.0.6", features = ["cargo", "rustc"] } -vergen-lib = { version = "0.1.6", features = ["cargo", "git", "rustc"] } +vergen = { version = "9.1.0", features = ["cargo", "rustc"] } +vergen-lib = { version = "9.1.0", features = ["cargo", "git", "rustc"] } winnow = { version = "0.7.14" } x509-cert = { version = "0.2.5" } zerocopy = { version = "0.8.40", default-features = false, features = ["derive", "simd"] } @@ -406,7 +404,7 @@ object = { version = "0.37.3", default-features = false, features = ["read", "st rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.44", features = ["fs", "stdio", "system", "termios"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.1.3", features = ["fs", "stdio", "termios"] } tokio-rustls = { version = "0.26.4", default-features = false, features = ["aws-lc-rs"] } -toml_datetime-3b31131e45eafb45 = { package = "toml_datetime", version = "0.6.11", default-features = false, features = ["serde"] } +toml_datetime = { version = "0.6.11", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } [target.x86_64-unknown-illumos.build-dependencies] @@ -423,7 +421,7 @@ object = { version = "0.37.3", default-features = false, features = ["read", "st rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.44", features = ["fs", "stdio", "system", "termios"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.1.3", features = ["fs", "stdio", "termios"] } tokio-rustls = { version = "0.26.4", default-features = false, features = ["aws-lc-rs"] } -toml_datetime-3b31131e45eafb45 = { package = "toml_datetime", version = "0.6.11", default-features = false, features = ["serde"] } +toml_datetime = { version = "0.6.11", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } ### END HAKARI SECTION