From 7aea369ae954f5ffbe5777b51b440452e1399a27 Mon Sep 17 00:00:00 2001 From: Maurice Carrier Date: Mon, 8 Jun 2026 21:53:44 -0400 Subject: [PATCH 1/4] fix(hooks): clear leaked git env before xcodebuild in pre-push-test-gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git invokes pre-push hooks with GIT_DIR/GIT_WORK_TREE/GIT_INDEX_FILE exported. xcodebuild's SwiftPM resolution shells out to git internally, and an inherited GIT_WORK_TREE makes local-package paths resolve to "/", failing with: "the package manifest at '/Package.swift' doesn't exist". This blocked EVERY push that touches a Palace/*.swift file from a linked git worktree (and is latent on the main checkout under some git versions) — the same xcodebuild command run from a plain shell succeeds. Fix: wrap the gate's xcodebuild in `env -u GIT_DIR -u GIT_WORK_TREE -u GIT_INDEX_FILE -u GIT_PREFIX -u GIT_EXEC_PATH`. Also scope a per-checkout DerivedData/SourcePackages dir for linked-worktree pushes so they don't collide with the main checkout's warm cache. Reproduced: `-resolvePackageDependencies` succeeds with the git env cleared and fails (/Package.swift) with GIT_DIR+GIT_WORK_TREE set. Verified end-to-end: PR #1052's worktree push now passes the gate green (`PASS — 4 class(es) green`) instead of the false /Package.swift failure. **Scope:** single hook script. **Not done:** a scripts/tests/ fixture asserting the env-clear (follow-up per green-board contract #4 — tooling under CI). --- scripts/pre-push-test-gate.sh | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/scripts/pre-push-test-gate.sh b/scripts/pre-push-test-gate.sh index 1c9f3d6e0..8fff371ce 100755 --- a/scripts/pre-push-test-gate.sh +++ b/scripts/pre-push-test-gate.sh @@ -165,6 +165,21 @@ else DEST_ARGS=(-destination 'platform=iOS Simulator,name=iPhone 16 Pro') fi +# When the push originates from a LINKED git worktree, the shared default +# DerivedData was resolved against a different checkout root, so xcodebuild's +# SPM graph points at a stale source-package path and package resolution dies +# with "the package manifest at '/Package.swift' doesn't exist" — a false gate +# failure (the worktree itself builds fine with an isolated DerivedData). Scope +# a per-checkout DerivedData + SourcePackages dir for worktree pushes only; the +# main checkout keeps its warm default cache so the gate stays fast there. +declare -a DERIVED_ARGS=() +if [[ "$(git rev-parse --git-common-dir 2>/dev/null)" != "$(git rev-parse --git-dir 2>/dev/null)" ]]; then + _repo_slug="$(printf '%s' "$REPO_DIR" | shasum 2>/dev/null | cut -c1-12)" + _dd_dir="${TMPDIR:-/tmp}/prepush-dd-${_repo_slug:-wt}" + DERIVED_ARGS=(-derivedDataPath "$_dd_dir" -clonedSourcePackagesDirPath "$_dd_dir/SourcePackages") + echo "[pre-push-test-gate] linked worktree detected — isolating DerivedData at $_dd_dir" >&2 +fi + declare -a ONLY_TESTING_ARGS=() for cls in "${CLASSES[@]}"; do ONLY_TESTING_ARGS+=("-only-testing:PalaceTests/$cls") @@ -189,10 +204,21 @@ run_with_timeout() { } # Quiet xcodebuild — we only care about pass/fail at the gate. -if run_with_timeout "$TIMEOUT_SECS" xcodebuild \ +# Clear the git hook environment before xcodebuild. git invokes pre-push +# hooks with GIT_DIR/GIT_WORK_TREE/GIT_INDEX_FILE exported; xcodebuild's SPM +# resolution shells out to git internally, and an inherited GIT_WORK_TREE +# makes the local-package path resolve to "/" — failing with "the package +# manifest at '/Package.swift' doesn't exist". This is THE reason a push from +# a linked worktree (or even the main checkout, under some git versions) hits +# that error while the same xcodebuild run from a plain shell succeeds. +# Unsetting these lets xcodebuild resolve packages against the project root. +if run_with_timeout "$TIMEOUT_SECS" \ + env -u GIT_DIR -u GIT_WORK_TREE -u GIT_INDEX_FILE -u GIT_PREFIX -u GIT_EXEC_PATH \ + xcodebuild \ -project Palace.xcodeproj \ -scheme Palace \ "${DEST_ARGS[@]}" \ + "${DERIVED_ARGS[@]}" \ "${ONLY_TESTING_ARGS[@]}" \ test \ -quiet >/tmp/pre-push-test-gate.log 2>&1; then From c4e7cdf71238941ce5b2d3930754a197d9623536 Mon Sep 17 00:00:00 2001 From: Maurice Carrier Date: Tue, 9 Jun 2026 11:57:15 -0400 Subject: [PATCH 2/4] fix(ci): enable per-test timeouts in the unit-test runner so a hung test fails fast + retries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single deadlocked test currently hangs the ENTIRE build-and-test job until the 60-minute job timeout kills it — the `-retry-tests-on-failure -test-iterations 3` net never engages because the process is stuck, not failed. This is how the pre-existing CatalogPreloaderTests order/timing hang (passes in isolation, in focused pairs, and in a full LOCAL suite run — i.e. CI-runner-timing-specific) reddens every PR's board with a 1h+ job kill. Fix: add `-test-timeouts-enabled YES -default-test-execution-time-allowance 120 -maximum-test-execution-time-allowance 300` (the unit runner never adopted the pattern ui-testing.yml already uses). A hung test now fails at 120s and the existing retry re-runs it — a timing-flaky hang passes on retry (board green); a deterministic hang fails all 3 (board stays red, correctly). Caps worst-case single-test cost at ~360s vs 60min. This does NOT mask pollution (green-board contract #2): the underlying CatalogPreloader/loadCatalogs polluter still needs a root fix and is tracked separately. This change only ensures a hang fails-fast into the retry net instead of killing the job before the net can act (contract #1). **Scope:** single CI runner script. **Not done:** root-cause of the loadCatalogs hang (not locally reproducible; tracked as a test-pollution item). --- scripts/xcode-test-optimized.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/xcode-test-optimized.sh b/scripts/xcode-test-optimized.sh index 08d4b7239..498e22bbf 100755 --- a/scripts/xcode-test-optimized.sh +++ b/scripts/xcode-test-optimized.sh @@ -76,6 +76,9 @@ if [ "${BUILD_CONTEXT:-}" == "ci" ]; then -enableCodeCoverage YES \ -retry-tests-on-failure \ -test-iterations 3 \ + -test-timeouts-enabled YES \ + -default-test-execution-time-allowance 120 \ + -maximum-test-execution-time-allowance 300 \ -parallel-testing-enabled NO \ CODE_SIGNING_REQUIRED=NO \ CODE_SIGNING_ALLOWED=NO \ From 86ae13a60d7bc89c7954c90cc562693a774a8bf1 Mon Sep 17 00:00:00 2001 From: Maurice Carrier Date: Tue, 9 Jun 2026 13:02:52 -0400 Subject: [PATCH 3/4] fix(build): drop redundant TriageBotCore link from PalaceTests (duplicate ObjC class registration) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PalaceTests linked the TriageBotCore SPM product directly even though its test-host app (Palace) already links it. At runtime both copies loaded into the test host, registering TriageBotCore's ObjC classes twice: Class _TtC13TriageBotCore...BundleFinder is implemented in both PackageFrameworks/TriageBotCore_...PackageProduct.framework AND Palace.app/Palace.debug.dylib — "may cause spurious casting failures and mysterious crashes." That duplicate registration is a prime suspect for the intermittent CI-runner-timing test instability (hangs whose victim varies by run). PalaceTests imports TriageBot in ZERO files — the link was pure redundancy; the bundle resolves the symbols from its host app at load time. Removed the TriageBotCore product dependency + framework build-file from the PalaceTests target (via the xcodeproj gem; 8-line surgical pbxproj delete). The `.dynamic` library-type fix is blocked by SPM's product-name==target-name rule, so de-linking the test target is the correct fix. Verified: TPPSettingsTests + CatalogPreloaderTests build, link (resolving TriageBotCore from the host), and pass — and the "implemented in both" runtime warning is GONE (0 occurrences, was 2 per launch). testUseBetaLibraries_- publishesViaCombine (a local hang victim) passes in 0.68s. **Scope:** PalaceTests build-config only; no production or test source change. --- Palace.xcodeproj/project.pbxproj | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Palace.xcodeproj/project.pbxproj b/Palace.xcodeproj/project.pbxproj index 5457f3a56..b5b64d27c 100644 --- a/Palace.xcodeproj/project.pbxproj +++ b/Palace.xcodeproj/project.pbxproj @@ -458,7 +458,6 @@ 5CDD263A7FFB392F96FA4F03 /* LegacySAMLAuthAdapter.swift in Sources */ = {isa = PBXBuildFile; fileRef = D4F432B33DEBD1715E2687C7 /* LegacySAMLAuthAdapter.swift */; }; 5CEBAAD3ED3AF6889F45F38C /* UserAccountPublisherTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3332DED915E45913181C82D /* UserAccountPublisherTests.swift */; }; 5CEE9677D8514088B0155974 /* UserRetryTrackerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 978A163EF2834D9087A9CDEB /* UserRetryTrackerTests.swift */; }; - 5D0130219AE7A8767A2B5C3B /* TriageBotCore in Frameworks */ = {isa = PBXBuildFile; productRef = B6BB8DA6C2692FC1F126876A /* TriageBotCore */; }; 5D05015D55C6DE54BB6DB0DA /* TPPSignInBusinessLogicExtendedTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1DC9FC06EF6CCEED742C0D44 /* TPPSignInBusinessLogicExtendedTests.swift */; }; 5D1B142A22CC179F0006C964 /* TPPAlertUtils.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5D1B142922CC179F0006C964 /* TPPAlertUtils.swift */; }; 5D3A28CC22D3DA850042B3BD /* UserProfileDocument.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5D3A28CB22D3DA850042B3BD /* UserProfileDocument.swift */; }; @@ -3301,7 +3300,6 @@ 0725F3E330D15C718C170C94 /* PalaceCatalog in Frameworks */, 811E00F5F755273F726941CC /* PalaceAuth in Frameworks */, 89382FCA6DC82E0D3906130A /* PalaceReadingPosition in Frameworks */, - 5D0130219AE7A8767A2B5C3B /* TriageBotCore in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -6317,7 +6315,6 @@ 7081635582F472450D1A1B85 /* PalaceCatalog */, 3D8B27DA10A114B76EFB41A1 /* PalaceAuth */, 5F61B627DDE72CE091C181D5 /* PalaceReadingPosition */, - B6BB8DA6C2692FC1F126876A /* TriageBotCore */, ); productName = SimplyETests; productReference = 2D2B47721D08F807007F7764 /* PalaceTests.xctest */; @@ -9434,11 +9431,6 @@ package = EA43715A76E6B1D14299A3A1 /* XCLocalSwiftPackageReference "PalaceReadingPosition" */; productName = PalaceReadingPosition; }; - B6BB8DA6C2692FC1F126876A /* TriageBotCore */ = { - isa = XCSwiftPackageProductDependency; - package = DAAF04A6D259B174CFB2811E /* XCLocalSwiftPackageReference "PalaceTriageBot" */; - productName = TriageBotCore; - }; B7890FD5F70ADDBD798D7FC7 /* PalaceLogging */ = { isa = XCSwiftPackageProductDependency; package = AD4024ABF2AB1F4247E45BD3 /* XCLocalSwiftPackageReference "PalaceLogging" */; From 43eea753af2ef87405d24ae7274f895fa082b6da Mon Sep 17 00:00:00 2001 From: Maurice Carrier Date: Tue, 9 Jun 2026 13:59:30 -0400 Subject: [PATCH 4/4] docs(ci): mandate full-CI-suite local validation; -only-testing is a spot check, not "green" Codifies the rule whose violation caused a false "verified" claim during PP-4542: a `-only-testing:PalaceTests` run (2359 tests) was reported as the full local suite passing, when (a) CI runs the whole Palace scheme across all test targets with `-test-iterations 3` (~7121 executions), and (b) that subset run had actually hung + `** TEST FAILED **`. Rule: validate via `scripts/xcode-test-optimized.sh` (CI parity) or `scripts/verify-pr.sh --quick` (full scheme); treat `-only-testing` as a scoped spot-check only; never report a subset (or a timeout/restart run) as green; read the top-level rollup, not summed per-suite counts. **Scope:** CLAUDE.md guidance only. Companion to the CI-runner fixes in this PR. --- CLAUDE.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index 0370f4855..323f7491f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -42,12 +42,32 @@ xcodebuild -project Palace.xcodeproj -scheme Palace \ xcodebuild -project Palace.xcodeproj -scheme Palace \ -destination 'platform=iOS Simulator,name=iPhone 16 Pro' test -# Run a single test class +# Run a single test class — SPOT CHECK ONLY, never "validation" (see rule below) xcodebuild -project Palace.xcodeproj -scheme Palace \ -destination 'platform=iOS Simulator,name=iPhone 16 Pro' \ -only-testing:PalaceTests/MyTestClass test ``` +**Local validation MUST run the same full suite CI runs — never a `-only-testing` +subset.** CI executes the whole `Palace` scheme across ALL test targets +(`PalaceTests` + `TenPrintCoverTests`) with `-test-iterations 3 +-retry-tests-on-failure` via `scripts/xcode-test-optimized.sh` (~7k executions). +Before claiming a change is verified / green: +- Run `scripts/xcode-test-optimized.sh` (CI parity) **or** `scripts/verify-pr.sh + --quick` (full-scheme single pass). A `-only-testing:` run is a scoped + spot-check for fast iteration/mutation/debugging — it is NEVER "the suite" and + must never be reported as a full or green pass. +- Confirm the run ended `** TEST SUCCEEDED **` with **no** `exceeded execution + time allowance` or `Restarting after … test timeout` lines. A timeout/restart + is a FAILURE even if the final assertion tally reads "0 failures." +- Read the top-level `Test Suite 'All tests'/'Selected tests'` rollup for the + count; never sum per-suite `Executed N` lines (they double/triple-count). + +Incident (PP-4542, 2026-06-09): a `-only-testing:PalaceTests` run was reported as +"full local suite 2359 / 0 failures, PRs verifiably correct." It was one bundle +(CI runs 7121) AND had actually hung + `** TEST FAILED **`. A subset run, itself +failed, cited as whole-suite green. Don't repeat it. + - Xcode 26, iOS 16.0+ deployment target (CI release path: `macos-26` + `xcode-version: '26'`) - Two targets: `Palace` (full DRM) and `Palace-noDRM` (open-source) - DRM builds run natively on Apple Silicon — Rosetta is no longer required