diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile
new file mode 100644
index 0000000000..5035c63143
--- /dev/null
+++ b/ci/Jenkinsfile
@@ -0,0 +1,1157 @@
+// FINN build pipeline.
+//
+// The shard matrix, timing helpers, and the STAGES/BOARDS tables live in
+// the ci/finn_ci package. Validate calls `python3 -m finn_ci validate-config`
+// once and the returned shard_plan is the single source of truth for the
+// branch/stash matrix, so this file carries no row-expansion logic. Shared
+// Groovy utilities live in ci/common.groovy. This file is
+// internally segmented by '// =====' banners and is meant to be read
+// top-down: configuration first, pipeline next, per-stage helpers grouped after,
+// helpers at the bottom.
+
+import groovy.transform.Field
+
+
+// ============================================================================
+// Module-level imports and shared-helper cache
+// ============================================================================
+// common.groovy is loaded once on the first active node and reused from
+// every subsequent stage. Several wrappers below run from no-agent
+// orchestration code, so the loaded module must remain accessible.
+// @Field is load-bearing here: under Jenkins CPS replay, top-level locals
+// are not preserved between stage replays, so the cached module would have
+// to be reloaded every stage. @Field promotes it to a script-scoped field
+// that survives replay and keeps the load() call to one per build.
+
+@Field
+def _common = null
+
+// All finn_ci CLI calls go through this one invocation so the package path is
+// defined once. Runs from the workspace root with ci/ on PYTHONPATH, so the
+// finn_ci package under ci/ imports by bare name.
+@Field
+String CI_TOOL = 'PYTHONPATH=ci python3 -m finn_ci'
+
+private def loadCommonOnce() {
+  if (_common == null) { _common = load 'ci/common.groovy' }
+  return _common
+}
+
+
+// ============================================================================
+// Configuration fields
+// ============================================================================
+// Populated once during Validate from the validate-config JSON payload, then
+// read by every later stage. Do not edit these in the middle of the pipeline.
+
+// The per-shard branch list (each entry: stage, stash, marker, workers,
+// numShards, shardId, coverage[, coverageFile][, zipArtifacts]).
+// Computed in Python so this file has no IntRange/closure-serialisation
+// loops to keep CPS-safe.
+@Field
+List<Map> SHARD_PLAN = []
+
+// Active-row display names (pre-filter) for the STAGE_FILTER miss message.
+@Field
+List<String> SHARD_CANDIDATES = []
+
+// Flattened per-(row, board) build-to-HW handoff list for the active rows.
+@Field
+List<Map> ZIP_ARTIFACTS = []
+
+@Field
+List<String> ENABLED_PARAMS = []
+
+@Field
+String FINN_CI_TIMINGS_MASTER = ''
+
+@Field
+String FINN_CI_TIMINGS_SNAPSHOT = ''
+
+@Field
+String FINN_CI_JOB_KEY = ''
+
+// Minimum free space (GB) below which a shard refuses to start.
+// Read per shard in runShard, so a non-integer value fails that shard loudly
+// rather than being silently ignored. Lazy helper, not a @Field initialiser,
+// because Jenkins CPS cannot invoke a user-defined method at script load.
+int finnCiMinFreeGB() {
+  String raw = (env.FINN_CI_MIN_FREE_GB ?: '').trim()
+  if (!raw) { return 120 }
+  if (raw.isInteger()) { return raw.toInteger() }
+  error "FINN_CI_MIN_FREE_GB='${raw}' is not an integer (units, whitespace, etc. are not accepted)"
+}
+
+// Agent label every non-board stage runs on. Static-final so declarative
+// `agent { label "${FINN_BUILD_LABEL}" }` blocks can read it at pipeline-
+// construction time. A relabel is a one-line edit here. Cannot be
+// env-overridden: `env` is not bound during @Field initialisation under
+// Jenkins CPS.
+@Field
+static final String FINN_BUILD_LABEL = 'finn-build'
+
+// Optional setup-local.sh stage label and cache root.
+// Read via helpers, not @Field, so env resolution happens
+// inside the running pipeline rather than at script load.
+String finnLocalBuildLabel() { return (env.FINN_LOCAL_BUILD_LABEL ?: FINN_BUILD_LABEL) }
+String finnCiLocalCacheRoot() {
+  return (env.FINN_CI_LOCAL_CACHE_ROOT ?: "${env.WORKSPACE_TMP ?: '/tmp'}/finn-ci-cache")
+}
+
+
+// ============================================================================
+// NFS layout
+// ============================================================================
+// FINN_CI_NFS_ROOT is the primary CI env var an operator sets; every shared
+// subtree below derives from it (optional overrides aside). The README
+// "Storage and retention" section documents the full subtree layout and
+// retention policy. All resolvers below return '' in local fallback mode,
+// which callers must handle.
+
+String finnCiNfsRoot()                    { return loadCommonOnce().finnCiNfsRoot() }
+String finnAgentCachesDir(String node)    { return loadCommonOnce().finnAgentCachesDir(node) }
+String finnDockerImagesRoot()             { return loadCommonOnce().finnDockerImagesRoot() }
+String finnDockerImagesDir(String jobKey) { return loadCommonOnce().finnDockerImagesDir(jobKey) }
+String finnArtifactsRoot()                { return loadCommonOnce().finnArtifactsRoot() }
+String finnCiStateRoot()                  { return loadCommonOnce().finnCiStateRoot() }
+String finnCiStateDir(String jobKey)      { return loadCommonOnce().finnCiStateDir(jobKey) }
+
+
+// ============================================================================
+// Pipeline declaration
+// ============================================================================
+// Each stage delegates to a single helper to keep this block scannable.
+
+pipeline {
+  agent none
+  options {
+    timeout(time: 72, unit: 'HOURS')
+  }
+  parameters {
+    // STAGES choices mirror finn_ci.config.jenkins_stage_choices(). Drift
+    // between this list and the Python source is caught by
+    // test_jenkinsfile_stage_choices_match_python_source.
+    choice(name: 'STAGES',
+           choices: ['sanity', 'full', 'fpgadataflow', 'end2end'],
+           description: 'Which CI rows to run. ' +
+                        'sanity (default) = sanity rows only, ~per-PR quick check. ' +
+                        'full = every CI row, ~nightly. ' +
+                        'fpgadataflow / end2end = just that one family, ' +
+                        'useful for debugging a single slice without STAGE_FILTER.')
+    booleanParam(name: 'local_setup',  defaultValue: false,
+                 description: 'Run the non-Docker Vivado setup test on the finn-local-build host. ' +
+                              'Slow and orthogonal to STAGES. Leave off unless you are working ' +
+                              'on setup-local.sh itself.')
+    string(name: 'STAGE_FILTER', defaultValue: '',
+           description: 'Substring filter for debugging a single shard. ' +
+                        'Leave empty for a normal build.')
+  }
+  stages {
+    stage('Validate') {
+      agent { label "${FINN_BUILD_LABEL}" }
+      steps {
+        script {
+          wipeStaleWorkspace()
+          announceCiMode()
+          loadStageConfig()
+          prepareTimingState()
+          validateShards()
+          reapOrphanedLsfJobs()
+          rotateBuildTrees()
+          writeBuildInfo()
+        }
+      }
+    }
+    stage('Build Docker Image') {
+      agent { label "${FINN_BUILD_LABEL}" }
+      environment { FINN_DOCKER_PREBUILT = '0' }
+      steps { script { wipeStaleWorkspace(); withAgentNfsEnv(exposeSharedImage: false) { buildAndPublishDockerImage() } } }
+    }
+    stage('Run Tests') {
+      steps { script { runParallelOrFail(buildParallelStageMap()) } }
+    }
+    stage('Check Stage Results') {
+      agent { label "${FINN_BUILD_LABEL}" }
+      steps { script { withAgentNfsEnv { aggregateReports() } } }
+    }
+  }
+  // post { always } runs on graceful completion AND graceful abort. It is
+  // the primary defence against this build's LSF jobs surviving past the
+  // build that submitted them. Hard kills (such as the controller crashing)
+  // are caught instead by reapOrphanedLsfJobs() in the next build's Validate.
+  post {
+    always {
+      node("${FINN_BUILD_LABEL}") {
+        script { reapThisBuildLsfJobs() }
+      }
+    }
+  }
+}
+
+
+// ============================================================================
+// Validate-stage helpers
+// ============================================================================
+
+void announceCiMode() {
+  String root = finnCiNfsRoot()
+  if (root) {
+    echo "[finn-ci] FINN_CI_NFS_ROOT=${root} (full operation)"
+  } else {
+    echo """[finn-ci] FINN_CI_NFS_ROOT is unset. Running in local fallback mode.
+[finn-ci]   - no shared Docker images (each agent builds locally)
+[finn-ci]   - no build-to-HW artifact handoff (HW pipeline cannot test this build)
+[finn-ci]   - no persistent timing master (cold-start round-robin each build)
+[finn-ci]   - no per-agent NFS caches (network downloads run fresh)"""
+  }
+}
+
+// One subprocess loads the full Validate-time config bundle. validate_config()
+// runs inside the subcommand so any orphan zipArtifact board fails Validate
+// loudly instead of slipping into the per-shard runs. The shard_plan it
+// returns is the single source of truth for the branch/stash matrix.
+void loadStageConfig() {
+  String choice = paramString('STAGES') ?: 'sanity'
+  String raw = sh(returnStdout: true, script:
+    "${CI_TOOL} validate-config " +
+    "--choice ${shellQuote(choice)} " +
+    "--job-name ${shellQuote(env.JOB_NAME ?: 'job')} " +
+    "--stage-filter ${shellQuote(resolveStageFilter())}"
+  ).trim()
+  Map payload = readJSON(text: raw) as Map
+  ENABLED_PARAMS  = (payload.enabled_params as List).collect { it as String }
+  FINN_CI_JOB_KEY = payload.job_key as String
+  Map plan = payload.shard_plan as Map
+  SHARD_PLAN       = (plan.shards as List).collect { it as Map }
+  SHARD_CANDIDATES = (plan.candidates as List).collect { it as String }
+  ZIP_ARTIFACTS    = (plan.zipArtifacts as List).collect { it as Map }
+}
+
+void validateShards() {
+  String filter = resolveStageFilter()
+  // An empty plan with no filter means loadStageConfig() never ran (every
+  // STAGES choice maps to at least one row, enforced by validate_config).
+  if (SHARD_PLAN.isEmpty() && !filter) {
+    error "shard plan is empty. loadStageConfig() must run before validateShards()"
+  }
+  // STAGE_FILTER mismatch is fatal so a zero-shard "green" build cannot mask
+  // a typo. The error lists active stage names so the operator does not have
+  // to guess what the filter should look like.
+  if (filter && SHARD_PLAN.isEmpty()) {
+    if (!SHARD_CANDIDATES.isEmpty()) {
+      error "STAGE_FILTER='${filter}' matched no shards. Active stages under " +
+            "STAGES=${paramString('STAGES')} are: " + SHARD_CANDIDATES.join(', ')
+    }
+    error "STAGE_FILTER='${filter}' matched 0 rows because no rows are active under STAGES=${paramString('STAGES')}"
+  }
+  int activeCount = SHARD_PLAN.size()
+  // local_setup adds one parallel branch on finnLocalBuildLabel(). When that
+  // label resolves to FINN_BUILD_LABEL it shares the executor pool the budget
+  // check is measuring, so it must contribute to activeCount.
+  if (paramBool('local_setup') && finnLocalBuildLabel() == FINN_BUILD_LABEL) {
+    activeCount++
+  }
+  int available = 0
+  try {
+    def lbl = Jenkins.instance.getLabelAtom(FINN_BUILD_LABEL)
+    if (lbl) { available = lbl.getTotalExecutors() }
+  } catch (org.jenkinsci.plugins.scriptsecurity.sandbox.RejectedAccessException e) {
+    // Budget check is informational, so surface the actionable approval hint
+    // without turning the build yellow over a pre-existing sandbox gap.
+    echo "validateShards: executor query rejected by Script Security (${e.message}). " +
+         "Approve Jenkins.instance.getLabelAtom in Manage Jenkins to restore the budget check."
+  } catch (Exception e) {
+    echo "validateShards: executor query failed (${e.class.name}: ${e.message}), skipping budget check"
+  }
+  if (available > 0) {
+    echo "validateShards: ${activeCount} shard(s) active, ${FINN_BUILD_LABEL} executors available: ${available}"
+    if (activeCount > available) {
+      unstable "shard plan requests ${activeCount} parallel shards but only ${available} ${FINN_BUILD_LABEL} executors are available, stages will queue and wall-clock will extend"
+    }
+  } else {
+    echo "validateShards: ${activeCount} shard(s) active, ${FINN_BUILD_LABEL} label has 0 online executors, skipping budget check"
+  }
+}
+
+void prepareTimingState() {
+  FINN_CI_TIMINGS_MASTER = ''
+  FINN_CI_TIMINGS_SNAPSHOT = ''
+  String stateDir = finnCiStateDir(finnCiJobKey())
+  if (!stateDir) {
+    echo "prepareTimingState: local fallback mode, sharding will cold-start to round-robin"
+    return
+  }
+  String master = "${stateDir}/ci_timings_master.json"
+  String snapshot = "${stateDir}/build_${env.BUILD_NUMBER}_timings_input.json"
+  catchError(buildResult: null, stageResult: null,
+             message: 'prepareTimingState failed, sharding will cold-start to round-robin') {
+    sh "mkdir -p ${shellQuote(stateDir)}"
+    sh "${CI_TOOL} prepare " +
+       "--master ${shellQuote(master)} --snapshot ${shellQuote(snapshot)}"
+    FINN_CI_TIMINGS_MASTER = master
+    FINN_CI_TIMINGS_SNAPSHOT = snapshot
+    echo "prepareTimingState: using ${snapshot}"
+  }
+}
+
+// Per-build traceability file. Never parsed by code. `cat` it for the
+// human-readable summary of what produced a given build directory. In local
+// fallback mode (no artifact tree) it is written to the workspace and
+// archived so the trace is not lost to git clean.
+void writeBuildInfo() {
+  String enabled = ENABLED_PARAMS.join(',')
+  String commit = env.GIT_COMMIT ?: ''
+  String branch = env.BRANCH_NAME ?: (env.GIT_BRANCH ?: '')
+  String imageDir = buildDockerImageDir()
+  String buildRoot = artifactBuildRoot()
+  String timestamp = sh(returnStdout: true, script: 'date -u +%FT%TZ').trim()
+  String body = [
+    "job_name=${env.JOB_NAME ?: ''}",
+    "job_key=${finnCiJobKey()}",
+    "build_number=${env.BUILD_NUMBER ?: ''}",
+    "git_commit=${commit}",
+    "git_branch=${branch}",
+    "enabled_params=${enabled}",
+    "stage_filter=${resolveStageFilter()}",
+    "timings_snapshot=${FINN_CI_TIMINGS_SNAPSHOT ?: ''}",
+    "docker_image_dir=${imageDir}",
+    "validate_node=${env.NODE_NAME ?: ''}",
+    "generated_at=${timestamp}",
+    '',
+  ].join('\n')
+  if (buildRoot) {
+    sh "mkdir -p ${shellQuote(buildRoot)}"
+    writeFile file: "${buildRoot}/BUILD_INFO.txt", text: body
+  } else {
+    writeFile file: 'BUILD_INFO.txt', text: body
+    catchError(buildResult: null, stageResult: null,
+               message: 'archive BUILD_INFO.txt (fallback) failed, non-fatal') {
+      archiveArtifacts artifacts: 'BUILD_INFO.txt', allowEmptyArchive: true
+    }
+  }
+}
+
+// Image and artifact trees live under FINN_CI_NFS_ROOT and are shared
+// across agents. Per-shard scratch is workspace-local and rotated by
+// wipeStaleWorkspace, so no prune subcommand for it.
+void rotateBuildTree(String kind, String root) {
+  catchError(buildResult: null, stageResult: null,
+             message: "prune --kind ${kind} failed, non-fatal") {
+    if (!root) { return }
+    // retain_n / max_age_days come from finn_ci.retention.RETENTION[kind], so
+    // the pipeline never restates the retention window.
+    sh "${CI_TOOL} prune --kind ${kind} " +
+       "${shellQuote(root)} ${shellQuote(finnCiJobKey())} ${shellQuote(env.BUILD_NUMBER ?: '')}"
+  }
+}
+
+void rotateBuildTrees() {
+  rotateBuildTree('image', finnDockerImagesRoot())
+  rotateBuildTree('artifact', finnArtifactsRoot())
+  rotateBuildTree('snapshot', finnCiStateRoot())
+}
+
+
+// ============================================================================
+// Build-Docker-Image stage helpers
+// ============================================================================
+
+void buildAndPublishDockerImage() {
+  // -a -f reclaims the whole local image cache. The FINN builder agent is
+  // assumed single-tenant so this will not evict another job's images.
+  sh 'docker system prune -a -f'
+  sh "./run-docker.sh echo 'Docker image build complete'"
+  String imageDir = buildDockerImageDir()
+  if (imageDir) {
+    publishSharedDockerImage(imageDir)
+  } else {
+    echo "buildAndPublishDockerImage: local fallback mode, skipping image publish"
+  }
+}
+
+// Build-scoped image directory under FINN_CI_NFS_ROOT, or '' in local fallback.
+String buildDockerImageDir() {
+  String parent = finnDockerImagesDir(finnCiJobKey())
+  if (!parent) { return '' }
+  return "${parent}/${env.BUILD_NUMBER}"
+}
+
+// Other agents docker-load this image instead of rebuilding. Tag comes from
+// `./run-docker.sh print-tag`. The build-scoped imageDir keeps cross-build
+// readers away from in-progress writes. publish_docker_image.sh handles the
+// same-build-retry serialisation.
+void publishSharedDockerImage(String imageDir) {
+  def tag = sh(returnStdout: true, script: './run-docker.sh print-tag').trim()
+  if (!tag || tag.contains('\n')) {
+    error "run-docker.sh print-tag must emit exactly one Docker tag line. Got: ${tag}"
+  }
+  sh "mkdir -p ${shellQuote(imageDir)}"
+  sh "ci/scripts/publish_docker_image.sh " +
+     "${shellQuote(imageDir)} ${shellQuote(tag)} ${shellQuote(env.BUILD_NUMBER ?: '')}"
+}
+
+
+// ============================================================================
+// Run-Tests stage helpers
+// ============================================================================
+
+Map<String, Closure> buildParallelStageMap() {
+  def branches = [:]
+  // .each gives each closure its own `entry` binding (a C-style for would
+  // share one mutable var across all closures). List.each avoids an
+  // IntRange non-Serializable CPS issue
+  SHARD_PLAN.each { entry ->
+    branches[entry.stage as String] = { runShard(entry) }
+  }
+  if (paramBool('local_setup')) {
+    branches['Local Setup (non-Docker) with Vivado'] = { runLocalSetupTest() }
+  }
+  return branches
+}
+
+void runParallelOrFail(Map<String, Closure> branches) {
+  if (!branches) {
+    error "No CI stages selected. STAGES=${paramString('STAGES')}, " +
+          "ENABLED_PARAMS=${ENABLED_PARAMS}, STAGE_FILTER='${resolveStageFilter()}'"
+  }
+  parallel(branches)
+}
+
+String resolveStageFilter() { return paramString('STAGE_FILTER') }
+
+void runShard(Map entry) {
+  String stageName = entry.stage as String
+  String stashName = entry.stash as String
+  node(FINN_BUILD_LABEL) {
+    withAgentNfsEnv {
+      stage(stageName) {
+        checkout scm
+        wipeStaleWorkspace()
+        String perBuildRoot = finnCiPerBuildRoot()
+        String buildDir = "${perBuildRoot}/${stashName}"
+        // startMarker is touched just before runShardBody and consumed by
+        // archive_failure_logs.sh to scope the LSF staging-log scan to
+        // files mtime'd after this shard began. Without it, the archiver
+        // would walk the entire shared staging dir.
+        String startMarker = "${perBuildRoot}/${stashName}.start"
+        String stageMap = "[stage-map] build=${env.BUILD_NUMBER} stage='${stageName}' stash=${stashName} shard=${(entry.shardId as int) + 1}/${entry.numShards} node=${env.NODE_NAME} workspace=${env.WORKSPACE} build_dir=${buildDir}"
+        echo stageMap
+        writeFile file: "${stashName}.stagemap", text: "${stageMap}\n"
+        withEnv(["FINN_HOST_BUILD_DIR=${buildDir}"]) {
+          sh "mkdir -p ${shellQuote(perBuildRoot)}"
+          sh "touch ${shellQuote(startMarker)}"
+          checkAgentFreeSpace(finnCiMinFreeGB())
+          sh "mkdir -p ${shellQuote(stashName)}"
+          cleanPreviousBuildFiles(buildDir)
+          try {
+            catchError(stageResult: 'FAILURE') {
+              runShardBody(entry, buildDir, startMarker)
+            }
+          } finally {
+            safeStashReport(stashName)
+          }
+        }
+      }
+    }
+  }
+}
+
+// Inner try/catch surfaces tool logs and tarballs them for triage
+// before the outer catchError marks the stage red.
+void runShardBody(Map entry, String buildDir, String startMarker) {
+  String stashName = entry.stash as String
+  try {
+    Map opts = [
+        marker: entry.marker,
+        stage: entry.stage,
+        stash: stashName,
+        workers: entry.workers as int,
+        numShards: entry.numShards as int,
+        shardId: entry.shardId as int,
+    ]
+    if (entry.coverage) {
+      opts.extraArgs = "--cov=finn"
+      opts.coverageFile = entry.coverageFile
+    }
+    runPytest(opts)
+    Map zipArt = entry.zipArtifacts as Map
+    if (zipArt) {
+      String testType = zipArt.hwTestType as String
+      (zipArt.boards ?: []).each { board ->
+        findCopyZip(testType, board, buildDir, stashName)
+      }
+    }
+    printLsfSummary(buildDir, stashName)
+  } catch (Exception e) {
+    printLsfSummary(buildDir, stashName)
+    printPytestFailures(stashName, 50, 20)
+    printFailureTails(buildDir, stashName, 150)
+    archiveFailureLogs(buildDir, stashName, startMarker)
+    throw e
+  }
+}
+
+// setup-local.sh expects apt + python3.10 on the agent. Override
+// FINN_LOCAL_BUILD_LABEL when the main build pool lacks those.
+void runLocalSetupTest() {
+  node(finnLocalBuildLabel()) {
+    withAgentNfsEnv {
+      stage('Local Setup (non-Docker) with Vivado') {
+        checkout scm
+        wipeStaleWorkspace()
+        String perBuildRoot = finnCiPerBuildRoot()
+        String buildDir = "${perBuildRoot}/local_setup"
+        echo "[stage-map] build=${env.BUILD_NUMBER} stage='Local Setup (non-Docker) with Vivado' stash=local_setup shard=1/1 node=${env.NODE_NAME} workspace=${env.WORKSPACE} build_dir=${buildDir}"
+        // Cache key tracks the dep inputs so changes force a fresh download.
+        String pipCacheKey = sh(
+          returnStdout: true,
+          script: 'sha256sum requirements.txt setup.py setup.cfg pyproject.toml 2>/dev/null | sha256sum | cut -c1-16'
+        ).trim()
+        if (!pipCacheKey) { pipCacheKey = "build-${env.BUILD_NUMBER}" }
+        String cacheRoot   = finnCiLocalCacheRoot()
+        String pipCacheRoot = "${cacheRoot}/pip"
+        String pipCacheDir  = "${pipCacheRoot}/${pipCacheKey}"
+        String xdgCacheDir  = "${cacheRoot}/xdg"
+        withEnv([
+          "FINN_HOST_BUILD_DIR=${buildDir}",
+          "PIP_CACHE_DIR=${pipCacheDir}",
+          "XDG_CACHE_HOME=${xdgCacheDir}",
+        ]) {
+          // prune-pip-cache keeps pipCacheDir and deletes cache-key dirs older
+          // than 14 days. It tolerates concurrent deletion, so two builds
+          // GC'ing at once needs no lock (flock is local-only on NFS anyway).
+          sh """
+mkdir -p ${shellQuote(perBuildRoot)} ${shellQuote(pipCacheDir)} ${shellQuote(xdgCacheDir)}
+${CI_TOOL} prune-pip-cache \\
+  ${shellQuote(pipCacheRoot)} ${shellQuote(pipCacheDir)} 14 || true
+"""
+          cleanPreviousBuildFiles(buildDir)
+          catchError(stageResult: 'FAILURE') {
+            sh './setup-local.sh'
+            sh '''#!/bin/bash
+set -eo pipefail
+source scripts/finn-env.sh
+./scripts/quicktest-local.sh vivado
+'''
+          }
+        }
+      }
+    }
+  }
+}
+
+// FINN_CI_MARKER passes the marker via env to dodge shell injection. The
+// marker shape is validated against MARKER_SAFE_PATTERN in Python's
+// validate_config (run during Validate), so the echoed value here is
+// guaranteed to be the 'a or b or c' form and safe to surround with
+// double-quotes in the pytest command.
+//
+// FINN_CI_TIMINGS_FILE is optional. When no snapshot is available the
+// pytest plugin falls back to deterministic round-robin sharding.
+void runPytest(Map opts) {
+  def parts = ["python -m pytest -m \"\$FINN_CI_MARKER\""]
+  // Skip -n entirely for single-worker rows: xdist still spawns a worker
+  // process for -n 1, which adds startup overhead and obscures tracebacks.
+  int workers = (opts.workers ?: 1) as int
+  if (workers > 1) {
+    // workers>1 always uses loadgroup so xdist_group chains (checkpoint-linked
+    // tests) stay on one worker within the shard.
+    parts << "-n ${workers} --dist loadgroup"
+  }
+  int numShards = (opts.numShards ?: 0) as int
+  if (numShards > 0) {
+    parts << "--num-shards ${numShards} --shard-id ${opts.shardId}"
+  }
+  parts << "--junitxml=${opts.stash}.xml --html=${opts.stash}.html --self-contained-html"
+  if (opts.extraArgs) { parts << opts.extraArgs }
+  def cmd = parts.join(' ')
+  echo "runPytest[${opts.stash}]: FINN_CI_MARKER='${opts.marker}' cmd=${cmd}"
+  String timingsFile = ''
+  if (FINN_CI_TIMINGS_SNAPSHOT && fileExists(FINN_CI_TIMINGS_SNAPSHOT)) {
+    timingsFile = "${opts.stash}.timings_input.json"
+    sh "cp ${shellQuote(FINN_CI_TIMINGS_SNAPSHOT)} ${shellQuote(timingsFile)}"
+  } else if (FINN_CI_TIMINGS_SNAPSHOT) {
+    echo "runPytest[${opts.stash}]: ${FINN_CI_TIMINGS_SNAPSHOT} not visible on ${env.NODE_NAME}, falling back to round-robin sharding"
+  }
+  def envLines = [
+      "export FINN_CI_MARKER=${shellQuote(opts.marker as String)}",
+      "export FINN_CI_STAGE=${shellQuote(opts.stage as String)}",
+      "export FINN_CI_STASH=${shellQuote(opts.stash as String)}",
+      "export FINN_CI_JOB_NAME=${shellQuote(env.JOB_NAME ?: '')}",
+      "export FINN_CI_BUILD_NUMBER=${shellQuote(env.BUILD_NUMBER ?: '')}",
+      "export FINN_CI_TIMINGS_FILE=${shellQuote(timingsFile)}",
+  ]
+  // per-shard COVERAGE_FILE keeps each shard's raw .coverage data unique so
+  // aggregateReports can `coverage combine` them later without clashes.
+  if (opts.coverageFile) {
+    envLines << "export COVERAGE_FILE=${shellQuote(opts.coverageFile as String)}"
+  }
+  // run-tests.sh sits at workspace root. Each parallel branch gets its own
+  // @N workspace from Jenkins so two shards on the same agent never race.
+  // set -e so a non-zero export line fails fast rather than silently
+  // running pytest with the wrong env.
+  writeFile file: 'run-tests.sh', text:
+      "#!/bin/bash\nset -eo pipefail\n${envLines.join('\n')}\n${cmd}\n"
+  // invoke via bash so we do not depend on the workspace file having the
+  // execute bit set (writeFile creates 0644 by default).
+  runDockerCommand('./run-docker.sh bash run-tests.sh')
+}
+
+
+// ============================================================================
+// Check-Stage-Results stage helpers
+// ============================================================================
+
+// junit first (workspace-local, never flakes). html merge and artifact
+// archive after, because agent-to-controller transfer can flake on large globs.
+void aggregateReports() {
+  // cleanPreviousBuildFiles wipes then recreates 'reports', so no mkdir first.
+  cleanPreviousBuildFiles('reports')
+  dir('reports') {
+    expectedStashes().each { name -> unstashIfPresent(name) }
+  }
+  junit allowEmptyResults: true, testResults: 'reports/*.xml'
+  mergeShardMaps()
+  echoShardTimings()
+  combineCoverageReports()
+  aggregatePublishedBoardZips()
+  assertZipArtifactsEmitted()
+  updateTimingState()
+  publishStageMap()
+  catchError(buildResult: null, stageResult: 'UNSTABLE',
+             message: 'pytest_html_merger failed, per-stage HTMLs still archived') {
+    runDockerCommand('./run-docker.sh pytest_html_merger -i reports/ -o reports/test_report_final.html')
+  }
+  catchError(buildResult: null, stageResult: 'UNSTABLE',
+             message: 'archiveArtifacts (xml) failed') {
+    archiveArtifacts artifacts: 'reports/*.xml', allowEmptyArchive: true
+  }
+  catchError(buildResult: null, stageResult: 'UNSTABLE',
+             message: 'archiveArtifacts (html) failed') {
+    archiveArtifacts artifacts: 'reports/*.html', allowEmptyArchive: true
+  }
+  catchError(buildResult: null, stageResult: 'UNSTABLE',
+             message: 'archiveArtifacts (timings, shard maps, empty-shard markers) failed') {
+    archiveArtifacts artifacts: 'reports/*.timings.json,reports/*shardmap.*,reports/shard_map.*,reports/ci_timings_master.json,reports/*.empty-shard',
+                     allowEmptyArchive: true
+  }
+}
+
+// Per-shard pytest runs with COVERAGE_FILE=${stash}.coverage, so each shard
+// stashes its own raw .coverage data file. Combine them here into a single
+// .coverage and render one HTML report. Skips silently when no row opted in.
+void combineCoverageReports() {
+  catchError(buildResult: null, stageResult: 'UNSTABLE',
+             message: 'combineCoverageReports failed, non-fatal') {
+    if (!fileExists('reports')) { return }
+    String found = sh(returnStdout: true,
+                      script: "ls reports/*.coverage 2>/dev/null | head -1 || true").trim()
+    if (!found) {
+      echo "combineCoverageReports: no per-shard .coverage files, skipping"
+      return
+    }
+    sh '''#!/bin/bash
+set -eo pipefail
+rm -rf coverage_data coverage_combined
+mkdir -p coverage_data
+n=0
+for f in reports/*.coverage; do
+  cp "$f" "coverage_data/.coverage.shard${n}"
+  n=$((n+1))
+done
+    '''
+    // pytest-cov ships its own coverage. Run via the same docker image so
+    // the merge uses the same Python/library versions as the per-shard runs.
+    runDockerCommand("./run-docker.sh bash -c 'cd coverage_data && coverage combine && " +
+                     "coverage html -d ../coverage_combined'")
+    archiveArtifacts artifacts: 'coverage_combined/**/*', allowEmptyArchive: true
+  }
+}
+
+// Sole writer of currentBuild.description on the build pipeline.
+void publishStageMap() {
+  catchError(buildResult: null, stageResult: null,
+             message: 'publishStageMap failed, non-fatal') {
+    if (!fileExists('reports')) { return }
+    sh "cat reports/*.stagemap 2>/dev/null | sort > reports/stage_map.txt || true"
+    if (!fileExists('reports/stage_map.txt')) { return }
+    archiveArtifacts artifacts: 'reports/stage_map.txt', allowEmptyArchive: true
+    String body = readFile('reports/stage_map.txt').trim()
+    if (body) {
+      currentBuild.description = body
+    }
+  }
+}
+
+void echoShardTimings() {
+  catchError(buildResult: null, stageResult: null,
+             message: 'echoShardTimings failed, non-fatal') {
+    if (!fileExists('reports')) { return }
+    sh "${CI_TOOL} summarize reports/"
+  }
+}
+
+void mergeShardMaps() {
+  catchError(buildResult: null, stageResult: null,
+             message: 'mergeShardMaps failed, non-fatal') {
+    if (!fileExists('reports')) { return }
+    sh "${CI_TOOL} merge-maps reports/"
+  }
+}
+
+// Marks the build UNSTABLE when an active row declared zipArtifacts but its
+// READY zip was not produced. In local fallback mode it is UNSTABLE too if
+// any row would have published (handoff cannot be staged without NFS)
+void assertZipArtifactsEmitted() {
+  catchError(buildResult: null, stageResult: null,
+             message: 'assertZipArtifactsEmitted failed, non-fatal') {
+    // ZIP_ARTIFACTS ignores STAGE_FILTER (it mirrors the unfiltered active
+    // rows), so a filtered debug run that ran one shard would otherwise flag
+    // every unrun row's missing zip. Skip the completeness check for filtered
+    // runs, matching shouldUpdatePersistentTimingState().
+    if (resolveStageFilter()) {
+      echo "assertZipArtifactsEmitted: STAGE_FILTER set, skipping board-zip completeness check (partial run)"
+      return
+    }
+    if (!finnArtifactsRoot()) {
+      if (!ZIP_ARTIFACTS.isEmpty()) {
+        unstable "FINN_CI_NFS_ROOT unset: build-to-HW handoff not staged this build (rows that would publish: " +
+                 activeArtifactRowSummary() + "). Set FINN_CI_NFS_ROOT to enable handoff."
+      } else {
+        echo "assertZipArtifactsEmitted: local fallback mode (no shared artifact tree), skipping"
+      }
+      return
+    }
+    def missing = []
+    ZIP_ARTIFACTS.each { art ->
+      String ready = "${boardZipPath(art.hwTestType as String, art.board as String)}.READY"
+      if (!fileExists(ready)) {
+        missing << "${art.stage} -> ${art.board} (no .READY at ${ready})"
+      }
+    }
+    if (!missing.isEmpty()) {
+      echo "assertZipArtifactsEmitted: ${missing.size()} board zip(s) not produced this build:\n  - " + missing.join('\n  - ')
+      unstable "assertZipArtifactsEmitted: ${missing.size()} board zip(s) not produced this build"
+    }
+  }
+}
+
+void updateTimingState() {
+  catchError(buildResult: null, stageResult: 'UNSTABLE',
+             message: 'updateTimingState failed. Persistent master may be stale') {
+    if (!fileExists('reports')) { return }
+    String masterArg = FINN_CI_TIMINGS_MASTER ? "--master ${shellQuote(FINN_CI_TIMINGS_MASTER)}" : ''
+    String updateArg = shouldUpdatePersistentTimingState() ? '--update-master' : ''
+    sh "${CI_TOOL} update --reports reports ${masterArg} " +
+       "--out reports/ci_timings_master.json " +
+       "--job ${shellQuote(env.JOB_NAME ?: '')} --build ${shellQuote(env.BUILD_NUMBER ?: '')} " +
+       "${updateArg}"
+  }
+}
+
+boolean shouldUpdatePersistentTimingState() {
+  // Update from any build that ran to a normal end, including ones with test
+  // failures (most builds fail some test). Skip only aborted / not-built runs,
+  // whose timings are partial, and ad-hoc stage-filtered debug runs.
+  String result = currentBuild.currentResult ?: 'SUCCESS'
+  return !(result in ['ABORTED', 'NOT_BUILT']) && !resolveStageFilter()
+}
+
+List<String> expectedStashes() {
+  return SHARD_PLAN.collect { it.stash as String }
+}
+
+// One canonical board zip per (testType, board) across all active rows.
+// Iterates a LinkedHashSet so per-board parallel order is stable.
+void aggregatePublishedBoardZips() {
+  catchError(buildResult: null, stageResult: null,
+             message: 'aggregatePublishedBoardZips failed, non-fatal') {
+    def pairs = new LinkedHashSet<String>()
+    ZIP_ARTIFACTS.each { art ->
+      pairs << "${art.hwTestType}/${art.board}"
+    }
+    def pairsList = pairs as List
+    for (int p = 0; p < pairsList.size(); p++) {
+      def parts = (pairsList[p] as String).split('/', 2)
+      publishBoardZip(parts[0], parts[1])
+    }
+  }
+}
+
+// Atomic rename-then-touch: the .READY sibling appears only after the zip
+// is in place, so HW never observes a half-written zip.
+void publishBoardZip(String testType, String board) {
+  String artifactRoot = artifactBuildRoot()
+  if (!artifactRoot) {
+    echo "publishBoardZip(${testType}/${board}): local fallback mode, skipping"
+    return
+  }
+  String srcRoot = "${artifactRoot}/deployments/${testType}/${board}"
+  if (!fileExists(srcRoot)) {
+    echo "publishBoardZip(${testType}/${board}): no staged deployments under ${srcRoot}"
+    return
+  }
+  String destPath = boardZipPath(testType, board)
+  int slash = destPath.lastIndexOf('/')
+  String destDir = slash >= 0 ? destPath.substring(0, slash) : '.'
+  String work = "artifact_zips/${testType}/${board}"
+  String noDeployments = "${work}/${board}/.NO_DEPLOYMENTS"
+  sh "rm -rf ${shellQuote(work)} && mkdir -p ${shellQuote(work + '/' + board)} ${shellQuote(destDir)}"
+  sh "ci/scripts/publish_board_zip_stage.sh " +
+     "${shellQuote(srcRoot)} ${shellQuote(work + '/' + board)} " +
+     "${shellQuote(board)} ${shellQuote(testType)}"
+  if (fileExists(noDeployments)) { return }
+  String qZip   = shellQuote("${board}.zip")
+  String qDest  = shellQuote(destPath)
+  String qTmp   = shellQuote(destPath + '.tmp')
+  String qReady = shellQuote(destPath + '.READY')
+  dir(work) {
+    sh "zip -r ${qZip} ${shellQuote(board + '/')}"
+    sh "cp ${qZip} ${qTmp}"
+    sh "mv -f ${qTmp} ${qDest}"
+    sh "sync ${qDest}"
+    sh "touch ${qReady}"
+  }
+}
+
+// Each shard stages its deployment tree under deployments/<testType>/<board>/<shard>/.
+// aggregateReports() then publishes one board zip per (testType, board).
+// No-op when hw_deployment_* is absent or the artifact tree is unavailable.
+void findCopyZip(String testType, String board, String findDir, String copyDir) {
+  if (!testType) {
+    error "findCopyZip(${board}): row has zipArtifacts.boards but no zipArtifacts.hwTestType"
+  }
+  String artifactRoot = artifactBuildRoot()
+  if (!artifactRoot) { return }
+  String stageDir = "${artifactRoot}/deployments/${testType}/${board}/${copyDir}"
+  sh "ci/scripts/find_copy_zip.sh " +
+     "${shellQuote(testType)} ${shellQuote(board)} " +
+     "${shellQuote(findDir)} ${shellQuote(stageDir)}"
+}
+
+
+// ============================================================================
+// Cross-cutting helpers
+// ============================================================================
+
+// Per-shard scratch lives under WORKSPACE_TMP (a sibling of the git
+// workspace) so git clean has no deep NFS scratch tree to walk. We still
+// reap the prior build's scratch here to reclaim space. A stuck NFS handle
+// surfaces as an actionable failure with a forensic dump after a bounded retry
+void wipeStaleWorkspace() {
+  String scratchRoot = finnCiScratchRoot()
+  sh """
+    scratch=${shellQuote(scratchRoot)}
+    if [ -d "\$scratch" ]; then
+      attempt=0
+      while [ -d "\$scratch" ] && [ "\$attempt" -lt 10 ]; do
+        rm -rf "\$scratch" 2>/dev/null || true
+        attempt=\$((attempt + 1))
+        [ -d "\$scratch" ] && sleep 1
+      done
+      if [ -d "\$scratch" ]; then
+        echo "wipeStaleWorkspace: \$scratch still present after 10 attempts" >&2
+        echo "--- surviving entries (top 50) ---" >&2
+        find "\$scratch" -maxdepth 4 2>&1 | head -50 >&2
+        echo "--- .nfsXXXX silly-renames (top 20) ---" >&2
+        find "\$scratch" -name '.nfs*' 2>&1 | head -20 >&2
+        echo "--- local open fds (lsof +D, top 30) ---" >&2
+        lsof +D "\$scratch" 2>&1 | head -30 >&2 || true
+        echo "--- LSF jobs (mine, best-effort) ---" >&2
+        bjobs -a 2>&1 | head -20 >&2 || true
+        exit 1
+      fi
+    fi
+    # -ff (double force) so git clean descends into nested git repos:
+    # FINN-generated cpp_driver_*/.git trees that a single -f silently skips
+    # would otherwise leave tmp/ dirty and trip the gate below on every run.
+    git clean -ffdx || true
+    dirty=\$(git status --porcelain)
+    if [ -n "\$dirty" ]; then
+      n=\$(echo "\$dirty" | wc -l)
+      echo "wipeStaleWorkspace: \${n} dirty path(s) after cleanup (showing first 20):"
+      echo "\$dirty" | head -20
+      exit 1
+    fi
+  """
+}
+
+// ============================================================================
+// LSF orphan cleanup (irrelevant if LSF is disabled)
+// ============================================================================
+// Orphaned bsub jobs from an aborted build keep open fds on the scratch tree
+// and block the next build's wipeStaleWorkspace. bsub wrappers must tag every
+// job name `finn_ci_<jobScope>_<TOOL>_<JOB_TAG>` with jobScope matching
+// lsfScope(). Two helpers reap in complementary places: reapThisBuildLsfJobs
+// (post always, graceful completion or abort) and reapOrphanedLsfJobs
+// (Validate, hard kill of a prior build whose post-block never ran). The
+// name parse lives in finn_ci.lsf.parse_lsf_jobs. The "is that build still
+// running" check stays here because it needs the Jenkins API, so a concurrent
+// older-numbered build is never killed. Both gate on FINN_LSF_NFS_STAGING.
+
+boolean lsfEnabled() { return (env.FINN_LSF_NFS_STAGING ?: '').trim() as boolean }
+
+void reapThisBuildLsfJobs() {
+  if (!lsfEnabled()) { return }
+  catchError(buildResult: null, stageResult: null,
+             message: 'LSF self-reap failed, non-fatal') {
+    String pattern = "finn_ci_${lsfScope()}_*"
+    sh "command -v bkill >/dev/null 2>&1 && bkill -J ${shellQuote(pattern)} 2>/dev/null || true"
+  }
+}
+
+// Concurrent-safe orphan reap. The job-name -> {build: [jobid]} parse is in
+// finn_ci.lsf.parse_lsf_jobs. Here we only bkill jobs we can positively tie
+// to a build of this job that is no longer running. Defaults to LEAVE ALONE
+// on any uncertainty so an API hiccup never kills a healthy concurrent build,
+// and a build we cannot resolve on this job (its jobs may belong to a
+// different job whose sanitised name collides on the job-level finn_ci_
+// prefix, which omits the build number) is never reaped.
+void reapOrphanedLsfJobs() {
+  if (!lsfEnabled()) { return }
+  catchError(buildResult: null, stageResult: null,
+             message: 'LSF orphan reap failed, non-fatal') {
+    String jobName = env.JOB_NAME ?: ''
+    if (!jobName) { return }
+    // job-level scope: sanitised JOB_NAME without the build number, so the
+    // bjobs query below matches every build of this job, not just this one.
+    String jobNameScope = jobName.replaceAll(/[^A-Za-z0-9_]/, '_')
+    String prefix = "finn_ci_${jobNameScope}_"
+
+    // bjobs output pipes straight into the Python parser, so a bjobs format
+    // change is a one-file, tested fix instead of a Groovy regex tweak.
+    String parsed = sh(returnStdout: true, script:
+      "command -v bjobs >/dev/null 2>&1 && " +
+      "bjobs -a -J ${shellQuote(prefix + '*')} -o 'jobid job_name' -noheader 2>/dev/null | " +
+      "${CI_TOOL} lsf-parse-jobs --prefix ${shellQuote(prefix)} || echo '{}'"
+    ).trim()
+    Map jobidsByBuild = readJSON(text: (parsed ?: '{}')) as Map
+    if (jobidsByBuild.isEmpty()) { return }
+
+    def jenkinsJob = Jenkins.instance.getItemByFullName(jobName)
+    int currentBuild = (env.BUILD_NUMBER && env.BUILD_NUMBER.isInteger())
+        ? env.BUILD_NUMBER.toInteger() : -1
+    def toKill = []
+    jobidsByBuild.each { buildStr, jobids ->
+      if (!(buildStr as String).isInteger()) { return }
+      int build = (buildStr as String).toInteger()
+      if (build == currentBuild) {
+        echo "reapOrphanedLsfJobs: build ${build} is this build, leaving ${jobids.size()} job(s)"
+        return
+      }
+      // A null run means either this job never had that build (so the jobs
+      // belong to a different job that collides on the prefix) or the build
+      // record has rotated out. Either way ownership is unprovable, so leave
+      // the jobs alone. Only a build resolvable on this job and not building
+      // is safe to reap.
+      def run = jenkinsJob?.getBuildByNumber(build)
+      if (run == null) {
+        echo "reapOrphanedLsfJobs: build ${build} unresolved on ${jobName}, leaving ${jobids.size()} job(s)"
+      } else if (run.isBuilding()) {
+        echo "reapOrphanedLsfJobs: build ${build} still running, leaving ${jobids.size()} job(s)"
+      } else {
+        echo "reapOrphanedLsfJobs: build ${build} not running, reaping ${jobids.size()} job(s)"
+        toKill.addAll(jobids)
+      }
+    }
+    if (toKill.isEmpty()) { return }
+    sh "bkill ${toKill.join(' ')} 2>/dev/null || true"
+  }
+}
+
+// Pre-create the per-agent NFS cache subtree. Split out from
+// agentCachesDockerExtra so the env-map builder stays free of side effects
+// and can be called from any context without an sh side effect.
+void prepareAgentCaches(String node) {
+  String cachesDir = finnAgentCachesDir(node)
+  if (!cachesDir) { return }
+  sh "mkdir -p ${shellQuote(cachesDir + '/xrt')} " +
+     "${shellQuote(cachesDir + '/finn_cache')} " +
+     "${shellQuote(cachesDir + '/vivado_ip_cache')}"
+}
+
+// Pure compute: returns the env-var list and FINN_DOCKER_EXTRA addition
+// implied by the per-agent NFS caches plus the unconditional hostname pin.
+// Hostname pin is required because bsub's gethostbyname() on the default
+// finn_dev_<user> fails with "Error 0".
+Map agentCachesDockerExtra() {
+  String node = env.NODE_NAME ?: 'localhost'
+  String dockerExtra = "${env.FINN_DOCKER_EXTRA ?: ''} --hostname ${node}"
+  List<String> envList = []
+  String cachesDir = finnAgentCachesDir(node)
+  if (cachesDir) {
+    dockerExtra += " -v ${cachesDir}/vivado_ip_cache:/tmp/vivado_ip_cache"
+    envList << "FINN_XRT_PATH=${cachesDir}/xrt"
+    envList << "FINN_DOCKER_CACHE_DIR=${cachesDir}/finn_cache"
+  }
+  return [envList: envList, dockerExtra: dockerExtra]
+}
+
+void withAgentNfsEnv(Closure body) { withAgentNfsEnv([:], body) }
+
+// Must run after node binding so ${NODE_NAME} resolves before
+// FINN_DOCKER_EXTRA is interpolated. Composes prepareAgentCaches +
+// agentCachesDockerExtra with the shared-image-dir or local-build toggle.
+// The Build-Docker-Image stage passes exposeSharedImage:false because imageDir
+// is not published there yet, so exposing it would make run-docker.sh log a
+// spurious "no usable shared image" fallback. That stage relies on its own
+// FINN_DOCKER_PREBUILT=0 to build locally.
+void withAgentNfsEnv(Map opts, Closure body) {
+  String node = env.NODE_NAME ?: 'localhost'
+  prepareAgentCaches(node)
+  Map agentEnv = agentCachesDockerExtra()
+  List<String> envList = agentEnv.envList as List<String>
+  envList << "FINN_DOCKER_EXTRA=${agentEnv.dockerExtra}"
+  String imageDir = buildDockerImageDir()
+  boolean exposeSharedImage = opts.get('exposeSharedImage', true)
+  if (imageDir && exposeSharedImage) {
+    envList << "FINN_DOCKER_SHARED_IMAGE_DIR=${imageDir}"
+  } else if (!imageDir) {
+    envList << "FINN_DOCKER_PREBUILT=0"
+  }
+  withEnv(envList) { body() }
+}
+
+// jobKey sanitisation lives in finn_ci.config so build and HW pipelines
+// cannot diverge. Populated eagerly by loadStageConfig() during Validate.
+String finnCiJobKey() {
+  if (!FINN_CI_JOB_KEY) {
+    error "finnCiJobKey: FINN_CI_JOB_KEY is unset. loadStageConfig() must run before any helper that reads it."
+  }
+  return FINN_CI_JOB_KEY
+}
+
+// Sanitise JOB_NAME + BUILD_NUMBER into the scope token an LSF bsub wrapper
+// embeds in every job name (`finn_ci_<jobScope>_<TOOL>_<JOB_TAG>`, jobScope
+// being this token). Uses the raw JOB_NAME rather than the validate-config
+// jobKey so the wrapper can build it from env alone, without the full config
+// bundle.
+String lsfScope() {
+  String name = env.JOB_NAME ?: 'adhoc'
+  String build = env.BUILD_NUMBER ?: 'adhoc'
+  return (name + '_' + build).replaceAll(/[^A-Za-z0-9_]/, '_')
+}
+
+String artifactBuildRoot() {
+  String base = finnArtifactsRoot()
+  if (!base) { return '' }
+  return "${base}/ci_runs/${finnCiJobKey()}/${env.BUILD_NUMBER}"
+}
+
+// Canonical build-to-HW handoff path. The build pipeline writes the zip
+// plus a sibling READY here, HW resolves per (testType, board) to the
+// newest READY zip. Errors in local fallback so a caller cannot accidentally
+// fileExists() in the agent workspace. Callers MUST gate on finnArtifactsRoot().
+String boardZipPath(String testType, String board) {
+  String root = artifactBuildRoot()
+  if (!root) {
+    error "boardZipPath(${testType}/${board}): artifact tree unavailable. " +
+          "Callers must gate on finnArtifactsRoot() before invoking."
+  }
+  return "${root}/zips/${testType}/${board}.zip"
+}
+
+// Collapse the flattened ZIP_ARTIFACTS back into a per-row summary so each
+// active row appears once even when it publishes for multiple boards.
+String activeArtifactRowSummary() {
+  def rowToBoards = [:] as LinkedHashMap
+  ZIP_ARTIFACTS.each { art ->
+    mapAppend(rowToBoards, art.stage as String, art.board as String)
+  }
+  def parts = []
+  rowToBoards.each { stage, boards -> parts << "${stage} -> ${boards.join(',')}" }
+  return parts.join('; ')
+}
+
+// Per-shard scratch root under WORKSPACE_TMP (a sibling of the git
+// workspace), so a deep NFS scratch tree never races git clean. Falls
+// back to <workspace>/tmp only when WORKSPACE_TMP is unset (in which case
+// the git clean race that wipeStaleWorkspace guards against can recur).
+String finnCiScratchRoot() {
+  String ws = env.WORKSPACE
+  if (!ws) { error "WORKSPACE is unset" }
+  String base = env.WORKSPACE_TMP ?: "${ws}/tmp"
+  return "${base}/finn_ci_runs"
+}
+
+String finnCiPerBuildRoot() {
+  if (!env.BUILD_NUMBER) { error "BUILD_NUMBER is unset" }
+  return "${finnCiScratchRoot()}/${env.BUILD_NUMBER}"
+}
+
+// Fail fast on low scratch space so pytest does not hit ENOSPC mid-run.
+void checkAgentFreeSpace(int minFreeGB) {
+  if (minFreeGB <= 0) { return }
+  String target = env.FINN_HOST_BUILD_DIR ?: '/scratch'
+  // Climb to the nearest existing ancestor so df works on a path that has
+  // not yet been mkdir'd. -P forces single-line records so a long device
+  // name cannot wrap and push the data onto a third line, which would make
+  // the NR==2 field the device row instead of the free-space figure.
+  def raw = sh(returnStdout: true, script: """
+d='${target}'
+while [ ! -d "\$d" ] && [ "\$d" != '/' ]; do d=\$(dirname "\$d"); done
+df -P -BG "\$d" | awk 'NR==2 {print \$4}' | tr -d 'G'
+""").trim()
+  int freeGB = raw.isInteger() ? raw.toInteger() : -1
+  echo "checkAgentFreeSpace[${env.NODE_NAME}]: ${freeGB} GB free (probed ${target}, threshold ${minFreeGB} GB)"
+  if (freeGB < 0) {
+    echo "checkAgentFreeSpace[${env.NODE_NAME}]: could not parse df output (${raw.take(80)}), skipping precheck"
+    return
+  }
+  if (freeGB < minFreeGB) {
+    error "agent ${env.NODE_NAME} has only ${freeGB} GB free on ${target} (< ${minFreeGB} GB threshold), aborting shard to avoid mid-run disk exhaustion."
+  }
+}
+
+void printLsfSummary(String buildDir, String stashName) {
+  catchError(buildResult: null, stageResult: null,
+             message: "printLsfSummary(${stashName}) failed, non-fatal") {
+    sh "ci/scripts/lsf_summary.sh " +
+       "${shellQuote(buildDir)} ${shellQuote(env.NODE_NAME ?: '')} ${shellQuote(stashName)}"
+  }
+}
+
+void printPytestFailures(String stashName, int linesPerFailure, int maxFailures) {
+  catchError(buildResult: null, stageResult: null,
+             message: "printPytestFailures(${stashName}) failed, non-fatal") {
+    String xml = "${stashName}.xml"
+    if (!fileExists(xml)) {
+      echo "[pytest-failures ${stashName}] no JUnit XML at ${xml}"
+      return
+    }
+    sh "${CI_TOOL} print-failures " +
+       "${shellQuote(xml)} ${shellQuote(stashName)} ${linesPerFailure} ${maxFailures}"
+  }
+}
+
+void printFailureTails(String buildDir, String stashName, int tailLines) {
+  catchError(buildResult: null, stageResult: null,
+             message: "printFailureTails(${stashName}) failed, non-fatal") {
+    sh "ci/scripts/failure_tails.sh " +
+       "${shellQuote(buildDir)} ${shellQuote(stashName)} ${tailLines}"
+  }
+}
+
+// FINN_LSF_NFS_STAGING (used inside the helper for the LSF staging tails)
+// is read from the environment because the staging dir lives outside the
+// per-build dir.
+void archiveFailureLogs(String buildDir, String stashName, String startMarker) {
+  catchError(buildResult: null, stageResult: null,
+             message: "archiveFailureLogs(${stashName}) failed, non-fatal") {
+    String tarball = "failure_logs/${stashName}.tar.gz"
+    String absTarball = "${env.WORKSPACE}/${tarball}"
+    sh "ci/scripts/archive_failure_logs.sh " +
+       "${shellQuote(buildDir)} ${shellQuote(absTarball)} ${shellQuote(startMarker)}"
+    archiveArtifacts artifacts: tarball, allowEmptyArchive: true
+  }
+}
+
+// Thin wrappers over ci/common.groovy.
+void safeStashReport(String stashName)        { loadCommonOnce().safeStashShardReport(stashName) }
+void unstashIfPresent(String stashName)       { loadCommonOnce().unstashIfPresent(stashName) }
+void cleanPreviousBuildFiles(String buildDir) { loadCommonOnce().cleanPreviousBuildFiles(buildDir) }
+void runDockerCommand(String command)         { loadCommonOnce().runDockerCommand(command) }
+boolean paramBool(String name)                { return loadCommonOnce().paramBool(name) }
+String  paramString(String name)              { return loadCommonOnce().paramString(name) }
+String  shellQuote(String s)                  { return loadCommonOnce().shellQuote(s) }
+List    mapAppend(Map m, Object k, Object v)  { return loadCommonOnce().mapAppend(m, k, v) }
diff --git a/docker/jenkins/Jenkinsfile_Brevitas b/ci/Jenkinsfile_Brevitas
similarity index 100%
rename from docker/jenkins/Jenkinsfile_Brevitas
rename to ci/Jenkinsfile_Brevitas
diff --git a/docker/jenkins/Jenkinsfile_CI b/ci/Jenkinsfile_CI
similarity index 100%
rename from docker/jenkins/Jenkinsfile_CI
rename to ci/Jenkinsfile_CI
diff --git a/ci/README.md b/ci/README.md
new file mode 100644
index 0000000000..dd2fe3701e
--- /dev/null
+++ b/ci/README.md
@@ -0,0 +1,268 @@
+# FINN Jenkins CI guide
+
+## How the pipeline works
+
+The [Jenkinsfile](./Jenkinsfile) is a declarative pipeline of four stages, delegating most logic to the `finn_ci` Python package.
+
+1. **Validate**: computes the sharding plan once, prepares a timing snapshot from historical records, checks executor budget, and prunes the shared trees.
+2. **Build Docker Image**: builds the FINN image with `run-docker.sh` and publishes it to NFS (if `FINN_CI_NFS_ROOT` is set) so the test shards load it instead of rebuilding.
+3. **Run Tests**: fans out one parallel branch per shard. Each branch runs `python -m pytest -m <marker> --num-shards N --shard-id i` inside the container and stashes results/artifacts.
+4. **Check Stage Results** unstashes every shard's reports, aggregates one board zip per `(hwTestType, board)`, and refreshes the persistent timing master file.
+
+Terms used throughout:
+
+- **row** is one entry in the `STAGES` list in [finn_ci/config.py](./finn_ci/config.py). Here is where you can configure the sharding/worker policy.
+- **stage** means either the four Jenkins pipeline stages above or a row's parallel branch name (e.g. "fpgadataflow (1/2)").
+- **shard** is one slice of a row's tests, selected with `--num-shards` and `--shard-id`, running as its own parallel branch on its own agent workspace.
+- **stash** is the per-shard report bundle, containing a shard's JUnitXML, HTML, timings, and shard-map sidecar files.
+- **group** refers to a `@pytest.mark.xdist_group`. The test-to-shard assignment logic always keeps these groups together.
+- **LSF** is IBM's "Load Sharing Facility", the compute-farm infrastructure FINN can use to offload heavy EDA work during CI runs. It is completely optional.
+
+---
+
+## For contributors and test authors
+
+For external contributors who would like to write or edit tests in FINN:
+
+### Run the tests locally
+
+You do not need Jenkins to run the same tests locally. From a checkout:
+
+```bash
+./run-docker.sh python -m pytest -m sanity_bnn
+```
+
+substituting any marker from the `STAGES` table in [finn_ci/config.py](./finn_ci/config.py). The sharding flags are optional and change nothing when omitted. If running tests in parallel locally with `-n <N>` (i.e. multiple workers), add `--dist loadgroup` too, so the checkpoint-linked tests stay on one worker.
+
+### Add a new test
+
+Decorate it with the existing markers. For example, `@pytest.mark.fpgadataflow`. The next CI run picks it up automatically. If the test reuses a checkpoint another test produces (loaded with `load_test_checkpoint_or_skip`), put both under the same `@pytest.mark.xdist_group(...)` so the sharder keeps them on one worker.
+
+### Clean up scratch in a test
+
+Use FINN's `make_build_dir()` and tear it down with the `robust_rmtree()` helper.
+
+### Add a new BNN parameter value
+
+Edit the `_BNN_WBITS`, `_BNN_ABITS`, and `_BNN_TOPOLOGY` constants in `tests/end2end/test_end2end_bnn_pynq.py`. Nothing else is needed.
+
+---
+
+## For privileged Jenkins users
+
+### Trigger a build
+
+The job DSL targets "dev" by default. Currently, targeting a different branch means editing the DSL and running a seed job.
+
+> See [Known limitations](#known-limitations) for the plan to remove this manual step.
+
+To start a build, click *Build with Parameters* and select the stage you would like to run.
+
+
+| `STAGES` value     | Rows that run            | Use when                                      | Needs `FINN_CI_NFS_ROOT`?                                      |
+| ------------------ | ------------------------ | --------------------------------------------- | -------------------------------------------------------------- |
+| `sanity` (default) | Sanity rows only         | Per-PR quick check                            | Recommended (publishes `bnn_build_sanity` zips for HW handoff) |
+| `full`             | Every CI row             | Nightly / pre-merge full matrix               | Yes (otherwise no handoff and no timing master update)         |
+| `fpgadataflow`     | fpgadataflow row(s) only | Only build-side debug, no HW handoff produced | No                                                             |
+| `end2end`          | end2end + BNN rows only  | Debugging just the end2end family             | Recommended (BNN rows publish `bnn_build_full` zips)           |
+
+
+The above table is unit tested for drift against the actual stage tables. Bitstream artifact handoff is skipped if an NFS root directory is not set. Note that "sanity" is the only stage that will be available directly after running a seed job.
+
+`local_setup` is another stage that can be added for non-Docker tests. Set `FINN_LOCAL_BUILD_LABEL` in the DSL to bind the stage to an agent that has the requisite dependencies set up.
+
+### Debug one stage
+
+Trigger a build with the matching `STAGES` value and use `STAGE_FILTER` in the GUI to match via substring to the shard's display name, for example, `STAGE_FILTER=BNN U250`.
+
+### Pin a test to a specific shard
+
+`@pytest.mark.shard(N)` pins a test and any `xdist_group` siblings to shard N.
+
+### Find which stage and shard runs a given test
+
+For an archived Jenkins build, open `reports/shard_map.txt` and grep for the nodeid or any useful substring. The row format is in the Reference section.
+
+---
+
+## For maintainers
+
+### Infrastructure configuration
+
+`FINN_CI_NFS_ROOT` (shared storage directory) is the only CI-pipeline-specific env var a Jenkins operator sets, and it is optional. Everything else derives from it. Wire it into the job DSL as a global env variable. When unset, the pipeline still runs but with the following features degraded:
+
+- No shared Docker image cache (each agent rebuilds locally)
+- No build-to-HW artifact handoff (HW pipeline won't be able to use this build)
+- No persistent timing master file (sharding falls back to round-robin)
+
+Optional CI-related overrides are listed below, with sensible defaults:
+
+
+| Env var                    | Defaults to                            | What it changes                                                                                                             |
+| -------------------------- | -------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- |
+| `FINN_LOCAL_BUILD_LABEL`   | `finn-build`                           | Agent label for the optional non-Docker `setup-local.sh` stage, requires a host with dependencies in place.                 |
+| `FINN_CI_LOCAL_CACHE_ROOT` | `${WORKSPACE_TMP:-/tmp}/finn-ci-cache` | Pip + XDG cache root for the same non-Docker stage.                                                                         |
+| `FINN_CI_MIN_FREE_GB`      | `120`                                  | Minimum free space (GB) on the agent scratch volume below which a shard refuses to start.                                   |
+| `FINN_LSF_NFS_STAGING`     | unset                                  | Staging area for LSF jobs, setting this variable enables a range of LSF functionality. Only needed if using an LSF cluster. |
+
+### Test configuration
+
+Every parallel stage is defined by one row of `STAGES` in [finn_ci/config.py](./finn_ci/config.py). The Jenkinsfile loads the entire config bundle during `Validate`, and the pytest plugin responsible for distributing the shards at the executor level ([finn_ci/plugin.py](./finn_ci/plugin.py)) consumes the same configuration.
+
+A `STAGES` row `marker` is restricted to an "a or b or c" pattern because it is interpolated into a shell `-m` argument, so `and`/`not` are rejected. This only constrains `STAGES` rows. Ad-hoc runs such as `pytest -m "fpgadataflow and not slow"` are unaffected and can still be sharded locally.
+
+If a stage is completing slowly, it may be possible to speed it up by increasing the shard or worker count.
+
+### Adding a new CI param
+
+`finn_ci.config.STAGES` rows carry a `param` field that maps onto the `STAGES` Jenkins choice. For instance, "sanity" in Jenkins maps to 'Sanity - Build Hardware', 'Sanity - Unit Tests'. To add a new family, for instance: "quantization":
+
+1. Add `STAGES` rows with `"param": "quantization"`.
+
+   ```
+   {
+       "param": "quantization",
+       "stage": "Quantization - Brevitas",
+       "marker": "quant_brevitas",
+       "shards": 2,
+       "workers": 8,
+       "coverage": True,
+   }
+   ```
+
+2. Run `PYTHONPATH=ci python3 -m finn_ci stage-choices-json` and copy the generated list into the Jenkinsfile's `choice` block. There is a util test that catches drift, should it occur.
+3. Add a row to the `STAGES` table in this README (any drift in this table is also tested).
+
+After those three edits, a user picking `STAGES=quantization` in Jenkins gets
+
+```
+choice quantization -> rows ['Quantization - Brevitas']
+```
+
+### Adding a new BNN board
+
+1. Add the marker `bnn_<board>` to `setup.cfg` under `[tool:pytest]`.
+2. In [finn_ci/config.py](./finn_ci/config.py), add a `BOARDS` entry, plus a `STAGES` row that references the board in its `zipArtifacts.boards`. `tests/end2end/test_end2end_bnn_pynq.py` reads `BOARDS[board]["bnnMarker"]`, so the board's scenarios are parametrised automatically.
+3. Nothing else is needed. `validate_config()` sanity-checks each `STAGES`/`BOARDS` row.
+
+### Running tools on LSF (optional)
+
+Each shard runs safely as a parallel branch on whatever `finn-build` executor picks it up, so adding capacity is as simple as adding more machines and agents under that label. For this reason, integration with an LSF cluster is not required to run FINN's CI.
+
+However, the intended long-term operational model for FINN CI is a single FINN build machine running several shards at once, delegating any heavy tasks to a compute farm. A tool interception hook has been provided for this reason at `finn.util.basic.resolve_xilinx_tool()`. The agent still drives the FINN flow and pytest, but each `vivado` / `v++` / `vitis_hls` / `xelab` invocation is wrapped with a deployment-specific shim that can delegate heavy subprocesses. The interception hook is generic and can be adapted for a variety of HPC models.
+
+If using IBM's LSF, the pipeline cooperates with such a wrapper through one env var, `FINN_LSF_NFS_STAGING`. When it is set:
+
+- The Jenkinsfile reaps orphaned `bsub` jobs left by an aborted build, both on this build's completion and at the next build's Validate. The site's wrapper must tag every job name `finn_ci_<jobScope>_<TOOL>_<JOB_TAG>` so the reaper can find them and `bkill` only the jobs whose submitting build is no longer running.
+- `archive_failure_logs.sh` tails the LSF staging-dir logs into a failed shard's bundle, so a farm-side tool failure is visible in Jenkins without opening the cluster.
+
+When `FINN_LSF_NFS_STAGING` is unset (the default) both behaviours are skipped.
+
+### Sharding and timing state
+
+Shard balancing is managed dynamically and automatically using historical data. A cold start will fall through to round-robin shard assignment. A persistent master timing file is refreshed by any build that ran fully (not aborted or interrupted), including runs with build failures.
+
+The timing master schema is `{"schema_version": 1, "updated_at": ..., "last_update": {...}, "groups": {<name>: {"samples": [last MAX_SAMPLES observations]}}}`. Each qualifying build appends one observation per observed xdist_group/test and trims the window to the five most recent samples. The weight used by the shard assignment logic is the **max** inside the window. For instance, if a group took the following amounts of times in the previous five runs:
+
+```
+25 min, 30 min, 18 min, 40 min, 22 min
+```
+
+the weight assigned to that test would be *40 minutes*.
+
+This guards conservatively against under-provisioning while bounding how long an outlier will affect timings. A corrupt or unreadable master timing file is logged and treated as empty, so the build degrades to deterministic round-robin sharding and the next build repopulates the master from its own observations. The master is disposable, so a once-off problem self-heals without manual cleanup.
+
+To inspect timing state, open `reports/ci_timings_master.json` from any archived build.
+
+### Build-to-HW zip handoff
+
+The build pipeline stages board deployment directories per shard, then "Check Stage Results" aggregates those staged deployments into one board bitstream zip plus a `.READY` marker in the per-build directory.
+
+```
+${FINN_CI_NFS_ROOT}/artifacts/ci_runs/<jobKey>/<BUILD>/
+      zips/<hwTestType>/<board>.zip
+      zips/<hwTestType>/<board>.zip.READY
+      BUILD_INFO.txt
+      deployments/<hwTestType>/<board>/<stash>/<board>/<model>/
+```
+
+The `.READY` marker is the build-to-HW handshake. It is touched only after the aggregated zip has been renamed into place. `FINN_CI_NFS_ROOT` is required for any build run that expects bitstream inputs (for example, Jenkinsfile_HW).
+
+> Note: Jenkinsfile_HW hasn't been migrated yet. See [Known limitations](#known-limitations).
+
+A `STAGES` row that produces these zips declares a `zipArtifacts` nested key:
+
+```python
+"zipArtifacts": {"hwTestType": "bnn_build_full", "boards": ["U250"]}
+```
+
+`hwTestType` (today `bnn_build_sanity` or `bnn_build_full`) selects which HW pipeline category the zip feeds. `boards` lists the board zips the row produces. The nested shape means the pair is either present or absent.
+
+`BUILD_INFO.txt` is simply a human-readable provenance record of the build.
+
+### Storage and retention
+
+Set `FINN_CI_NFS_ROOT` once on the Jenkins controller (in the job DSL) and the build pipeline derives every shared subtree from it. There are no other CI storage env vars to set. Layout under `FINN_CI_NFS_ROOT`:
+
+```
+agent_caches/<NODE>/{xrt,finn_cache,vivado_ip_cache}   per-agent caches
+docker_images/<jobKey>/<BUILD>/                        shared docker image
+artifacts/ci_runs/<jobKey>/<BUILD>/                    build-to-HW handoff + BUILD_INFO
+_ci_state/<jobKey>/                                    timing master + snapshots
+```
+
+Per-shard scratch lives at `${WORKSPACE_TMP}/finn_ci_runs/<BUILD>/<stash>` (falling back to `${WORKSPACE}/tmp/finn_ci_runs/...` only when `WORKSPACE_TMP` is unset). The workspace itself is per-agent and configured on the DSL side: NFS-mounted via `remote_fs` on the lab build hosts, local SSD elsewhere.
+
+The "Validate" stage rotates the image, artifact, and timing-snapshot trees via the single `rotateBuildTrees()` helper in [Jenkinsfile](./Jenkinsfile). Each rotation keeps the newest N numeric entries and the current build, and deletes older entries whose mtime exceeds M days. All three subcommands skip silently when their parent directory does not exist, and the Python side tolerates concurrent prune races.
+
+---
+
+## Known limitations
+
+- **Targeting a non-default branch needs a DSL edit.** The job DSL targets "dev" by default, so testing a different branch (for example a PR branch) currently means editing the DSL and running a seed job. The intended fix is to target a PR branch without hand-editing the DSL.
+- **Jenkinsfile_HW is not migrated yet.** It will continue working with any existing artifacts in the legacy `ARTIFACT_DIR`, but won't work with the new aggregated board zips until it is migrated. The intention is that the HW test runs with the newest valid board zip available, and is marked UNSTABLE if the newest zip wasn't created by the last run (i.e. there was some error in the build stage that is unrelated to HW tests).
+
+---
+
+## Reference
+
+### Artifacts
+
+- `reports/*.xml`, `reports/*.html` from pytest and `pytest_html_merger`.
+- `reports/<stash>.timings.json` per shard.
+- `reports/<stash>.shardmap.txt` and `reports/<stash>.shardmap.json` per shard.
+- `reports/shard_map.txt` and `reports/shard_map.json` merged across all shards.
+- `reports/ci_timings_master.json` archived timing preview from this build. Its `last_update` field records the observed group count and whether the shared master was updated.
+- `reports/<stash>.empty-shard` per shard that collected zero items. Useful for distinguishing "shard had no work" from "shard crashed".
+- `coverage_combined/` one merged HTML report across all rows with `coverage: true`. Per-shard pytest runs write raw `.coverage` data files (one per shard, named via `COVERAGE_FILE=<stash>.coverage`), `aggregateReports` runs `coverage combine` and `coverage html` on the union, and the merged result is archived. Skipped silently when no row opted in.
+- `${FINN_CI_NFS_ROOT}/artifacts/ci_runs/<jobKey>/<BUILD_NUMBER>/zips/<hwTestType>/<board>.zip` per row with a `zipArtifacts` entry. `aggregateReports()` runs `assertZipArtifactsEmitted()` which marks the build UNSTABLE (non-fatal) when an active row declared `zipArtifacts` but no `.READY` was written.
+- `${FINN_CI_NFS_ROOT}/artifacts/ci_runs/<jobKey>/<BUILD_NUMBER>/zips/<hwTestType>/<board>.zip.READY` per-board handshake marker, touched only after the zip is in place. Publishing is idempotent for same-build retries.
+- `${FINN_CI_NFS_ROOT}/artifacts/ci_runs/<jobKey>/<BUILD_NUMBER>/BUILD_INFO.txt` for human traceability.
+
+### shard_map.txt row format
+
+Each row of `reports/shard_map.txt` is grep-friendly:
+
+```text
+nodeid=<nodeid> stage=<stage> shard=<i>/<n> stash=<stash> group=<group> weight_s=<seconds> source=<known|fallback|pinned|round_robin|single>
+```
+
+### DSL environment variables
+
+These are the other env vars a job DSL typically sets for a build-pipeline job, on top of the CI-specific ones in "Infrastructure configuration" (`FINN_CI_NFS_ROOT` and the optional overrides). They are consumed by `run-docker.sh` and the FINN flow rather than by the pipeline itself, so the defaults and meanings match a normal local `run-docker.sh` run.
+
+| Env var               | What it sets                                                                                                                                                  |
+| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `FINN_XILINX_PATH`    | Path to the Xilinx tools install. `run-docker.sh` warns when unset, and Vivado/Vitis/HLS steps need it.                                                       |
+| `FINN_XILINX_VERSION` | Xilinx tool version (for example `2022.2`).                                                                                                                   |
+| `PLATFORM_REPO_PATHS` | Vitis platform (DSA) files, required for Vitis-based Alveo cards.                                                                                             |
+| `FINN_DOCKER_EXTRA`   | Extra `docker run` arguments (bind mounts, licence, network, and any `-e` vars the tool-dispatch layer needs). The pipeline appends a per-agent `--hostname` and the NFS cache mounts to whatever the DSL sets. |
+| `NUM_DEFAULT_WORKERS` | Default xdist worker count for ad-hoc runs. Per-shard worker counts come from `STAGES`, not this.                                                             |
+
+A site that offloads the heavy Xilinx tools to a compute farm (see "Running tools on LSF") needs no pipeline changes. The tool wrapper and its configuration ride into the container through `FINN_DOCKER_EXTRA`, and the only variable FINN itself reads is the shim-directory override below:
+
+| Env var                  | What it sets                                                                                                                                        |
+| ------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `FINN_TOOL_DIR_OVERRIDE` | Shim directory. `finn.util.basic.resolve_xilinx_tool()` resolves `vivado`/`v++`/`vitis_hls`/`vitis-run`/`xelab` to `<dir>/<tool>` when set.        |
+
+The wrapper's own variables are deployment-specific.
diff --git a/ci/common.groovy b/ci/common.groovy
new file mode 100644
index 0000000000..695dc1cd1a
--- /dev/null
+++ b/ci/common.groovy
@@ -0,0 +1,121 @@
+// Shared helpers loaded by the build pipeline Jenkinsfile.
+
+boolean paramBool(String name) {
+  def v = params.get(name)
+  if (v == null) { return false }
+  if (v instanceof Boolean) { return v }
+  return v.toString().toBoolean()
+}
+
+String paramString(String name) {
+  def v = params.get(name)
+  return v == null ? '' : v.toString()
+}
+
+// Sole shell-quoting primitive. Wraps the argument in single quotes and
+// escapes embedded single quotes.
+String shellQuote(String s) {
+  return "'" + (s ?: '').replace("'", "'\"'\"'") + "'"
+}
+
+// Sets FINN_DOCKER_PREBUILT=1 when a shared image is configured so non-builder
+// agents load the image from NFS instead of rebuilding.
+void runDockerCommand(String command) {
+  if (env.FINN_DOCKER_SHARED_IMAGE_DIR) {
+    withEnv(['FINN_DOCKER_PREBUILT=1']) {
+      sh command
+    }
+  } else {
+    sh command
+  }
+}
+
+void unstashIfPresent(String stashName) {
+  try {
+    unstash stashName
+  } catch (Exception ignored) {
+    echo "No stash '${stashName}' (stage skipped or failed before publishing)"
+  }
+}
+
+// Single stash-with-catchError primitive. requireFile, if given, gates the
+// stash on that file existing. allowEmpty controls the stash step.
+void _stashReport(String stashName, String includes, boolean allowEmpty, String requireFile) {
+  catchError(buildResult: null, stageResult: null,
+             message: "safeStashReport(${stashName}) failed, aggregation may be partial") {
+    if (requireFile && !fileExists(requireFile)) { return }
+    stash name: stashName, includes: includes, allowEmpty: allowEmpty
+  }
+}
+
+// Build pipeline stashes the full per-shard report sidecar set. Some are
+// missing when a shard fails early, so allowEmpty is true. The .coverage
+// entry only exists on rows that opted into coverage in STAGES.
+void safeStashShardReport(String stashName) {
+  _stashReport(
+    stashName,
+    "${stashName}.xml,${stashName}.html,${stashName}.timings.json," +
+    "${stashName}.shardmap.txt,${stashName}.shardmap.json,${stashName}.stagemap," +
+    "${stashName}.empty-shard,${stashName}.coverage",
+    true,
+    null,
+  )
+}
+
+// Hard-fail on root-owned residue. Factored out so the build forms below
+// cannot diverge on the error message or detection logic.
+void _assertNoResidue(String caller, String q) {
+  sh """
+    if [ -d ${q} ]; then
+      echo "${caller}: ${q} still exists after rm. Likely root-owned residue. Ask an admin to 'sudo rm -rf' the directory on this agent."
+      ls -la ${q} | head -40
+      exit 1
+    fi
+  """
+}
+
+// Tolerant rm, hard-fail on root-owned residue, then pre-create as the
+// unprivileged user so docker -v does not bind the mount as root.
+void cleanPreviousBuildFiles(String buildDir) {
+  if (!buildDir || buildDir.empty) { return }
+  String q = shellQuote(buildDir)
+  sh "rm -rf ${q} 2>/dev/null || true"
+  _assertNoResidue('cleanPreviousBuildFiles', q)
+  sh "mkdir -p ${q}"
+}
+
+// All shared NFS subtrees derive from FINN_CI_NFS_ROOT. Returning '' from any
+// resolver means "no NFS available". Callers must handle that as a fallback.
+String finnCiNfsRoot() { return (env.FINN_CI_NFS_ROOT ?: '').trim() }
+
+String finnSubdir(String... segments) {
+  String r = finnCiNfsRoot()
+  if (!r) { return '' }
+  for (int i = 0; i < segments.length; i++) {
+    if (!segments[i]) { return '' }
+  }
+  return ([r] + (segments as List)).join('/')
+}
+
+String finnAgentCachesDir(String node)    { return finnSubdir('agent_caches', node) }
+String finnDockerImagesRoot()             { return finnSubdir('docker_images') }
+String finnDockerImagesDir(String jobKey) { return finnSubdir('docker_images', jobKey) }
+String finnArtifactsRoot()                { return finnSubdir('artifacts') }
+String finnCiStateRoot()                  { return finnSubdir('_ci_state') }
+String finnCiStateDir(String jobKey)      { return finnSubdir('_ci_state', jobKey) }
+
+// Append `value` to the list at `map[key]`, creating the list lazily.
+// Replaces the inline `Map.computeIfAbsent` idiom because CPS does not
+// reliably transform SAM closures to java.util.function.Function. Returns
+// the (possibly newly created) list so callers can chain.
+List mapAppend(Map map, Object key, Object value) {
+  def existing = map.get(key)
+  if (existing == null) {
+    existing = []
+    map.put(key, existing)
+  }
+  existing << value
+  return existing
+}
+
+return this
diff --git a/ci/finn_ci/__init__.py b/ci/finn_ci/__init__.py
index 4c79d60c62..cfb3b5a6fe 100644
--- a/ci/finn_ci/__init__.py
+++ b/ci/finn_ci/__init__.py
@@ -6,11 +6,17 @@
 """FINN CI helpers.
 
 A small package, importable without the finn package installed, that backs the
-FINN Jenkins CI pipeline and the pytest sharding plugin.
+FINN Jenkins CI pipeline and the pytest sharding plugin. The build pipeline
+drives the CLI with ``PYTHONPATH=ci python3 -m finn_ci <subcommand>``.
 
 Submodules:
-  config   - the CI board and stage tables and the pure helpers over them
-  sharding - deterministic weight-balanced group-to-shard assignment
-  jsonio   - the JSON read helper shared across the package
-  plugin   - the pytest plugin that selects a shard and captures timings
+  config    - the CI board and stage tables and the pure helpers over them
+  sharding  - deterministic weight-balanced group-to-shard assignment
+  jsonio    - the JSON read/write helpers shared across the package
+  plugin    - the pytest plugin that selects a shard and captures timings
+  timing    - the self-maintaining per-group timing master and summaries
+  retention - image/artifact/snapshot tree rotation and pip-cache pruning
+  lsf       - bjobs orphan-job parsing for the build reaper
+  failures  - the stdlib JUnit failure printer
+  __main__  - the CLI dispatched by python3 -m finn_ci
 """
diff --git a/ci/finn_ci/__main__.py b/ci/finn_ci/__main__.py
new file mode 100644
index 0000000000..bc0b2030ba
--- /dev/null
+++ b/ci/finn_ci/__main__.py
@@ -0,0 +1,158 @@
+# Copyright (C) 2026, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""CLI for the FINN CI pipeline: python3 -m finn_ci <subcommand>.
+
+Run from a checkout with ci/ on PYTHONPATH (the Jenkinsfile uses
+PYTHONPATH=ci python3 -m finn_ci ...). Each subcommand is a thin wrapper over a
+finn_ci submodule so the Groovy side never re-implements the config, timing,
+retention, or LSF parsing logic.
+"""
+
+import argparse
+import json
+import sys
+from finn_ci import config, failures, lsf, retention, timing
+
+
+def main(argv=None):
+    """CLI entry point. Catches validate_* failures so a malformed STAGES row
+    surfaces in the Validate Jenkins console as a one-line "ci_sharding:"
+    message instead of a Python traceback.
+    """
+    try:
+        return _dispatch(argv)
+    except (ValueError, AssertionError) as exc:
+        print("ci_sharding: %s" % exc, file=sys.stderr)
+        return 2
+
+
+def _dispatch(argv):
+    parser = argparse.ArgumentParser(prog="finn_ci", description=__doc__)
+    sub = parser.add_subparsers(dest="cmd")
+
+    sub.add_parser("stage-choices-json")
+
+    # validate-config is the one entry point the Validate stage in Jenkins
+    # delegates to. Folds enabled_params / job_key / shard_plan into a single
+    # subprocess and runs validate_config() first so a malformed row or orphan
+    # zipArtifact board fails Validate loudly.
+    p = sub.add_parser("validate-config")
+    p.add_argument("--choice", required=True)
+    p.add_argument("--job-name", required=True)
+    p.add_argument("--stage-filter", default="")
+
+    p = sub.add_parser("job-key")
+    p.add_argument("name")
+
+    p = sub.add_parser("lsf-parse-jobs")
+    p.add_argument("--prefix", required=True)
+
+    p = sub.add_parser("prune-pip-cache")
+    p.add_argument("root")
+    p.add_argument("keep")
+    p.add_argument("max_age_days", type=int)
+    p.add_argument("--dry-run", action="store_true")
+
+    p = sub.add_parser("prepare")
+    p.add_argument("--master", required=True)
+    p.add_argument("--snapshot", required=True)
+
+    p = sub.add_parser("summarize")
+    p.add_argument("reports_dir")
+
+    p = sub.add_parser("update")
+    p.add_argument("--reports", required=True)
+    p.add_argument("--master", default="")
+    p.add_argument("--out", required=True)
+    p.add_argument("--job", default="")
+    p.add_argument("--build", default="")
+    p.add_argument("--update-master", action="store_true")
+
+    p = sub.add_parser("merge-maps")
+    p.add_argument("reports_dir")
+
+    p = sub.add_parser("print-failures")
+    p.add_argument("junit_xml")
+    p.add_argument("stash")
+    p.add_argument("lines_per", type=int)
+    p.add_argument("max_fails", type=int)
+
+    # One numbered-tree rotation for the image / artifact / snapshot trees.
+    # retain_n and max_age_days come from RETENTION[kind], so a caller cannot
+    # pass a window that disagrees with the documented policy.
+    p = sub.add_parser("prune")
+    p.add_argument("--kind", required=True, choices=tuple(retention.RETENTION))
+    p.add_argument("root")
+    p.add_argument("job_key")
+    p.add_argument("current_build")
+    p.add_argument("--dry-run", action="store_true")
+
+    args = parser.parse_args(argv)
+    if args.cmd == "stage-choices-json":
+        print(json.dumps(config.jenkins_stage_choices()))
+        return 0
+    if args.cmd == "validate-config":
+        config.validate_config()
+        print(
+            json.dumps(
+                {
+                    "enabled_params": config.enabled_params_for_choice(args.choice),
+                    "job_key": config.job_key(args.job_name),
+                    "shard_plan": config.shard_plan(args.choice, args.stage_filter),
+                }
+            )
+        )
+        return 0
+    if args.cmd == "job-key":
+        print(config.job_key(args.name))
+        return 0
+    if args.cmd == "lsf-parse-jobs":
+        print(json.dumps(lsf.parse_lsf_jobs(args.prefix, sys.stdin.read())))
+        return 0
+    if args.cmd == "prune-pip-cache":
+        retention.prune_pip_cache(args.root, args.keep, args.max_age_days, args.dry_run)
+        return 0
+    if args.cmd == "prepare":
+        return timing.prepare_timing_snapshot(args.master, args.snapshot)
+    if args.cmd == "summarize":
+        return timing.summarize_timings(args.reports_dir)
+    if args.cmd == "update":
+        return timing.update_master(
+            args.reports,
+            args.master,
+            args.out,
+            update_persistent=args.update_master,
+            metadata={
+                "job": args.job,
+                "build": args.build,
+            },
+        )
+    if args.cmd == "merge-maps":
+        return timing.merge_maps(args.reports_dir)
+    if args.cmd == "print-failures":
+        return failures.print_failures(args.junit_xml, args.stash, args.lines_per, args.max_fails)
+    if args.cmd == "prune":
+        policy = retention.RETENTION[args.kind]
+        prune_fn = {
+            "image": retention.prune_images,
+            "artifact": retention.prune_artifacts,
+            "snapshot": retention.prune_snapshots,
+        }[args.kind]
+        prune_fn(
+            args.root,
+            config.job_key(args.job_key),
+            args.current_build,
+            policy["retain"],
+            policy["ageDays"],
+            args.dry_run,
+        )
+        return 0
+    parser.print_help()
+    return 2
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/ci/scripts/print_pytest_failures.py b/ci/finn_ci/failures.py
old mode 100755
new mode 100644
similarity index 81%
rename from ci/scripts/print_pytest_failures.py
rename to ci/finn_ci/failures.py
index e9173ccbdd..41d5e9c1e6
--- a/ci/scripts/print_pytest_failures.py
+++ b/ci/finn_ci/failures.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 # Copyright (C) 2026, Advanced Micro Devices, Inc.
 # All rights reserved.
 #
@@ -9,26 +8,13 @@
 Used by Jenkins to surface per-test failure context when there is no tool
 log to tail (notebook timeouts, asserts, fixture errors). Pure stdlib so
 it runs on any agent.
-
-Usage: print_pytest_failures.py <junit_xml> <stash> <lines_per_failure> <max_failures>
 """
+
 import re
-import sys
 import xml.etree.ElementTree as ET
 
 
-def main(argv):
-    if len(argv) != 5:
-        print(
-            "Usage: print_pytest_failures.py <junit_xml> <stash> "
-            "<lines_per_failure> <max_failures>",
-            file=sys.stderr,
-        )
-        return 2
-    xml_path = argv[1]
-    stash = argv[2]
-    lines_per = int(argv[3])
-    max_fails = int(argv[4])
+def print_failures(xml_path, stash, lines_per, max_fails):
     tag = "[pytest-failures %s]" % stash
     try:
         root = ET.parse(xml_path).getroot()
@@ -81,7 +67,3 @@ def main(argv):
         for ln in body_lines:
             print("  %s" % ln)
     return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv))
diff --git a/ci/finn_ci/jsonio.py b/ci/finn_ci/jsonio.py
index 62db0b0baa..90e7cccc63 100644
--- a/ci/finn_ci/jsonio.py
+++ b/ci/finn_ci/jsonio.py
@@ -3,10 +3,12 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
-"""JSON read helper shared across the finn_ci package."""
+"""JSON read/write helpers shared across the finn_ci package."""
 
 import json
+import os
 import sys
+import tempfile
 
 
 def read_json(path, default=None):
@@ -25,3 +27,22 @@ def read_json(path, default=None):
             file=sys.stderr,
         )
         return default
+
+
+def write_json_atomic(path, data):
+    parent = os.path.dirname(os.path.abspath(path))
+    # exist_ok=True so two concurrent first-time callers on a shared NFS root
+    # cannot race on mkdir.
+    os.makedirs(parent, exist_ok=True)
+    fd, tmp = tempfile.mkstemp(prefix=".tmp-", suffix=".json", dir=parent)
+    try:
+        with os.fdopen(fd, "w") as f:
+            json.dump(data, f, indent=2, sort_keys=True)
+            f.write("\n")
+        os.rename(tmp, path)
+    except Exception:
+        try:
+            os.unlink(tmp)
+        except OSError:
+            pass
+        raise
diff --git a/ci/finn_ci/lsf.py b/ci/finn_ci/lsf.py
new file mode 100644
index 0000000000..8d59bd654f
--- /dev/null
+++ b/ci/finn_ci/lsf.py
@@ -0,0 +1,70 @@
+# Copyright (C) 2026, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""LSF orphan-job parsing (irrelevant for CI setups that don't use LSF).
+
+The CI pipeline's reaper still owns the "is this build still running" decision
+(it needs the Jenkins API) and the bkill call, but the fragile job-name parsing
+lives here so a bjobs format change is a one-file, tested fix instead of a
+Groovy regex tweak.
+"""
+
+import collections
+import json
+import re
+
+LSF_JOB_BUILD_RE = re.compile(r"^(\d+)_")
+
+
+def parse_lsf_jobs(prefix, raw):
+    """Group bjobs output into a {build_number: [jobid, ...]} mapping.
+
+    raw may be either of two forms, and both are accepted so the caller
+    does not need to know which one the local LSF build supports:
+
+      - JSON, as emitted by:  bjobs -json -o 'jobid job_name'
+      - plain "jobid job_name" lines, as emitted by:  bjobs -noheader
+
+    Only jobs whose name starts with prefix followed by "<build>_" are
+    kept. Everything else is ignored.
+    """
+    records = _lsf_records(raw)
+    out = collections.OrderedDict()
+    for jobid, name in records:
+        if not jobid or not name or not name.startswith(prefix):
+            continue
+        tail = name[len(prefix) :]
+        m = LSF_JOB_BUILD_RE.match(tail)
+        if not m:
+            continue
+        out.setdefault(m.group(1), []).append(jobid)
+    return out
+
+
+def _lsf_records(raw):
+    """Return (jobid, job_name) pairs from bjobs JSON or text output."""
+    raw = (raw or "").strip()
+    if not raw:
+        return []
+    if raw[0] in "{[":
+        try:
+            doc = json.loads(raw)
+        except ValueError:
+            return []
+        records = doc.get("RECORDS", doc) if isinstance(doc, dict) else doc
+        pairs = []
+        for rec in records or []:
+            if isinstance(rec, dict):
+                jobid = str(rec.get("JOBID", "")).strip()
+                name = str(rec.get("JOB_NAME", "")).strip()
+                pairs.append((jobid, name))
+        return pairs
+    pairs = []
+    for line in raw.split("\n"):
+        toks = line.split(None, 1)
+        if len(toks) < 2:
+            continue
+        pairs.append((toks[0].strip(), toks[1].strip()))
+    return pairs
diff --git a/ci/finn_ci/retention.py b/ci/finn_ci/retention.py
new file mode 100644
index 0000000000..618706ac34
--- /dev/null
+++ b/ci/finn_ci/retention.py
@@ -0,0 +1,222 @@
+# Copyright (C) 2026, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Retention and pruning for the shared CI image, artifact and snapshot trees.
+
+One numbered-tree rotation core (_prune_numbered) backs the image, artifact and
+snapshot trees. They differ only in the file lister and the remover. RETENTION
+holds the per-tree window so a caller never restates the policy. Deletions
+tolerate concurrent removal on a shared NFS parent.
+"""
+
+import errno
+import os
+import re
+import shutil
+import time
+
+# Per-tree retention for the "prune --kind {image,artifact,snapshot}" CLI
+# subcommand. Artifacts are the per-board fallback used when a board's most
+# recent build regresses, so they are kept deep. Snapshots are the small
+# per-build timing inputs, so a shallow window is enough.
+RETENTION = {
+    "image": {"retain": 3, "ageDays": 14},
+    "artifact": {"retain": 30, "ageDays": 30},
+    "snapshot": {"retain": 3, "ageDays": 2},
+}
+
+
+def robust_rmtree(path, retries=6, initial_delay=0.1, backoff=2.0):
+    """remove a directory tree with retries for transient NFS cleanup races.
+
+    mirror of finn.util.basic.robust_rmtree so ci/ stays importable on
+    bare agents that have no finn package.
+    """
+    if not path or not os.path.exists(path):
+        return
+    delay = initial_delay
+    for attempt in range(retries):
+        try:
+            shutil.rmtree(path)
+            return
+        except FileNotFoundError:
+            return
+        except OSError as exc:
+            if exc.errno not in (errno.ENOTEMPTY, errno.EBUSY) or attempt == retries - 1:
+                raise
+            time.sleep(delay)
+            delay *= backoff
+
+
+SNAPSHOT_FILE_RE = re.compile(r"^build_(\d+)_timings_input\.json$")
+
+
+def _coerce_current_build(value, tag):
+    try:
+        return int(str(value))
+    except (TypeError, ValueError):
+        raise ValueError(
+            "ci_sharding %s: current_build must be an integer-like string, got %r" % (tag, value)
+        )
+
+
+def _list_numeric_dirs(parent):
+    """Return (build, name) for digit-named subdirectories of parent."""
+    return [(int(d), d) for d in os.listdir(parent) if d.isdigit()]
+
+
+def _list_snapshot_files(parent):
+    """Return (build, name) for build_<N>_timings_input.json files."""
+    out = []
+    for name in os.listdir(parent):
+        m = SNAPSHOT_FILE_RE.match(name)
+        if m:
+            out.append((int(m.group(1)), name))
+    return out
+
+
+def _prune_numbered(
+    parent,
+    current_build,
+    retain_n,
+    max_age_days,
+    dry_run,
+    *,
+    tag,
+    list_entries=_list_numeric_dirs,
+    remove=robust_rmtree,
+):
+    """Delete build-numbered entries of parent outside the newest retain_n.
+
+    list_entries maps the parent to (build, name) pairs and remove deletes
+    one path. Both default to the numeric build-dir tree (rmtree), and
+    prune_snapshots overrides them for the snapshot files (unlink).
+
+    The newest retain_n builds and current_build are always kept. An older
+    entry is removed only once it is past max_age_days. Concurrent deletion on
+    a shared NFS parent is tolerated at both probe sites: an entry that
+    vanishes during the age check or the remove is treated as already-pruned.
+    Returns the number matched.
+    """
+    retain_n = int(retain_n)
+    max_age_days = int(max_age_days)
+    if retain_n < 1:
+        raise ValueError("retain_n must be >= 1")
+    if max_age_days < 0:
+        raise ValueError("max_age_days must be >= 0")
+    current_build_int = _coerce_current_build(current_build, tag)
+    if not os.path.isdir(parent):
+        print("ci_sharding %s: %s not present, skipping" % (tag, parent))
+        return 0
+    cutoff = time.time() - (max_age_days * 24 * 60 * 60)
+    # Compare by int so an on-disk "0123" matches a BUILD_NUMBER of "123".
+    entries = sorted(list_entries(parent))
+    keep = {build for build, _ in entries[-retain_n:]}
+    keep.add(current_build_int)
+    matched = 0
+    for build, name in entries:
+        if build in keep:
+            continue
+        path = os.path.join(parent, name)
+        if max_age_days > 0:
+            try:
+                if os.path.getmtime(path) >= cutoff:
+                    continue
+            except FileNotFoundError:
+                continue
+        matched += 1
+        if dry_run:
+            print("ci_sharding %s: would delete %s" % (tag, path))
+        else:
+            print("ci_sharding %s: deleting %s" % (tag, path))
+            try:
+                remove(path)
+            except FileNotFoundError:
+                pass
+    print(
+        "ci_sharding %s: done (parent=%s current=%s retain_n=%s "
+        "max_age_days=%s dry_run=%s matched=%d)"
+        % (tag, parent, current_build_int, retain_n, max_age_days, int(dry_run), matched)
+    )
+    return matched
+
+
+def prune_images(shared_dir, job_key, current_build, retain_n, max_age_days, dry_run=False):
+    parent = os.path.join(shared_dir, job_key)
+    return _prune_numbered(
+        parent, current_build, retain_n, max_age_days, dry_run, tag="prune-images"
+    )
+
+
+def prune_artifacts(artifact_dir, job_key, current_build, retain_n, max_age_days, dry_run=False):
+    """Rotate ${FINN_CI_NFS_ROOT}/artifacts/ci_runs/<job_key>/ for this build job.
+
+    Keeps the newest retain_n builds plus any younger than max_age_days. HW
+    resolves per board to the newest READY zip, so pruning is safe as long as
+    a board's most recent READY still falls inside the retained window. The
+    window is kept deep for that reason (see RETENTION["artifact"]).
+    """
+    parent = os.path.join(artifact_dir, "ci_runs", job_key)
+    return _prune_numbered(
+        parent, current_build, retain_n, max_age_days, dry_run, tag="prune-artifacts"
+    )
+
+
+def prune_snapshots(state_root, job_key, current_build, retain_n, max_age_days, dry_run=False):
+    """Rotate per-build timing snapshot files under _ci_state/<job_key>/.
+
+    The snapshots are named build_<N>_timings_input.json and live alongside
+    the persistent ci_timings_master.json, which is left untouched. Only the
+    build-numbered files are eligible. This shares the numbered-rotation core
+    with the image and artifact trees, differing only in the file lister and
+    the os.unlink remover.
+    """
+    parent = os.path.join(state_root, job_key)
+    return _prune_numbered(
+        parent,
+        current_build,
+        retain_n,
+        max_age_days,
+        dry_run,
+        tag="prune-snapshots",
+        list_entries=_list_snapshot_files,
+        remove=os.unlink,
+    )
+
+
+def prune_pip_cache(root, keep, max_age_days, dry_run=False):
+    """Delete cache-key subdirs of root older than max_age_days.
+
+    The directory pointed at by keep is always retained. A subdir's own mtime
+    is the age key, so an actively reused cache dir is bumped on write and
+    survives. Returns the number matched.
+    """
+    max_age_days = int(max_age_days)
+    if max_age_days < 0:
+        raise ValueError("max_age_days must be >= 0")
+    if not os.path.isdir(root):
+        return 0
+    keep_abs = os.path.abspath(keep) if keep else None
+    cutoff = time.time() - (max_age_days * 24 * 60 * 60)
+    matched = 0
+    for name in os.listdir(root):
+        path = os.path.join(root, name)
+        if not os.path.isdir(path):
+            continue
+        if keep_abs and os.path.abspath(path) == keep_abs:
+            continue
+        if max_age_days > 0:
+            try:
+                if os.path.getmtime(path) >= cutoff:
+                    continue
+            except FileNotFoundError:
+                continue
+        matched += 1
+        if dry_run:
+            print("ci_sharding prune-pip-cache: would delete %s" % path)
+        else:
+            print("ci_sharding prune-pip-cache: deleting %s" % path)
+            robust_rmtree(path)
+    return matched
diff --git a/ci/finn_ci/timing.py b/ci/finn_ci/timing.py
new file mode 100644
index 0000000000..1a7d8ed046
--- /dev/null
+++ b/ci/finn_ci/timing.py
@@ -0,0 +1,265 @@
+# Copyright (C) 2026, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Self-maintaining per-group timing master and per-shard report summaries.
+
+The persistent master at
+${FINN_CI_NFS_ROOT}/_ci_state/<jobKey>/ci_timings_master.json holds the last
+MAX_SAMPLES observations per group. update_master appends a build's
+observations. Persisting them to the master is opt-in (the caller persists for
+builds that ran to a normal end). The bin packer (finn_ci.sharding) weights
+each group by the window max. This module also merges the per-shard shard-map
+sidecars and prints the per-shard wall-clock summary used for triage.
+"""
+
+import collections
+import glob
+import os
+import re
+import sys
+import time
+from finn_ci import jsonio, sharding
+
+# Per-group rolling window for the timing master. The bin packer reads the
+# max of the window, meaning each group is weighted by its slowest recent run.
+MAX_SAMPLES = 5
+
+# summarize-timings flags shards exceeding this multiple of the family median.
+SLOW_FACTOR = 1.5
+
+
+# =============================================================================
+# Reports I/O (merge maps, per-shard summary)
+# =============================================================================
+
+
+def load_map_rows(path):
+    data = jsonio.read_json(path, default=[])
+    if isinstance(data, list):
+        return data
+    return []
+
+
+def merge_maps(reports_dir):
+    rows = []
+    for path in sorted(glob.glob(os.path.join(reports_dir, "*.shardmap.json"))):
+        rows.extend(load_map_rows(path))
+    rows.sort(
+        key=lambda r: (
+            str(r.get("stage", "")),
+            int(r.get("shard_id", 0)),
+            str(r.get("nodeid", "")),
+        )
+    )
+    json_path = os.path.join(reports_dir, "shard_map.json")
+    txt_path = os.path.join(reports_dir, "shard_map.txt")
+    jsonio.write_json_atomic(json_path, rows)
+    with open(txt_path, "w") as f:
+        for row in rows:
+            f.write(
+                "nodeid={nodeid} stage={stage} shard={shard_num}/{shard_count} "
+                "stash={stash} group={group} weight_s={weight_s:.3f} source={source}\n".format(
+                    nodeid=row.get("nodeid", ""),
+                    stage=row.get("stage", ""),
+                    shard_num=int(row.get("shard_id", 0)) + 1,
+                    shard_count=int(row.get("num_shards", 1)),
+                    stash=row.get("stash", ""),
+                    group=row.get("group", ""),
+                    weight_s=float(row.get("weight_s", 0.0) or 0.0),
+                    source=row.get("source", ""),
+                )
+            )
+    print("ci_sharding merge-maps: wrote %d row(s)" % len(rows))
+    return 0
+
+
+def timing_rows(reports_dir):
+    rows = []
+    pattern = os.path.join(reports_dir, "*.timings.json")
+    for path in sorted(glob.glob(pattern)):
+        data = jsonio.read_json(path, default={})
+        if not isinstance(data, dict):
+            print("ci_sharding summarize: could not parse %s" % path, file=sys.stderr)
+            continue
+        stash = data.get("stash") or os.path.basename(path).split(".")[0]
+        groups = data.get("groups") or []
+        top = groups[0] if groups else {"name": "(none)", "seconds": 0.0}
+        rows.append(
+            (
+                stash,
+                int(data.get("shard", {}).get("id", 0)),
+                float(data.get("wall_seconds", 0.0) or 0.0),
+                float(top.get("seconds", 0.0) or 0.0),
+                str(top.get("name", "")),
+            )
+        )
+    return rows
+
+
+def family(stash):
+    return re.sub(r"_\d+$", "", stash)
+
+
+def summarize_timings(reports_dir):
+    rows = timing_rows(reports_dir)
+    if not rows:
+        print("ci_sharding summarize: no parseable timings.json files in %s" % reports_dir)
+        return 0
+    by_family = collections.defaultdict(list)
+    for row in rows:
+        by_family[family(row[0])].append(row)
+    print()
+    print("=== per-shard wall-clock ===")
+    print("%-36s %3s %10s %12s  %s" % ("stash", "id", "wall_s", "max_group_s", "max_group"))
+    print("-" * 100)
+    slow_found = False
+    for fam in sorted(by_family):
+        fam_rows = sorted(by_family[fam], key=lambda r: r[1])
+        walls = sorted(r[2] for r in fam_rows)
+        median = walls[len(walls) // 2] if walls else 0.0
+        for stash, sid, wall, mx_sec, mx_name in fam_rows:
+            flag = ""
+            if median > 0.0 and wall > SLOW_FACTOR * median:
+                flag = "  <<< SLOW SHARD (%.1fx median)" % (wall / median)
+                slow_found = True
+            print("%-36s %3d %10.1f %12.1f  %s%s" % (stash, sid, wall, mx_sec, mx_name, flag))
+        print()
+    if slow_found:
+        print(
+            "ci_sharding summarize: one or more shards exceeded %.1fx family median. "
+            "A trusted full build refreshes the timing master from these observations."
+            % SLOW_FACTOR
+        )
+    return 0
+
+
+# =============================================================================
+# Timing master state machine
+# =============================================================================
+# Schema v1:
+#
+#   {"schema_version": 1, "updated_at": str, "last_update": {...},
+#    "groups": {<name>: {"samples": [s1, ..., sMAX_SAMPLES]}}}
+#
+# Bump SCHEMA_VERSION when the master layout changes incompatibly: an
+# unrecognised version is discarded and the timings cold-start, which one
+# non-aborted build repopulates.
+
+SCHEMA_VERSION = 1
+
+
+def normalise_master(data):
+    """Coerce arbitrary input to the master schema (drops unknown top-level keys)."""
+    if not isinstance(data, dict):
+        data = {}
+    schema_version = data.get("schema_version")
+    if schema_version is not None and schema_version != SCHEMA_VERSION:
+        print(
+            "ci_sharding normalise_master: unrecognised schema_version %r, "
+            "treating as empty (expected %d)" % (schema_version, SCHEMA_VERSION),
+            file=sys.stderr,
+        )
+        data = {}
+    groups = data.get("groups")
+    if not isinstance(groups, dict):
+        groups = {}
+    return {
+        "schema_version": SCHEMA_VERSION,
+        "updated_at": data.get("updated_at"),
+        "groups": dict(groups),
+    }
+
+
+def observed_groups_from_reports(reports_dir):
+    """Return {group_name: max_seconds} over this build's timing sidecars."""
+    observed = {}
+    for path in sorted(glob.glob(os.path.join(reports_dir, "*.timings.json"))):
+        data = jsonio.read_json(path, default={})
+        if not isinstance(data, dict):
+            continue
+        for entry in data.get("groups") or []:
+            name = sharding.canonical_key(str(entry.get("name", "")))
+            if not name:
+                continue
+            try:
+                seconds = float(entry.get("seconds", 0.0) or 0.0)
+            except (TypeError, ValueError):
+                continue
+            if seconds > observed.get(name, 0.0):
+                observed[name] = round(seconds, 3)
+    return observed
+
+
+def _apply_per_group_update(observed_seconds, current_entry):
+    """Append the observation to current_entry's window, trimmed to MAX_SAMPLES."""
+    prior_samples = sharding._samples_from_entry(current_entry)
+    new_samples = (prior_samples + [round(float(observed_seconds), 3)])[-MAX_SAMPLES:]
+    return {"samples": new_samples}
+
+
+def update_master(reports_dir, master_path, out_path, update_persistent=False, metadata=None):
+    """Merge observed timings into a per-build preview and optionally the master.
+
+    Every call writes out_path. Updating the persistent master is opt-in via
+    update_persistent, which the caller passes for any non-aborted build.
+    Preview mode (update_persistent off) leaves the on-disk master untouched.
+    Either way, every observation in this build is appended to its group's
+    samples and the window is trimmed to MAX_SAMPLES.
+    """
+    observed_seconds = observed_groups_from_reports(reports_dir)
+    metadata = metadata or {}
+    now_iso = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+
+    def apply(current, persist=False):
+        master = normalise_master(current)
+        master["updated_at"] = now_iso
+        for name, seconds in observed_seconds.items():
+            master["groups"][name] = _apply_per_group_update(seconds, master["groups"].get(name))
+        master["last_update"] = {
+            "job": metadata.get("job"),
+            "build": metadata.get("build"),
+            "persistent_update": bool(persist),
+            "observed_groups": len(observed_seconds),
+        }
+        return master
+
+    persistent_updated = False
+    if master_path and update_persistent:
+        # No cross-agent lock. write_json_atomic renames into place, so a
+        # reader never sees a half-written file and overlapping writers settle
+        # to last-writer-wins. A sample dropped by a concurrent
+        # read-modify-write is re-added by the next build that observes the
+        # group, and since the bin packer only reads the window max, an
+        # occasional missing sample makes no practical difference.
+        master = apply(jsonio.read_json(master_path, default={}), persist=True)
+        jsonio.write_json_atomic(master_path, master)
+        persistent_updated = True
+    elif master_path:
+        master = apply(jsonio.read_json(master_path, default={}), persist=False)
+    else:
+        master = apply({}, persist=False)
+    if out_path:
+        jsonio.write_json_atomic(out_path, master)
+    print(
+        "ci_sharding update: %d observed, %d in master, persistent_update=%s"
+        % (len(observed_seconds), len(master.get("groups", {})), persistent_updated)
+    )
+    return 0
+
+
+def prepare_timing_snapshot(master_path, snapshot_path):
+    """Copy the persistent master to a per-build snapshot for shard consumption.
+
+    Cold start writes an empty snapshot so sharding falls back to
+    deterministic round-robin until the first build populates the master.
+    """
+    master = jsonio.read_json(master_path, default=None)
+    master = normalise_master(master)
+    jsonio.write_json_atomic(snapshot_path, master)
+    print(
+        "ci_sharding prepare: wrote %s with %d group(s)"
+        % (snapshot_path, len(master.get("groups", {})))
+    )
+    return 0
diff --git a/ci/scripts/archive_failure_logs.sh b/ci/scripts/archive_failure_logs.sh
new file mode 100755
index 0000000000..09746a8264
--- /dev/null
+++ b/ci/scripts/archive_failure_logs.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# archive_failure_logs.sh <build_dir> <tarball_path> [start_marker]
+#
+# One tarball of tool logs per failed shard. LSF staging logs live outside
+# the build dir under FINN_LSF_NFS_STAGING. They are scoped to files newer
+# than the start_marker if provided.
+# Best-effort: failures are logged but never abort the pipeline (the real
+# test result is owned by pytest).
+set +e
+
+if [ "$#" -lt 2 ] || [ "$#" -gt 3 ]; then
+  echo "Usage: archive_failure_logs.sh <build_dir> <tarball_path> [start_marker]" >&2
+  exit 2
+fi
+
+bd=$1
+tarball=$2
+start_marker=${3:-}
+lsf_staging="${FINN_LSF_NFS_STAGING:-}"
+
+mkdir -p "$(dirname "$tarball")"
+if [ ! -d "$bd" ]; then
+  exit 0
+fi
+
+# Use absolute paths so tar can stat them from its own cwd.
+abs_bd=$(cd "$bd" && pwd)
+# The LSF staging scan is only useful when scoped to files newer than the
+# real per-shard start. Without a marker the find would walk the entire
+# shared staging dir, so we skip the LSF block entirely instead.
+newer_ref=
+if [ -n "$start_marker" ] && [ -e "$start_marker" ]; then
+  newer_ref=$start_marker
+fi
+
+# Collect to a temp file so we can both count and tar, and so a healthy
+# "no candidates" run does not produce an empty tarball indistinguishable
+# from a tar failure.
+list=$(mktemp)
+trap 'rm -f "$list"' EXIT
+
+# Two-pass capture. First pass: basenames that are unambiguously FINN/Vitis
+# artefacts anywhere in the build dir. Second pass: generic names (config.txt
+# etc.) gated by grep to the build-subdir families, so they only match inside
+# those subtrees. The leading-slash anchor stops 'myvitis_proj' matching
+# 'vitis_proj'; trailing '/' marks a fixed dir name, trailing '_' a
+# '<name>_<hash>' prefix.
+build_subdirs='/(project_|finn_zynqbuild_|vitis_proj/|vivado_stitch_proj_|vitis_link_proj_)'
+{
+  find "$abs_bd" -type f \( \
+      -name 'vitis_hls.log' -o \
+      -name 'build_dataflow.log' -o \
+      -name 'vivado.log' -o \
+      -name 'v++_a.log' -o \
+      -name 'v++.link_summary' -o \
+      -name 'link.steps.log' -o \
+      -name '*runme.log' \
+    \) -print0 2>/dev/null
+  find "$abs_bd" -type f \( \
+      -name 'config.txt' -o \
+      -name 'run_vitis_link.sh' -o \
+      -name '*.fcnmap.xml' -o \
+      -name 'xd_ip_index.xml' \
+    \) -print0 2>/dev/null | grep -zE "$build_subdirs"
+  if [ -n "$newer_ref" ] && [ -d "$lsf_staging" ]; then
+    find "$lsf_staging" -mindepth 2 -maxdepth 3 -type f -newer "$newer_ref" \( \
+        -name 'lsf.stdout' -o \
+        -name 'lsf.stderr' -o \
+        -name 'remote_runner.sh' \
+      \) -print0 2>/dev/null
+  fi
+} > "$list"
+
+n=$(tr -cd '\0' < "$list" | wc -c)
+echo "[archive-failure-logs] ${n} candidate file(s) for ${tarball}"
+if [ "$n" = "0" ]; then
+  exit 0
+fi
+tar --null --create --gzip --file "$tarball" --files-from "$list" 2>/dev/null || true
diff --git a/ci/scripts/failure_tails.sh b/ci/scripts/failure_tails.sh
new file mode 100755
index 0000000000..fddcaf2944
--- /dev/null
+++ b/ci/scripts/failure_tails.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# failure_tails.sh <build_dir> <stash> <tail_lines>
+#
+# Tail every tool log with an ERROR: marker so the Jenkins console shows the
+# actual error without downloading the artifact tarball.
+# Best-effort: failures are logged but never abort the pipeline (the real
+# test result is owned by pytest).
+set +e
+
+if [ "$#" -ne 3 ]; then
+  echo "Usage: failure_tails.sh <build_dir> <stash> <tail_lines>" >&2
+  exit 2
+fi
+
+bd=$1
+stash=$2
+tail_lines=$3
+tag="[failure-tails ${stash}]"
+
+if [ ! -d "$bd" ]; then
+  echo "$tag no build dir"
+  exit 0
+fi
+
+mapfile -t failed < <(find "$bd" -type f \( \
+    -name 'vitis_hls.log' -o \
+    -name 'build_dataflow.log' -o \
+    -name 'vivado.log' -o \
+    -name 'v++_a.log' -o \
+    -name 'v++.link_summary' \
+  \) -exec grep -l 'ERROR:' {} + 2>/dev/null)
+
+if [ "${#failed[@]}" = "0" ]; then
+  echo "$tag no logs with ERROR: markers found"
+  exit 0
+fi
+
+echo "$tag ${#failed[@]} log file(s) with ERROR: markers"
+for f in "${failed[@]}"; do
+  rel="${f#"$bd"/}"
+  echo ""
+  echo "=== FAIL: $rel (tail ${tail_lines}) ==="
+  tail -n "$tail_lines" "$f"
+done
diff --git a/ci/scripts/find_copy_zip.sh b/ci/scripts/find_copy_zip.sh
new file mode 100755
index 0000000000..7e8666e51f
--- /dev/null
+++ b/ci/scripts/find_copy_zip.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# find_copy_zip.sh <test_type> <board> <find_dir> <stage_dir>
+#
+# Walks <find_dir> for per-shard hw_deployment_*/<board>/* directories and
+# stages each model dir under <stage_dir>/<board>/. The Jenkinsfile calls
+# this once per (testType, board) shard from runShardBody.
+# NOT best-effort. A half-staged deployment tree would silently lose models
+# at aggregate time, so strict mode is on and any error aborts the step.
+# See also: publish_board_zip_stage.sh, which performs the matching
+# aggregate-time walk over the staged trees this script produced.
+set -euo pipefail
+
+if [ "$#" -ne 4 ]; then
+  echo "Usage: find_copy_zip.sh <test_type> <board> <find_dir> <stage_dir>" >&2
+  exit 2
+fi
+
+test_type=$1
+board=$2
+find_dir=$3
+stage_dir=$4
+tag="findCopyZip(${test_type}/${board})"
+
+if [ ! -d "$find_dir" ]; then
+  exit 0
+fi
+
+mkdir -p "$stage_dir/$board"
+# u+w so a previous run's read-only residue can be removed. ignore failures
+# on a freshly-created dir.
+chmod -R u+w "$stage_dir/$board" 2>/dev/null || true
+# find -mindepth 1 catches dotfiles that glob */* would miss.
+find "${stage_dir:?}/${board:?}" -mindepth 1 -maxdepth 1 -exec rm -rf {} +
+
+found=0
+while IFS= read -r -d '' board_dir; do
+  found=1
+  for model_dir in "$board_dir"/*; do
+    [ -d "$model_dir" ] || continue
+    name=$(basename "$model_dir")
+    if [ -e "$stage_dir/$board/$name" ]; then
+      echo "$tag: duplicate deployment $name across hw_deployment dirs" >&2
+      exit 1
+    fi
+    # cp not mv: the per-shard build dir is kept for failure triage. Trees
+    # are small (per-board deployment dirs).
+    cp -a "$model_dir" "$stage_dir/$board/"
+  done
+done < <(find "$find_dir" -maxdepth 2 -type d -name "$board" -path '*/hw_deployment_*/*' -print0)
+
+if [ "$found" = "0" ]; then
+  exit 0
+fi
+
+echo "$tag: staged deployments under $stage_dir"
diff --git a/ci/scripts/lsf_summary.sh b/ci/scripts/lsf_summary.sh
new file mode 100755
index 0000000000..f9263c738b
--- /dev/null
+++ b/ci/scripts/lsf_summary.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# lsf_summary.sh <build_dir> <agent> <stash>
+#
+# Per-tool run counts and hosts so Blue Ocean shows LSF fan-out.
+# Best-effort: failures are logged but never abort the pipeline (the real
+# test result is owned by pytest).
+set +e
+
+if [ "$#" -ne 3 ]; then
+  echo "Usage: lsf_summary.sh <build_dir> <agent> <stash>" >&2
+  exit 2
+fi
+
+bd=$1
+agent=$2
+stash=$3
+tag="[lsf-summary ${stash}]"
+
+if [ ! -d "$bd" ]; then
+  echo "$tag no build dir"
+  exit 0
+fi
+
+# vitis_hls log line: "INFO: [HLS 200-10] ... on host '<host>'"
+# vivado log header:  "# Running On: <host>,"
+hls_hosts=$(find "$bd" -name vitis_hls.log -exec grep -h "INFO: \[HLS 200-10\] .* on host '" {} + 2>/dev/null \
+  | awk -F"on host '" '{print $2}' | awk -F"'" '{print $1}')
+viv_hosts=$(find "$bd" -name vivado.log -exec grep -h '^# Running On:' {} + 2>/dev/null \
+  | awk -F'Running On: *' '{print $2}' | awk -F',' '{print $1}')
+
+all_hosts=$(printf '%s\n%s\n' "$hls_hosts" "$viv_hosts" | grep -v '^$')
+n_runs=$(echo "$all_hosts" | grep -c .)
+if [ "$n_runs" = "0" ]; then
+  # warn loudly when logs exist but the host-line format has drifted; a
+  # silent "no tool runs" otherwise masks a parsing regression after a
+  # Vivado/Vitis version bump.
+  n_logs=$(find "$bd" \( -name vitis_hls.log -o -name vivado.log \) 2>/dev/null | wc -l)
+  if [ "$n_logs" -gt 0 ]; then
+    echo "$tag no parseable host found in ${n_logs} log(s) (format change?)"
+  else
+    echo "$tag no tool runs"
+  fi
+  exit 0
+fi
+n_remote=$(echo "$all_hosts" | grep -vcx "$agent")
+n_local=$(echo "$all_hosts" | grep -cx "$agent")
+buckets=$(echo "$all_hosts" | sort | uniq -c | sort -rn)
+n_hosts=$(echo "$buckets" | grep -c .)
+
+if [ "$n_remote" = "0" ]; then
+  echo "$tag $n_runs tool run(s) on this agent (no LSF dispatch)"
+else
+  echo "$tag $n_runs tool run(s) across $n_hosts host(s) ($n_remote remote / $n_local local)"
+fi
+top=$(echo "$buckets" | head -5 | awk '{printf "%s(%d) ", $2, $1}')
+echo "                          top: $top"
diff --git a/ci/scripts/publish_board_zip_stage.sh b/ci/scripts/publish_board_zip_stage.sh
new file mode 100755
index 0000000000..a1b295a657
--- /dev/null
+++ b/ci/scripts/publish_board_zip_stage.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# publish_board_zip_stage.sh <src_root> <work_board> <board> <test_type>
+#
+# Stages per-shard board deployments into work_board ahead of zipping.
+# Hard-fails on duplicate model names across shards (a real conflict).
+# Touches <work_board>/.NO_DEPLOYMENTS when nothing was found so the caller
+# can short-circuit cleanly.
+# NOT best-effort. A half-staged board zip is worse than no zip, so strict
+# mode is on and any error aborts the publish step.
+# See also: find_copy_zip.sh, which produced the per-shard staged trees
+# this script aggregates.
+set -euo pipefail
+
+if [ "$#" -ne 4 ]; then
+  echo "Usage: publish_board_zip_stage.sh <src_root> <work_board> <board> <test_type>" >&2
+  exit 2
+fi
+
+src_root=$1
+work_board=$2
+board=$3
+test_type=$4
+tag="publishBoardZip(${test_type}/${board})"
+
+found=0
+while IFS= read -r -d '' board_dir; do
+  found=1
+  while IFS= read -r -d '' model_dir; do
+    name=$(basename "$model_dir")
+    if [ -e "$work_board/$name" ]; then
+      echo "$tag: duplicate deployment $name under $src_root"
+      exit 1
+    fi
+    cp -a "$model_dir" "$work_board/"
+  done < <(find "$board_dir" -mindepth 1 -maxdepth 1 -type d -print0)
+done < <(find "$src_root" -mindepth 2 -maxdepth 2 -type d -name "$board" -print0)
+
+if [ "$found" = "0" ] || [ -z "$(find "$work_board" -mindepth 1 -maxdepth 1 -type d -print -quit)" ]; then
+  echo "$tag: no deployment directories found under $src_root"
+  touch "$work_board/.NO_DEPLOYMENTS"
+  exit 0
+fi
diff --git a/ci/scripts/publish_docker_image.sh b/ci/scripts/publish_docker_image.sh
new file mode 100755
index 0000000000..76fa7c77b3
--- /dev/null
+++ b/ci/scripts/publish_docker_image.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# publish_docker_image.sh <image_dir> <tag> <build_number>
+#
+# Save the named Docker image to <image_dir>/finn-docker-image.tar.gz and
+# write its tag to a sibling file. image_dir is per-build, so the only
+# concurrency is a same-build retry whose bytes are identical. Unique
+# per-invocation temp files plus the atomic rename are the serialisation
+# point (no NFS lock, which flock cannot provide cross-host anyway). A
+# concurrent retry may redo the docker save, which is rare and harmless.
+set -eo pipefail
+
+if [ "$#" -ne 3 ]; then
+  echo "Usage: publish_docker_image.sh <image_dir> <tag> <build_number>" >&2
+  exit 2
+fi
+
+image_dir=$1
+tag=$2
+build=$3
+
+final_img="${image_dir}/finn-docker-image.tar.gz"
+final_tag="${image_dir}/finn-docker-tag.txt"
+# Unique per invocation (pid + host) so a concurrent same-build retry cannot
+# clobber our half-written temp; the trap removes it on any early exit.
+uniq="${build}.$$.$(hostname -s 2>/dev/null || echo host)"
+tmp_img="${final_img}.tmp-${uniq}"
+tmp_tag="${final_tag}.tmp-${uniq}"
+trap 'rm -f "$tmp_img" "$tmp_tag"' EXIT
+
+if command -v pigz >/dev/null 2>&1; then
+  docker save "$tag" | pigz -p "$(nproc)" > "$tmp_img"
+else
+  docker save "$tag" | gzip > "$tmp_img"
+fi
+printf '%s\n' "$tag" > "$tmp_tag"
+sync "$tmp_img" "$tmp_tag"
+# image last so a reader that gates on both files never sees the image without
+# its tag (the loader keys off finn-docker-image.tar.gz being present).
+mv -f "$tmp_tag" "$final_tag"
+mv -f "$tmp_img" "$final_img"
diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn
index 4614019134..ebe2a3dcbf 100644
--- a/docker/Dockerfile.finn
+++ b/docker/Dockerfile.finn
@@ -79,7 +79,9 @@ RUN apt-get update && \
     libgetdata-dev \
     libtinfo5 \
     g++-10 \
-    cmake
+    cmake \
+    tcsh
+# tcsh: required by LSF esub scripts (they ship with a /bin/csh shebang).
 RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
 RUN locale-gen "en_US.UTF-8"
 
diff --git a/docker/jenkins/Jenkinsfile b/docker/jenkins/Jenkinsfile
deleted file mode 100644
index f4b0b8220a..0000000000
--- a/docker/jenkins/Jenkinsfile
+++ /dev/null
@@ -1,422 +0,0 @@
-pipeline {
-  agent none
-  options {
-    timeout(time: 72, unit: 'HOURS')
-  }
-  parameters {
-    booleanParam(name: 'fpgadataflow', defaultValue: false, description: 'Run fpgadataflow tests')
-    booleanParam(name: 'sanity', defaultValue: true, description: 'Run sanity hardware and unit tests')
-    booleanParam(name: 'end2end', defaultValue: false, description: 'Run end2end tests')
-    booleanParam(name: 'local_setup', defaultValue: false, description: 'Run local (non-Docker) setup test with Vivado')
-  }
-  stages {
-    stage('Build Docker Image') {
-      agent {
-        label 'finn-build'
-      }
-      environment {
-        // Override any DSL-level FINN_DOCKER_PREBUILT setting — this
-        // stage must always build so the image exists for test stages
-        FINN_DOCKER_PREBUILT = "0"
-      }
-      steps {
-        script {
-          // Prune old docker containers
-          sh "docker system prune -a -f"
-          // Build Docker image so parallel stages don't each rebuild it
-          sh "./run-docker.sh echo 'Docker image build complete'"
-          // If FINN_DOCKER_SHARED_DIR is set, export the image so other
-          // agents can load it via run-docker.sh instead of rebuilding
-          if (env.FINN_DOCKER_SHARED_DIR) {
-            def dockerTag = sh(returnStdout: true, script: '''
-              XRT_DEB_VERSION="${XRT_DEB_VERSION:-xrt_202220.2.14.354_22.04-amd64-xrt}"
-              echo "xilinx/finn:$(git describe --always --tags --dirty).${XRT_DEB_VERSION}"
-            ''').trim()
-            sh "mkdir -p ${env.FINN_DOCKER_SHARED_DIR}"
-            sh "echo '${dockerTag}' > ${env.FINN_DOCKER_SHARED_DIR}/finn-docker-tag.txt"
-            sh "bash -c 'set -o pipefail; docker save \"${dockerTag}\" | gzip > ${env.FINN_DOCKER_SHARED_DIR}/finn-docker-image.tar.gz'"
-          }
-        }
-      }
-    }
-    stage('Run Tests') {
-      parallel {
-        stage('Sanity - Build Hardware') {
-          when {
-            expression { return params['sanity'] }
-          }
-          agent {
-            label 'finn-build'
-          }
-          environment {
-            TEST_NAME = "bnn_build_sanity"
-            FINN_HOST_BUILD_DIR = "${env.FINN_HOST_BUILD_DIR}/${env.TEST_NAME}"
-          }
-          steps {
-            catchError(stageResult: 'FAILURE') {
-              script {
-                // Creates dir in finn clone to store build files for stashing
-                sh "mkdir -p ${env.TEST_NAME}"
-                cleanPreviousBuildFiles(env.FINN_HOST_BUILD_DIR)
-
-                // Pass in the marker to run with pytest and the XML test results filename
-                runDockerPytestWithMarker("sanity_bnn", "${env.TEST_NAME}", '')
-
-                // Find the board's build files (bitstreams/xclbins) and zip for use on the boards themselves
-                findCopyZip("Pynq-Z1", env.FINN_HOST_BUILD_DIR, env.TEST_NAME)
-                findCopyZip("ZCU104", env.FINN_HOST_BUILD_DIR, env.TEST_NAME)
-                findCopyZip("KV260_SOM", env.FINN_HOST_BUILD_DIR, env.TEST_NAME)
-                findCopyZip("U250", env.FINN_HOST_BUILD_DIR, env.TEST_NAME)
-
-                // Stash the test results file(s)
-                stash name: "${env.TEST_NAME}", includes: "${env.TEST_NAME}.xml,${env.TEST_NAME}.html"
-
-                // Use an env variable to help collect test results later in pipeline
-                env.BNN_BUILD_SANITY = "SUCCESS"
-              }
-            }
-          }
-        }
-        stage('Sanity - Unit Tests') {
-          when {
-            expression { params['sanity'] }
-          }
-          agent {
-            label 'finn-build'
-          }
-          environment {
-            TEST_NAME = "sanity_ut"
-            FINN_HOST_BUILD_DIR = "${env.FINN_HOST_BUILD_DIR}/${env.TEST_NAME}"
-          }
-          steps {
-            catchError(stageResult: 'FAILURE') {
-              script {
-                cleanPreviousBuildFiles(env.FINN_HOST_BUILD_DIR)
-
-                // Multiple markers with pytest needs its own script
-                createMultiMarkerScript("util or brevitas_export or streamline or transform or notebooks", "${env.TEST_NAME}", "-n ${env.NUM_PYTEST_WORKERS} --dist worksteal --cov --cov-report=html:coverage_sanity_ut")
-                sh './run-docker.sh ./run-tests.sh'
-
-                // Stash the test results file(s)
-                stash name: env.TEST_NAME, includes: "${env.TEST_NAME}.xml,${env.TEST_NAME}.html"
-
-                // Use an env variable to help collect test results later in pipeline
-                env.SANITY_UT = "SUCCESS"
-
-                // Archive coverage report if successful
-                archiveSuccessfulStage(env.SANITY_UT, "coverage_sanity_ut")
-              }
-            }
-          }
-        }
-        stage('fpgadataflow Tests') {
-          when {
-            expression { params['fpgadataflow'] }
-          }
-          agent {
-            label 'finn-build'
-          }
-          environment {
-            TEST_NAME = "fpgadataflow"
-            FINN_HOST_BUILD_DIR = "${env.FINN_HOST_BUILD_DIR}/${env.TEST_NAME}"
-          }
-          steps {
-            catchError(stageResult: 'FAILURE') {
-              script {
-                cleanPreviousBuildFiles(env.FINN_HOST_BUILD_DIR)
-
-                // Pass in the marker to run with pytest and the XML test results filename
-                runDockerPytestWithMarker("fpgadataflow", "${env.TEST_NAME}", "--cov --cov-report=html:coverage_fpgadataflow -n ${env.NUM_PYTEST_WORKERS} --dist worksteal")
-
-                // Stash the test results file(s)
-                stash name: env.TEST_NAME, includes: "${env.TEST_NAME}.xml,${env.TEST_NAME}.html"
-
-                // Use an env variable to help collect test results later in pipeline
-                env.FPGADATAFLOW_RESULT = "SUCCESS"
-
-                // Archive coverage report if successful
-                archiveSuccessfulStage(env.FPGADATAFLOW_RESULT, "coverage_fpgadataflow")
-              }
-            }
-          }
-        }
-        stage('End2end') {
-          when {
-            expression { params['end2end'] }
-          }
-          agent {
-            label 'finn-build'
-          }
-          environment {
-            TEST_NAME = "end2end"
-            FINN_HOST_BUILD_DIR = "${env.FINN_HOST_BUILD_DIR}/${env.TEST_NAME}"
-          }
-          steps {
-            catchError(stageResult: 'FAILURE') {
-              script {
-                // Delete any build files from a previous build
-                cleanPreviousBuildFiles(env.FINN_HOST_BUILD_DIR)
-
-                // Pass in the marker to run with pytest and the XML test results filename
-                runDockerPytestWithMarker(env.TEST_NAME, "${env.TEST_NAME}", '')
-
-                // Stash the test results file(s)
-                stash name: env.TEST_NAME, includes: "${env.TEST_NAME}.xml,${env.TEST_NAME}.html"
-
-                // Use an env variable to help collect test results later in pipeline
-                env.END2END_RESULT = "SUCCESS"
-              }
-            }
-          }
-        }
-        stage('BNN end2end - U250') {
-          when {
-            expression { return params['end2end'] }
-          }
-          agent {
-            label 'finn-build'
-          }
-          environment {
-            BOARD = "U250"
-            TEST_NAME = "bnn_build_full"
-            FINN_HOST_BUILD_DIR = "${env.FINN_HOST_BUILD_DIR}/${env.TEST_NAME}_${env.BOARD}"
-          }
-          steps {
-            catchError(stageResult: 'FAILURE') {
-              script {
-                // Creates dir in finn clone to store build files for stashing
-                sh "mkdir -p ${env.TEST_NAME}"
-                cleanPreviousBuildFiles(env.FINN_HOST_BUILD_DIR)
-
-                // Pass in the marker to run with pytest and the XML test results filename
-                runDockerPytestWithMarker("bnn_u250", "${env.TEST_NAME}_${env.BOARD}", '')
-                findCopyZip(env.BOARD, env.FINN_HOST_BUILD_DIR, env.TEST_NAME)
-
-                // Stash the test results file(s)
-                stash name: "${env.TEST_NAME}_${env.BOARD}", includes: "${env.TEST_NAME}_${env.BOARD}.xml,${env.TEST_NAME}_${env.BOARD}.html"
-
-                // Use an env variable to help collect test results later in pipeline
-                env.BNN_BUILD_U250 = "SUCCESS"
-              }
-            }
-          }
-        }
-        stage('BNN end2end - Pynq-Z1') {
-          when {
-            expression { return params['end2end'] }
-          }
-          agent {
-            label 'finn-build'
-          }
-          environment {
-            BOARD = "Pynq-Z1"
-            TEST_NAME = "bnn_build_full"
-            FINN_HOST_BUILD_DIR = "${env.FINN_HOST_BUILD_DIR}/${env.TEST_NAME}_${env.BOARD}"
-          }
-          steps {
-            catchError(stageResult: 'FAILURE') {
-              script {
-                // Creates dir in finn clone to store build files for stashing
-                sh "mkdir -p ${env.TEST_NAME}"
-                cleanPreviousBuildFiles(env.FINN_HOST_BUILD_DIR)
-
-                // Pass in the marker to run with pytest and the XML test results filename
-                runDockerPytestWithMarker("bnn_pynq", "${env.TEST_NAME}_${env.BOARD}", '')
-                findCopyZip(env.BOARD, env.FINN_HOST_BUILD_DIR, env.TEST_NAME)
-
-                // Stash the test results file(s)
-                stash name: "${env.TEST_NAME}_PynqZ1", includes: "${env.TEST_NAME}_${env.BOARD}.xml,${env.TEST_NAME}_${env.BOARD}.html"
-
-                // Use an env variable to help collect test results later in pipeline
-                env.BNN_BUILD_PYNQZ1 = "SUCCESS"
-              }
-            }
-          }
-        }
-        stage('BNN end2end - ZCU104') {
-          when {
-            expression { return params['end2end'] }
-          }
-          agent {
-            label 'finn-build'
-          }
-          environment {
-            BOARD = "ZCU104"
-            TEST_NAME = "bnn_build_full"
-            FINN_HOST_BUILD_DIR = "${env.FINN_HOST_BUILD_DIR}/${env.TEST_NAME}_${env.BOARD}"
-          }
-          steps {
-            catchError(stageResult: 'FAILURE') {
-              script {
-                // Creates dir in finn clone to store build files for stashing
-                sh "mkdir -p ${env.TEST_NAME}"
-                cleanPreviousBuildFiles(env.FINN_HOST_BUILD_DIR)
-
-                // Pass in the marker to run with pytest and the XML test results filename
-                runDockerPytestWithMarker("bnn_zcu104", "${env.TEST_NAME}_${env.BOARD}", '')
-                findCopyZip(env.BOARD, env.FINN_HOST_BUILD_DIR, env.TEST_NAME)
-
-                // Stash the test results file(s)
-                stash name: "${env.TEST_NAME}_${env.BOARD}", includes: "${env.TEST_NAME}_${env.BOARD}.xml,${env.TEST_NAME}_${env.BOARD}.html"
-
-                // Use an env variable to help collect test results later in pipeline
-                env.BNN_BUILD_ZCU104 = "SUCCESS"
-              }
-            }
-          }
-        }
-        stage('BNN end2end - KV260_SOM') {
-          when {
-            expression { return params['end2end'] }
-          }
-          agent {
-            label 'finn-build'
-          }
-          environment {
-            BOARD = "KV260_SOM"
-            TEST_NAME = "bnn_build_full"
-            FINN_HOST_BUILD_DIR = "${env.FINN_HOST_BUILD_DIR}/${env.TEST_NAME}_${env.BOARD}"
-          }
-          steps {
-            catchError(stageResult: 'FAILURE') {
-              script {
-                // Creates dir in finn clone to store build files for stashing
-                sh "mkdir -p ${env.TEST_NAME}"
-                cleanPreviousBuildFiles(env.FINN_HOST_BUILD_DIR)
-
-                // Pass in the marker to run with pytest and the XML test results filename
-                runDockerPytestWithMarker("bnn_kv260", "${env.TEST_NAME}_${env.BOARD}", '')
-                findCopyZip(env.BOARD, env.FINN_HOST_BUILD_DIR, env.TEST_NAME)
-
-                // Stash the test results file(s)
-                stash name: "${env.TEST_NAME}_${env.BOARD}", includes: "${env.TEST_NAME}_${env.BOARD}.xml,${env.TEST_NAME}_${env.BOARD}.html"
-
-                // Use an env variable to help collect test results later in pipeline
-                env.BNN_BUILD_KV260_SOM = "SUCCESS"
-              }
-            }
-          }
-        }
-        stage('Local Setup (non-Docker) with Vivado') {
-          when {
-            expression { return params['local_setup'] }
-          }
-          agent {
-            label 'finn-build'
-          }
-          environment {
-            TEST_NAME = "local_setup"
-            FINN_HOST_BUILD_DIR = "${env.FINN_HOST_BUILD_DIR}/${env.TEST_NAME}"
-          }
-          steps {
-            catchError(stageResult: 'FAILURE') {
-              script {
-                cleanPreviousBuildFiles(env.FINN_HOST_BUILD_DIR)
-
-                // Note: System dependencies must be pre-installed on Jenkins server
-                // Run: sudo ./scripts/install-system-deps.sh
-
-                // Setup local FINN environment
-                sh """
-                  export FINN_HOST_BUILD_DIR=${env.FINN_HOST_BUILD_DIR}
-                  ./setup-local.sh
-                """
-
-                // Run quicktest with Vivado integration tests
-                sh """
-                  export FINN_HOST_BUILD_DIR=${env.FINN_HOST_BUILD_DIR}
-                  source scripts/finn-env.sh
-                  ./scripts/quicktest-local.sh vivado
-                """
-
-                // Use an env variable to help collect test results later in pipeline
-                env.LOCAL_SETUP_RESULT = "SUCCESS"
-              }
-            }
-          }
-        }
-      }
-    }
-    stage('Check Stage Results') {
-      agent {
-        label 'finn-build'
-      }
-      steps {
-        script {
-          sh 'mkdir -p reports'
-          cleanPreviousBuildFiles('reports')
-          dir('reports') {
-            // Only unstash for stages that ran
-            unstashSuccessfulStage(env.SANITY_UT, "sanity_ut")
-            unstashSuccessfulStage(env.FPGADATAFLOW_RESULT, "fpgadataflow")
-            unstashSuccessfulStage(env.BNN_BUILD_SANITY, "bnn_build_sanity")
-            unstashSuccessfulStage(env.END2END_RESULT, "end2end")
-            unstashSuccessfulStage(env.BNN_BUILD_U250, "bnn_build_full_U250")
-            unstashSuccessfulStage(env.BNN_BUILD_PYNQZ1, "bnn_build_full_PynqZ1")
-            unstashSuccessfulStage(env.BNN_BUILD_ZCU104, "bnn_build_full_ZCU104")
-            unstashSuccessfulStage(env.BNN_BUILD_KV260_SOM, "bnn_build_full_KV260_SOM")
-          }
-
-          // Combine individual HTML files to one single report
-          sh './run-docker.sh pytest_html_merger -i reports/ -o reports/test_report_final.html'
-
-          // Archive the XML & HTML test results
-          archiveArtifacts artifacts: "reports/*.xml"
-          archiveArtifacts artifacts: "reports/*.html"
-
-          // Plot what XML files were created during the test run
-          junit 'reports/*.xml'
-        }
-      }
-    }
-  }
-}
-
-void cleanPreviousBuildFiles(String buildDir) {
-  // Delete any build files from a previous build
-  // Previous build folders affect findCopyZip() and can cause the stage to fail
-  if (!buildDir.empty) {
-      sh "rm -rf ${buildDir}"
-  }
-}
-
-void createMultiMarkerScript(String markers, String testResultsFilename, String additionalOptions) {
-  // Passing multiple markers when running ./run-docker.sh does not work with bash.
-  // Therefore, create a script to maintain the single quotes that surround the markers
-  sh """echo "#!/bin/bash
-python -m pytest -m \'${markers}\' --forked --junitxml=${testResultsFilename}.xml --html=${testResultsFilename}.html --self-contained-html ${additionalOptions}" >> run-tests.sh
-    """
-
-  // Give permissions to script
-  sh 'chmod 777 run-tests.sh'
-}
-
-void runDockerPytestWithMarker(String marker, String testResultsFilename, String additionalOptions) {
-  sh """./run-docker.sh python -m pytest -m ${marker} --forked --junitxml=${testResultsFilename}.xml --html=${testResultsFilename}.html --self-contained-html ${additionalOptions}"""
-}
-
-void findCopyZip(String board, String findDir, String copyDir) {
-  sh "mkdir -p ${copyDir}"
-  try {
-    sh "cp -r ${findDir}/hw_deployment_*/${board} ${copyDir}/"
-    dir(copyDir) {
-      sh "zip -r ${board}.zip ${board}/"
-      sh "mkdir -p ${env.ARTIFACT_DIR}/${copyDir}/"
-      sh "cp ${board}.zip ${env.ARTIFACT_DIR}/${copyDir}/"
-    }
-  } catch (err) {
-    error "No ${board} hw_deployment_* build artifacts found in ${findDir}"
-  }
-}
-
-void unstashSuccessfulStage(String stageEnvVariableSet, String stashName) {
-  if (stageEnvVariableSet) {
-    unstash stashName
-  }
-}
-
-void archiveSuccessfulStage(String stageEnvVariableSet, String folder) {
-  if (stageEnvVariableSet) {
-    archiveArtifacts artifacts: "${folder}/**/*"
-  }
-}
diff --git a/requirements.txt b/requirements.txt
index 19007b68ac..4a4d0ff70e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,7 +14,6 @@ protobuf==5.29.6
 psutil==5.9.4
 pybind11==2.10.0
 pyscaffold==4.6
-pytest-forked==1.6.0
 scipy==1.10.1
 setupext-janitor>=1.1.2
 sigtools==4.0.1
diff --git a/run-docker.sh b/run-docker.sh
index dcda41f428..a9f061ae64 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -41,26 +41,6 @@ recho () {
   echo -e "${RED}$1${NC}"
 }
 
-if [ -z "$FINN_XILINX_PATH" ];then
-  recho "Please set the FINN_XILINX_PATH environment variable to the path to your Xilinx tools installation directory (e.g. /opt/Xilinx)."
-  recho "FINN functionality depending on Vivado, Vitis or HLS will not be available."
-fi
-
-if [ -z "$FINN_XILINX_VERSION" ];then
-  recho "Please set the FINN_XILINX_VERSION to the version of the Xilinx tools to use (e.g. 2022.2)"
-  recho "FINN functionality depending on Vivado, Vitis or HLS will not be available."
-fi
-
-if [ -z "$PLATFORM_REPO_PATHS" ];then
-  recho "Please set PLATFORM_REPO_PATHS pointing to Vitis platform files (DSAs)."
-  recho "This is required to be able to use Vitis-based Alveo PCIe cards."
-fi
-
-if [ -z "$V80PP_DEB_PACKAGE" ];then
-  recho "Please set V80PP_DEB_PACKAGE pointing to the SLASH v80++ .deb package."
-  recho "This is required to be able to use the Alveo V80 card."
-fi
-
 DOCKER_GID=$(id -g)
 DOCKER_GNAME=$(id -gn)
 DOCKER_UNAME=$(id -un)
@@ -100,11 +80,54 @@ SCRIPTPATH=$(dirname "$SCRIPT")
 : ${FINN_XRT_PATH=""}
 : ${FINN_DOCKER_NO_CACHE="0"}
 
+# print-tag emits the Docker image tag and exits, so the Jenkins publish step
+# has one source of truth for the tag (FINN_DOCKER_TAG). Placed before any
+# side effects so the invocation is read-only.
+if [ "$1" = "print-tag" ]; then
+  if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 print-tag" >&2
+    exit 2
+  fi
+  echo "$FINN_DOCKER_TAG"
+  exit 0
+fi
+
 DOCKER_INTERACTIVE=""
 
 # Catch FINN_DOCKER_EXTRA options being passed in without a trailing space
 FINN_DOCKER_EXTRA+=" "
 
+if [ -z "$FINN_XILINX_PATH" ];then
+  recho "Please set the FINN_XILINX_PATH environment variable to the path to your Xilinx tools installation directory (e.g. /opt/Xilinx)."
+  recho "FINN functionality depending on Vivado, Vitis or HLS will not be available."
+fi
+
+if [ -z "$FINN_XILINX_VERSION" ];then
+  recho "Please set the FINN_XILINX_VERSION to the version of the Xilinx tools to use (e.g. 2022.2)"
+  recho "FINN functionality depending on Vivado, Vitis or HLS will not be available."
+fi
+
+if [ -z "$PLATFORM_REPO_PATHS" ];then
+  recho "Please set PLATFORM_REPO_PATHS pointing to Vitis platform files (DSAs)."
+  recho "This is required to be able to use Vitis-based Alveo PCIe cards."
+fi
+
+if [ -z "$V80PP_DEB_PACKAGE" ];then
+  recho "Please set V80PP_DEB_PACKAGE pointing to the SLASH v80++ .deb package."
+  recho "This is required to be able to use the Alveo V80 card."
+fi
+
+# Mirror the Jenkinsfile's local-fallback banner, but only inside a real
+# Jenkins run (JENKINS_URL + BUILD_NUMBER) so unrelated CI systems and
+# developer shells that happen to export BUILD_NUMBER stay quiet.
+if [ -n "$JENKINS_URL" ] && [ -n "$BUILD_NUMBER" ] \
+   && [ -z "$FINN_CI_NFS_ROOT" ] && [ -z "$FINN_DOCKER_SHARED_IMAGE_DIR" ]; then
+  recho "FINN_CI_NFS_ROOT and FINN_DOCKER_SHARED_IMAGE_DIR are unset. Running in local-fallback mode."
+  recho "  - no shared Docker image cache (this agent will build locally)"
+  recho "  - no build-to-HW artifact handoff (the HW pipeline cannot test this build)"
+  recho "Set FINN_CI_NFS_ROOT in the Jenkins job DSL to enable the shared cache."
+fi
+
 if [ "$1" = "test" ]; then
   gecho "Running test suite (all tests)"
   DOCKER_CMD="pytest"
@@ -165,10 +188,12 @@ if [ "$FINN_SKIP_DEP_REPOS" = "0" ]; then
   ./fetch-repos.sh || exit 1
 fi
 
-# If xrt path given, copy .deb file to this repo
-# Be aware that we assume a certain name of the xrt deb version
-if [ -d "$FINN_XRT_PATH" ];then
-  cp $FINN_XRT_PATH/$XRT_DEB_VERSION.deb .
+# If xrt path given, copy .deb file to this repo. Gate on the .deb
+# itself, not the dir. Otherwise an empty cache dir trips LOCAL_XRT=1
+# without producing a build-context .deb, and the docker build then
+# fails because the wget branch is also skipped.
+if [ -f "$FINN_XRT_PATH/$XRT_DEB_VERSION.deb" ]; then
+  cp "$FINN_XRT_PATH/$XRT_DEB_VERSION.deb" .
   export LOCAL_XRT=1
 fi
 
@@ -181,32 +206,50 @@ if [ "$FINN_DOCKER_NO_CACHE" = "1" ]; then
   FINN_DOCKER_BUILD_EXTRA+="--no-cache "
 fi
 
-# If the image isn't available locally, try loading from shared storage.
-# This is independent of FINN_DOCKER_PREBUILT: loading is an image
-# acquisition step, not a build step. With PREBUILT=1 it provides the
-# image so the build below is skipped; with PREBUILT=0 it warms the
-# layer cache so the build below runs faster.
-if [ ! -z "$FINN_DOCKER_SHARED_DIR" ] && \
-   ! docker image inspect "$FINN_DOCKER_TAG" > /dev/null 2>&1; then
-  SHARED_IMG="$FINN_DOCKER_SHARED_DIR/finn-docker-image.tar.gz"
-  SHARED_TAG_FILE="$FINN_DOCKER_SHARED_DIR/finn-docker-tag.txt"
+# fail fast on PREBUILT=1 with no usable image source: with no shared dir
+# configured and no local image, docker run further down would fail with
+# a generic "Unable to find image" much later in the pipeline.
+if [ "$FINN_DOCKER_PREBUILT" = "1" ] && [ -z "$FINN_DOCKER_SHARED_IMAGE_DIR" ] \
+   && ! docker image inspect "$FINN_DOCKER_TAG" > /dev/null 2>&1; then
+  recho "FINN_DOCKER_PREBUILT=1 but FINN_DOCKER_SHARED_IMAGE_DIR is unset and tag $FINN_DOCKER_TAG is not loaded locally"
+  recho "Set FINN_DOCKER_SHARED_IMAGE_DIR to a directory containing finn-docker-image.tar.gz, or unset FINN_DOCKER_PREBUILT to build locally."
+  exit 1
+fi
+
+# If a shared-image dir is configured, load from there. In prebuilt mode
+# the shared image is authoritative and any same-tag local image is ignored.
+if [ -n "$FINN_DOCKER_SHARED_IMAGE_DIR" ] && \
+   { [ "$FINN_DOCKER_PREBUILT" = "1" ] || ! docker image inspect "$FINN_DOCKER_TAG" > /dev/null 2>&1; }; then
+  SHARED_DIR="$FINN_DOCKER_SHARED_IMAGE_DIR"
+  SHARED_LOADED="0"
+  SHARED_IMG="$SHARED_DIR/finn-docker-image.tar.gz"
+  SHARED_TAG_FILE="$SHARED_DIR/finn-docker-tag.txt"
   if [ -f "$SHARED_IMG" ] && [ -f "$SHARED_TAG_FILE" ]; then
-    gecho "Loading Docker image from shared storage ($FINN_DOCKER_SHARED_DIR)..."
+    gecho "Loading Docker image from shared storage ($SHARED_DIR)..."
     SHARED_TAG=$(cat "$SHARED_TAG_FILE")
-    # Lock is local (/tmp) to serialize loads on the same host. Do not move to NFS.
-    if flock /tmp/finn-docker-load.lock bash -c "set -o pipefail; gunzip -c '$SHARED_IMG' | docker load"; then
+    if [ "$FINN_DOCKER_PREBUILT" = "1" ] && [ "$SHARED_TAG" != "$FINN_DOCKER_TAG" ]; then
+      recho "Shared Docker tag $SHARED_TAG does not match requested tag $FINN_DOCKER_TAG"
+      exit 1
+    fi
+    # local /tmp lock to serialise concurrent loads on the same host
+    if flock /tmp/finn-docker-load.lock \
+         bash -c 'set -o pipefail; gunzip -c "$1" | docker load' _ "$SHARED_IMG"; then
+      SHARED_LOADED="1"
       if [ "$SHARED_TAG" != "$FINN_DOCKER_TAG" ]; then
         gecho "Tagging $SHARED_TAG as $FINN_DOCKER_TAG"
         docker tag "$SHARED_TAG" "$FINN_DOCKER_TAG"
       fi
     else
-      gecho "WARNING: Failed to load Docker image from shared storage"
-      if [ "$FINN_DOCKER_PREBUILT" = "1" ]; then
-        gecho "Falling back to local Docker build"
-        FINN_DOCKER_PREBUILT="0"
-      fi
+      gecho "WARNING: Failed to load Docker image from shared storage ($SHARED_DIR)"
     fi
   fi
+  if [ "$SHARED_LOADED" != "1" ] && [ "$FINN_DOCKER_PREBUILT" != "1" ]; then
+    gecho "WARNING: No usable shared Docker image found at FINN_DOCKER_SHARED_IMAGE_DIR=$SHARED_DIR. Falling back to local build"
+  fi
+  if [ "$FINN_DOCKER_PREBUILT" = "1" ] && [ "$SHARED_LOADED" != "1" ]; then
+    recho "FINN_DOCKER_PREBUILT=1 but no usable shared Docker image at FINN_DOCKER_SHARED_IMAGE_DIR=$SHARED_DIR (expected finn-docker-image.tar.gz and finn-docker-tag.txt)"
+    exit 1
+  fi
 fi
 
 # Build the FINN Docker image
@@ -227,7 +270,7 @@ if [ "$FINN_DOCKER_PREBUILT" = "0" ] && [ -z "$FINN_SINGULARITY" ]; then
     --build-arg GROUPNAME=$DOCKER_GNAME \
     --build-arg USERNAME=$DOCKER_UNAME \
     --build-arg USER_UID=$DOCKER_UID \
-    .
+    . || { recho "docker build failed"; exit 1; }
   cd $OLD_PWD
 fi
 
@@ -259,6 +302,16 @@ DOCKER_EXEC+="-e LD_PRELOAD=/lib/x86_64-linux-gnu/libudev.so.1 "
 # Workaround for running multiple Vivado instances simultaneously, see:
 # https://adaptivesupport.amd.com/s/article/63253?language=en_US
 DOCKER_EXEC+="-e XILINX_LOCAL_USER_DATA=no "
+# Optional host cache for torch.hub / huggingface weights to avoid CDN 504s
+# on parallel CI runs. Bind target is /finn_cache (NOT $HOME, because docker
+# creates bind parents as root and that would break pip install --user).
+: ${FINN_DOCKER_CACHE_DIR=""}
+if [ -n "$FINN_DOCKER_CACHE_DIR" ]; then
+  mkdir -p "$FINN_DOCKER_CACHE_DIR/torch" "$FINN_DOCKER_CACHE_DIR/huggingface"
+  DOCKER_EXEC+="-v $FINN_DOCKER_CACHE_DIR:/finn_cache "
+  DOCKER_EXEC+="-e TORCH_HOME=/finn_cache/torch "
+  DOCKER_EXEC+="-e HF_HOME=/finn_cache/huggingface "
+fi
 if [ "$FINN_DOCKER_RUN_AS_ROOT" = "0" ] && [ -z "$FINN_SINGULARITY" ];then
   DOCKER_EXEC+="-v $FINN_SSH_KEY_DIR:$HOME/.ssh "
   DOCKER_EXEC+="--user $DOCKER_UID:$DOCKER_GID "
diff --git a/tests/util/test_ci_config_sync.py b/tests/util/test_ci_config_sync.py
new file mode 100644
index 0000000000..8a9e62b970
--- /dev/null
+++ b/tests/util/test_ci_config_sync.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2026, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import pytest
+
+import os
+import re
+from finn_ci import config
+
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+pytestmark = pytest.mark.util
+
+
+def test_jenkinsfile_stage_choices_match_python_source():
+    # Anchor on the STAGES choice block so a future ``choice(name: 'XYZ', ...)``
+    # cannot match instead. Accept both single- and double-quoted Groovy strings.
+    jenkinsfile = os.path.join(REPO_ROOT, "ci", "Jenkinsfile")
+    text = open(jenkinsfile).read()
+    match = re.search(
+        r"""choice\(\s*name:\s*['"]STAGES['"],\s*choices:\s*\[([^\]]+)\]""",
+        text,
+    )
+    assert match is not None, "could not locate STAGES choice block in Jenkinsfile"
+    choices = re.findall(r"""['"]([^'"]+)['"]""", match.group(1))
+    expected = config.jenkins_stage_choices()
+    assert (
+        choices == expected
+    ), "Jenkinsfile STAGES choices %r drifted from finn_ci.config.jenkins_stage_choices() %r" % (
+        choices,
+        expected,
+    )
+
+
+def test_readme_stages_table_matches_python_source():
+    readme = os.path.join(REPO_ROOT, "ci", "README.md")
+    text = open(readme).read()
+    # Parse the values column of the "| STAGES value | ... |" table.
+    table_rows = re.findall(r"^\|\s*`([a-z0-9_]+)`(?:\s*\(default\))?\s*\|", text, re.MULTILINE)
+    expected = config.jenkins_stage_choices()
+    assert (
+        table_rows == expected
+    ), "README STAGES table %r drifted from finn_ci.config.jenkins_stage_choices() %r" % (
+        table_rows,
+        expected,
+    )
diff --git a/tests/util/test_finn_ci_cli.py b/tests/util/test_finn_ci_cli.py
new file mode 100644
index 0000000000..0f86df8d0f
--- /dev/null
+++ b/tests/util/test_finn_ci_cli.py
@@ -0,0 +1,72 @@
+# Copyright (C) 2026, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import pytest
+
+import json
+from finn_ci import __main__ as cli
+from finn_ci import config
+
+pytestmark = pytest.mark.util
+
+
+def test_stage_choices_json_cli(capsys):
+    rc = cli.main(["stage-choices-json"])
+    assert rc == 0
+    assert json.loads(capsys.readouterr().out) == config.jenkins_stage_choices()
+
+
+def test_validate_config_single_invocation_returns_full_payload(capsys):
+    # The Jenkinsfile collapses the Validate-time config into this one call.
+    # The contract is keys present, well-formed and readJSON-ready, asserted
+    # as a subset so adding a future key does not break this test.
+    rc = cli.main(["validate-config", "--choice", "sanity", "--job-name", "finn.dev"])
+    assert rc == 0
+    captured = capsys.readouterr()
+    payload = json.loads(captured.out)
+    assert {"enabled_params", "job_key", "shard_plan"} <= set(payload)
+    assert payload["enabled_params"] == ["sanity"]
+    # job-key sanitiser is shared with the standalone subcommand.
+    assert payload["job_key"] == "finn.dev"
+    # the shard plan is the build pipeline's single source for the branch list
+    plan = payload["shard_plan"]
+    assert {"shards", "candidates", "zipArtifacts"} <= set(plan)
+    assert plan["shards"], "sanity choice must produce at least one shard"
+
+
+def test_validate_config_rejects_orphan_zipartifact_board(monkeypatch, capsys):
+    # validate_config() runs inside the subcommand so a STAGES row with
+    # an orphan board fails Validate loudly, not three stages later when
+    # the HW pipeline tries to look it up.
+    bad_stages = list(config.STAGES) + [
+        {
+            "param": "sanity",
+            "stage": "Bad",
+            "marker": "sanity_bnn",
+            "shards": 1,
+            "workers": 1,
+            "zipArtifacts": {"hwTestType": "bnn_build_sanity", "boards": ["NotABoard"]},
+        }
+    ]
+    monkeypatch.setattr(config, "STAGES", bad_stages)
+    rc = cli.main(["validate-config", "--choice", "full", "--job-name", "j"])
+    assert rc == 2
+    assert "NotABoard" in capsys.readouterr().err
+
+
+def test_validate_config_runs_validate_stage_row_for_every_entry(monkeypatch, capsys):
+    # CLI form: main() catches ValueError, prints a one-line ci_sharding:
+    # message to stderr, and exits 2 instead of leaking a Python traceback
+    # into the Jenkins Validate console.
+    bad_stages = [
+        {"param": "p", "stage": "Bad", "marker": "a and b", "shards": 1, "workers": 1},
+    ]
+    monkeypatch.setattr(config, "STAGES", bad_stages)
+    rc = cli.main(["validate-config", "--choice", "p", "--job-name", "j"])
+    assert rc == 2
+    captured = capsys.readouterr()
+    assert captured.err.startswith("ci_sharding: ")
+    assert "unsafe marker" in captured.err
+    assert "Traceback" not in captured.err
diff --git a/tests/util/test_print_pytest_failures.py b/tests/util/test_finn_ci_failures.py
similarity index 58%
rename from tests/util/test_print_pytest_failures.py
rename to tests/util/test_finn_ci_failures.py
index 6667bf802c..f942b61c8c 100644
--- a/tests/util/test_print_pytest_failures.py
+++ b/tests/util/test_finn_ci_failures.py
@@ -5,12 +5,10 @@
 
 import pytest
 
-import os
-import subprocess
-import sys
+from finn_ci import __main__ as cli
+from finn_ci import failures
 
-REPO_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-SCRIPT = os.path.join(REPO_ROOT, "ci", "scripts", "print_pytest_failures.py")
+pytestmark = pytest.mark.util
 
 
 JUNIT_WITH_FAILURES = """\
@@ -32,23 +30,14 @@
 """
 
 
-def _run(xml_path, stash, lines_per, max_fails):
-    return subprocess.run(
-        [sys.executable, SCRIPT, str(xml_path), stash, str(lines_per), str(max_fails)],
-        capture_output=True,
-        text=True,
-        check=True,
-    )
-
-
-@pytest.mark.util
-def test_print_pytest_failures_emits_per_failure_blocks(tmp_path):
+def test_print_failures_emits_per_failure_blocks(tmp_path, capsys):
     xml = tmp_path / "stage.xml"
     xml.write_text(JUNIT_WITH_FAILURES)
 
-    result = _run(xml, "stage", lines_per=10, max_fails=10)
+    rc = failures.print_failures(str(xml), "stage", 10, 10)
 
-    out = result.stdout
+    out = capsys.readouterr().out
+    assert rc == 0
     assert "[pytest-failures stage] 2 test failure(s)" in out
     assert "FAILURE: pkg.mod::test_fails" in out
     assert "assert 1 == 2" in out
@@ -57,8 +46,7 @@ def test_print_pytest_failures_emits_per_failure_blocks(tmp_path):
     assert "trace line 2" in out
 
 
-@pytest.mark.util
-def test_print_pytest_failures_truncates_long_bodies(tmp_path):
+def test_print_failures_truncates_long_bodies(tmp_path, capsys):
     body_lines = "\n".join("line %02d" % i for i in range(50))
     xml = tmp_path / "stage.xml"
     xml.write_text(
@@ -69,15 +57,15 @@ def test_print_pytest_failures_truncates_long_bodies(tmp_path):
         "</testcase></testsuite></testsuites>\n" % body_lines
     )
 
-    result = _run(xml, "stage", lines_per=5, max_fails=10)
+    failures.print_failures(str(xml), "stage", 5, 10)
 
-    assert "earlier lines elided" in result.stdout
-    assert "line 49" in result.stdout
-    assert "line 04" not in result.stdout
+    out = capsys.readouterr().out
+    assert "earlier lines elided" in out
+    assert "line 49" in out
+    assert "line 04" not in out
 
 
-@pytest.mark.util
-def test_print_pytest_failures_caps_to_max_failures(tmp_path):
+def test_print_failures_caps_to_max_failures(tmp_path, capsys):
     cases = "\n".join(
         "<testcase classname='c' name='t%d'><failure message='m'>x</failure></testcase>" % i
         for i in range(5)
@@ -89,14 +77,14 @@ def test_print_pytest_failures_caps_to_max_failures(tmp_path):
         "%s\n</testsuite></testsuites>\n" % cases
     )
 
-    result = _run(xml, "stage", lines_per=10, max_fails=2)
+    failures.print_failures(str(xml), "stage", 10, 2)
 
-    assert "5 test failure(s)" in result.stdout
-    assert "and 3 more failure(s) elided" in result.stdout
+    out = capsys.readouterr().out
+    assert "5 test failure(s)" in out
+    assert "and 3 more failure(s) elided" in out
 
 
-@pytest.mark.util
-def test_print_pytest_failures_handles_no_failures(tmp_path):
+def test_print_failures_handles_no_failures(tmp_path, capsys):
     xml = tmp_path / "stage.xml"
     xml.write_text(
         "<?xml version='1.0'?>\n"
@@ -104,16 +92,24 @@ def test_print_pytest_failures_handles_no_failures(tmp_path):
         "<testcase classname='c' name='t'/></testsuite></testsuites>\n"
     )
 
-    result = _run(xml, "stage", lines_per=10, max_fails=10)
+    failures.print_failures(str(xml), "stage", 10, 10)
 
-    assert "no test failures recorded" in result.stdout
+    assert "no test failures recorded" in capsys.readouterr().out
 
 
-@pytest.mark.util
-def test_print_pytest_failures_handles_unparseable_xml(tmp_path):
+def test_print_failures_handles_unparseable_xml(tmp_path, capsys):
     xml = tmp_path / "stage.xml"
     xml.write_text("not actually xml")
 
-    result = _run(xml, "stage", lines_per=10, max_fails=10)
+    failures.print_failures(str(xml), "stage", 10, 10)
+
+    assert "failed to parse" in capsys.readouterr().out
 
-    assert "failed to parse" in result.stdout
+
+def test_print_failures_cli_smoke(tmp_path, capsys):
+    # exercise the print-failures subcommand wiring in finn_ci.__main__.
+    xml = tmp_path / "stage.xml"
+    xml.write_text(JUNIT_WITH_FAILURES)
+    rc = cli.main(["print-failures", str(xml), "stage", "10", "10"])
+    assert rc == 0
+    assert "[pytest-failures stage] 2 test failure(s)" in capsys.readouterr().out
diff --git a/tests/util/test_finn_ci_lsf.py b/tests/util/test_finn_ci_lsf.py
new file mode 100644
index 0000000000..93a58a9411
--- /dev/null
+++ b/tests/util/test_finn_ci_lsf.py
@@ -0,0 +1,49 @@
+# Copyright (C) 2026, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import pytest
+
+import json
+from finn_ci import lsf
+
+pytestmark = pytest.mark.util
+
+
+def test_parse_lsf_jobs_text_form_groups_by_build():
+    raw = "\n".join(
+        [
+            "1001 finn_ci_finn_42_vivado_abc",
+            "1002 finn_ci_finn_42_vitis_hls_def",
+            "1003 finn_ci_finn_43_xelab_ghi",
+            "9999 unrelated_job_name",
+            "malformed-line-no-jobname",
+        ]
+    )
+    out = lsf.parse_lsf_jobs("finn_ci_finn_", raw)
+    assert out == {"42": ["1001", "1002"], "43": ["1003"]}
+
+
+def test_parse_lsf_jobs_json_form():
+    raw = json.dumps(
+        {
+            "RECORDS": [
+                {"JOBID": "2001", "JOB_NAME": "finn_ci_finn_7_vivado_x"},
+                {"JOBID": "2002", "JOB_NAME": "finn_ci_finn_7_v++_y"},
+                {"JOBID": "2003", "JOB_NAME": "other"},
+            ]
+        }
+    )
+    out = lsf.parse_lsf_jobs("finn_ci_finn_", raw)
+    assert out == {"7": ["2001", "2002"]}
+
+
+def test_parse_lsf_jobs_ignores_non_numeric_build_token():
+    raw = "3001 finn_ci_finn_notanumber_vivado_x"
+    assert lsf.parse_lsf_jobs("finn_ci_finn_", raw) == {}
+
+
+def test_parse_lsf_jobs_empty_input():
+    assert lsf.parse_lsf_jobs("finn_ci_finn_", "") == {}
+    assert lsf.parse_lsf_jobs("finn_ci_finn_", "   \n  ") == {}
diff --git a/tests/util/test_finn_ci_retention.py b/tests/util/test_finn_ci_retention.py
new file mode 100644
index 0000000000..2f731d50c9
--- /dev/null
+++ b/tests/util/test_finn_ci_retention.py
@@ -0,0 +1,389 @@
+# Copyright (C) 2026, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import pytest
+
+import os
+from finn_ci import __main__ as cli
+from finn_ci import retention
+
+pytestmark = pytest.mark.util
+
+
+@pytest.mark.parametrize(
+    "prune_fn, rel_parent",
+    [
+        (retention.prune_images, "job"),
+        (retention.prune_artifacts, "ci_runs/job"),
+    ],
+)
+def test_prune_keeps_current_build_and_newest(tmp_path, prune_fn, rel_parent):
+    # prune_images and prune_artifacts share the numbered-rotation core, and
+    # only the parent path differs (<root>/job vs <root>/ci_runs/job).
+    parent = tmp_path.joinpath(*rel_parent.split("/"))
+    for build in ("1", "2", "3", "4"):
+        path = parent / build
+        path.mkdir(parents=True)
+        os.utime(str(path), (1, 1))
+
+    prune_fn(str(tmp_path), "job", "5", retain_n=2, max_age_days=0)
+
+    assert not (parent / "1").exists()
+    assert not (parent / "2").exists()
+    assert (parent / "3").exists()
+    assert (parent / "4").exists()
+
+
+def test_prune_images_skips_when_parent_missing(tmp_path, capsys):
+    rc = retention.prune_images(str(tmp_path / "absent"), "job", "1", 1, 0)
+    captured = capsys.readouterr()
+    assert rc == 0
+    assert "not present, skipping" in captured.out
+
+
+def test_prune_numbered_dry_run_matches_real_run_count(tmp_path):
+    parent = tmp_path / "p"
+    parent.mkdir()
+    for build in ("1", "2", "3", "4"):
+        (parent / build).mkdir()
+        os.utime(str(parent / build), (1, 1))
+
+    dry = retention._prune_numbered(
+        str(parent),
+        current_build="5",
+        retain_n=1,
+        max_age_days=0,
+        dry_run=True,
+        tag="t",
+    )
+    assert dry == 3
+    assert sorted(p.name for p in parent.iterdir()) == ["1", "2", "3", "4"]
+
+    real = retention._prune_numbered(
+        str(parent),
+        current_build="5",
+        retain_n=1,
+        max_age_days=0,
+        dry_run=False,
+        tag="t",
+    )
+    assert real == 3
+    assert sorted(p.name for p in parent.iterdir()) == ["4"]
+
+
+def test_prune_numbered_rejects_non_numeric_current(tmp_path):
+    # off-Jenkins CLI invocations or a broken BUILD_NUMBER env must not
+    # silently degrade retention to "newest N" by passing a string the
+    # numeric-only sibling filter can never match.
+    parent = tmp_path / "p"
+    parent.mkdir()
+    for build in ("1", "2"):
+        (parent / build).mkdir()
+    with pytest.raises(ValueError, match="current_build must be an integer-like string"):
+        retention._prune_numbered(
+            str(parent),
+            current_build="not-a-number",
+            retain_n=1,
+            max_age_days=0,
+            dry_run=True,
+            tag="t",
+        )
+    with pytest.raises(ValueError, match="current_build must be an integer-like string"):
+        retention._prune_numbered(
+            str(parent),
+            current_build=None,
+            retain_n=1,
+            max_age_days=0,
+            dry_run=True,
+            tag="t",
+        )
+
+
+def test_prune_numbered_canonicalises_leading_zeros(tmp_path):
+    # On-disk dir name "0123" and a BUILD_NUMBER value of "123" refer to the
+    # same build for retention purposes. Without canonicalisation the keep set
+    # contained the BUILD_NUMBER as-is and the on-disk leading-zero variant
+    # would be eligible for pruning even though it is the current build.
+    parent = tmp_path / "p"
+    parent.mkdir()
+    for build in ("0123", "0124", "0125"):
+        (parent / build).mkdir()
+        os.utime(str(parent / build), (1, 1))
+
+    matched = retention._prune_numbered(
+        str(parent),
+        current_build="123",
+        retain_n=1,
+        max_age_days=0,
+        dry_run=False,
+        tag="t",
+    )
+    # newest ("0125") kept by retain_n, current build ("0123" via int(123))
+    # kept by the current-build guard. "0124" is the only one pruned.
+    assert matched == 1
+    surviving = sorted(p.name for p in parent.iterdir())
+    assert surviving == ["0123", "0125"]
+
+
+def test_prune_numbered_tolerates_concurrent_delete(tmp_path):
+    parent = tmp_path / "p"
+    parent.mkdir()
+    for build in ("1", "2", "3"):
+        (parent / build).mkdir()
+        os.utime(str(parent / build), (1, 1))
+
+    real_rmtree = retention.robust_rmtree
+    state = {"first": True}
+
+    def flaky_rmtree(path, *args, **kwargs):
+        # simulate another CI run pruning '1' between our listdir and rmtree
+        if state["first"]:
+            state["first"] = False
+            raise FileNotFoundError(path)
+        return real_rmtree(path, *args, **kwargs)
+
+    matched = retention._prune_numbered(
+        str(parent),
+        current_build="5",
+        retain_n=1,
+        max_age_days=0,
+        dry_run=False,
+        tag="t",
+        remove=flaky_rmtree,
+    )
+    assert matched == 2
+    # build '3' is kept (retain_n=1, newest), '2' got rmtreed for real,
+    # '1' was the simulated race victim and we tolerated it
+    surviving = sorted(p.name for p in parent.iterdir())
+    assert "3" in surviving
+    assert "2" not in surviving
+
+
+def test_prune_numbered_tolerates_concurrent_delete_in_age_check(tmp_path, monkeypatch):
+    parent = tmp_path / "p"
+    parent.mkdir()
+    # retain_n=1 keeps the newest ('3'). '1' and '2' are both deletion
+    # candidates. age cutoff is in the past so both qualify on mtime.
+    for build in ("1", "2", "3"):
+        (parent / build).mkdir()
+        os.utime(str(parent / build), (1, 1))
+
+    real_getmtime = retention.os.path.getmtime
+
+    def flaky_getmtime(path):
+        if path.endswith("/1"):
+            raise FileNotFoundError(path)
+        return real_getmtime(path)
+
+    monkeypatch.setattr(retention.os.path, "getmtime", flaky_getmtime)
+    matched = retention._prune_numbered(
+        str(parent),
+        current_build="9",
+        retain_n=1,
+        max_age_days=7,
+        dry_run=False,
+        tag="t",
+    )
+    # '1' raised FileNotFoundError during the age probe so the loop treats it
+    # as already-pruned and does not count it. '2' was processed normally and
+    # deleted. The point is that the FileNotFoundError on '1' did not abort
+    # the loop and leave '2' behind.
+    assert matched == 1
+    assert "2" not in [p.name for p in parent.iterdir()]
+    assert "3" in [p.name for p in parent.iterdir()]
+
+
+def test_prune_snapshots_keeps_current_build_and_newest(tmp_path):
+    state_root = tmp_path / "state"
+    job = "finn"
+    (state_root / job).mkdir(parents=True)
+    for n in (1, 2, 3, 4, 5):
+        (state_root / job / ("build_%d_timings_input.json" % n)).write_text("{}")
+    # Non-numbered files (the master itself, corrupt backups) must be left
+    # alone even when they sort lexicographically alongside snapshots.
+    (state_root / job / "ci_timings_master.json").write_text("{}")
+    (state_root / job / "ci_timings_master.json.corrupt-1").write_text("{}")
+    retention.prune_snapshots(str(state_root), job, current_build="3", retain_n=2, max_age_days=0)
+    remaining = sorted(p.name for p in (state_root / job).iterdir())
+    assert "build_3_timings_input.json" in remaining
+    assert "build_4_timings_input.json" in remaining
+    assert "build_5_timings_input.json" in remaining
+    assert "ci_timings_master.json" in remaining
+    assert "ci_timings_master.json.corrupt-1" in remaining
+    assert "build_1_timings_input.json" not in remaining
+    assert "build_2_timings_input.json" not in remaining
+
+
+def test_prune_snapshots_skips_when_parent_missing(tmp_path, capsys):
+    rc = retention.prune_snapshots(
+        str(tmp_path / "nope"), "finn", current_build="1", retain_n=2, max_age_days=0
+    )
+    assert rc == 0
+    captured = capsys.readouterr()
+    assert "not present, skipping" in captured.out
+
+
+def test_prune_snapshots_rejects_non_numeric_current(tmp_path):
+    with pytest.raises(ValueError, match="prune-snapshots: current_build must be"):
+        retention.prune_snapshots(
+            str(tmp_path), "finn", current_build="x", retain_n=1, max_age_days=0
+        )
+
+
+def test_prune_snapshots_honours_age_gating(tmp_path):
+    state_root = tmp_path / "state"
+    job = "finn"
+    parent = state_root / job
+    parent.mkdir(parents=True)
+    old = parent / "build_1_timings_input.json"
+    fresh = parent / "build_2_timings_input.json"
+    old.write_text("{}")
+    fresh.write_text("{}")
+    os.utime(str(old), (1, 1))
+
+    retention.prune_snapshots(str(state_root), job, current_build="3", retain_n=1, max_age_days=1)
+
+    assert not old.exists()
+    assert fresh.exists()
+
+
+def test_prune_snapshots_tolerates_concurrent_delete_on_unlink(tmp_path, monkeypatch):
+    state_root = tmp_path / "state"
+    job = "finn"
+    parent = state_root / job
+    parent.mkdir(parents=True)
+    for n in (1, 2, 3):
+        path = parent / ("build_%d_timings_input.json" % n)
+        path.write_text("{}")
+        os.utime(str(path), (1, 1))
+    real_unlink = retention.os.unlink
+    state = {"first": True}
+
+    def flaky_unlink(path):
+        if state["first"]:
+            state["first"] = False
+            raise FileNotFoundError(path)
+        return real_unlink(path)
+
+    monkeypatch.setattr(retention.os, "unlink", flaky_unlink)
+    retention.prune_snapshots(str(state_root), job, current_build="9", retain_n=1, max_age_days=0)
+
+    assert not (parent / "build_2_timings_input.json").exists()
+    assert (parent / "build_3_timings_input.json").exists()
+
+
+def test_prune_snapshots_dry_run_does_not_delete(tmp_path):
+    state_root = tmp_path / "state"
+    job = "finn"
+    (state_root / job).mkdir(parents=True)
+    for n in (1, 2, 3):
+        (state_root / job / ("build_%d_timings_input.json" % n)).write_text("{}")
+    retention.prune_snapshots(
+        str(state_root), job, current_build="3", retain_n=1, max_age_days=0, dry_run=True
+    )
+    remaining = sorted(p.name for p in (state_root / job).iterdir())
+    assert remaining == [
+        "build_1_timings_input.json",
+        "build_2_timings_input.json",
+        "build_3_timings_input.json",
+    ]
+
+
+def test_prune_snapshots_cli_smoke(tmp_path):
+    state_root = tmp_path / "state"
+    job = "finn"
+    (state_root / job).mkdir(parents=True)
+    (state_root / job / "build_1_timings_input.json").write_text("{}")
+    rc = cli.main(["prune", "--kind", "snapshot", str(state_root), job, "1", "--dry-run"])
+    assert rc == 0
+
+
+def test_prune_cli_kind_image_reads_retention_window(tmp_path):
+    # the prune CLI looks up retain_n/max_age_days from RETENTION[kind] so the
+    # caller never passes a window that disagrees with the documented policy.
+    parent = tmp_path / "job"
+    for build in ("1", "2", "3", "4", "5"):
+        path = parent / build
+        path.mkdir(parents=True)
+        os.utime(str(path), (1, 1))  # ancient, so the age gate never holds them
+    rc = cli.main(["prune", "--kind", "image", str(tmp_path), "job", "9"])
+    assert rc == 0
+    # RETENTION["image"]["retain"] == 3 keeps the newest three (3, 4, 5).
+    # current build 9 is absent on disk, so 1 and 2 are the only ones pruned.
+    assert sorted(p.name for p in parent.iterdir()) == ["3", "4", "5"]
+
+
+def test_prune_cli_rejects_unknown_kind(tmp_path):
+    with pytest.raises(SystemExit):
+        cli.main(["prune", "--kind", "bogus", str(tmp_path), "job", "1"])
+
+
+def test_prune_cli_sanitises_job_key(tmp_path):
+    # the prune root is os.path.join(root, job_key(JOB_NAME)). a JOB_NAME of
+    # ".." must collapse to "job" so the destructive rmtree cannot climb out of
+    # root. without sanitisation the parent would resolve to root's parent.
+    root = tmp_path / "images"
+    job_parent = root / "job"
+    for build in ("1", "2", "3", "4", "5"):
+        path = job_parent / build
+        path.mkdir(parents=True)
+        os.utime(str(path), (1, 1))
+    sibling = root / "sibling"
+    sibling.mkdir()
+    rc = cli.main(["prune", "--kind", "image", str(root), "..", "9"])
+    assert rc == 0
+    # RETENTION["image"]["retain"] == 3 keeps the newest three under root/job,
+    # and the ".." never escaped to touch the sibling tree.
+    assert sorted(p.name for p in job_parent.iterdir()) == ["3", "4", "5"]
+    assert sibling.exists()
+
+
+def test_prune_pip_cache_keeps_current_and_fresh(tmp_path):
+    root = tmp_path / "pip"
+    keep = root / "keepme"
+    old = root / "stale"
+    fresh = root / "recent"
+    for d in (keep, old, fresh):
+        d.mkdir(parents=True)
+    os.utime(str(old), (1, 1))
+    matched = retention.prune_pip_cache(str(root), str(keep), max_age_days=14)
+    # 'old' (ancient mtime) is pruned, 'keep' is excluded, 'recent' is fresh
+    assert matched == 1
+    assert not old.exists()
+    assert keep.exists()
+    assert fresh.exists()
+
+
+def test_prune_pip_cache_dry_run_deletes_nothing(tmp_path):
+    root = tmp_path / "pip"
+    old = root / "stale"
+    old.mkdir(parents=True)
+    os.utime(str(old), (1, 1))
+    matched = retention.prune_pip_cache(str(root), "", max_age_days=14, dry_run=True)
+    assert matched == 1
+    assert old.exists()
+
+
+def test_prune_pip_cache_missing_root_is_noop(tmp_path):
+    assert retention.prune_pip_cache(str(tmp_path / "absent"), "", 14) == 0
+
+
+def test_prune_numbered_rejects_negative_max_age_days(tmp_path):
+    # A negative window must fail loudly rather than silently disabling the age
+    # gate (which would delete every eligible entry regardless of age).
+    parent = tmp_path / "p"
+    parent.mkdir()
+    (parent / "1").mkdir()
+    with pytest.raises(ValueError, match="max_age_days must be >= 0"):
+        retention._prune_numbered(
+            str(parent), current_build="9", retain_n=1, max_age_days=-1, dry_run=True, tag="t"
+        )
+
+
+def test_prune_pip_cache_rejects_negative_max_age_days(tmp_path):
+    root = tmp_path / "pip"
+    root.mkdir()
+    with pytest.raises(ValueError, match="max_age_days must be >= 0"):
+        retention.prune_pip_cache(str(root), "", max_age_days=-1)
diff --git a/tests/util/test_finn_ci_timing.py b/tests/util/test_finn_ci_timing.py
new file mode 100644
index 0000000000..dd44d497ae
--- /dev/null
+++ b/tests/util/test_finn_ci_timing.py
@@ -0,0 +1,273 @@
+# Copyright (C) 2026, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import pytest
+
+import json
+from finn_ci import jsonio, sharding, timing
+
+pytestmark = pytest.mark.util
+
+
+def write_json(path, payload):
+    path.write_text(json.dumps(payload))
+
+
+def _seed_master_with_group(path, name, samples, **extra):
+    write_json(
+        path,
+        {
+            "schema_version": timing.SCHEMA_VERSION,
+            "groups": {name: {"samples": list(samples), **extra}},
+        },
+    )
+
+
+def _write_observation(reports_dir, stash, name, seconds):
+    write_json(
+        reports_dir / ("%s.timings.json" % stash),
+        {
+            "stash": stash,
+            "metadata": {"job": "j", "build": "1", "stage": stash},
+            "groups": [{"name": name, "seconds": seconds, "count": 1}],
+        },
+    )
+
+
+def test_update_master_preserves_unseen_and_appends_seen(tmp_path):
+    reports = tmp_path / "reports"
+    reports.mkdir()
+    master = tmp_path / "master.json"
+    out = reports / "ci_timings_master.json"
+    write_json(
+        master,
+        {
+            "schema_version": timing.SCHEMA_VERSION,
+            "groups": {
+                "seen": {"samples": [1.0], "count": 1},
+                "unseen": {"samples": [7.0], "count": 2},
+            },
+        },
+    )
+    write_json(
+        reports / "stage.timings.json",
+        {
+            "stash": "stage",
+            "metadata": {"job": "job", "build": "12", "stage": "Stage"},
+            "groups": [{"name": "seen", "seconds": 3.5, "count": 4}],
+        },
+    )
+
+    timing.update_master(str(reports), str(master), str(out), update_persistent=True)
+
+    persisted = json.loads(master.read_text())
+    merged = json.loads(out.read_text())
+    # observed groups are always appended, and unseen groups are left untouched.
+    assert persisted["groups"]["seen"]["samples"] == [1.0, 3.5]
+    assert persisted["groups"]["unseen"]["samples"] == [7.0]
+    assert merged["groups"]["seen"]["samples"] == [1.0, 3.5]
+    assert merged["last_update"]["observed_groups"] == 1
+    assert merged["last_update"]["persistent_update"] is True
+
+
+def test_merge_maps_writes_searchable_text(tmp_path):
+    reports = tmp_path / "reports"
+    reports.mkdir()
+    write_json(
+        reports / "stage.shardmap.json",
+        [
+            {
+                "nodeid": "tests/foo.py::test_bar",
+                "stage": "Stage",
+                "stash": "stage",
+                "shard_id": 0,
+                "num_shards": 2,
+                "group": "grp",
+                "weight_s": 1.25,
+                "source": "known",
+            }
+        ],
+    )
+
+    timing.merge_maps(str(reports))
+
+    text = (reports / "shard_map.txt").read_text()
+    assert "nodeid=tests/foo.py::test_bar" in text
+    assert "stage=Stage" in text
+    assert "shard=1/2" in text
+    assert "source=known" in text
+
+
+def test_prepare_timing_snapshot_empty_when_master_missing(tmp_path):
+    snapshot = tmp_path / "snapshot.json"
+    timing.prepare_timing_snapshot(str(tmp_path / "missing-master.json"), str(snapshot))
+    data = json.loads(snapshot.read_text())
+    assert data["groups"] == {}
+    assert data["schema_version"] == timing.SCHEMA_VERSION
+
+
+def test_prepare_timing_snapshot_copies_master_when_present(tmp_path):
+    master = tmp_path / "master.json"
+    snapshot = tmp_path / "snapshot.json"
+    write_json(
+        master,
+        {
+            "schema_version": timing.SCHEMA_VERSION,
+            "groups": {"slow": {"samples": [12.0]}},
+        },
+    )
+
+    timing.prepare_timing_snapshot(str(master), str(snapshot))
+
+    data = json.loads(snapshot.read_text())
+    assert data["groups"]["slow"]["samples"] == [12.0]
+
+
+def test_update_master_raises_on_persistent_write_failure(tmp_path, monkeypatch):
+    # Persistent write failure propagates so the calling pipeline can mark
+    # the build UNSTABLE instead of silently leaving a stale master behind.
+    reports = tmp_path / "reports"
+    reports.mkdir()
+    master = tmp_path / "master.json"
+    out = reports / "ci_timings_master.json"
+    write_json(master, {"schema_version": timing.SCHEMA_VERSION, "groups": {}})
+    write_json(
+        reports / "stage.timings.json",
+        {
+            "stash": "stage",
+            "metadata": {"job": "j", "build": "1", "stage": "Stage"},
+            "groups": [{"name": "seen", "seconds": 1.0, "count": 1}],
+        },
+    )
+
+    def boom(*_a, **_k):
+        raise IOError("simulated NFS write failure")
+
+    monkeypatch.setattr(jsonio, "write_json_atomic", boom)
+
+    with pytest.raises(IOError, match="simulated NFS write failure"):
+        timing.update_master(
+            str(reports),
+            str(master),
+            str(out),
+            update_persistent=True,
+            metadata={"job": "j", "build": "1"},
+        )
+
+
+def test_update_master_no_master_path_writes_preview(tmp_path):
+    # Local-fallback mode (no NFS): the per-build preview is still written,
+    # the master simply has nowhere to live.
+    reports = tmp_path / "reports"
+    reports.mkdir()
+    out = reports / "ci_timings_master.json"
+    write_json(
+        reports / "stage.timings.json",
+        {
+            "stash": "stage",
+            "metadata": {"job": "j", "build": "1", "stage": "Stage"},
+            "groups": [{"name": "seen", "seconds": 1.0, "count": 1}],
+        },
+    )
+
+    rc = timing.update_master(
+        str(reports),
+        master_path="",
+        out_path=str(out),
+        metadata={"job": "j", "build": "1"},
+    )
+
+    preview = json.loads(out.read_text())
+    assert rc == 0
+    assert preview["groups"]["seen"]["samples"] == [1.0]
+    assert preview["last_update"]["observed_groups"] == 1
+    assert preview["last_update"]["persistent_update"] is False
+
+
+def test_observed_groups_uses_max_seconds_for_duplicate_group(tmp_path):
+    reports = tmp_path / "reports"
+    reports.mkdir()
+    _write_observation(reports, "fast", "same", 1.0)
+    _write_observation(reports, "slow", "same", 9.0)
+    observed = timing.observed_groups_from_reports(str(reports))
+    assert observed["same"] == 9.0
+
+
+def test_update_master_cold_start_accepts_first_observation(tmp_path):
+    reports = tmp_path / "reports"
+    reports.mkdir()
+    master = tmp_path / "master.json"
+    out = reports / "ci_timings_master.json"
+    write_json(master, {"schema_version": timing.SCHEMA_VERSION, "groups": {}})
+    _write_observation(reports, "stage", "newgroup", 42.0)
+    timing.update_master(str(reports), str(master), str(out), update_persistent=True)
+    persisted = json.loads(master.read_text())
+    assert persisted["groups"]["newgroup"]["samples"] == [42.0]
+    assert persisted["last_update"]["observed_groups"] == 1
+
+
+def test_update_master_grows_samples_to_max_then_trims(tmp_path):
+    reports = tmp_path / "reports"
+    reports.mkdir()
+    master = tmp_path / "master.json"
+    out = reports / "ci_timings_master.json"
+    _seed_master_with_group(master, "g", [10.0, 10.0, 10.0, 10.0])
+    _write_observation(reports, "stage", "g", 11.0)
+    timing.update_master(str(reports), str(master), str(out), update_persistent=True)
+    persisted = json.loads(master.read_text())
+    # 5th sample appended, window full but not yet trimmed.
+    assert persisted["groups"]["g"]["samples"] == [10.0, 10.0, 10.0, 10.0, 11.0]
+    # Next observation evicts the oldest sample (FIFO ring).
+    _write_observation(reports, "stage", "g", 12.0)
+    timing.update_master(str(reports), str(master), str(out), update_persistent=True)
+    persisted = json.loads(master.read_text())
+    assert persisted["groups"]["g"]["samples"] == [10.0, 10.0, 10.0, 11.0, 12.0]
+
+
+def test_update_master_uses_max_so_slowest_recent_run_sets_weight(tmp_path):
+    # max window: [10, 10, 10, 10, 35] weighs the group at its slowest recent
+    # run (35), the conservative estimate. The sample ages out of the window
+    # after MAX_SAMPLES newer runs, so a one-off spike does not pin it forever.
+    reports = tmp_path / "reports"
+    reports.mkdir()
+    master = tmp_path / "master.json"
+    out = reports / "ci_timings_master.json"
+    _seed_master_with_group(master, "g", [10.0, 10.0, 10.0, 10.0])
+    _write_observation(reports, "stage", "g", 35.0)
+    timing.update_master(str(reports), str(master), str(out), update_persistent=True)
+    persisted = json.loads(master.read_text())
+    assert persisted["groups"]["g"]["samples"] == [10.0, 10.0, 10.0, 10.0, 35.0]
+    weights = sharding.load_group_weights(str(master))
+    assert weights["g"] == 35.0
+
+
+def test_update_master_preview_leaves_persistent_master_untouched(tmp_path):
+    # Non-persist mode must write the per-build preview to out_path but
+    # never touch the on-disk master.
+    reports = tmp_path / "reports"
+    reports.mkdir()
+    master = tmp_path / "master.json"
+    out = reports / "ci_timings_master.json"
+    write_json(
+        master,
+        {
+            "schema_version": timing.SCHEMA_VERSION,
+            "groups": {"g": {"samples": [10.0]}},
+        },
+    )
+    _write_observation(reports, "stage", "g", 25.0)
+    timing.update_master(str(reports), str(master), str(out))
+    persisted = json.loads(master.read_text())
+    preview = json.loads(out.read_text())
+    assert persisted["groups"]["g"]["samples"] == [10.0]
+    assert preview["groups"]["g"]["samples"] == [10.0, 25.0]
+    assert preview["last_update"]["persistent_update"] is False
+
+
+def test_normalise_master_drops_unknown_schema_version(capsys):
+    out = timing.normalise_master({"schema_version": 99, "groups": {"g": {"samples": [1.0]}}})
+    assert out["schema_version"] == timing.SCHEMA_VERSION
+    assert out["groups"] == {}
+    assert "unrecognised schema_version" in capsys.readouterr().err