diff --git a/.ci/publish-prebuilt.sh b/.ci/publish-prebuilt.sh index 4b48abab..2eed5784 100755 --- a/.ci/publish-prebuilt.sh +++ b/.ci/publish-prebuilt.sh @@ -14,7 +14,12 @@ # Outputs (in cwd): # Image.bz2 # rootfs.cpio.bz2 -# prebuilt.sha1 -- two-line manifest in standard `sha1sum` format +# prebuilt.sha1 -- three-line manifest in sha1sum format. The +# first two lines verify the published archives; +# the third uses the virtual name 'inputs' to +# publish the SHA-1 of the concatenated input +# files so drift-detection consumers can read it +# directly from the release. # # Stdout (machine-readable, one assignment per line): # kernel_sha1= @@ -23,8 +28,8 @@ set -euo pipefail -# Pick a SHA1 tool. macOS dropped `sha1sum` from the base system; the -# coreutils-style `shasum -a 1` is the portable fallback. +# Pick a SHA1 tool. macOS dropped sha1sum from the base system; the +# coreutils-style 'shasum -a 1' is the portable fallback. if command -v sha1sum >/dev/null 2>&1; then SHA1=(sha1sum) elif command -v shasum >/dev/null 2>&1; then @@ -35,7 +40,7 @@ else fi # Keep this list in sync with PREBUILT_INPUTS in mk/external.mk and the -# `paths:` filter in .github/workflows/prebuilt.yml. +# 'paths:' filter in .github/workflows/prebuilt.yml. INPUTS=( configs/linux.config configs/busybox.config @@ -61,11 +66,15 @@ INITRD_SHA1=$("${SHA1[@]}" rootfs.cpio.bz2 | awk '{print $1}') # the make-time computation in mk/external.mk so they compare directly. INPUTS_SHA1=$(cat "${INPUTS[@]}" | "${SHA1[@]}" | awk '{print $1}') -# Write the human-friendly checksum manifest. Format matches `sha1sum -c` -# so the file works as input to that tool unchanged. +# Write the manifest. The first two lines match 'sha1sum -c' format for +# the real archives; the third line uses the virtual filename 'inputs' +# to publish the input-fingerprint hash so consumers (mk/external.mk's +# drift warning, .github/workflows/main.yml's PR drift detection) can +# read it from the release without parsing the release-body markdown. { echo "$KERNEL_SHA1 Image.bz2" echo "$INITRD_SHA1 rootfs.cpio.bz2" + echo "$INPUTS_SHA1 inputs" } > prebuilt.sha1 # Echo the manifest + inputs hash to stderr for visibility in CI logs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dba2dbb6..2681524e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -40,7 +40,22 @@ jobs: echo "should_build=false" >> "$GITHUB_OUTPUT" exit 0 fi - expected=$(awk '/^PREBUILT_INPUTS_SHA1/ {print $3}' mk/external.mk) + # The prebuilt's input-fingerprint lives on the release as the + # third line of prebuilt.sha1 (virtual name 'inputs'). Pull it + # from there instead of mirroring it in the source tree. + manifest_url="https://github.com/sysprog21/semu/releases/download/prebuilt/prebuilt.sha1" + curl --fail --silent --show-error --retry 3 --retry-delay 1 \ + -L -o prebuilt.sha1 "$manifest_url" + expected=$(awk '$2 == "inputs" {print $1}' prebuilt.sha1) + if [ -z "$expected" ]; then + # Manifest predates the inputs line (transition window after + # this workflow lands). Skip drift detection until prebuilt.yml + # republishes with the new format; treat as no drift to avoid + # gratuitous 90-minute from-source rebuilds on every PR. + echo "manifest has no inputs hash yet; skipping drift detection" + echo "should_build=false" >> "$GITHUB_OUTPUT" + exit 0 + fi live=$(cat \ configs/linux.config \ configs/busybox.config \ diff --git a/.github/workflows/prebuilt.yml b/.github/workflows/prebuilt.yml index 5103613b..e22e600e 100644 --- a/.github/workflows/prebuilt.yml +++ b/.github/workflows/prebuilt.yml @@ -7,8 +7,10 @@ name: Publish prebuilt images # # Triggers automatically on master pushes that touch any input listed # in the paths filter below, and can be invoked manually via -# workflow_dispatch. The resulting SHA1 sums must be reflected in -# mk/external.mk (KERNEL_DATA_SHA1, INITRD_DATA_SHA1, PREBUILT_INPUTS_SHA1). +# workflow_dispatch. The archive SHA1 sums and the input-fingerprint +# hash are all read at make-time / CI-time from the prebuilt.sha1 +# manifest uploaded with the archives, so no source-tree edit is +# needed on a republish. on: workflow_dispatch: @@ -24,6 +26,7 @@ on: - 'scripts/build-image.sh' - 'scripts/rootfs_ext4.sh' - 'target/**' + - '.ci/publish-prebuilt.sh' - '.github/workflows/prebuilt.yml' permissions: @@ -88,18 +91,10 @@ jobs: consumed by `mk/external.mk`. Re-published whenever any input that defines the kernel/rootfs content changes. - ## Update `mk/external.mk` - - Paste these three lines into `mk/external.mk` to pin a fresh - `make` checkout to this build. `PREBUILT_INPUTS_SHA1` is what - local checkouts compare against to detect when their configs - have drifted from the prebuilt. - - ```make - KERNEL_DATA_SHA1 = ${{ steps.checksum.outputs.kernel_sha1 }} - INITRD_DATA_SHA1 = ${{ steps.checksum.outputs.initrd_sha1 }} - PREBUILT_INPUTS_SHA1 = ${{ steps.checksum.outputs.inputs_sha1 }} - ``` + All checksums (archive hashes plus the source-input + fingerprint used for drift detection) are published in the + `prebuilt.sha1` manifest below; consumers read it directly, + so no source-tree edit is needed on a republish. ## Raw checksums diff --git a/.gitignore b/.gitignore index 7aa23f79..f9f8f9d3 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ semu Image ext4.img rootfs.cpio +prebuilt.sha1 # intermediate riscv-harts.dtsi diff --git a/Makefile b/Makefile index eebfc59e..10fad261 100644 --- a/Makefile +++ b/Makefile @@ -362,7 +362,7 @@ distclean: clean $(Q)$(RM) minimal.dtb $(Q)$(RM) .dtb-config.stamp $(Q)$(RM) .build-config.stamp - $(Q)$(RM) Image rootfs.cpio + $(Q)$(RM) Image rootfs.cpio prebuilt.sha1 $(Q)$(RM) ext4.img -include $(deps) diff --git a/mk/external.mk b/mk/external.mk index 0c932100..ec7e9139 100644 --- a/mk/external.mk +++ b/mk/external.mk @@ -1,24 +1,43 @@ # For each external target, the following must be defined in advance: # _DATA_URL : the hyperlink which points to archive. # _DATA : the file to be read by specific executable. -# _DATA_SHA1 : the checksum of the content in _DATA # # Artifacts are published as assets on the fixed-tag prebuilt GitHub -# prerelease by .github/workflows/prebuilt.yml. Update the SHA1 values -# below from the release body whenever the workflow republishes a new -# build. +# prerelease by .github/workflows/prebuilt.yml. The expected SHA-1 of +# each archive is read from the prebuilt.sha1 manifest published +# alongside the archives, so checksum updates require no edit here. COMMON_URL = https://github.com/sysprog21/semu/releases/download/prebuilt +PREBUILT_MANIFEST = prebuilt.sha1 +PREBUILT_MANIFEST_URL = $(COMMON_URL)/$(PREBUILT_MANIFEST) + # kernel KERNEL_DATA_URL = $(COMMON_URL)/Image.bz2 KERNEL_DATA = Image -KERNEL_DATA_SHA1 = 39d273097f21a1bf38fd93b96a3d7459f843bc84 # initrd INITRD_DATA_URL = $(COMMON_URL)/rootfs.cpio.bz2 INITRD_DATA = rootfs.cpio -INITRD_DATA_SHA1 = 9df154cdf58103e953ccdf0d40736cadf9318b12 + +$(PREBUILT_MANIFEST): FORCE + $(VECHO) " GET\t$@\n" + $(Q)if curl --fail --retry 3 --retry-delay 1 --progress-bar \ + -L -o "$@.part" "$(PREBUILT_MANIFEST_URL)"; then \ + if [ -f "$@" ] && cmp -s "$@" "$@.part"; then \ + rm -f "$@.part"; \ + else \ + mv "$@.part" "$@"; \ + fi; \ + else \ + rm -f "$@.part"; \ + if [ -f "$@" ]; then \ + $(PRINTF) " KEEP\t$@ (offline; using cached manifest)\n"; \ + else \ + echo "manifest fetch failed and no cached manifest; cannot proceed" >&2; \ + exit 1; \ + fi; \ + fi define download # Download to a .part file so an interrupted curl never lands a @@ -29,16 +48,19 @@ define download # HTTP 416, and curl exits non-zero, a permanent self-inflicted # deadlock. These files are 5 to 7 MiB; a fresh GET is cheap. # -# Decompress to a .tmp file and rename only on success, so an -# interrupted bunzip2 cannot leave a half-decompressed Image or -# rootfs.cpio that make would treat as a valid up-to-date target on the -# next invocation. -$($(T)_DATA): +# Look up the expected SHA-1 by archive basename in the release +# manifest, then verify the .part against it. Decompress to a .tmp +# file and rename only on success, so an interrupted bunzip2 cannot +# leave a half-decompressed Image or rootfs.cpio that make would treat +# as a valid up-to-date target on the next invocation. +$($(T)_DATA): $(PREBUILT_MANIFEST) | prebuilt-check $(VECHO) " GET\t$$@\n" $(Q)curl --fail --retry 3 --retry-delay 1 --progress-bar \ -L -o "$$@.bz2.part" "$(strip $($(T)_DATA_URL))" \ || { rm -f "$$@.bz2.part"; exit 1; } - $(Q)echo "$(strip $$($(T)_DATA_SHA1)) $$@.bz2.part" | $(SHA1SUM) -c - \ + $(Q)expected=$$$$(awk -v f="$(notdir $($(T)_DATA_URL))" '$$$$2==f{print $$$$1}' $(PREBUILT_MANIFEST)); \ + [ -n "$$$$expected" ] || { echo "verify: no $(notdir $($(T)_DATA_URL)) entry in $(PREBUILT_MANIFEST)" >&2; rm -f "$$@.bz2.part"; exit 1; }; \ + echo "$$$$expected $$@.bz2.part" | $(SHA1SUM) -c - \ || { rm -f "$$@.bz2.part"; exit 1; } $(Q)mv "$$@.bz2.part" "$$@.bz2" $(Q)bunzip2 -c "$$@.bz2" > "$$@.tmp" \ @@ -56,8 +78,9 @@ $(foreach T,$(EXTERNAL_DATA),$(eval $(download))) # input files (kernel/buildroot/busybox configs, the build script, and # the init stub). When any of those change locally the prebuilt may no # longer reflect the user's intent, so we compute the SHA1 of those -# inputs and compare against PREBUILT_INPUTS_SHA1, the value the -# Publish prebuilt images workflow recorded for the live release. +# inputs and compare against the publisher's recorded inputs hash -- +# the third line of prebuilt.sha1, written by .ci/publish-prebuilt.sh +# under the virtual name 'inputs'. # # Mismatch -> warn but do not auto-rebuild: a buildroot run takes the # better part of an hour, so we let the user opt in via make build-image. @@ -71,29 +94,22 @@ PREBUILT_INPUTS := \ scripts/rootfs_ext4.sh \ target/init -PREBUILT_INPUTS_SHA1 = 1ae09da49a6d7ce44e10d04a682950b295b3b77c - -# Compute the live hash only when *every* input file exists. A partial -# tree would otherwise silently hash the present subset and trip a bogus -# "stale" warning instead of the more useful "your tree is incomplete" -# signal. The shell-side count compare is portable across BSD/GNU. -LIVE_INPUTS_SHA1 := $(shell \ - expected=$(words $(PREBUILT_INPUTS)); \ - found=0; \ - for f in $(PREBUILT_INPUTS); do [ -f "$$f" ] && found=$$((found + 1)); done; \ - if [ "$$found" -eq "$$expected" ]; then \ - cat $(PREBUILT_INPUTS) | $(SHA1SUM) | awk '{print $$1}'; \ - fi) - -# Skip the comparison until PREBUILT_INPUTS_SHA1 is real (the all-zero -# placeholder is the bootstrap state before the first prebuilt run). -ifneq ($(PREBUILT_INPUTS_SHA1),0000000000000000000000000000000000000000) -ifneq ($(LIVE_INPUTS_SHA1),) -ifneq ($(LIVE_INPUTS_SHA1),$(PREBUILT_INPUTS_SHA1)) -$(warning Local kernel/rootfs inputs ($(LIVE_INPUTS_SHA1)) differ from) -$(warning the prebuilt's recorded inputs ($(PREBUILT_INPUTS_SHA1)).) -$(warning The downloaded Image/rootfs.cpio do not reflect your local) -$(warning configs. Run `make build-image` to rebuild from source.) -endif -endif -endif +# Read the publisher's inputs hash from the downloaded manifest at +# recipe time, after the manifest refresh above has had a chance to run. +.PHONY: prebuilt-check +prebuilt-check: $(PREBUILT_MANIFEST) + $(Q)manifest_sha1=$$(awk '$$2 == "inputs" {print $$1}' $(PREBUILT_MANIFEST)); \ + if [ -n "$$manifest_sha1" ]; then \ + expected=$(words $(PREBUILT_INPUTS)); \ + found=0; \ + for f in $(PREBUILT_INPUTS); do [ -f "$$f" ] && found=$$((found + 1)); done; \ + if [ "$$found" -eq "$$expected" ]; then \ + live_sha1=$$(cat $(PREBUILT_INPUTS) | $(SHA1SUM) | awk '{print $$1}'); \ + if [ "$$live_sha1" != "$$manifest_sha1" ]; then \ + echo "warning: Local kernel/rootfs inputs ($$live_sha1) differ from" >&2; \ + echo "warning: the prebuilt's recorded inputs ($$manifest_sha1)." >&2; \ + echo "warning: The downloaded Image/rootfs.cpio do not reflect your local" >&2; \ + echo "warning: configs. Run \`make build-image\` to rebuild from source." >&2; \ + fi; \ + fi; \ + fi