diff --git a/.ci/common.sh b/.ci/common.sh index 0aab566e..e6f88cf3 100755 --- a/.ci/common.sh +++ b/.ci/common.sh @@ -7,9 +7,6 @@ set -euo pipefail MACHINE_TYPE="$(uname -m)" OS_TYPE="$(uname -s)" -# Enable SDL headless mode explicitly. -export SDL_VIDEODRIVER=offscreen - # Cleanup function - kills all semu processes cleanup() { sleep 1 diff --git a/.ci/publish-prebuilt.sh b/.ci/publish-prebuilt.sh index 2eed5784..d4eeaeaf 100755 --- a/.ci/publish-prebuilt.sh +++ b/.ci/publish-prebuilt.sh @@ -1,22 +1,24 @@ #!/usr/bin/env bash # -# Compress the prebuilt Image and rootfs.cpio in cwd, write a sha1 -# manifest, hash the input files that define the prebuilt's contents, -# and print all three sums in KEY=VAL form on stdout so callers can -# splice them into release notes, GITHUB_OUTPUT, or whatever else. +# Compress the prebuilt Image, rootfs.cpio, and test-tools.img in cwd, write a +# sha1 manifest, hash the input files that define the prebuilt's contents, and +# print all four sums in KEY=VAL form on stdout so callers can splice them into +# release notes, GITHUB_OUTPUT, or whatever else. # # Inputs (in cwd): # Image # rootfs.cpio +# test-tools.img # plus the source inputs listed in INPUTS below (config + scripts + -# target/init that define the buildroot/kernel content) +# target files that define the prebuilt content) # # Outputs (in cwd): # Image.bz2 # rootfs.cpio.bz2 -# prebuilt.sha1 -- three-line manifest in sha1sum format. The -# first two lines verify the published archives; -# the third uses the virtual name 'inputs' to +# test-tools.img.bz2 +# prebuilt.sha1 -- four-line manifest in sha1sum format. The +# first three lines verify the published archives; +# the fourth uses the virtual name 'inputs' to # publish the SHA-1 of the concatenated input # files so drift-detection consumers can read it # directly from the release. @@ -24,6 +26,7 @@ # Stdout (machine-readable, one assignment per line): # kernel_sha1= # initrd_sha1= +# test_tools_sha1= # inputs_sha1= set -euo pipefail @@ -45,35 +48,41 @@ INPUTS=( configs/linux.config configs/busybox.config configs/buildroot.config + configs/x11.config + configs/riscv-cross-file scripts/build-image.sh scripts/rootfs_ext4.sh target/init + target/local-env.sh ) -for f in Image rootfs.cpio "${INPUTS[@]}"; do +for f in Image rootfs.cpio test-tools.img "${INPUTS[@]}"; do if [ ! -f "$f" ]; then - echo "[!] Missing $f -- run scripts/build-image.sh --all first" >&2 + echo "[!] Missing $f -- run scripts/build-image.sh --all --x11 --directfb2-test first" >&2 exit 1 fi done bzip2 -k -f Image bzip2 -k -f rootfs.cpio +bzip2 -k -f test-tools.img KERNEL_SHA1=$("${SHA1[@]}" Image.bz2 | awk '{print $1}') INITRD_SHA1=$("${SHA1[@]}" rootfs.cpio.bz2 | awk '{print $1}') +TEST_TOOLS_SHA1=$("${SHA1[@]}" test-tools.img.bz2 | awk '{print $1}') # Concatenate inputs in deterministic order and hash the stream. Matches # the make-time computation in mk/external.mk so they compare directly. INPUTS_SHA1=$(cat "${INPUTS[@]}" | "${SHA1[@]}" | awk '{print $1}') -# Write the manifest. The first two lines match 'sha1sum -c' format for -# the real archives; the third line uses the virtual filename 'inputs' +# Write the manifest. The first three lines match 'sha1sum -c' format for +# the real archives; the fourth line uses the virtual filename 'inputs' # to publish the input-fingerprint hash so consumers (mk/external.mk's # drift warning, .github/workflows/main.yml's PR drift detection) can # read it from the release without parsing the release-body markdown. { echo "$KERNEL_SHA1 Image.bz2" echo "$INITRD_SHA1 rootfs.cpio.bz2" + echo "$TEST_TOOLS_SHA1 test-tools.img.bz2" echo "$INPUTS_SHA1 inputs" } > prebuilt.sha1 @@ -86,4 +95,5 @@ INPUTS_SHA1=$(cat "${INPUTS[@]}" | "${SHA1[@]}" | awk '{print $1}') echo "kernel_sha1=$KERNEL_SHA1" echo "initrd_sha1=$INITRD_SHA1" +echo "test_tools_sha1=$TEST_TOOLS_SHA1" echo "inputs_sha1=$INPUTS_SHA1" diff --git a/.ci/test-gpu.sh b/.ci/test-gpu.sh new file mode 100755 index 00000000..cda6762d --- /dev/null +++ b/.ci/test-gpu.sh @@ -0,0 +1,185 @@ +#!/usr/bin/env bash + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +. "${SCRIPT_DIR}/common.sh" + +# Override timeout and sleep duration for macOS - emulation is significantly slower +case "${OS_TYPE}" in + Darwin) + TIMEOUT=10800 + DFB_SLEEP=180 + ;; + *) + DFB_SLEEP=5 + ;; +esac +export DFB_SLEEP +SEMU_DIRECTFB2_TEST="${SEMU_DIRECTFB2_TEST:-1}" +export SEMU_DIRECTFB2_TEST +MAKE_CHECK_DISKIMG_ARG="" + +cleanup +trap cleanup EXIT + +# Feature toggles are passed through environment variables, which do not +# participate in normal dependency tracking by 'make'. Force a rebuild here so +# one-feature-at-a-time test runs never reuse a stale 'semu' binary or DTB. +make -B semu minimal.dtb + +if [ ! -f Image ] || [ ! -f rootfs.cpio ]; then + make Image rootfs.cpio +fi +if [[ "${SEMU_DIRECTFB2_TEST}" == "1" ]]; then + # The default ext4.img is intentionally small. DirectFB2 lives in the + # optional test tools disk, which is supplied by PR-built artifacts or downloaded + # like the other prebuilt artifacts. + if [ ! -f test-tools.img ]; then + make test-tools.img + fi + MAKE_CHECK_DISKIMG_ARG="DISKIMG_FILE=test-tools.img" +elif [ ! -f ext4.img ]; then + make ext4.img +fi +export MAKE_CHECK_DISKIMG_ARG + +# NOTE: We want to capture the 'expect' exit code and map +# it to our 'MESSAGES' array for meaningful error output. +# Temporarily disable 'errexit' for the 'expect' call. +set +e +expect <<'DONE' +set timeout $env(TIMEOUT) +if {$env(MAKE_CHECK_DISKIMG_ARG) eq ""} { + spawn make check +} else { + spawn make check $env(MAKE_CHECK_DISKIMG_ARG) +} + +# Boot and login +expect "buildroot login:" { send "root\r" } timeout { exit 1 } +expect "# " { send "uname -a\r" } timeout { exit 2 } +expect "riscv32 GNU/Linux" {} + +# ---------------- virtio-gpu basic checks ---------------- +expect "# " { send "ls -la /dev/dri/ 2>/dev/null || true\r" } +# Emit a shell-expanded status marker so 'expect' cannot match the echoed command. +expect "# " { send "if test -c /dev/dri/card0; then status=OK; else status=MISSING; fi; printf \"__VGPU_DRM_%s__\\n\" \"\$status\"\r" } timeout { exit 3 } +expect { + -exact "__VGPU_DRM_OK__" {} + -exact "__VGPU_DRM_MISSING__" { exit 3 } + timeout { exit 3 } +} + +# virtio transport may be 'virtio-mmio', binding check should look at the +# 'virtio_gpu' driver directory. +expect "# " { + send "sh -lc 'if ls /sys/bus/virtio/drivers/virtio_gpu/virtio* >/dev/null 2>&1; then status=OK; else status=BAD; fi; printf \"__VGPU_BIND_%s__\\n\" \"\u0024status\"'\r" +} timeout { exit 3 } +expect { + -exact "__VGPU_BIND_OK__" {} + -exact "__VGPU_BIND_BAD__" { + send "ls -l /sys/bus/virtio/drivers/virtio_gpu/ 2>/dev/null || true\r" + # Emit literal '$d' via '\u0024' to avoid Tcl variable substitution. + send "sh -lc 'for d in /sys/bus/virtio/devices/virtio*; do echo \u0024d; ls -l \u0024d/driver 2>/dev/null || true; done'\r" + exit 3 + } + timeout { exit 3 } +} + +# Useful logs (non-fatal) +expect "# " { send "dmesg | grep -Ei 'virtio.*gpu|drm.*virtio|scanout|number of scanouts' | tail -n 80 || true\r" } + +if {$env(SEMU_DIRECTFB2_TEST) ne "1"} { + exit 0 +} + +# ---------------- DirectFB2 ---------------- +# Strategy: +# 1) Stop X11 if running (it holds the DRM device) +# 2) Check 'local-env.sh' exists at '/root/local-env.sh' +# 3) Source 'local-env.sh' to set 'PATH'/'LD_LIBRARY_PATH' +# 4) Verify 'df_drivertest' is in 'PATH' +# 5) Run 'df_drivertest' and check for DirectFB init messages +# +# NOTE: 'df_drivertest' may segfault when killed due to a race condition in +# DirectFB2's fusion module ('libfusion') during signal handling. When 'SIGTERM' +# is sent, the signal handler starts cleanup while the "Fusion Dispatch" thread +# may still be accessing shared state, leading to a use-after-free crash. The +# test passes if DirectFB init messages appear, even if the program crashes +# afterward during cleanup. + +# Step 0: Stop X11 to release DRM device (it holds '/dev/dri/card0') +# Use 'pidof' with fallback to 'ps'/'grep' if 'pidof' is unavailable. +expect "# " { + send "sh -lc '\ + if command -v pidof >/dev/null 2>&1; then \ + pidof Xorg >/dev/null 2>&1 && kill \u0024(pidof Xorg) 2>/dev/null || true; \ + else \ + ps | grep Xorg | grep -v grep | awk \"{print \u00241}\" | xargs kill 2>/dev/null || true; \ + fi; \ + sleep 1; printf \"__X11_%s__\\n\" STOPPED'\r" +} +expect { + -exact "__X11_STOPPED__" {} + timeout { exit 4 } +} + +# Step 1: Check 'local-env.sh' exists. +expect "# " { send "if test -f /root/local-env.sh; then status=OK; else status=MISSING; fi; printf \"__LOCALENV_%s__\\n\" \"\$status\"\r" } +expect { + -exact "__LOCALENV_OK__" {} + -exact "__LOCALENV_MISSING__" { exit 4 } + timeout { exit 4 } +} + +# Step 2: Source 'local-env.sh'. +expect "# " { send "if . /root/local-env.sh >/dev/null 2>&1; then status=DONE; else status=FAIL; fi; printf \"__SRC_%s__\\n\" \"\$status\"\r" } +expect { + -exact "__SRC_DONE__" {} + -exact "__SRC_FAIL__" { exit 4 } + timeout { exit 4 } +} + +# Step 3: Verify 'df_drivertest' is available. +expect "# " { send "if command -v df_drivertest >/dev/null 2>&1; then status=OK; else status=MISS; fi; printf \"__APP_%s__\\n\" \"\$status\"\r" } +expect { + -exact "__APP_OK__" {} + -exact "__APP_MISS__" { exit 4 } + timeout { exit 4 } +} + +# Step 4: Run 'df_drivertest' and check output (run in background, kill after +# delay). +expect "# " { send "df_drivertest >/tmp/dfb.log 2>&1 & sleep $env(DFB_SLEEP); kill \u0024! 2>/dev/null; head -30 /tmp/dfb.log\r" } +# Check for 'DRMKMS' init message. +expect "# " { send "if grep -qi 'DRMKMS/System' /tmp/dfb.log; then status=OK; else status=FAIL; fi; printf \"__DFB_%s__\\n\" \"\$status\"\r" } +expect { + -exact "__DFB_OK__" {} + -exact "__DFB_FAIL__" { exit 4 } + timeout { exit 4 } +} +DONE + +ret="$?" +set -e # Re-enable 'errexit' after capturing 'expect' return code. + +if [[ "${ret}" -eq 0 ]]; then + if [[ "${SEMU_DIRECTFB2_TEST}" == "1" ]]; then + print_success "PASS: headless virtio-gpu + DirectFB2 checks" + else + print_success "PASS: headless virtio-gpu checks" + fi + exit 0 +fi + +MESSAGES=( + "unused" + "FAIL: boot/login prompt not found" + "FAIL: shell prompt not found" + "FAIL: virtio-gpu basic checks failed (/dev/dri/card0 or virtio_gpu binding)" + "FAIL: DirectFB2 check failed (local-env.sh/df_drivertest missing or no DRMKMS init messages)" +) + +print_error "${MESSAGES[${ret}]:-FAIL: unknown error (exit code ${ret})}" +exit "${ret}" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2681524e..a651fde5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -10,8 +10,8 @@ permissions: contents: read jobs: - # PR-only: rebuild Image/rootfs.cpio from source when local kernel - # and rootfs inputs have drifted from the prebuilt's recorded inputs. + # PR-only: rebuild or restore Image/rootfs.cpio/test-tools.img when local + # guest-artifact inputs have drifted from the prebuilt's recorded inputs. # Without this, make check on a PR that touches configs, scripts, or # target would silently exercise the stale prebuilt instead of the # contributor's actual change. @@ -60,30 +60,45 @@ jobs: configs/linux.config \ configs/busybox.config \ configs/buildroot.config \ + configs/x11.config \ + configs/riscv-cross-file \ scripts/build-image.sh \ scripts/rootfs_ext4.sh \ target/init \ + target/local-env.sh \ | sha1sum | awk '{print $1}') + echo "live_hash=$live" >> "$GITHUB_OUTPUT" if [ "$live" = "$expected" ]; then echo "PR inputs match the prebuilt ($live); skipping rebuild" echo "should_build=false" >> "$GITHUB_OUTPUT" else - echo "PR inputs drifted ($live != $expected); will rebuild from source" + echo "PR inputs drifted ($live != $expected); will use PR artifacts" echo "should_build=true" >> "$GITHUB_OUTPUT" fi - - name: install build dependencies + - name: cache PR-built artifacts if: steps.detect.outputs.should_build == 'true' + id: pr_artifact_cache + uses: actions/cache@v4 + with: + path: | + Image + rootfs.cpio + test-tools.img + key: pr-prebuilt-${{ runner.os }}-${{ steps.detect.outputs.live_hash }} + - name: install build dependencies + if: steps.detect.outputs.should_build == 'true' && steps.pr_artifact_cache.outputs.cache-hit != 'true' run: | sudo apt-get update sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \ build-essential \ bc bison flex cpio fakeroot e2fsprogs \ - git python3 libssl-dev libelf-dev wget + git python3 libssl-dev libelf-dev wget \ + meson ninja-build pkg-config timeout-minutes: 5 - - name: build kernel and rootfs from source - if: steps.detect.outputs.should_build == 'true' - run: ./scripts/build-image.sh --all - timeout-minutes: 90 + - name: build kernel, rootfs, and test tools disk from source + if: steps.detect.outputs.should_build == 'true' && steps.pr_artifact_cache.outputs.cache-hit != 'true' + run: ./scripts/build-image.sh --all --x11 --directfb2-test + timeout-minutes: 180 - name: upload PR-built artifacts if: steps.detect.outputs.should_build == 'true' uses: actions/upload-artifact@v4 @@ -92,6 +107,7 @@ jobs: path: | Image rootfs.cpio + test-tools.img retention-days: 1 if-no-files-found: error @@ -110,16 +126,16 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive - # Drift PR: pull the freshly-built Image/rootfs.cpio from + # Drift PR: pull the freshly-built Image/rootfs.cpio/test-tools.img from # pr-prebuilt-build via workflow artifact (reliable, not cache). - - name: download PR-built kernel/rootfs + - name: download PR-built external artifacts if: needs.pr-prebuilt-build.outputs.should_build == 'true' uses: actions/download-artifact@v4 with: name: prebuilt-pr # Non-drift PR or master push: cache the release-downloaded artifacts # across runs. Include mk/external.mk so checksum or input-pin bumps - # after a republish invalidate the old Image/rootfs.cpio pair. + # after a republish invalidate the old external artifacts. - name: cache external downloads if: needs.pr-prebuilt-build.outputs.should_build != 'true' uses: actions/cache@v4 @@ -127,7 +143,8 @@ jobs: path: | Image rootfs.cpio - key: external-${{ hashFiles('mk/external.mk', 'configs/linux.config', 'configs/busybox.config', 'configs/buildroot.config', 'scripts/build-image.sh', 'scripts/rootfs_ext4.sh', 'target/**') }} + test-tools.img + key: external-${{ hashFiles('mk/external.mk', 'configs/linux.config', 'configs/busybox.config', 'configs/buildroot.config', 'configs/x11.config', 'scripts/build-image.sh', 'scripts/rootfs_ext4.sh', 'target/**') }} - name: cache submodule builds uses: actions/cache@v4 with: @@ -171,6 +188,10 @@ jobs: run: .ci/test-vinput.sh shell: bash timeout-minutes: 5 + - name: virtio-gpu test + run: .ci/test-gpu.sh + shell: bash + timeout-minutes: 10 # Guard the legacy initramfs path so it does not bitrot now that the # default boot mode is /dev/vda. Single slim job: fresh build with @@ -183,7 +204,7 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive - - name: download PR-built kernel/rootfs + - name: download PR-built external artifacts if: needs.pr-prebuilt-build.outputs.should_build == 'true' uses: actions/download-artifact@v4 with: @@ -195,7 +216,8 @@ jobs: path: | Image rootfs.cpio - key: external-${{ hashFiles('mk/external.mk', 'configs/linux.config', 'configs/busybox.config', 'configs/buildroot.config', 'scripts/build-image.sh', 'scripts/rootfs_ext4.sh', 'target/**') }} + test-tools.img + key: external-${{ hashFiles('mk/external.mk', 'configs/linux.config', 'configs/busybox.config', 'configs/buildroot.config', 'configs/x11.config', 'scripts/build-image.sh', 'scripts/rootfs_ext4.sh', 'target/**') }} - name: cache submodule builds uses: actions/cache@v4 with: @@ -234,7 +256,7 @@ jobs: # pr-prebuilt-build's artifact (built on a linux runner) -- a cache # miss here would silently fall back to downloading the stale # release, defeating the whole point of the drift-detection logic. - - name: download PR-built kernel/rootfs + - name: download PR-built external artifacts if: needs.pr-prebuilt-build.outputs.should_build == 'true' uses: actions/download-artifact@v4 with: @@ -246,7 +268,8 @@ jobs: path: | Image rootfs.cpio - key: external-${{ hashFiles('mk/external.mk', 'configs/linux.config', 'configs/busybox.config', 'configs/buildroot.config', 'scripts/build-image.sh', 'scripts/rootfs_ext4.sh', 'target/**') }} + test-tools.img + key: external-${{ hashFiles('mk/external.mk', 'configs/linux.config', 'configs/busybox.config', 'configs/buildroot.config', 'configs/x11.config', 'scripts/build-image.sh', 'scripts/rootfs_ext4.sh', 'target/**') }} - name: cache submodule builds uses: actions/cache@v4 with: @@ -288,6 +311,10 @@ jobs: run: .ci/test-vinput.sh shell: bash timeout-minutes: 20 + - name: virtio-gpu test + run: .ci/test-gpu.sh + shell: bash + timeout-minutes: 20 coding_style: runs-on: ubuntu-24.04 diff --git a/.github/workflows/prebuilt.yml b/.github/workflows/prebuilt.yml index e22e600e..5b52ac44 100644 --- a/.github/workflows/prebuilt.yml +++ b/.github/workflows/prebuilt.yml @@ -1,9 +1,9 @@ name: Publish prebuilt images -# Builds the Linux kernel and Buildroot rootfs that the rest of CI and -# make on a fresh checkout consumes, then publishes them as assets on a -# fixed-tag GitHub prerelease so the download URL stays stable across -# rebuilds, keeping large binary artifacts out of the source tree. +# Builds the Linux kernel, Buildroot rootfs, and optional test tools disk that the +# rest of CI and make on a fresh checkout consume, then publishes them as +# assets on a fixed-tag GitHub prerelease so the download URL stays stable +# across rebuilds, keeping large binary artifacts out of the source tree. # # Triggers automatically on master pushes that touch any input listed # in the paths filter below, and can be invoked manually via @@ -23,6 +23,8 @@ on: - 'configs/linux.config' - 'configs/busybox.config' - 'configs/buildroot.config' + - 'configs/x11.config' + - 'configs/riscv-cross-file' - 'scripts/build-image.sh' - 'scripts/rootfs_ext4.sh' - 'target/**' @@ -59,10 +61,13 @@ jobs: python3 \ libssl-dev \ libelf-dev \ - wget + wget \ + meson \ + ninja-build \ + pkg-config - - name: Build Buildroot and Linux - run: ./scripts/build-image.sh --all + - name: Build Buildroot, Linux, and test tools disk + run: ./scripts/build-image.sh --all --x11 --directfb2-test - name: Compress and checksum artifacts id: checksum @@ -71,7 +76,7 @@ jobs: run: | set -euo pipefail .ci/publish-prebuilt.sh >> "$GITHUB_OUTPUT" - ls -la Image.bz2 rootfs.cpio.bz2 prebuilt.sha1 + ls -la Image.bz2 rootfs.cpio.bz2 test-tools.img.bz2 prebuilt.sha1 - name: Update prebuilt prerelease uses: softprops/action-gh-release@v2 @@ -85,11 +90,13 @@ jobs: files: | Image.bz2 rootfs.cpio.bz2 + test-tools.img.bz2 prebuilt.sha1 body: | - Rolling prerelease of the Linux kernel and Buildroot rootfs - consumed by `mk/external.mk`. Re-published whenever any - input that defines the kernel/rootfs content changes. + Rolling prerelease of the Linux kernel, Buildroot rootfs, + and optional test tools disk consumed by `mk/external.mk`. + Re-published whenever any input that defines the prebuilt + content changes. All checksums (archive hashes plus the source-input fingerprint used for drift detection) are published in the @@ -101,5 +108,6 @@ jobs: ``` ${{ steps.checksum.outputs.kernel_sha1 }} Image.bz2 ${{ steps.checksum.outputs.initrd_sha1 }} rootfs.cpio.bz2 - ${{ steps.checksum.outputs.inputs_sha1 }} inputs (configs + scripts + target/init, concatenated) + ${{ steps.checksum.outputs.test_tools_sha1 }} test-tools.img.bz2 + ${{ steps.checksum.outputs.inputs_sha1 }} inputs (configs + scripts + target files, concatenated) ``` diff --git a/.gitignore b/.gitignore index f9f8f9d3..f387cb41 100644 --- a/.gitignore +++ b/.gitignore @@ -7,14 +7,23 @@ semu *.dtb Image ext4.img +test-tools.img rootfs.cpio prebuilt.sha1 # intermediate riscv-harts.dtsi .smp_stamp +.dtb-config.stamp +.build-config.stamp # Build directories buildroot/ linux/ rootfs/ +directfb/ +extra_packages/ + +# DirectFB build +DirectFB2/ +DirectFB-examples/ diff --git a/Makefile b/Makefile index 10fad261..cd3347a8 100644 --- a/Makefile +++ b/Makefile @@ -191,7 +191,8 @@ ifeq ($(ENABLE_SDL),1) CFLAGS += $(shell sdl2-config --cflags) LDFLAGS += $(shell sdl2-config --libs) else - # Disable virtio-input if SDL is not set + # Disable window-backed virtio devices if SDL is not set. + override ENABLE_VIRTIOGPU := 0 override ENABLE_VIRTIOINPUT := 0 endif @@ -203,6 +204,18 @@ $(call set-feature, VIRTIOINPUT) ifeq ($(call has, VIRTIOINPUT), 1) OBJS_EXTRA += virtio-input-event.o OBJS_EXTRA += virtio-input.o +endif + +# virtio-gpu +ENABLE_VIRTIOGPU ?= 1 +$(call set-feature, VIRTIOGPU) +ifeq ($(call has, VIRTIOGPU), 1) + OBJS_EXTRA += virtio-gpu.o + OBJS_EXTRA += virtio-gpu-sw.o + OBJS_EXTRA += vgpu-display.o +endif + +ifneq ($(filter 1,$(call has, VIRTIOGPU) $(call has, VIRTIOINPUT)),) OBJS_EXTRA += window-sw.o endif @@ -311,7 +324,7 @@ minimal.dtb: minimal.dts riscv-harts.dtsi .dtb-config.stamp .PHONY: FORCE FORCE: -# Rules for downloading prebuilt Linux kernel image +# Rules for downloading prebuilt guest artifacts include mk/external.mk ifeq ($(call has, EXTERNAL_ROOT), 1) @@ -347,8 +360,9 @@ check: $(BIN) minimal.dtb $(KERNEL_DATA) $(INITRD_DEP) $(DISKIMG_FILE) $(SHARED_ @$(call notice, Ready to launch Linux kernel. Please be patient.) $(Q)./$(BIN) -k $(KERNEL_DATA) -c $(SMP) -b minimal.dtb -H $(INITRD_OPT) $(if $(NETDEV),-n $(NETDEV)) $(OPTS) +BUILD_IMAGE_ARGS ?= --all build-image: - scripts/build-image.sh + scripts/build-image.sh $(BUILD_IMAGE_ARGS) clean: $(Q)$(RM) $(BIN) $(OBJS) $(deps) @@ -363,6 +377,6 @@ distclean: clean $(Q)$(RM) .dtb-config.stamp $(Q)$(RM) .build-config.stamp $(Q)$(RM) Image rootfs.cpio prebuilt.sha1 - $(Q)$(RM) ext4.img + $(Q)$(RM) ext4.img test-tools.img -include $(deps) diff --git a/README.md b/README.md index fd860103..e0ede423 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ A minimalist RISC-V system emulator capable of running Linux the kernel and corr - UART: 8250/16550 - PLIC (platform-level interrupt controller): 32 interrupts, no priority - Standard SBI, with the timer extension -- Four types of I/O support using VirtIO standard: +- I/O support using VirtIO standard: - virtio-blk acquires disk image from the host. - virtio-net is mapped as TAP interface. - virtio-snd uses [PortAudio](https://github.com/PortAudio/portaudio) for sound playback on the host with one limitations: @@ -19,7 +19,11 @@ A minimalist RISC-V system emulator capable of running Linux the kernel and corr - For instance, the following buffer/period size settings on `aplay` has been tested with broken and stutter effects yet complete with no any errors: `aplay --buffer-size=32768 --period-size=4096 /usr/share/sounds/alsa/Front_Center.wav`. - virtio-input exposes SDL-backed keyboard and mouse devices to the guest. - - You can exit the SDL window by pressing Ctrl+A+G + - virtio-gpu exposes a minimal 2D DRM/KMS device to the guest. Linux can + bind the `virtio_gpu` driver and create `/dev/dri/card0`. + - Only 2D scanout is currently supported; 3D, virgl, and blob resources + are not implemented yet. + - Press Ctrl+Alt+G to release the mouse cursor from the SDL window. ## Prerequisites @@ -54,11 +58,21 @@ Build the emulator: $ make ``` -Download prebuilt Linux kernel image: +Download the prebuilt guest artifacts and run the default check: ```shell $ make check ``` +With the default external-root build, `make check` uses `Image`, `minimal.dtb`, +and `ext4.img`, and boots `semu` headlessly with an equivalent command line: + +```shell +$ ./semu -k Image -c 1 -b minimal.dtb -H -d ext4.img +``` + +If `ENABLE_EXTERNAL_ROOT=0` is used, `make check` switches to the legacy +initramfs path and passes `-i rootfs.cpio` instead of `-d ext4.img`. + Please be patient while `semu` is running. Reference output: @@ -76,6 +90,32 @@ Enter `root` to access shell. You can exit the emulator using: \. (press Ctrl+A, leave it, afterwards press X) +To test virtio-gpu with a visible SDL window, run `semu` manually without `-H`. +Make sure `sdl2-config` is in `PATH`, then build the emulator, DTB, kernel, and +test tools disk. Press `Ctrl+Alt+G` to release the mouse cursor from the SDL +window: + +```shell +$ sdl2-config --version +$ make semu minimal.dtb Image test-tools.img +$ ./semu -k Image -c 1 -b minimal.dtb -d test-tools.img +``` + +Log in as `root`, source the test-tools image environment, and run one of +the DirectFB2 examples: + +``` +# . /root/local-env.sh +# df_drivertest +``` + +The installed DirectFB2 examples come from the upstream DirectFB-examples +project and can be listed in the guest with: + +``` +# ls /usr/local/bin/df_* +``` + ## Usage ```shell @@ -104,6 +144,10 @@ unpacking a large cpio, and matches how real systems deploy. The `ext4.img` is built from `rootfs.cpio` via `scripts/rootfs_ext4.sh`, which requires `fakeroot` and `mkfs.ext4`. +The rolling `prebuilt` release provides an optional `test-tools.img.bz2` for +larger test/user tools that should not inflate `rootfs.cpio` or the default +`ext4.img`. Use `make test-tools.img` to download it. + If `fakeroot` is missing, the build falls back to the legacy initramfs path (`-i rootfs.cpio`) automatically and prints a one-line warning. To force the legacy path explicitly: @@ -164,21 +208,23 @@ To build everything, simply run: $ make build-image ``` -This command invokes the underlying script: `scripts/build-image.sh`, which also offers more flexible usage options. +This command invokes the underlying script: `scripts/build-image.sh --all`, which also offers more flexible usage options. ### Script Usage ``` -./scripts/build-image.sh [--buildroot] [--linux] [--all] [--no-ext4] [--clean-build] [--help] +./scripts/build-image.sh [--buildroot] [--linux] [--directfb2-test] [--all] [--no-ext4] [--clean-build] [--help] Options: --buildroot Build Buildroot userland (produces rootfs.cpio and, unless --no-ext4 is given, ext4.img for vda boot) + --directfb2-test Build test-tools.img with the DirectFB2 test payload --linux Build the Linux kernel --all Build both Buildroot and Linux --no-ext4 Skip ext4.img generation; produce only rootfs.cpio (matches the legacy ENABLE_EXTERNAL_ROOT=0 path) - --clean-build Remove buildroot/ and/or linux/ before building + --clean-build Remove buildroot/ and/or linux/ before building; + with --directfb2-test, also remove DirectFB2 build outputs --help Show this message ``` @@ -202,6 +248,28 @@ Build Buildroot for the legacy initramfs-only path (no ext4): $ scripts/build-image.sh --buildroot --no-ext4 ``` +`test-tools.img` is the shared optional disk for test payloads that should +not live in the default `rootfs.cpio` or `ext4.img`. This keeps the default +guest image small while still allowing larger tools to be collected in one +place. + +Build Buildroot and the test tools image with the DirectFB2 test payload. Add +`--x11` when the test tools image should use an X11-enabled rootfs: + +``` +$ scripts/build-image.sh --x11 --directfb2-test +``` + +To add a new test tool, extend the `test-tools.img` build path in +`scripts/build-image.sh` so the tool is staged into `extra_packages`, then +update `target/local-env.sh` if the tool needs an additional binary or library +search path. + +The build script copies `target/local-env.sh` to `/root/local-env.sh` in the +test tools image. After booting the VM, source it once to pick up paths such +as `/usr/local/bin` and `/usr/local/lib`, instead of running overlaid tools +through full paths like `/usr/local/bin/df_*`. + Force a clean build: ``` diff --git a/configs/buildroot.config b/configs/buildroot.config index 315fccf5..6c9b179d 100644 --- a/configs/buildroot.config +++ b/configs/buildroot.config @@ -39,6 +39,8 @@ BR2_FORTIFY_SOURCE_1=y BR2_PACKAGE_ALSA_UTILS=y BR2_PACKAGE_ALSA_UTILS_APLAY=y BR2_PACKAGE_ALSA_UTILS_SPEAKER_TEST=y +BR2_PACKAGE_LIBDRM=y +# BR2_PACKAGE_LIBDRM_INSTALL_TESTS is not set # BR2_PACKAGE_URANDOM_SCRIPTS is not set BR2_TARGET_ROOTFS_CPIO=y BR2_TARGET_ROOTFS_CPIO_FULL=y diff --git a/configs/linux.config b/configs/linux.config index 3adeccda..0a32ab3c 100644 --- a/configs/linux.config +++ b/configs/linux.config @@ -911,13 +911,19 @@ CONFIG_MFD_SYSCON=y # # Graphics support # -# CONFIG_DRM is not set +CONFIG_DRM=y +CONFIG_DRM_KMS_HELPER=y # CONFIG_DRM_DEBUG_MODESET_LOCK is not set # # ARM devices # # end of ARM devices +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_DRM_VIRTIO_GPU_KMS=y +CONFIG_DRM_PANEL=y +CONFIG_DRM_BRIDGE=y +CONFIG_DRM_PANEL_BRIDGE=y # # Frame buffer Devices @@ -1056,6 +1062,7 @@ CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y # CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set +CONFIG_VIRTIO_DMA_SHARED_BUFFER=y # CONFIG_VDPA is not set # CONFIG_VHOST_MENU is not set diff --git a/configs/riscv-cross-file b/configs/riscv-cross-file new file mode 100644 index 00000000..668f91f5 --- /dev/null +++ b/configs/riscv-cross-file @@ -0,0 +1,18 @@ +[binaries] + c = 'riscv32-buildroot-linux-gnu-gcc' + strip = 'riscv32-buildroot-linux-gnu-strip' + pkgconfig = 'pkg-config' + python = '/usr/bin/python3' + +[properties] + pkg_config_libdir = ['@GLOBAL_SOURCE_ROOT@' / '../buildroot/output/host/riscv32-buildroot-linux-gnu/sysroot/usr/local/lib/pkgconfig', + '@GLOBAL_SOURCE_ROOT@' / '../buildroot/output/host/riscv32-buildroot-linux-gnu/sysroot/usr/share/pkgconfig/', + '@GLOBAL_SOURCE_ROOT@' / '../buildroot/output/host/riscv32-buildroot-linux-gnu/sysroot/usr/lib/pkgconfig/' + ] + sys_root = '@GLOBAL_SOURCE_ROOT@' / '../buildroot/output/host/riscv32-buildroot-linux-gnu/sysroot' + +[host_machine] + system = 'linux' + cpu_family = 'riscv32' + cpu = 'riscv32-ima' + endian = 'little' diff --git a/configs/x11.config b/configs/x11.config new file mode 100644 index 00000000..3cab4aac --- /dev/null +++ b/configs/x11.config @@ -0,0 +1,35 @@ +BR2_TOOLCHAIN_BUILDROOT_CXX=y +BR2_INSTALL_LIBSTDCPP=y +BR2_PACKAGE_GLMARK2=y +BR2_PACKAGE_KMSCUBE=y +BR2_PACKAGE_MESA3D_DEMOS=y +BR2_PACKAGE_MESA3D=y +BR2_PACKAGE_MESA3D_GALLIUM_DRIVER=y +BR2_PACKAGE_MESA3D_DRIVER=y +BR2_PACKAGE_MESA3D_NEEDS_X11=y +BR2_PACKAGE_MESA3D_GALLIUM_DRIVER_SWRAST=y +BR2_PACKAGE_MESA3D_GALLIUM_DRIVER_VIRGL=n +BR2_PACKAGE_MESA3D_GBM=y +BR2_PACKAGE_MESA3D_OPENGL_GLX=y +BR2_PACKAGE_MESA3D_OPENGL_EGL=y +BR2_PACKAGE_MESA3D_OPENGL_ES=y +BR2_PACKAGE_PROVIDES_LIBGBM="mesa3d" +BR2_PACKAGE_XORG7=y +BR2_PACKAGE_XSERVER_XORG_SERVER=y +BR2_PACKAGE_XSERVER_XORG_SERVER_MODULAR=y +BR2_PACKAGE_XLIB_LIBX11=y +BR2_PACKAGE_XAPP_TWM=y +BR2_PACKAGE_XAPP_XAUTH=y +BR2_PACKAGE_XAPP_XCLOCK=y +BR2_PACKAGE_XAPP_XINIT=y +BR2_PACKAGE_XDRIVER_XF86_INPUT_LIBINPUT=y +BR2_PACKAGE_XTERM=y +BR2_PACKAGE_EUDEV=y +BR2_ROOTFS_DEVICE_CREATION_DYNAMIC_EUDEV=y +BR2_PACKAGE_PROVIDES_UDEV="eudev" +BR2_PACKAGE_HAS_UDEV=y +BR2_PACKAGE_LIBGLEW=y +BR2_PACKAGE_HAS_LIBGBM=y +BR2_PACKAGE_HAS_LIBGLES=y +BR2_PACKAGE_LIBINPUT=y +BR2_PACKAGE_LIBDRI2=y diff --git a/device.h b/device.h index 9d8d4cfc..02198be4 100644 --- a/device.h +++ b/device.h @@ -293,6 +293,60 @@ void virtio_input_drain_host_events(void); bool virtio_input_irq_pending(virtio_input_state_t *vinput); #endif /* SEMU_HAS(VIRTIOINPUT) */ +/* VirtIO-GPU */ + +#if SEMU_HAS(VIRTIOGPU) + +#define IRQ_VGPU 9 +#define IRQ_VGPU_BIT (1 << IRQ_VGPU) + +typedef struct { + uint32_t QueueNum; + uint32_t QueueDesc; + uint32_t QueueAvail; + uint32_t QueueUsed; + uint16_t last_avail; + bool ready; +} virtio_gpu_queue_t; + +typedef struct { + /* feature negotiation */ + uint32_t DeviceFeaturesSel; + uint32_t DriverFeatures; + uint32_t DriverFeaturesSel; + /* queue config */ + uint32_t QueueSel; + virtio_gpu_queue_t queues[2]; + /* status */ + uint32_t Status; + uint32_t InterruptStatus; + /* supplied by environment */ + uint32_t *ram; + /* implementation-specific */ + void *priv; +} virtio_gpu_state_t; + +void virtio_gpu_read(hart_t *vm, + virtio_gpu_state_t *vgpu, + uint32_t addr, + uint8_t width, + uint32_t *value); + +void virtio_gpu_write(hart_t *vm, + virtio_gpu_state_t *vgpu, + uint32_t addr, + uint8_t width, + uint32_t value); + +/* Initializes the process-wide virtio-gpu singleton. semu currently supports + * one in-process GPU instance; a second call is fatal. + */ +void virtio_gpu_init(virtio_gpu_state_t *vgpu); +uint32_t virtio_gpu_register_scanout(virtio_gpu_state_t *vgpu, + uint32_t width, + uint32_t height); +#endif /* SEMU_HAS(VIRTIOGPU) */ + /* ACLINT MTIMER */ typedef struct { /* A MTIMER device has two separate base addresses: one for the MTIME @@ -514,10 +568,6 @@ typedef struct { #if SEMU_HAS(VIRTIORNG) virtio_rng_state_t vrng; #endif - /* ACLINT */ - mtimer_state_t mtimer; - mswi_state_t mswi; - sswi_state_t sswi; #if SEMU_HAS(VIRTIOSND) virtio_snd_state_t vsnd; #endif @@ -527,21 +577,29 @@ typedef struct { #if SEMU_HAS(VIRTIOINPUT) virtio_input_state_t vkeyboard; virtio_input_state_t vmouse; - /* Use self-pipe trick to unblock the emulator loop when the - * window backend has queued work, such as input events or - * window shutdown. When all harts are idle, semu_run() calls - * poll(-1) and blocks indefinitely waiting for timer or UART - * events. The window-event thread has no way to wake that - * blocked poll() other than writing to a file descriptor it is +#endif +#if SEMU_HAS(VIRTIOGPU) + virtio_gpu_state_t vgpu; +#endif +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) + /* Use self-pipe trick to unblock the emulator loop when the window backend + * has queued work, such as input events or window shutdown. When all harts + * are idle, 'semu_run()' can call 'poll(-1)' and block indefinitely + * waiting for timer or UART events. The window-event thread has no way to + * wake that blocked 'poll()' other than writing to a file descriptor it is * watching. * - * wake_fd[0] (read end) is added to pfds[] so poll() monitors it. - * wake_fd[1] (write end) is handed to the window backend, which - * writes one byte when backend work arrives to make wake_fd[0] - * readable and return poll() immediately. + * 'wake_fd[0]' (read end) is added to 'pfds[]' so 'poll()' monitors it. + * 'wake_fd[1]' (write end) is handed to the window backend, which + * writes one byte when backend work arrives to make 'wake_fd[0]' + * readable and return 'poll()' immediately. */ int wake_fd[2]; #endif + /* ACLINT */ + mtimer_state_t mtimer; + mswi_state_t mswi; + sswi_state_t sswi; uint32_t peripheral_update_ctr; diff --git a/feature.h b/feature.h index 0ff4e1b0..bca97602 100644 --- a/feature.h +++ b/feature.h @@ -32,5 +32,10 @@ #define SEMU_FEATURE_EXTERNAL_ROOT 0 #endif +/* virtio-gpu */ +#ifndef SEMU_FEATURE_VIRTIOGPU +#define SEMU_FEATURE_VIRTIOGPU 1 +#endif + /* Feature test macro */ #define SEMU_HAS(x) SEMU_FEATURE_##x diff --git a/main.c b/main.c index 8e113a6f..a0b53334 100644 --- a/main.c +++ b/main.c @@ -28,10 +28,16 @@ #include "mini-gdbstub/include/gdbstub.h" #if SEMU_HAS(VIRTIOINPUT) #include "virtio-input-event.h" +#endif +#if SEMU_HAS(VIRTIOGPU) +#include "vgpu-display.h" +#endif +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) #include "window.h" #endif #include "riscv.h" #include "riscv_private.h" + #define PRIV(x) ((emu_state_t *) x->priv) /* Forward declarations for coroutine support */ @@ -139,6 +145,18 @@ static void emu_update_vinput_mouse_interrupts(vm_t *vm) } #endif +#if SEMU_HAS(VIRTIOGPU) +static void emu_update_vgpu_interrupts(vm_t *vm) +{ + emu_state_t *data = PRIV(vm->hart[0]); + if (data->vgpu.InterruptStatus) + data->plic.active |= IRQ_VGPU_BIT; + else + data->plic.active &= ~IRQ_VGPU_BIT; + plic_update_interrupts(vm, &data->plic); +} +#endif + static void emu_update_timer_interrupt(hart_t *hart) { emu_state_t *data = PRIV(hart); @@ -248,7 +266,8 @@ static inline void emu_tick_peripherals(emu_state_t *emu) if (virtio_input_irq_pending(&emu->vmouse)) emu_update_vinput_mouse_interrupts(vm); - +#endif +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) /* A closed window is treated like a frontend shutdown request. */ if (g_window.window_is_closed()) emu->stopped = true; @@ -320,12 +339,15 @@ static void mem_load(hart_t *hart, case 0x49: /* virtio-input keyboard */ virtio_input_read(hart, &data->vkeyboard, addr & 0xFFFFF, width, value); - emu_update_vinput_keyboard_interrupts(hart->vm); return; case 0x4A: /* virtio-input mouse */ virtio_input_read(hart, &data->vmouse, addr & 0xFFFFF, width, value); - emu_update_vinput_mouse_interrupts(hart->vm); + return; +#endif +#if SEMU_HAS(VIRTIOGPU) + case 0x4B: /* virtio-gpu */ + virtio_gpu_read(hart, &data->vgpu, addr & 0xFFFFF, width, value); return; #endif } @@ -414,6 +436,12 @@ static void mem_store(hart_t *hart, value); emu_update_vinput_mouse_interrupts(hart->vm); return; +#endif +#if SEMU_HAS(VIRTIOGPU) + case 0x4B: /* virtio-gpu */ + virtio_gpu_write(hart, &data->vgpu, addr & 0xFFFFF, width, value); + emu_update_vgpu_interrupts(hart->vm); + return; #endif } } @@ -848,7 +876,7 @@ static int semu_init(emu_state_t *emu, int argc, char **argv) handle_options(argc, argv, &kernel_file, &dtb_file, &initrd_file, &disk_file, &netdev, &hart_count, &debug, &headless, &shared_dir); -#if !SEMU_HAS(VIRTIOINPUT) +#if !SEMU_HAS(VIRTIOINPUT) && !SEMU_HAS(VIRTIOGPU) (void) headless; #endif @@ -993,25 +1021,48 @@ static int semu_init(emu_state_t *emu, int argc, char **argv) #endif #if SEMU_HAS(VIRTIOINPUT) - g_window.window_init(headless); - emu->vkeyboard.ram = emu->ram; virtio_input_init(&(emu->vkeyboard)); emu->vmouse.ram = emu->ram; virtio_input_init(&(emu->vmouse)); +#endif + +#if SEMU_HAS(VIRTIOGPU) + emu->vgpu.ram = emu->ram; + virtio_gpu_init(&(emu->vgpu)); + uint32_t scanout_id = + virtio_gpu_register_scanout(&(emu->vgpu), SCREEN_WIDTH, SCREEN_HEIGHT); + vgpu_display_set_scanout_count(scanout_id + 1U); +#endif + +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) + g_window.window_init(headless, SCREEN_WIDTH, SCREEN_HEIGHT); emu->wake_fd[0] = emu->wake_fd[1] = -1; if (vm->n_hart > 1 && g_window.window_main_loop) { if (pipe(emu->wake_fd) < 0) { - perror("pipe"); - return 2; + perror("failed to create emulator wake pipe"); + g_window.window_cleanup(); + return EXIT_FAILURE; } - /* Make the write end non-blocking so window_shutdown_sw() never - * stalls. Single-hart mode never blocks in poll(-1), so it does not - * need the wake pipe at all. + + /* Make the write end non-blocking so 'window_shutdown_sw()' never + * stalls. The read end remains blocking because 'semu_run()' reads it + * only after 'poll()' reports 'POLLIN' on the same emulator thread. */ - fcntl(emu->wake_fd[1], F_SETFL, O_NONBLOCK); + int flags = fcntl(emu->wake_fd[1], F_GETFL, 0); + if (flags < 0 || + fcntl(emu->wake_fd[1], F_SETFL, flags | O_NONBLOCK) < 0) { + perror( + "failed to configure emulator wake pipe write end as " + "non-blocking"); + close(emu->wake_fd[0]); + close(emu->wake_fd[1]); + emu->wake_fd[0] = emu->wake_fd[1] = -1; + g_window.window_cleanup(); + return EXIT_FAILURE; + } } #endif @@ -1028,6 +1079,14 @@ static int semu_init(emu_state_t *emu, int argc, char **argv) if (!coro_init(total_slots, vm->n_hart)) { fprintf(stderr, "Failed to initialize coroutine subsystem\n"); fflush(stderr); +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) + if (emu->wake_fd[0] >= 0) + close(emu->wake_fd[0]); + if (emu->wake_fd[1] >= 0) + close(emu->wake_fd[1]); + emu->wake_fd[0] = emu->wake_fd[1] = -1; + g_window.window_cleanup(); +#endif return 1; } @@ -1036,6 +1095,14 @@ static int semu_init(emu_state_t *emu, int argc, char **argv) if (!coro_create_hart(i, hart_exec_loop, vm->hart[i])) { fprintf(stderr, "Failed to create coroutine for hart %u\n", i); coro_cleanup(); +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) + if (emu->wake_fd[0] >= 0) + close(emu->wake_fd[0]); + if (emu->wake_fd[1] >= 0) + close(emu->wake_fd[1]); + emu->wake_fd[0] = emu->wake_fd[1] = -1; + g_window.window_cleanup(); +#endif return 1; } } @@ -1233,6 +1300,24 @@ static void signal_handler(int sig UNUSED) } } +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) +static void semu_close_wake_pipe(emu_state_t *emu) +{ + signal_wake_fd = -1; + if (g_window.window_set_wake_fd) + g_window.window_set_wake_fd(-1); + + if (emu->wake_fd[0] >= 0) { + close(emu->wake_fd[0]); + emu->wake_fd[0] = -1; + } + if (emu->wake_fd[1] >= 0) { + close(emu->wake_fd[1]); + emu->wake_fd[1] = -1; + } +} +#endif + #ifdef MMU_CACHE_STATS static void print_mmu_cache_stats(vm_t *vm) { @@ -1404,10 +1489,10 @@ static void semu_run(emu_state_t *emu) if (signal_received) break; /* Only need fds for timer and UART (no coroutine I/O), - * plus an optional wake pipe when VIRTIOINPUT is enabled. + * plus an optional wake pipe when a window backend is enabled. */ size_t needed = 2; -#if SEMU_HAS(VIRTIOINPUT) +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) if (emu->wake_fd[0] >= 0) needed++; #endif @@ -1422,15 +1507,6 @@ static void semu_run(emu_state_t *emu) close(kq); #else close(wfi_timer_fd); -#endif -#if SEMU_HAS(VIRTIOINPUT) - /* Mirror the normal-exit cleanup so the wake pipe - * does not leak across the early return. - */ - if (emu->wake_fd[0] >= 0) - close(emu->wake_fd[0]); - if (emu->wake_fd[1] >= 0) - close(emu->wake_fd[1]); #endif emu->exit_code = -1; return; @@ -1517,9 +1593,9 @@ static void semu_run(emu_state_t *emu) pfd_count++; } -#if SEMU_HAS(VIRTIOINPUT) +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) /* Always watch the wake pipe so that backend work such as input - * events or SDL window close unblocks poll(-1) immediately. + * events or SDL window close unblocks 'poll(-1)' immediately. */ int wake_pfd_index = -1; if (emu->wake_fd[0] >= 0 && pfd_count < poll_capacity) { @@ -1580,17 +1656,17 @@ static void semu_run(emu_state_t *emu) #endif } } else if (nevents < 0 && errno != EINTR) { - perror("poll"); + perror("failed to poll emulator events"); } -#if SEMU_HAS(VIRTIOINPUT) +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) /* Drain one wake byte if the pipe fired. The virtio-input path * coalesces backend wakeups behind a bool gate, so it contributes * at most one queued notification byte before the emulator thread * drains pending work. Extra shutdown wake bytes do not need to be * fully consumed here because the first one is enough to make - * emu_tick_peripherals() observe g_window.window_is_closed() and - * stop the emulator. + * 'emu_tick_peripherals()' observe 'g_window.window_is_closed()' + * and stop the emulator. */ if (wake_pfd_index >= 0 && (pfds[wake_pfd_index].revents & POLLIN)) { @@ -1626,12 +1702,6 @@ static void semu_run(emu_state_t *emu) close(kq); #else close(wfi_timer_fd); -#endif -#if SEMU_HAS(VIRTIOINPUT) - if (emu->wake_fd[0] >= 0) - close(emu->wake_fd[0]); - if (emu->wake_fd[1] >= 0) - close(emu->wake_fd[1]); #endif /* Free coroutine stacks/contexts from coro_init() above so the * graceful-exit path matches what coro_create_hart()'s failure @@ -1640,7 +1710,7 @@ static void semu_run(emu_state_t *emu) coro_cleanup(); /* A closed window is a normal user action, not an error. */ -#if SEMU_HAS(VIRTIOINPUT) +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) if (emu->stopped && !g_window.window_is_closed()) #else if (emu->stopped) @@ -1748,7 +1818,7 @@ static gdb_action_t semu_cont(void *args) * commands can run guest code again. */ signal_received = 0; -#if SEMU_HAS(VIRTIOINPUT) +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) while (!semu_is_interrupt(emu) && !g_window.window_is_closed()) { #else while (!semu_is_interrupt(emu)) { @@ -1764,7 +1834,7 @@ static gdb_action_t semu_cont(void *args) /* Clear the interrupt if it's pending */ __atomic_store_n(&emu->is_interrupted, false, __ATOMIC_RELAXED); -#if SEMU_HAS(VIRTIOINPUT) +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) /* Tell gdbstub_run() to exit cleanly when the window is closed. */ if (g_window.window_is_closed()) return ACT_SHUTDOWN; @@ -1839,8 +1909,10 @@ static void semu_run_debug(emu_state_t *emu) emu->exit_code = ok ? 0 : 1; } -#if SEMU_HAS(VIRTIOINPUT) -/* Thread wrapper for running emulator in background thread */ +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) +/* Thread wrapper for backends that reserve the main thread for + * 'window_main_loop()'. + */ static void *emu_thread_func(void *arg) { emu_state_t *emu = (emu_state_t *) arg; @@ -1850,7 +1922,7 @@ static void *emu_thread_func(void *arg) else semu_run(emu); - /* Unblock window_main_loop() on the main thread so it can return */ + /* Unblock 'window_main_loop()' on the main thread so it can return. */ if (g_window.window_shutdown) g_window.window_shutdown(); @@ -1885,15 +1957,15 @@ int main(int argc, char **argv) sigaction(SIGTERM, &sa, NULL); } -#if SEMU_HAS(VIRTIOINPUT) +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) /* Publish the wake pipe to the signal handler so SIGINT/SIGTERM can * unblock the emulator thread's poll() in the threaded window path. */ if (emu.wake_fd[1] >= 0) signal_wake_fd = emu.wake_fd[1]; - /* If window backend has a main loop function, run emulator in background - * thread and use main thread for window events (required for macOS SDL2). + /* If the window backend provides 'window_main_loop()', run the emulator in + * a background thread and use the main thread for window events. */ if (g_window.window_main_loop) { pthread_t emu_thread; @@ -1903,18 +1975,18 @@ int main(int argc, char **argv) if (pthread_create(&emu_thread, NULL, emu_thread_func, &emu) != 0) { fprintf(stderr, "Failed to create emulator thread\n"); + semu_close_wake_pipe(&emu); + g_window.window_cleanup(); return 1; } - /* Main thread runs window event loop (required for macOS) */ - g_window.window_main_loop(); - - /* window_main_loop() returns either because the user closed the window - * (SDL_QUIT) or because the emulator called window_shutdown(). - * emu_tick_peripherals() picks up g_window.window_is_closed() and - * sets emu->stopped, so no direct write to emu.stopped is needed - * here. + /* Main thread runs window event loop. Returns either because the user + * closed the window ('SDL_QUIT') or because the emulator called + * 'window_shutdown()'. 'emu_tick_peripherals()' picks up the window + * backend's closed state and sets 'emu->stopped', so no direct write to + * 'emu.stopped' is needed here. */ + g_window.window_main_loop(); /* Wait for emulator thread to finish. */ pthread_join(emu_thread, NULL); @@ -1927,6 +1999,11 @@ int main(int argc, char **argv) semu_run(&emu); } +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) + semu_close_wake_pipe(&emu); + g_window.window_cleanup(); +#endif + #ifdef MMU_CACHE_STATS print_mmu_cache_stats(&emu.vm); #endif diff --git a/minimal.dts b/minimal.dts index 709d1872..2b604236 100644 --- a/minimal.dts +++ b/minimal.dts @@ -115,5 +115,13 @@ interrupts = <8>; }; #endif + +#if SEMU_FEATURE_VIRTIOGPU + gpu0: virtio@4b00000 { + compatible = "virtio,mmio"; + reg = <0x4b00000 0x200>; + interrupts = <9>; + }; +#endif }; }; diff --git a/mk/external.mk b/mk/external.mk index ec7e9139..1c86ec7a 100644 --- a/mk/external.mk +++ b/mk/external.mk @@ -39,6 +39,10 @@ $(PREBUILT_MANIFEST): FORCE fi; \ fi +# optional test tools disk +TEST_TOOLS_DATA_URL = $(COMMON_URL)/test-tools.img.bz2 +TEST_TOOLS_DATA = test-tools.img + define download # Download to a .part file so an interrupted curl never lands a # corrupt or incomplete .bz2 that a later run mistakes for valid input. @@ -46,14 +50,16 @@ define download # left over from a previous run, e.g. interrupted before sha1 verify, # would make curl request a byte range past EOF, the server replies # HTTP 416, and curl exits non-zero, a permanent self-inflicted -# deadlock. These files are 5 to 7 MiB; a fresh GET is cheap. +# deadlock. These files are small enough that a fresh GET is cheap. # # Look up the expected SHA-1 by archive basename in the release -# manifest, then verify the .part against it. Decompress to a .tmp -# file and rename only on success, so an interrupted bunzip2 cannot -# leave a half-decompressed Image or rootfs.cpio that make would treat -# as a valid up-to-date target on the next invocation. -$($(T)_DATA): $(PREBUILT_MANIFEST) | prebuilt-check +# manifest, then verify the .part against it. Keep the manifest +# order-only so an existing PR-built artifact is not considered stale +# just because the manifest was refreshed. Decompress to a .tmp file +# and rename only on success, so an interrupted bunzip2 cannot leave a +# half-decompressed artifact that make would treat as a valid +# up-to-date target on the next invocation. +$($(T)_DATA): | $(PREBUILT_MANIFEST) prebuilt-check $(VECHO) " GET\t$$@\n" $(Q)curl --fail --retry 3 --retry-delay 1 --progress-bar \ -L -o "$$@.bz2.part" "$(strip $($(T)_DATA_URL))" \ @@ -69,18 +75,18 @@ $($(T)_DATA): $(PREBUILT_MANIFEST) | prebuilt-check $(Q)rm -f "$$@.bz2" endef -EXTERNAL_DATA = KERNEL INITRD +EXTERNAL_DATA = KERNEL INITRD TEST_TOOLS $(foreach T,$(EXTERNAL_DATA),$(eval $(download))) # --- Stale-prebuilt detection ------------------------------------------- # -# The prebuilt Image and rootfs.cpio above are baked from a fixed set of -# input files (kernel/buildroot/busybox configs, the build script, and -# the init stub). When any of those change locally the prebuilt may no +# The prebuilt Image, rootfs.cpio, and test-tools.img above are baked from a +# fixed set of input files (kernel/buildroot/busybox configs, the build script, +# and the init stub). When any of those change locally the prebuilt may no # longer reflect the user's intent, so we compute the SHA1 of those # inputs and compare against the publisher's recorded inputs hash -- -# the third line of prebuilt.sha1, written by .ci/publish-prebuilt.sh -# under the virtual name 'inputs'. +# the line of prebuilt.sha1 written by .ci/publish-prebuilt.sh under +# the virtual name 'inputs'. # # Mismatch -> warn but do not auto-rebuild: a buildroot run takes the # better part of an hour, so we let the user opt in via make build-image. @@ -90,9 +96,12 @@ PREBUILT_INPUTS := \ configs/linux.config \ configs/busybox.config \ configs/buildroot.config \ + configs/x11.config \ + configs/riscv-cross-file \ scripts/build-image.sh \ scripts/rootfs_ext4.sh \ - target/init + target/init \ + target/local-env.sh # Read the publisher's inputs hash from the downloaded manifest at # recipe time, after the manifest refresh above has had a chance to run. @@ -106,10 +115,10 @@ prebuilt-check: $(PREBUILT_MANIFEST) if [ "$$found" -eq "$$expected" ]; then \ live_sha1=$$(cat $(PREBUILT_INPUTS) | $(SHA1SUM) | awk '{print $$1}'); \ if [ "$$live_sha1" != "$$manifest_sha1" ]; then \ - echo "warning: Local kernel/rootfs inputs ($$live_sha1) differ from" >&2; \ + echo "warning: Local prebuilt guest inputs ($$live_sha1) differ from" >&2; \ echo "warning: the prebuilt's recorded inputs ($$manifest_sha1)." >&2; \ - echo "warning: The downloaded Image/rootfs.cpio do not reflect your local" >&2; \ - echo "warning: configs. Run \`make build-image\` to rebuild from source." >&2; \ + echo "warning: The downloaded guest artifacts do not reflect your local configs." >&2; \ + echo "warning: Run \`make build-image\` to rebuild from source." >&2; \ fi; \ fi; \ fi diff --git a/scripts/build-image.sh b/scripts/build-image.sh index a805443d..f3543358 100755 --- a/scripts/build-image.sh +++ b/scripts/build-image.sh @@ -19,6 +19,12 @@ function OK PARALLEL="-j$(nproc)" +DIRECTFB2_REPO="https://github.com/directfb2/DirectFB2" +DIRECTFB2_REV="7d4682d0cc092ed2f28c903175d1a0c104e9e9a8" +DIRECTFB_EXAMPLES_REPO="https://github.com/directfb2/DirectFB-examples" +DIRECTFB_EXAMPLES_REV="eecf1019b29933a45578e62aea5f08a884d30fbc" +TEST_TOOLS_SIZE_MB=192 + function safe_copy { local src="$1" local dst="$2" @@ -31,16 +37,64 @@ function safe_copy { fi } -function do_buildroot +function checkout_repo_rev { - if [ ! -d buildroot ]; then - echo "Cloning Buildroot..." - ASSERT git clone https://github.com/buildroot/buildroot -b 2025.02.x --depth=1 + local dir="$1" + local repo="$2" + local rev="$3" + + if [ ! -d "$dir/.git" ]; then + echo "Cloning $dir..." + ASSERT git clone "$repo" "$dir" else - echo "buildroot/ already exists, skipping clone" + echo "$dir already exists, reusing clone..." fi - safe_copy configs/buildroot.config buildroot/.config + pushd "$dir" + if ! git cat-file -e "$rev^{commit}" 2>/dev/null; then + ASSERT git fetch origin + fi + ASSERT git checkout --detach "$rev" + popd +} + +function meson_setup_or_reconfigure +{ + local build_dir="$1" + shift + + if [ -f "$build_dir/build.ninja" ]; then + if ! meson setup --reconfigure "$@" "$build_dir"; then + echo "Recreating stale Meson build directory: $build_dir" + rm -rf "$build_dir" + ASSERT meson setup "$@" "$build_dir" + fi + else + ASSERT meson setup "$@" "$build_dir" + fi +} + +function configure_buildroot +{ + local mode="${1:-default}" + local buildroot_config="configs/buildroot.config" + local x11_config="configs/x11.config" + local merge_tool="buildroot/support/kconfig/merge_config.sh" + + if [[ "$mode" == "x11" ]]; then + echo "Preparing Buildroot config with X11 fragment..." + ASSERT "$merge_tool" -m -r -O buildroot "$buildroot_config" "$x11_config" + else + echo "Preparing default Buildroot config..." + cp -f "$buildroot_config" buildroot/.config + fi +} + +function build_buildroot_rootfs +{ + local mode="${1:-default}" + + configure_buildroot "$mode" safe_copy configs/busybox.config buildroot/busybox.config cp -f target/init buildroot/fs/cpio/init @@ -50,8 +104,25 @@ function do_buildroot unset LD_LIBRARY_PATH pushd buildroot ASSERT make olddefconfig + if [[ "$mode" == "x11" && \ + ! -x output/host/bin/riscv32-buildroot-linux-gnu-g++ ]]; then + echo "Rebuilding Buildroot final GCC with C++ support..." + ASSERT make host-gcc-final-dirclean + fi ASSERT make $PARALLEL popd +} + +function do_buildroot +{ + if [ ! -d buildroot ]; then + echo "Cloning Buildroot..." + ASSERT git clone https://github.com/buildroot/buildroot -b 2025.02.x --depth=1 + else + echo "buildroot/ already exists, skipping clone" + fi + + build_buildroot_rootfs default # Always publish the cpio. It is the canonical buildroot output and # serves both as the source for the ext4 image and as the legacy @@ -67,6 +138,27 @@ function do_buildroot echo "Skipping ext4.img build (--no-ext4)" else ASSERT ./scripts/rootfs_ext4.sh ./rootfs.cpio ./ext4.img + + local test_tools_rootfs=./rootfs.cpio + if [[ $BUILD_X11 -eq 1 ]]; then + build_buildroot_rootfs x11 + test_tools_rootfs=./buildroot/output/images/rootfs.cpio + fi + + if [[ $BUILD_DIRECTFB_TEST -eq 1 ]]; then + do_extra_packages + if [[ $BUILD_X11 -eq 1 ]]; then + stage_cxx_runtime + fi + ASSERT ./scripts/rootfs_ext4.sh "$test_tools_rootfs" ./test-tools.img \ + "$TEST_TOOLS_SIZE_MB" ./extra_packages + elif [[ $BUILD_X11 -eq 1 ]]; then + rm -rf extra_packages + mkdir -p extra_packages + stage_cxx_runtime + ASSERT ./scripts/rootfs_ext4.sh "$test_tools_rootfs" ./test-tools.img \ + "$TEST_TOOLS_SIZE_MB" ./extra_packages + fi fi } @@ -91,24 +183,99 @@ function do_linux popd } +function do_directfb +{ + export PATH="$PWD/buildroot/output/host/bin:$PATH" + export BUILDROOT_OUT=$PWD/buildroot/output/ + export DIRECTFB_STAGE=$PWD/directfb + mkdir -p directfb + + # Build DirectFB2 + checkout_repo_rev DirectFB2 "$DIRECTFB2_REPO" "$DIRECTFB2_REV" + pushd DirectFB2 + cp ../configs/riscv-cross-file . + meson_setup_or_reconfigure build/riscv -Ddrmkms=true --cross-file \ + riscv-cross-file + ASSERT meson compile -C build/riscv + ASSERT env DESTDIR=$BUILDROOT_OUT/host/riscv32-buildroot-linux-gnu/sysroot meson install -C build/riscv + ASSERT env DESTDIR=$DIRECTFB_STAGE meson install -C build/riscv + popd + + # Build DirectFB2 examples + checkout_repo_rev DirectFB-examples "$DIRECTFB_EXAMPLES_REPO" \ + "$DIRECTFB_EXAMPLES_REV" + pushd DirectFB-examples/ + cp ../configs/riscv-cross-file . + meson_setup_or_reconfigure build/riscv --cross-file riscv-cross-file + ASSERT meson compile -C build/riscv + ASSERT env DESTDIR=$DIRECTFB_STAGE meson install -C build/riscv + popd +} + +function do_extra_packages +{ + export PATH="$PWD/buildroot/output/host/bin:$PATH" + export CROSS_COMPILE=riscv32-buildroot-linux-gnu- + + rm -rf directfb extra_packages + mkdir -p directfb + mkdir -p extra_packages + mkdir -p extra_packages/root + + do_directfb && OK + + if ! find directfb -mindepth 1 -print -quit | grep -q .; then + echo "Error: DirectFB staging tree is empty." + exit 1 + fi + + ASSERT cp -r directfb/. extra_packages/ + ASSERT cp target/local-env.sh extra_packages/root/ +} + +function stage_cxx_runtime +{ + local toolchain_lib="buildroot/output/host/riscv32-buildroot-linux-gnu/lib" + local libstdcpp="$toolchain_lib/libstdc++.so.6" + local libstdcpp_real + + if [ ! -e "$libstdcpp" ]; then + echo "Error: libstdc++.so.6 not found in $toolchain_lib" + exit 1 + fi + + libstdcpp_real="$(readlink "$libstdcpp" || basename "$libstdcpp")" + if [[ "$libstdcpp_real" != /* ]]; then + libstdcpp_real="$toolchain_lib/$libstdcpp_real" + fi + mkdir -p extra_packages/lib + ASSERT cp -a "$toolchain_lib/libstdc++.so" "$libstdcpp" \ + "$libstdcpp_real" extra_packages/lib/ +} + function show_help { cat << EOF -Usage: $0 [--buildroot] [--linux] [--all] [--no-ext4] [--clean-build] [--help] +Usage: $0 [--buildroot] [--x11] [--linux] [--directfb2-test] [--all] [--no-ext4] [--clean-build] [--help] Options: --buildroot Build Buildroot userland (produces rootfs.cpio and, unless --no-ext4 is given, ext4.img for vda boot) + --x11 Build test-tools.img from an X11-enabled rootfs + --directfb2-test Overlay the DirectFB2 test payload into test-tools.img --linux Build the Linux kernel --all Build both Buildroot and Linux --no-ext4 Skip ext4.img generation; produce only rootfs.cpio (matches the legacy ENABLE_EXTERNAL_ROOT=0 path) - --clean-build Remove buildroot/ and/or linux/ before building + --clean-build Remove buildroot/ and/or linux/ before building; + with --directfb2-test, also remove DirectFB2 sources --help Show this message EOF exit 1 } BUILD_BUILDROOT=0 +BUILD_X11=0 +BUILD_DIRECTFB_TEST=0 BUILD_LINUX=0 NO_EXT4=0 CLEAN_BUILD=0 @@ -118,6 +285,14 @@ while [[ $# -gt 0 ]]; do --buildroot) BUILD_BUILDROOT=1 ;; + --x11) + BUILD_BUILDROOT=1 + BUILD_X11=1 + ;; + --directfb2-test) + BUILD_BUILDROOT=1 + BUILD_DIRECTFB_TEST=1 + ;; --linux) BUILD_LINUX=1 ;; @@ -147,6 +322,11 @@ if [[ $BUILD_BUILDROOT -eq 0 && $BUILD_LINUX -eq 0 ]]; then show_help fi +if [[ ( $BUILD_DIRECTFB_TEST -eq 1 || $BUILD_X11 -eq 1 ) && $NO_EXT4 -eq 1 ]]; then + echo "Error: --x11/--directfb2-test requires an ext4 image; remove --no-ext4." + show_help +fi + if [[ $CLEAN_BUILD -eq 1 && $BUILD_BUILDROOT -eq 1 && -d buildroot ]]; then echo "Removing buildroot/ for clean build..." rm -rf buildroot @@ -157,6 +337,11 @@ if [[ $CLEAN_BUILD -eq 1 && $BUILD_LINUX -eq 1 && -d linux ]]; then rm -rf linux fi +if [[ $CLEAN_BUILD -eq 1 && $BUILD_DIRECTFB_TEST -eq 1 ]]; then + echo "Removing DirectFB2 sources for clean build..." + rm -rf DirectFB2 DirectFB-examples directfb extra_packages +fi + if [[ $BUILD_BUILDROOT -eq 1 ]]; then do_buildroot && OK fi diff --git a/scripts/rootfs_ext4.sh b/scripts/rootfs_ext4.sh index 9cb03725..c8d783a0 100755 --- a/scripts/rootfs_ext4.sh +++ b/scripts/rootfs_ext4.sh @@ -2,17 +2,20 @@ # # Build an ext4 rootfs image from an existing cpio archive. # -# Usage: rootfs_ext4.sh [SOURCE_CPIO] [OUT_IMG] [SIZE_MB] +# Usage: rootfs_ext4.sh [SOURCE_CPIO] [OUT_IMG] [SIZE_MB] [EXTRA_DIR] # # Default values match the EXTROOT make path: read rootfs.cpio, produce # ext4.img sized at 32 MiB. The 32 MiB default fits the buildroot userland -# with headroom; bump SIZE_MB for larger rootfs payloads. +# with headroom; bump SIZE_MB for larger rootfs payloads. EXTRA_DIR, when +# given, is copied into the ext4 image after SOURCE_CPIO is extracted without +# changing SOURCE_CPIO itself. set -euo pipefail SRC_CPIO="${1:-rootfs.cpio}" OUT_IMG="${2:-ext4.img}" SIZE_MB="${3:-32}" +EXTRA_DIR="${4:-}" MKFS_EXT4="${MKFS_EXT4:-mkfs.ext4}" if [ ! -f "$SRC_CPIO" ]; then @@ -32,26 +35,54 @@ fi SRC_DIR="$(cd "$(dirname "$SRC_CPIO")" && pwd -P)" SRC_ABS="$SRC_DIR/$(basename "$SRC_CPIO")" +OUT_DIR="$(cd "$(dirname "$OUT_IMG")" && pwd -P)" +OUT_ABS="$OUT_DIR/$(basename "$OUT_IMG")" +EXTRA_ABS="" +if [ -n "$EXTRA_DIR" ]; then + if [ ! -d "$EXTRA_DIR" ]; then + echo "[!] Extra directory not found: $EXTRA_DIR" >&2 + exit 1 + fi + EXTRA_ABS="$(cd "$EXTRA_DIR" && pwd -P)" +fi # `mktemp -d -t PREFIX` differs between GNU (PREFIX is a name) and BSD (PREFIX # is a template) -- spell out the full template instead. STAGE="$(mktemp -d "${TMPDIR:-/tmp}/semu-rootfs.XXXXXX")" -trap 'rm -rf "$STAGE"' EXIT - -echo "[*] Extracting $SRC_CPIO -> $STAGE" -( cd "$STAGE" && fakeroot bash -c "cpio -idm < '$SRC_ABS'" ) +OUT_TMP="$(mktemp "$OUT_DIR/.$(basename "$OUT_IMG").XXXXXX")" +trap 'rm -rf "$STAGE" "$OUT_TMP"' EXIT echo "[*] Creating empty image: $OUT_IMG (${SIZE_MB} MiB)" # bs=1024k works on both GNU and BSD dd; bs=1M is GNU-only and bs=1m is # BSD-only. -dd if=/dev/zero of="$OUT_IMG" bs=1024k count="$SIZE_MB" >/dev/null 2>&1 +dd if=/dev/zero of="$OUT_TMP" bs=1024k count="$SIZE_MB" >/dev/null 2>&1 echo "[*] Building ext4 filesystem" +echo "[*] Extracting $SRC_CPIO -> $STAGE" +if [ -n "$EXTRA_ABS" ]; then + echo "[*] Applying extra files: $EXTRA_DIR" +fi # -E lazy_*_init=0: do all init at mkfs time so the first guest mount does # not pay the lazy-init cost. Stripping the journal (-O ^has_journal) # would also speed mount, but the prebuilt Linux Image is built with # CONFIG_EXT4_USE_FOR_EXT2=n and refuses to mount a no-journal image. -fakeroot "$MKFS_EXT4" -q -F \ - -E lazy_itable_init=0,lazy_journal_init=0 \ - -d "$STAGE" "$OUT_IMG" +fakeroot bash -c ' + set -e + stage="$1" + src_cpio="$2" + extra_dir="$3" + mkfs_ext4="$4" + out_img="$5" + + cd "$stage" + cpio -idm < "$src_cpio" + if [ -n "$extra_dir" ]; then + cp -a "$extra_dir"/. . + fi + chown -R 0:0 . + "$mkfs_ext4" -q -F \ + -E lazy_itable_init=0,lazy_journal_init=0 \ + -d . "$out_img" + ' sh "$STAGE" "$SRC_ABS" "$EXTRA_ABS" "$MKFS_EXT4" "$OUT_TMP" -du -h "$OUT_IMG" +mv -f "$OUT_TMP" "$OUT_ABS" +du -h "$OUT_ABS" diff --git a/target/local-env.sh b/target/local-env.sh new file mode 100755 index 00000000..afda50d0 --- /dev/null +++ b/target/local-env.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +# Guest-side environment for manually overlaid user payloads. Buildroot +# packages normally live under /usr, while Meson/autotools payloads staged +# through DESTDIR commonly keep their default /usr/local prefix. + +add_path() +{ + [ -d "$1" ] || return 0 + + case ":$PATH:" in + *":$1:"*) ;; + *) PATH="${PATH:+$PATH:}$1" ;; + esac +} + +add_library_path() +{ + [ -d "$1" ] || return 0 + + case ":${LD_LIBRARY_PATH:-}:" in + *":$1:"*) ;; + *) LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+$LD_LIBRARY_PATH:}$1" ;; + esac +} + +add_path /usr/local/bin +add_library_path /usr/local/lib + +export PATH +export LD_LIBRARY_PATH diff --git a/utils.h b/utils.h index b6c872e2..8aa1cc58 100644 --- a/utils.h +++ b/utils.h @@ -115,3 +115,6 @@ static inline void list_del_init(struct list_head *node) safe = list_entry(entry->member.next, __typeof__(*entry), member); \ &entry->member != (head); entry = safe, \ safe = list_entry(safe->member.next, __typeof__(*entry), member)) + +#define LIST_HEAD_INIT(name) {.prev = (&name), .next = (&name)} +#define LIST_HEAD(name) struct list_head name = LIST_HEAD_INIT(name) diff --git a/vgpu-display.c b/vgpu-display.c new file mode 100644 index 00000000..e185f37a --- /dev/null +++ b/vgpu-display.c @@ -0,0 +1,289 @@ +#include + +#include "vgpu-display.h" + +/* 'PRIMARY_SET'/'CURSOR_SET' own CPU-frame snapshots, so each queued command + * can retain significantly more memory than an input event. Keep this backlog + * deliberately small: display updates are lossy and quickly become stale, and + * the emulator thread must be able to drop them rather than accumulate a large + * queue of old frames. + */ +#define VGPU_DISPLAY_CMD_QUEUE_SIZE 64U +#define VGPU_DISPLAY_CMD_QUEUE_MASK (VGPU_DISPLAY_CMD_QUEUE_SIZE - 1U) + +/* Reliable state for plane clear/removal events. The producer advances + * 'generation' when the guest detaches a plane. The SDL consumer mirrors the + * last applied value in 'consumed_generation'. Frame payloads remain in the + * lossy SPSC queue below. + */ +struct vgpu_display_plane_clear_state { + uint32_t generation; + uint32_t consumed_generation; +}; + +static struct vgpu_display_plane_clear_state + vgpu_display_primary_clear[VIRTIO_GPU_MAX_SCANOUTS]; +static struct vgpu_display_plane_clear_state + vgpu_display_cursor_clear[VIRTIO_GPU_MAX_SCANOUTS]; +static uint32_t vgpu_display_scanout_count = 1U; + +/* The SPSC queue carries lossy frame/move commands. It's process-wide and + * currently assumes one 'virtio-gpu' producer. The GPU backend is the only + * producer and the window backend is the only consumer. Commands entering this + * bridge carry 'scanout_id' values already validated by the guest-facing + * backend; the SDL consumer relies on that internal contract. + */ +static struct vgpu_display_cmd + vgpu_display_cmd_queue[VGPU_DISPLAY_CMD_QUEUE_SIZE]; +static uint32_t vgpu_display_cmd_head; +static uint32_t vgpu_display_cmd_tail; + +static bool vgpu_display_unavailable; + +static bool vgpu_display_is_cmd_stale(const struct vgpu_display_cmd *cmd) +{ + switch (cmd->type) { + case VGPU_DISPLAY_CMD_PRIMARY_SET: + return cmd->generation != + __atomic_load_n( + &vgpu_display_primary_clear[cmd->scanout_id].generation, + __ATOMIC_ACQUIRE); + case VGPU_DISPLAY_CMD_CURSOR_SET: + case VGPU_DISPLAY_CMD_CURSOR_MOVE: + return cmd->generation != + __atomic_load_n( + &vgpu_display_cursor_clear[cmd->scanout_id].generation, + __ATOMIC_ACQUIRE); + default: + return false; + } +} + +static bool vgpu_display_pop_pending_clear_cmd( + struct vgpu_display_plane_clear_state *states, + enum vgpu_display_cmd_type type, + struct vgpu_display_cmd *cmd) +{ + uint32_t scanout_count = + __atomic_load_n(&vgpu_display_scanout_count, __ATOMIC_ACQUIRE); + + for (uint32_t i = 0; i < scanout_count; i++) { + struct vgpu_display_plane_clear_state *state = &states[i]; + uint32_t generation = + __atomic_load_n(&state->generation, __ATOMIC_ACQUIRE); + + if (state->consumed_generation == generation) + continue; + + state->consumed_generation = generation; + + *cmd = (struct vgpu_display_cmd) { + .type = type, + .scanout_id = i, + .generation = generation, + }; + return true; + } + + return false; +} + +void vgpu_display_set_scanout_count(uint32_t scanout_count) +{ + if (scanout_count > VIRTIO_GPU_MAX_SCANOUTS) + scanout_count = VIRTIO_GPU_MAX_SCANOUTS; + + __atomic_store_n(&vgpu_display_scanout_count, scanout_count, + __ATOMIC_RELEASE); +} + +void vgpu_display_publish_primary_clear(uint32_t scanout_id) +{ + if (__atomic_load_n(&vgpu_display_unavailable, __ATOMIC_ACQUIRE)) + return; + + __atomic_add_fetch(&vgpu_display_primary_clear[scanout_id].generation, 1U, + __ATOMIC_ACQ_REL); +} + +void vgpu_display_publish_cursor_clear(uint32_t scanout_id) +{ + if (__atomic_load_n(&vgpu_display_unavailable, __ATOMIC_ACQUIRE)) + return; + + __atomic_add_fetch(&vgpu_display_cursor_clear[scanout_id].generation, 1U, + __ATOMIC_ACQ_REL); +} + +static bool vgpu_display_is_cmd_queue_full(void) +{ + uint32_t head = __atomic_load_n(&vgpu_display_cmd_head, __ATOMIC_RELAXED); + uint32_t tail = __atomic_load_n(&vgpu_display_cmd_tail, __ATOMIC_ACQUIRE); + uint32_t next = (head + 1U) & VGPU_DISPLAY_CMD_QUEUE_MASK; + return next == tail; +} + +static void vgpu_display_push_cmd(struct vgpu_display_cmd *cmd) +{ + uint32_t head = __atomic_load_n(&vgpu_display_cmd_head, __ATOMIC_RELAXED); + uint32_t tail = __atomic_load_n(&vgpu_display_cmd_tail, __ATOMIC_ACQUIRE); + uint32_t next = (head + 1U) & VGPU_DISPLAY_CMD_QUEUE_MASK; + + /* Keep the producer non-blocking. If the window backend falls behind, + * prefer dropping lossy display updates over stalling guest/device + * execution on the emulator thread. Clear commands do not use this queue. + */ + if (next == tail) { + vgpu_display_release_cmd(cmd); + return; + } + + vgpu_display_cmd_queue[head] = *cmd; + __atomic_store_n(&vgpu_display_cmd_head, next, __ATOMIC_RELEASE); +} + +static bool vgpu_display_pop_queued_cmd(struct vgpu_display_cmd *cmd) +{ + uint32_t tail = __atomic_load_n(&vgpu_display_cmd_tail, __ATOMIC_RELAXED); + uint32_t head = __atomic_load_n(&vgpu_display_cmd_head, __ATOMIC_ACQUIRE); + + if (tail == head) + return false; + + *cmd = vgpu_display_cmd_queue[tail]; + __atomic_store_n(&vgpu_display_cmd_tail, + (tail + 1U) & VGPU_DISPLAY_CMD_QUEUE_MASK, + __ATOMIC_RELEASE); + return true; +} + +void vgpu_display_release_cmd(struct vgpu_display_cmd *cmd) +{ + switch (cmd->type) { + case VGPU_DISPLAY_CMD_PRIMARY_SET: + free(cmd->u.primary_set.payload); + break; + case VGPU_DISPLAY_CMD_CURSOR_SET: + free(cmd->u.cursor_set.payload); + break; + default: + break; + } +} + +bool vgpu_display_pop_cmd(struct vgpu_display_cmd *cmd) +{ + /* Return true when '*cmd' is filled with a clear command or a valid queued + * frame/move command. Stale queued commands are released and skipped; + * return false only when no command remains. + */ + for (;;) { + /* Check clear command for primary and cursor plane. */ + if (vgpu_display_pop_pending_clear_cmd(vgpu_display_primary_clear, + VGPU_DISPLAY_CMD_PRIMARY_CLEAR, + cmd)) + return true; + if (vgpu_display_pop_pending_clear_cmd( + vgpu_display_cursor_clear, VGPU_DISPLAY_CMD_CURSOR_CLEAR, cmd)) + return true; + + /* Pop the command and check if it is still valid. */ + if (!vgpu_display_pop_queued_cmd(cmd)) + return false; + if (!vgpu_display_is_cmd_stale(cmd)) + return true; + + /* Drop invalid command and continue. */ + vgpu_display_release_cmd(cmd); + } +} + +void vgpu_display_set_unavailable(void) +{ + struct vgpu_display_cmd cmd; + + /* This is an init-only fallback path for 'window-sw' initialization + * failure, before the emulator thread starts publishing display commands. + * It is not a concurrent shutdown primitive: a producer could otherwise + * observe 'vgpu_display_unavailable == false', race with this drain, and + * enqueue a payload after the queue was already drained. + * + * Still publish the latch atomically so later call sites keep the same + * one-way handoff rule. + */ + __atomic_store_n(&vgpu_display_unavailable, true, __ATOMIC_RELEASE); + + while (vgpu_display_pop_cmd(&cmd)) + vgpu_display_release_cmd(&cmd); +} + +bool vgpu_display_can_publish(void) +{ + return !__atomic_load_n(&vgpu_display_unavailable, __ATOMIC_ACQUIRE) && + !vgpu_display_is_cmd_queue_full(); +} + +void vgpu_display_publish_primary_set(uint32_t scanout_id, + struct vgpu_display_payload *payload) +{ + if (__atomic_load_n(&vgpu_display_unavailable, __ATOMIC_ACQUIRE)) { + free(payload); + return; + } + + struct vgpu_display_cmd cmd = { + .type = VGPU_DISPLAY_CMD_PRIMARY_SET, + .scanout_id = scanout_id, + .generation = + __atomic_load_n(&vgpu_display_primary_clear[scanout_id].generation, + __ATOMIC_ACQUIRE), + .u.primary_set = {.payload = payload}, + }; + vgpu_display_push_cmd(&cmd); +} + +void vgpu_display_publish_cursor_set(uint32_t scanout_id, + struct vgpu_display_payload *payload, + int32_t x, + int32_t y, + uint32_t hot_x, + uint32_t hot_y) +{ + if (__atomic_load_n(&vgpu_display_unavailable, __ATOMIC_ACQUIRE)) { + free(payload); + return; + } + + struct vgpu_display_cmd cmd = { + .type = VGPU_DISPLAY_CMD_CURSOR_SET, + .scanout_id = scanout_id, + .generation = + __atomic_load_n(&vgpu_display_cursor_clear[scanout_id].generation, + __ATOMIC_ACQUIRE), + .u.cursor_set = + { + .payload = payload, + .x = x, + .y = y, + .hot_x = hot_x, + .hot_y = hot_y, + }, + }; + vgpu_display_push_cmd(&cmd); +} + +void vgpu_display_publish_cursor_move(uint32_t scanout_id, int32_t x, int32_t y) +{ + if (__atomic_load_n(&vgpu_display_unavailable, __ATOMIC_ACQUIRE)) + return; + + struct vgpu_display_cmd cmd = { + .type = VGPU_DISPLAY_CMD_CURSOR_MOVE, + .scanout_id = scanout_id, + .generation = + __atomic_load_n(&vgpu_display_cursor_clear[scanout_id].generation, + __ATOMIC_ACQUIRE), + .u.cursor_move = {.x = x, .y = y}, + }; + vgpu_display_push_cmd(&cmd); +} diff --git a/vgpu-display.h b/vgpu-display.h new file mode 100644 index 00000000..6eb73a25 --- /dev/null +++ b/vgpu-display.h @@ -0,0 +1,93 @@ +#pragma once + +#if !SEMU_HAS(VIRTIOGPU) +#error Only valid when Virtio-GPU is enabled. +#endif + +#include +#include + +#include "virtio-gpu.h" + +/* Immutable CPU-frame payload published by the VirtIO GPU backend and later + * consumed by the window backend when it uploads pixels into its own textures. + */ +struct vgpu_display_cpu_payload { + enum virtio_gpu_formats format; + uint32_t width, height; + uint32_t stride; + uint32_t bits_per_pixel; + uint8_t *pixels; +}; + +/* Owning payload object passed through the display queue. The bridge queues + * and disposes this object, while GPU and window backends only fill or + * consume the payload it carries. + */ +struct vgpu_display_payload { + struct vgpu_display_cpu_payload cpu; + /* TODO: Add a GL/virgl payload when 3D scanout is implemented. The display + * bridge currently transports CPU-owned 2D frames only. + */ +}; + +/* Runtime display commands published by the GPU backend and consumed by the + * window backend. 'PRIMARY_*' updates the main scanout image, while 'CURSOR_*' + * updates or moves the separate cursor plane. + * + * Clear commands are reliable generation changes, frame/move commands are lossy + * SPSC queue entries. + */ +enum vgpu_display_cmd_type { + VGPU_DISPLAY_CMD_PRIMARY_SET = 0, + VGPU_DISPLAY_CMD_PRIMARY_CLEAR, + VGPU_DISPLAY_CMD_CURSOR_SET, + VGPU_DISPLAY_CMD_CURSOR_CLEAR, + VGPU_DISPLAY_CMD_CURSOR_MOVE, +}; + +/* One synthesized display bridge command. 'scanout_id' selects which scanout + * to update, and the union carries the payload or coordinates required by the + * specific command type above. + */ +struct vgpu_display_cmd { + enum vgpu_display_cmd_type type; + uint32_t scanout_id; + uint32_t generation; + union { + struct { + struct vgpu_display_payload *payload; + } primary_set; + struct { + struct vgpu_display_payload *payload; + int32_t x; + int32_t y; + uint32_t hot_x; + uint32_t hot_y; + } cursor_set; + struct { + int32_t x; + int32_t y; + } cursor_move; + } u; +}; + +void vgpu_display_set_scanout_count(uint32_t scanout_count); +void vgpu_display_publish_primary_clear(uint32_t scanout_id); +void vgpu_display_publish_cursor_clear(uint32_t scanout_id); + +void vgpu_display_release_cmd(struct vgpu_display_cmd *cmd); +bool vgpu_display_pop_cmd(struct vgpu_display_cmd *cmd); +void vgpu_display_set_unavailable(void); +bool vgpu_display_can_publish(void); +void vgpu_display_publish_primary_set(uint32_t scanout_id, + struct vgpu_display_payload *payload); +void vgpu_display_publish_cursor_set(uint32_t scanout_id, + struct vgpu_display_payload *payload, + int32_t x, + int32_t y, + uint32_t hot_x, + uint32_t hot_y); +void vgpu_display_publish_cursor_move(uint32_t scanout_id, + int32_t x, + int32_t y); diff --git a/virtio-gpu-sw.c b/virtio-gpu-sw.c new file mode 100644 index 00000000..8096539a --- /dev/null +++ b/virtio-gpu-sw.c @@ -0,0 +1,1243 @@ +#include +#include +#include +#include +#include +#include + +#include "device.h" +#include "utils.h" +#include "vgpu-display.h" +#include "virtio-gpu.h" +#include "virtio.h" + +#define PRIV(x) ((virtio_gpu_data_t *) x->priv) + +/* Host-side images are allocated per resource with 'calloc()'. Track their + * aggregate size and cap it at 256 MiB. + * + * Backing entries describe guest RAM ranges. Use 4 KiB as the expected minimum + * page granularity, so 512 MiB guest RAM needs at most 'RAM_SIZE / 4096' + * entries plus one extra entry for an unaligned tail. + */ +#define VGPU_SW_MAX_HOSTMEM (256U * 1024U * 1024U) +#define VGPU_SW_BACKING_ENTRY_PAGE_SIZE 4096U +#define VGPU_SW_MAX_BACKING_ENTRIES \ + (RAM_SIZE / VGPU_SW_BACKING_ENTRY_PAGE_SIZE + 1U) + +/* Host-side 2D resource owned by the software backend. It keeps the copied + * 'image' plus any attached guest backing metadata needed by transfers. + */ +struct vgpu_sw_resource_2d { + uint32_t resource_id; + uint32_t format; + uint32_t width, height; + uint32_t stride; + uint32_t bits_per_pixel; + uint32_t *image; + size_t image_size; + size_t page_cnt; + struct iovec *iovec; + struct list_head list; +}; + +/* Process-wide singleton: semu currently assumes at most one software + * virtio-gpu backend instance per process. + */ +static LIST_HEAD(g_vgpu_sw_res_2d_list); +static size_t g_vgpu_sw_hostmem; + +static size_t vgpu_sw_iov_to_buf(const struct iovec *iov, + unsigned int iov_cnt, + size_t offset, + void *buf, + size_t bytes) +{ + size_t done = 0; + + if (bytes == 0) + return 0; + + /* Each non-empty 'iovec' entry is validated by 'RESOURCE_ATTACH_BACKING' + * before it is stored here. Treat the array as one long byte stream: skip + * whole entries until reaching the starting offset, then copy chunks into + * 'buf'. + */ + for (unsigned int i = 0; i < iov_cnt; i++) { + if (iov[i].iov_len == 0) + continue; + /* Treat a malformed backing entry as an incomplete copy. */ + if (!iov[i].iov_base) + return done; + + if (offset < iov[i].iov_len) { + size_t remained = bytes - done; + size_t page_avail = iov[i].iov_len - offset; + size_t len = (remained < page_avail) ? remained : page_avail; + void *src = (void *) ((uintptr_t) iov[i].iov_base + offset); + void *dest = (void *) ((uintptr_t) buf + done); + + memcpy(dest, src, len); + offset = 0; + done += len; + + if (done >= bytes) + break; + } else { + offset -= iov[i].iov_len; + } + } + + return done; +} + +static bool vgpu_sw_u64_add_overflow(uint64_t a, uint64_t b, uint64_t *out) +{ + *out = a + b; + return *out < a; +} + +static bool vgpu_sw_u64_mul_overflow(uint64_t a, uint64_t b, uint64_t *out) +{ + if (a != 0 && b > UINT64_MAX / a) + return true; + + *out = a * b; + return false; +} + +static bool vgpu_sw_rect_fits(uint32_t width, + uint32_t height, + const struct virtio_gpu_rect *rect) +{ + if (rect->width == 0 || rect->height == 0) + return false; + if (rect->x >= width || rect->y >= height) + return false; + + return rect->width <= width - rect->x && rect->height <= height - rect->y; +} + +static bool vgpu_sw_transfer_source_fits( + const struct virtio_gpu_trans_to_host_2d *req, + const struct vgpu_sw_resource_2d *res_2d) +{ + uint64_t bpp = res_2d->bits_per_pixel / 8; + uint64_t row_bytes, row_stride, last_row, last_row_offset, end_offset; + uint64_t required_bytes, backing_size = 0, backing_end; + + if (req->r.height == 0 || req->offset > SIZE_MAX) + return false; + if (vgpu_sw_u64_mul_overflow(req->r.width, bpp, &row_bytes) || + row_bytes == 0) + return false; + if (vgpu_sw_u64_mul_overflow((uint64_t) res_2d->stride, req->r.height - 1, + &last_row)) + return false; + if (vgpu_sw_u64_add_overflow(req->offset, last_row, &last_row_offset)) + return false; + if (vgpu_sw_u64_add_overflow(last_row_offset, row_bytes, &end_offset)) + return false; + if (vgpu_sw_u64_mul_overflow((uint64_t) res_2d->stride, req->r.height, + &row_stride)) + return false; + + required_bytes = + row_bytes == res_2d->stride ? row_stride : end_offset - req->offset; + for (size_t i = 0; i < res_2d->page_cnt; i++) { + if (vgpu_sw_u64_add_overflow(backing_size, res_2d->iovec[i].iov_len, + &backing_size)) + return false; + } + + return !vgpu_sw_u64_add_overflow(req->offset, required_bytes, + &backing_end) && + backing_end <= backing_size; +} + +static bool vgpu_sw_copy_image_from_pages( + struct virtio_gpu_trans_to_host_2d *req, + struct vgpu_sw_resource_2d *res_2d) +{ + uint32_t stride = res_2d->stride; + uint32_t bpp = res_2d->bits_per_pixel / 8; /* Bytes per pixel */ + uint32_t width = req->r.width; + uint32_t height = req->r.height; + + /* When the transfer spans full-width rows with no padding, both source + * ('iovec' at 'req->offset') and destination ('image' at 'r.y') are + * contiguous, so the entire rectangle can be copied in a single helper + * call. This covers all cursor transfers, full-frame updates, and + * full-width dirty bands. + */ + if (req->r.x == 0 && (size_t) width * bpp == stride) { + void *dest = + (void *) ((uintptr_t) res_2d->image + (size_t) req->r.y * stride); + size_t bytes = (size_t) stride * height; + return vgpu_sw_iov_to_buf(res_2d->iovec, res_2d->page_cnt, + (size_t) req->offset, dest, bytes) == bytes; + } + + /* Partial-width sub-rect: copy row by row */ + for (uint32_t h = 0; h < height; h++) { + /* Source offset is in the image coordinate. The address to copy from + * is the page base address plus the offset. + */ + size_t src_offset = req->offset + (size_t) stride * h; + size_t dest_offset = + ((size_t) req->r.y + h) * stride + (size_t) req->r.x * bpp; + void *dest = (void *) ((uintptr_t) res_2d->image + dest_offset); + size_t total = (size_t) width * bpp; + + if (vgpu_sw_iov_to_buf(res_2d->iovec, res_2d->page_cnt, src_offset, + dest, total) != total) + return false; + } + + return true; +} + +static void vgpu_sw_destroy_resource_2d(struct vgpu_sw_resource_2d *res_2d) +{ + list_del(&res_2d->list); + g_vgpu_sw_hostmem -= res_2d->image_size; + free(res_2d->image); + free(res_2d->iovec); + free(res_2d); +} + +static struct vgpu_sw_resource_2d *vgpu_sw_get_resource_2d(uint32_t resource_id) +{ + struct vgpu_sw_resource_2d *res_2d; + list_for_each_entry (res_2d, &g_vgpu_sw_res_2d_list, list) { + if (res_2d->resource_id == resource_id) + return res_2d; + } + + return NULL; +} + +static const struct virtq_desc *vgpu_sw_get_response_desc( + struct virtq_desc *vq_desc, + size_t response_size, + uint32_t *plen) +{ + int resp_idx = virtio_gpu_get_response_desc(vq_desc, VIRTIO_GPU_MAX_DESC, + response_size); + if (resp_idx >= 0) + return &vq_desc[resp_idx]; + + *plen = 0; + return NULL; +} + +static struct virtio_gpu_scanout_info *vgpu_sw_get_scanout( + virtio_gpu_state_t *vgpu, + uint32_t scanout_id) +{ + if (scanout_id >= PRIV(vgpu)->num_scanouts) + return NULL; + + struct virtio_gpu_scanout_info *scanout = &PRIV(vgpu)->scanouts[scanout_id]; + return scanout->enabled ? scanout : NULL; +} + +static struct vgpu_display_payload *vgpu_sw_create_window_payload( + const struct vgpu_sw_resource_2d *res_2d, + const struct virtio_gpu_scanout_info *scanout, + const char *plane_name) +{ + if (!res_2d || !res_2d->image) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): missing %s image\n", + __func__, plane_name); + return NULL; + } + + if (res_2d->bits_per_pixel == 0 || (res_2d->bits_per_pixel % 8) != 0) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): invalid %s bpp %u\n", + __func__, plane_name, res_2d->bits_per_pixel); + return NULL; + } + + size_t bytes_per_pixel = res_2d->bits_per_pixel / 8; + uint32_t src_x = 0; + uint32_t src_y = 0; + uint32_t width = res_2d->width; + uint32_t height = res_2d->height; + if (scanout) { + /* Primary scanouts can expose only a sub-rectangle of the resource. + * Record that view before snapshotting it. + */ + src_x = scanout->src_x; + src_y = scanout->src_y; + width = scanout->src_w; + height = scanout->src_h; + } + + if (width == 0 || height == 0) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): invalid %s size %ux%u\n", + __func__, plane_name, width, height); + return NULL; + } + + size_t row_bytes = (size_t) width * bytes_per_pixel; + if (row_bytes / width != bytes_per_pixel) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): %s row size overflow\n", + __func__, plane_name); + return NULL; + } + if (row_bytes > UINT32_MAX) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): %s row size exceeds uint32_t\n", + __func__, plane_name); + return NULL; + } + if (res_2d->stride < row_bytes) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): invalid %s stride %u for row size %zu\n", + __func__, plane_name, res_2d->stride, row_bytes); + return NULL; + } + + size_t pixels_size = row_bytes * height; + if (pixels_size / height != row_bytes) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): %s image size overflow\n", + __func__, plane_name); + return NULL; + } + + size_t alloc_size = sizeof(struct vgpu_display_payload) + pixels_size; + if (alloc_size < pixels_size) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): %s allocation overflow\n", + __func__, plane_name); + return NULL; + } + + struct vgpu_display_payload *payload = malloc(alloc_size); + if (!payload) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): failed to allocate %s snapshot\n", + __func__, plane_name); + return NULL; + } + + payload->cpu.format = res_2d->format; + payload->cpu.width = width; + payload->cpu.height = height; + payload->cpu.stride = (uint32_t) row_bytes; + payload->cpu.bits_per_pixel = res_2d->bits_per_pixel; + payload->cpu.pixels = (uint8_t *) (payload + 1); + + /* The cropped view is contiguous only when the source stride matches this + * snapshot's row size. Otherwise each source row still carries padding or + * untouched pixels outside the requested view, so the snapshot must be + * packed row by row. + */ + const uint8_t *src_pixels = (const uint8_t *) res_2d->image + + (size_t) src_y * res_2d->stride + + (size_t) src_x * bytes_per_pixel; + if (res_2d->stride == row_bytes) { + memcpy(payload->cpu.pixels, src_pixels, pixels_size); + } else { + for (uint32_t y = 0; y < height; y++) { + memcpy(payload->cpu.pixels + (size_t) y * row_bytes, + src_pixels + (size_t) y * res_2d->stride, row_bytes); + } + } + + return payload; +} + +/* Backend Implementation */ +static void vgpu_sw_reset(virtio_gpu_state_t *vgpu) +{ + /* The display queue may still hold older 'PRIMARY_SET' / 'CURSOR_SET' + * frames published before this reset. Publishing 'CLEAR' advances the + * per-plane generation; 'vgpu_display_pop_cmd()' consumes those clears + * first, then drops older queued frame commands as stale. + * + * Queued frame payloads are deep copies, so destroying resources after the + * clear publication cannot dangle any display payload still in the bridge. + * The display queue is SPSC and consumer-owned, so reset does not drain it + * from the producer side. The bounded queue releases stale payloads when + * the SDL consumer pops them. + */ + for (uint32_t i = 0; i < PRIV(vgpu)->num_scanouts; i++) { + PRIV(vgpu)->scanouts[i].primary_resource_id = 0; + PRIV(vgpu)->scanouts[i].cursor_resource_id = 0; + PRIV(vgpu)->scanouts[i].src_x = 0; + PRIV(vgpu)->scanouts[i].src_y = 0; + PRIV(vgpu)->scanouts[i].src_w = 0; + PRIV(vgpu)->scanouts[i].src_h = 0; + vgpu_display_publish_primary_clear(i); + vgpu_display_publish_cursor_clear(i); + } + + struct list_head *curr, *next; + list_for_each_safe (curr, next, &g_vgpu_sw_res_2d_list) { + struct vgpu_sw_resource_2d *res_2d = + list_entry(curr, struct vgpu_sw_resource_2d, list); + + vgpu_sw_destroy_resource_2d(res_2d); + } +} + +static void vgpu_sw_resource_create_2d_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + const struct virtq_desc *response_desc = vgpu_sw_get_response_desc( + vq_desc, sizeof(struct virtio_gpu_ctrl_hdr), plen); + if (!response_desc) + return; + + struct virtio_gpu_res_create_2d *request = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_res_create_2d)); + if (!request) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + /* Keep 'resource_id' 0 unavailable for real resources. The virtio spec + * explicitly documents 'resource_id = 0' as the 'SET_SCANOUT' disable + * sentinel. + * The Linux virtio-gpu driver also allocates guest-generated resource IDs + * as 'handle + 1', so they are always greater than 0. See + * 'virtgpu_object.c' for details. + */ + if (request->resource_id == 0) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): resource id should not be 0\n", + __func__); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID); + return; + } + + if (request->width == 0 || request->height == 0) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): invalid resource size %ux%u\n", + __func__, request->width, request->height); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + + /* Reject re-use of an already-live resource id. Without this check the + * guest could orphan the previous resource (its 'image' and 'iovec' would + * leak because 'vgpu_sw_get_resource_2d()' returns the first match) and + * confuse later 'TRANSFER' / 'FLUSH' / 'UNREF' requests that target the + * same id. Spec explicitly allows the device to fail this. + */ + if (vgpu_sw_get_resource_2d(request->resource_id)) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): resource id %u already in use\n", + __func__, request->resource_id); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID); + return; + } + + /* Create 2D resource */ + struct vgpu_sw_resource_2d *res_2d = calloc(1, sizeof(*res_2d)); + if (!res_2d) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): failed to allocate new resource\n", + __func__); + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + res_2d->resource_id = request->resource_id; + + /* The software backend currently supports only 32bpp packed formats. */ + uint32_t bits_per_pixel; + switch (request->format) { + case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM: + case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM: + case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM: + case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM: + case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM: + case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM: + case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM: + case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM: + bits_per_pixel = 32; + break; + default: + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): unsupported format %u\n", + __func__, request->format); + free(res_2d); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + + /* Set 2D resource */ + res_2d->width = request->width; + res_2d->height = request->height; + res_2d->format = request->format; + res_2d->bits_per_pixel = bits_per_pixel; + + /* Compute the row stride in a wider type first, then narrow it only after + * checking the final byte count still fits in 'uint32_t'. Otherwise a large + * guest width could wrap during the intermediate multiplication and leave + * a truncated stride in the resource. + */ + size_t stride = + (((size_t) res_2d->width * res_2d->bits_per_pixel + 0x1f) >> 5) * + sizeof(uint32_t); + if (stride > UINT32_MAX) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): stride overflow (%u x %u bpp)\n", + __func__, res_2d->width, res_2d->bits_per_pixel); + free(res_2d); + *plen = + virtio_gpu_write_ctrl_response(vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY); + return; + } + res_2d->stride = (uint32_t) stride; + + /* Guard against integer overflow in image buffer allocation. + * Both 'stride' and 'height' are guest-controlled 'uint32_t' values whose + * product can silently wrap around in 32-bit arithmetic, resulting in + * an undersized 'malloc()' while later transfers write to the full extent. + */ + size_t image_size = (size_t) res_2d->stride * res_2d->height; + if (res_2d->height && image_size / res_2d->height != res_2d->stride) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): image size overflow (%u x %u)\n", + __func__, res_2d->width, res_2d->height); + free(res_2d); + *plen = + virtio_gpu_write_ctrl_response(vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY); + return; + } + + if (image_size > VGPU_SW_MAX_HOSTMEM || + g_vgpu_sw_hostmem > VGPU_SW_MAX_HOSTMEM - image_size) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): image memory limit exceeded (%zu bytes)\n", + __func__, image_size); + free(res_2d); + *plen = + virtio_gpu_write_ctrl_response(vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY); + return; + } + + res_2d->image = calloc(1, image_size); + + /* Failed to create image buffer */ + if (!res_2d->image) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): failed to allocate image buffer (%zu bytes)\n", + __func__, image_size); + free(res_2d); + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + res_2d->image_size = image_size; + g_vgpu_sw_hostmem += image_size; + list_push(&res_2d->list, &g_vgpu_sw_res_2d_list); + + *plen = virtio_gpu_write_ctrl_response(vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_OK_NODATA); +} + +static void vgpu_sw_cmd_resource_unref_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + const struct virtq_desc *response_desc = vgpu_sw_get_response_desc( + vq_desc, sizeof(struct virtio_gpu_ctrl_hdr), plen); + if (!response_desc) + return; + + struct virtio_gpu_res_unref *request = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_res_unref)); + if (!request) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + struct vgpu_sw_resource_2d *res_2d = + vgpu_sw_get_resource_2d(request->resource_id); + if (!res_2d) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): resource unref references invalid resource id %u\n", + __func__, request->resource_id); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID); + return; + } + + /* Clear any visible plane using this resource before it is freed. */ + for (uint32_t i = 0; i < PRIV(vgpu)->num_scanouts; i++) { + struct virtio_gpu_scanout_info *scanout = &PRIV(vgpu)->scanouts[i]; + + if (!scanout->enabled) + continue; + + if (scanout->primary_resource_id == request->resource_id) { + scanout->primary_resource_id = 0; + scanout->src_x = scanout->src_y = 0; + scanout->src_w = scanout->src_h = 0; + vgpu_display_publish_primary_clear(i); + } + + if (scanout->cursor_resource_id == request->resource_id) { + scanout->cursor_resource_id = 0; + vgpu_display_publish_cursor_clear(i); + } + } + + vgpu_sw_destroy_resource_2d(res_2d); + + *plen = virtio_gpu_write_ctrl_response(vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_OK_NODATA); +} + +static void vgpu_sw_cmd_set_scanout_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + const struct virtq_desc *response_desc = vgpu_sw_get_response_desc( + vq_desc, sizeof(struct virtio_gpu_ctrl_hdr), plen); + if (!response_desc) + return; + + struct virtio_gpu_set_scanout *request = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_set_scanout)); + if (!request) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + struct virtio_gpu_scanout_info *scanout = + vgpu_sw_get_scanout(vgpu, request->scanout_id); + if (!scanout) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): invalid scanout id %u\n", + __func__, request->scanout_id); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID); + return; + } + + /* Keep 'resource_id' 0 unavailable for real resources. The virtio spec + * explicitly documents 'resource_id = 0' as the 'SET_SCANOUT' disable + * sentinel. + * The Linux virtio-gpu driver also allocates guest-generated resource IDs + * as 'handle + 1', so they are always greater than 0. See + * 'virtgpu_object.c' for details. + */ + if (request->resource_id == 0) { + scanout->primary_resource_id = 0; + scanout->src_x = scanout->src_y = 0; + scanout->src_w = scanout->src_h = 0; + vgpu_display_publish_primary_clear(request->scanout_id); + goto leave; + } + + /* Retrieve 2D resource */ + struct vgpu_sw_resource_2d *res_2d = + vgpu_sw_get_resource_2d(request->resource_id); + if (!res_2d) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): invalid resource id %u\n", + __func__, request->resource_id); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID); + return; + } + + /* Validate that the source rectangle fits within the resource without + * relying on wrapping 32-bit additions. + */ + if (!vgpu_sw_rect_fits(res_2d->width, res_2d->height, &request->r)) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): source rect %u,%u %ux%u exceeds resource %ux%u\n", + __func__, request->r.x, request->r.y, request->r.width, + request->r.height, res_2d->width, res_2d->height); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + + /* The source rectangle is displayed into this scanout, view size is bounded + * by the advertised scanout size. + */ + if (request->r.width > scanout->width || + request->r.height > scanout->height) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): source rect %ux%u exceeds scanout %ux%u\n", + __func__, request->r.width, request->r.height, scanout->width, + scanout->height); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + + /* Bind scanout with resource and record the source rectangle */ + scanout->primary_resource_id = res_2d->resource_id; + scanout->src_x = request->r.x; + scanout->src_y = request->r.y; + scanout->src_w = request->r.width; + scanout->src_h = request->r.height; + +leave: + *plen = virtio_gpu_write_ctrl_response(vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_OK_NODATA); +} + +static void vgpu_sw_cmd_resource_flush_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + const struct virtq_desc *response_desc = vgpu_sw_get_response_desc( + vq_desc, sizeof(struct virtio_gpu_ctrl_hdr), plen); + if (!response_desc) + return; + + struct virtio_gpu_res_flush *request = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_res_flush)); + if (!request) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + /* Retrieve 2D resource */ + struct vgpu_sw_resource_2d *res_2d = + vgpu_sw_get_resource_2d(request->resource_id); + if (!res_2d) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): invalid resource id %u\n", + __func__, request->resource_id); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID); + return; + } + + if (!vgpu_sw_rect_fits(res_2d->width, res_2d->height, &request->r)) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): invalid flush rect %u,%u %ux%u for resource %u size " + "%ux%u\n", + __func__, request->r.x, request->r.y, request->r.width, + request->r.height, request->resource_id, res_2d->width, + res_2d->height); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + + /* Flush the resource to every scanout currently bound to it, using the + * source rectangle recorded by 'SET_SCANOUT' to display only the requested + * sub-region of the resource. + */ + for (uint32_t i = 0; i < PRIV(vgpu)->num_scanouts; i++) { + struct virtio_gpu_scanout_info *scanout = &PRIV(vgpu)->scanouts[i]; + + if (!scanout->enabled || + scanout->primary_resource_id != request->resource_id) + continue; + + /* Keep the producer non-blocking: if the display queue is full or + * snapshot allocation fails below, this flush frame for scanout 'i' is + * dropped and the frontend keeps showing its previous published frame. + */ + if (!vgpu_display_can_publish()) + continue; + + struct vgpu_display_payload *payload = + vgpu_sw_create_window_payload(res_2d, scanout, "primary"); + if (!payload) + continue; + + /* The publish path snapshots the whole 'SET_SCANOUT' view for this + * scanout. 'request->r' is not used here to further trim the payload + * for now. + */ + vgpu_display_publish_primary_set(i, payload); + } + + *plen = virtio_gpu_write_ctrl_response(vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_OK_NODATA); +} + +static void vgpu_sw_cmd_transfer_to_host_2d_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + const struct virtq_desc *response_desc = vgpu_sw_get_response_desc( + vq_desc, sizeof(struct virtio_gpu_ctrl_hdr), plen); + if (!response_desc) + return; + + struct virtio_gpu_trans_to_host_2d *req = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_trans_to_host_2d)); + if (!req) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + /* Retrieve 2D resource */ + struct vgpu_sw_resource_2d *res_2d = + vgpu_sw_get_resource_2d(req->resource_id); + if (!res_2d) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): invalid resource id %u\n", + __func__, req->resource_id); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &req->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID); + return; + } + + /* Check if backing has been attached */ + if (!res_2d->iovec) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): backing not attached for resource %u\n", + __func__, req->resource_id); + *plen = virtio_gpu_write_ctrl_response(vgpu, &req->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_UNSPEC); + return; + } + + /* Validate that the destination rectangle fits within the resource + * without relying on wrapping 32-bit additions. Mirrors the check in + * 'vgpu_sw_cmd_set_scanout_handler()'. + */ + if (!vgpu_sw_rect_fits(res_2d->width, res_2d->height, &req->r)) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): invalid transfer rect %u,%u %ux%u for resource %u size " + "%ux%u\n", + __func__, req->r.x, req->r.y, req->r.width, req->r.height, + req->resource_id, res_2d->width, res_2d->height); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &req->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + + if (!vgpu_sw_transfer_source_fits(req, res_2d)) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): transfer source exceeds backing\n", + __func__); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &req->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + + /* Transfer frame data from guest to host */ + if (!vgpu_sw_copy_image_from_pages(req, res_2d)) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): incomplete transfer from backing\n", + __func__); + *plen = virtio_gpu_write_ctrl_response(vgpu, &req->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_UNSPEC); + return; + } + + *plen = virtio_gpu_write_ctrl_response(vgpu, &req->hdr, response_desc, + VIRTIO_GPU_RESP_OK_NODATA); +} + +static void vgpu_sw_cmd_resource_attach_backing_handler( + virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + const struct virtq_desc *response_desc = vgpu_sw_get_response_desc( + vq_desc, sizeof(struct virtio_gpu_ctrl_hdr), plen); + if (!response_desc) + return; + + struct virtio_gpu_res_attach_backing *backing_info = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_res_attach_backing)); + if (!backing_info) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + if (vq_desc[1].flags & VIRTIO_DESC_F_WRITE) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): backing entries descriptor is writable\n", + __func__); + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + if (backing_info->nr_entries == 0 || + backing_info->nr_entries > VGPU_SW_MAX_BACKING_ENTRIES) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): invalid backing entry count %u\n", + __func__, backing_info->nr_entries); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &backing_info->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + + /* The entry cap above keeps 'entries_size' small. semu currently targets + * 64-bit hosts, so this path does not guard for 32-bit host overflow yet. + */ + size_t entries_size = + sizeof(struct virtio_gpu_mem_entry) * backing_info->nr_entries; + + if (vq_desc[1].len < entries_size) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): backing entries descriptor too small\n", + __func__); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &backing_info->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + + struct virtio_gpu_mem_entry *pages = virtio_gpu_mem_guest_to_host( + vgpu, vq_desc[1].addr, (uint32_t) entries_size); + if (!pages) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + /* Retrieve 2D resource */ + struct vgpu_sw_resource_2d *res_2d = + vgpu_sw_get_resource_2d(backing_info->resource_id); + if (!res_2d) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): invalid resource id %u\n", + __func__, backing_info->resource_id); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &backing_info->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID); + return; + } + + /* Check if backing is already attached */ + if (res_2d->iovec) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): backing already attached for resource %u\n", + __func__, backing_info->resource_id); + *plen = virtio_gpu_write_ctrl_response(vgpu, &backing_info->hdr, + response_desc, + VIRTIO_GPU_RESP_ERR_UNSPEC); + return; + } + + /* Dispatch page memories to the 2D resource */ + res_2d->page_cnt = backing_info->nr_entries; + res_2d->iovec = malloc(sizeof(struct iovec) * backing_info->nr_entries); + if (!res_2d->iovec) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): failed to allocate io vector\n", + __func__); + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + /* Convert each guest-provided backing entry into one host-side 'iovec'. */ + for (size_t i = 0; i < backing_info->nr_entries; i++) { + if (pages[i].addr > UINT32_MAX) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): page %zu addr_high non-zero\n", + __func__, i); + free(res_2d->iovec); + res_2d->iovec = NULL; + res_2d->page_cnt = 0; + *plen = virtio_gpu_write_ctrl_response( + vgpu, &backing_info->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + + /* Attach address and length of i-th page to the 2D resource. */ + res_2d->iovec[i].iov_base = virtio_gpu_mem_guest_to_host( + vgpu, (uint32_t) pages[i].addr, pages[i].length); + res_2d->iovec[i].iov_len = pages[i].length; + + /* Corrupted page address */ + if (!res_2d->iovec[i].iov_base) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): backing entry %zu guest address 0x%llx length %u " + "is out of guest RAM\n", + __func__, i, (unsigned long long) pages[i].addr, + pages[i].length); + free(res_2d->iovec); + res_2d->iovec = NULL; + res_2d->page_cnt = 0; + *plen = virtio_gpu_write_ctrl_response( + vgpu, &backing_info->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER); + return; + } + } + + *plen = virtio_gpu_write_ctrl_response( + vgpu, &backing_info->hdr, response_desc, VIRTIO_GPU_RESP_OK_NODATA); +} + +static void vgpu_sw_cmd_resource_detach_backing_handler( + virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + const struct virtq_desc *response_desc = vgpu_sw_get_response_desc( + vq_desc, sizeof(struct virtio_gpu_ctrl_hdr), plen); + if (!response_desc) + return; + + struct virtio_gpu_res_detach_backing *request = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_res_detach_backing)); + if (!request) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + /* Retrieve 2D resource */ + struct vgpu_sw_resource_2d *res_2d = + vgpu_sw_get_resource_2d(request->resource_id); + + if (!res_2d) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): invalid resource id %u\n", + __func__, request->resource_id); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID); + return; + } + + /* Check if backing exists */ + if (!res_2d->iovec) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): no backing for resource %u\n", + __func__, request->resource_id); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, response_desc, VIRTIO_GPU_RESP_ERR_UNSPEC); + return; + } + + /* Detach backing and free the 'iovec' array. */ + free(res_2d->iovec); + res_2d->iovec = NULL; + res_2d->page_cnt = 0; + + *plen = virtio_gpu_write_ctrl_response(vgpu, &request->hdr, response_desc, + VIRTIO_GPU_RESP_OK_NODATA); +} + +static int32_t vgpu_sw_decode_cursor_coord(uint32_t coord) +{ + /* Linux passes signed cursor plane 'crtc_x'/'crtc_y' through virtio-gpu's + * unsigned 32-bit wire fields. Decode that two's-complement value + * explicitly instead of relying on implementation-defined signed casts: + * values above 'INT32_MAX' represent negative coordinates, so subtract + * 2^32 to recover them, e.g. '0xffffffff' -> -1 and '0xfffffffe' -> -2. + */ + if (coord <= (uint32_t) INT32_MAX) + return (int32_t) coord; + return (int32_t) ((int64_t) coord - ((int64_t) UINT32_MAX + 1)); +} + +static void vgpu_sw_cmd_update_cursor_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + struct virtio_gpu_update_cursor *cursor = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_update_cursor)); + if (!cursor) { + *plen = 0; + return; + } + + /* Normal cursorq commands have no response descriptor. Current Linux sends + * cursor buffers through 'virtio_gpu_queue_cursor()' without fencing and + * with a single out descriptor, so keep this path unfenced-only for now. + * + * TODO: Support fenced cursor commands by handling a response descriptor, + * echoing the fence id, and auditing every cursor success/error path to + * emit a proper control response instead of len=0. + */ + if (cursor->hdr.flags & VIRTIO_GPU_FLAG_FENCE) + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): fenced cursor command is unsupported\n", + __func__); + + struct virtio_gpu_scanout_info *scanout = + vgpu_sw_get_scanout(vgpu, cursor->pos.scanout_id); + if (!scanout) { + *plen = 0; + return; + } + + /* Keep 'resource_id' 0 unavailable for real resources. The virtio spec + * explicitly documents 'resource_id = 0' as the 'SET_SCANOUT' disable + * sentinel. + * The Linux virtio-gpu driver also allocates guest-generated resource IDs + * as 'handle + 1', so they are always greater than 0. See + * 'virtgpu_object.c' for details. + */ + if (cursor->resource_id == 0) { + scanout->cursor_resource_id = 0; + vgpu_display_publish_cursor_clear(cursor->pos.scanout_id); + *plen = 0; + return; + } + + /* Update cursor image */ + struct vgpu_sw_resource_2d *res_2d = + vgpu_sw_get_resource_2d(cursor->resource_id); + if (!res_2d) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): invalid resource id %u\n", + __func__, cursor->resource_id); + *plen = 0; + return; + } + + if (res_2d->width == 0 || res_2d->height == 0 || + res_2d->width > scanout->width || res_2d->height > scanout->height) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): invalid cursor size %ux%u\n", + __func__, res_2d->width, res_2d->height); + *plen = 0; + return; + } + + /* Cursor commands have no response. If publication would drop this frame, + * keep 'cursor_resource_id' unchanged because it tracks the cursor that is + * still visible and is used by RESOURCE_UNREF to decide whether to publish + * a clear. + */ + if (!vgpu_display_can_publish()) { + *plen = 0; + return; + } + + struct vgpu_display_payload *payload = + vgpu_sw_create_window_payload(res_2d, NULL, "cursor"); + if (!payload) { + /* Allocation failure has the same visible result as a dropped + * publication: keep the old cursor binding. + */ + *plen = 0; + return; + } + scanout->cursor_resource_id = cursor->resource_id; + vgpu_display_publish_cursor_set(cursor->pos.scanout_id, payload, + vgpu_sw_decode_cursor_coord(cursor->pos.x), + vgpu_sw_decode_cursor_coord(cursor->pos.y), + cursor->hot_x, cursor->hot_y); + + *plen = 0; +} + +static void vgpu_sw_cmd_move_cursor_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + struct virtio_gpu_update_cursor *cursor = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_update_cursor)); + if (!cursor) { + *plen = 0; + return; + } + + /* Normal cursorq commands have no response descriptor. Current Linux sends + * cursor buffers through 'virtio_gpu_queue_cursor()' without fencing and + * with a single out descriptor, so keep this path unfenced-only for now. + * + * TODO: Support fenced cursor commands by handling a response descriptor, + * echoing the fence id, and auditing every cursor success/error path to + * emit a proper control response instead of len=0. + */ + if (cursor->hdr.flags & VIRTIO_GPU_FLAG_FENCE) + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): fenced cursor command is unsupported\n", + __func__); + + if (!vgpu_sw_get_scanout(vgpu, cursor->pos.scanout_id)) { + *plen = 0; + return; + } + + /* Move cursor to new position */ + vgpu_display_publish_cursor_move( + cursor->pos.scanout_id, vgpu_sw_decode_cursor_coord(cursor->pos.x), + vgpu_sw_decode_cursor_coord(cursor->pos.y)); + + *plen = 0; +} + +/* The software backend supports only CPU-backed 2D scanout resources today. + * Optional virtio-gpu features for capsets, resource UUIDs, blob resources, + * virgl/3D contexts, and blob mappings intentionally stay routed to + * 'VIRTIO_GPU_CMD_UNDEF' so unsupported guest paths fail explicitly. + * + * TODO: Implement these handlers after the feature bits, backend resource + * model, and display payload path grow matching virgl/blob support. + */ +const struct virtio_gpu_cmd_backend g_virtio_gpu_backend = { + .reset = vgpu_sw_reset, + .get_display_info = virtio_gpu_get_display_info_handler, + .resource_create_2d = vgpu_sw_resource_create_2d_handler, + .resource_unref = vgpu_sw_cmd_resource_unref_handler, + .set_scanout = vgpu_sw_cmd_set_scanout_handler, + .resource_flush = vgpu_sw_cmd_resource_flush_handler, + .transfer_to_host_2d = vgpu_sw_cmd_transfer_to_host_2d_handler, + .resource_attach_backing = vgpu_sw_cmd_resource_attach_backing_handler, + .resource_detach_backing = vgpu_sw_cmd_resource_detach_backing_handler, + .get_capset_info = VIRTIO_GPU_CMD_UNDEF, + .get_capset = VIRTIO_GPU_CMD_UNDEF, + .get_edid = virtio_gpu_get_edid_handler, + .resource_assign_uuid = VIRTIO_GPU_CMD_UNDEF, + .resource_create_blob = VIRTIO_GPU_CMD_UNDEF, + .set_scanout_blob = VIRTIO_GPU_CMD_UNDEF, + .ctx_create = VIRTIO_GPU_CMD_UNDEF, + .ctx_destroy = VIRTIO_GPU_CMD_UNDEF, + .ctx_attach_resource = VIRTIO_GPU_CMD_UNDEF, + .ctx_detach_resource = VIRTIO_GPU_CMD_UNDEF, + .resource_create_3d = VIRTIO_GPU_CMD_UNDEF, + .transfer_to_host_3d = VIRTIO_GPU_CMD_UNDEF, + .transfer_from_host_3d = VIRTIO_GPU_CMD_UNDEF, + .submit_3d = VIRTIO_GPU_CMD_UNDEF, + .resource_map_blob = VIRTIO_GPU_CMD_UNDEF, + .resource_unmap_blob = VIRTIO_GPU_CMD_UNDEF, + .update_cursor = vgpu_sw_cmd_update_cursor_handler, + .move_cursor = vgpu_sw_cmd_move_cursor_handler, +}; diff --git a/virtio-gpu.c b/virtio-gpu.c new file mode 100644 index 00000000..35c23d2f --- /dev/null +++ b/virtio-gpu.c @@ -0,0 +1,1184 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "device.h" +#include "riscv.h" +#include "riscv_private.h" +#include "utils.h" +#include "virtio-gpu.h" +#include "virtio.h" + +#define VIRTIO_GPU_CMD_TRACE_ENABLED 0 + +#define VIRTIO_F_VERSION_1 1 + +#define VIRTIO_GPU_EVENT_DISPLAY (1 << 0) +#define VIRTIO_GPU_F_EDID (1 << 1) +#define VIRTIO_GPU_F_CONTEXT_INIT (1 << 4) + +#define VIRTIO_GPU_QUEUE_NUM_MAX 1024 +#define VIRTIO_GPU_QUEUE (vgpu->queues[vgpu->QueueSel]) +#define VIRTIO_GPU_CONTROLQ 0 +#define VIRTIO_GPU_CURSORQ 1 + +/* DMT usage macro */ +#define EDID_BLOCK_SIZE 128U +#define DMT_BASE_WIDTH 1024U +#define DMT_BASE_HEIGHT 768U +#define DMT_BASE_PIXEL_CLOCK_10KHZ 6500U +#define DMT_BASE_H_BLANK 320U +#define DMT_BASE_H_FRONT 24U +#define DMT_BASE_H_SYNC 136U +#define DMT_BASE_V_BLANK 38U +#define DMT_BASE_V_FRONT 3U +#define DMT_BASE_V_SYNC 6U +#define DMT_BOUND_FIELD(field, max) \ + do { \ + if ((field) > (max)) \ + (field) = (max); \ + } while (0) + +#define PRIV(x) ((virtio_gpu_data_t *) x->priv) + +#if VIRTIO_GPU_CMD_TRACE_ENABLED +#define VIRTIO_GPU_CMD_CASE(cmd, fn) \ + case VIRTIO_GPU_CMD_##cmd: \ + printf("(*) semu/virtio-gpu: %s\n", "VIRTIO_GPU_CMD_" #cmd); \ + g_virtio_gpu_backend.fn(vgpu, vq_desc, plen); \ + break; +#else +#define VIRTIO_GPU_CMD_CASE(cmd, fn) \ + case VIRTIO_GPU_CMD_##cmd: \ + g_virtio_gpu_backend.fn(vgpu, vq_desc, plen); \ + break; +#endif + +extern const struct virtio_gpu_cmd_backend g_virtio_gpu_backend; +static virtio_gpu_data_t virtio_gpu_data; + +void *virtio_gpu_mem_guest_to_host(virtio_gpu_state_t *vgpu, + uint32_t addr, + uint32_t size) +{ + if (addr >= RAM_SIZE || size > RAM_SIZE || addr + size > RAM_SIZE) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): guest address 0x%x size 0x%x out of bounds\n", + __func__, addr, size); + return NULL; + } + return (void *) ((uintptr_t) vgpu->ram + addr); +} + +void virtio_gpu_set_fail(virtio_gpu_state_t *vgpu) +{ + vgpu->Status |= VIRTIO_STATUS__DEVICE_NEEDS_RESET; + if (vgpu->Status & VIRTIO_STATUS__DRIVER_OK) + vgpu->InterruptStatus |= VIRTIO_INT__CONF_CHANGE; +} + +void *virtio_gpu_get_request(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + size_t request_size) +{ + if ((vq_desc[0].flags & VIRTIO_DESC_F_WRITE) || + vq_desc[0].len < request_size || request_size > UINT32_MAX) + return NULL; + + return virtio_gpu_mem_guest_to_host(vgpu, vq_desc[0].addr, + (uint32_t) request_size); +} + +int virtio_gpu_get_response_desc(struct virtq_desc *vq_desc, + int max_desc, + size_t response_size) +{ + if (response_size > UINT32_MAX) + return -1; + + /* This helper works with the current fixed-shape descriptor parser: + * 'vq_desc[0]' is the request, optional command data follows, and the + * first writable descriptor is the response buffer. A writable descriptor + * that is too small therefore means the expected response buffer is + * malformed; this helper does not skip it and search for a later writable + * descriptor. + * + * TODO: Support generic descriptor-chain parsing. + */ + for (int i = 1; i < max_desc; i++) { + if (!(vq_desc[i].flags & VIRTIO_DESC_F_WRITE)) + continue; + + if (vq_desc[i].len < response_size) + return -1; + + return i; + } + + return -1; +} + +uint32_t virtio_gpu_write_ctrl_response( + virtio_gpu_state_t *vgpu, + const struct virtio_gpu_ctrl_hdr *request, + const struct virtq_desc *response_desc, + uint32_t type) +{ + if (response_desc->len < sizeof(struct virtio_gpu_ctrl_hdr)) + return 0; + + struct virtio_gpu_ctrl_hdr *response = virtio_gpu_mem_guest_to_host( + vgpu, response_desc->addr, sizeof(struct virtio_gpu_ctrl_hdr)); + if (!response) + return 0; + + memset(response, 0, sizeof(*response)); + response->type = type; + + if (request->flags & VIRTIO_GPU_FLAG_FENCE) { + response->flags = VIRTIO_GPU_FLAG_FENCE; + response->fence_id = request->fence_id; + } + + return sizeof(*response); +} + +/* 'virtio_gpu' protocol handlers */ +void virtio_gpu_get_display_info_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + struct virtio_gpu_ctrl_hdr *request = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_ctrl_hdr)); + if (!request) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + int resp_idx = virtio_gpu_get_response_desc( + vq_desc, VIRTIO_GPU_MAX_DESC, sizeof(struct virtio_gpu_resp_disp_info)); + if (resp_idx < 0) { + *plen = 0; + return; + } + + struct virtio_gpu_resp_disp_info *response = virtio_gpu_mem_guest_to_host( + vgpu, vq_desc[resp_idx].addr, sizeof(struct virtio_gpu_resp_disp_info)); + if (!response) { + *plen = 0; + return; + } + + memset(response, 0, sizeof(*response)); + response->hdr.type = VIRTIO_GPU_RESP_OK_DISPLAY_INFO; + + /* 'GET_DISPLAY_INFO' exposes scanouts as the 'pmodes[]' array, so the array + * index is the guest-visible 'scanout_id' used by later requests such as + * 'SET_SCANOUT' and 'GET_EDID'. + * + * The spec describes 'pmodes[]' as per-scanout information but does not + * spell out this mapping as a separate rule. semu follows the implicit + * model where 'pmodes[i]' describes scanout ID 'i' because later requests + * only carry a 'scanout_id', and Linux does the same when it copies + * 'resp->pmodes[i]' into 'outputs[i]' and later sends 'output->index' in + * 'SET_SCANOUT'. See 'virtgpu_vq.c' and 'virtgpu_display.c' for more + * details. + */ + int scanout_num = PRIV(vgpu)->num_scanouts; + for (int i = 0; i < scanout_num; i++) { + response->pmodes[i].r.width = PRIV(vgpu)->scanouts[i].width; + response->pmodes[i].r.height = PRIV(vgpu)->scanouts[i].height; + response->pmodes[i].enabled = PRIV(vgpu)->scanouts[i].enabled; + } + + *plen = sizeof(*response); + if (request->flags & VIRTIO_GPU_FLAG_FENCE) { + response->hdr.flags = VIRTIO_GPU_FLAG_FENCE; + response->hdr.fence_id = request->fence_id; + } +} + +static uint8_t virtio_gpu_generate_edid_checksum(uint8_t *edid, size_t size) +{ + /* Check EDID 1.4 Section 3.11, Table 3.40 notes 2 and 3: byte 7Fh must + * make the modulo-256 sum of all 128 base EDID bytes equal 00h. + */ + uint8_t sum = 0; + + for (size_t i = 0; i < size; i++) + sum += edid[i]; + + return 0x100 - sum; +} + +static uint16_t virtio_gpu_edid_pixels_to_mm(uint32_t pixels) +{ + /* Check EDID 1.4 Sections 3.6.2 and 3.10.2: base screen size is stored in + * centimeters, while detailed timing image size is stored in millimeters. + * Estimate virtual display size at 100 DPI. + */ + uint32_t mm = ((uint64_t) pixels * 254U + 500U) / 1000U; + + if (mm == 0) + mm = 1; + if (mm > 4095) + mm = 4095; + + return mm; +} + +static uint8_t virtio_gpu_edid_mm_to_cm(uint16_t mm) +{ + /* Check EDID 1.4 Section 3.6.2: base screen size fields are centimeters. */ + uint32_t cm = (mm + 5U) / 10U; + + if (cm == 0) + cm = 1; + if (cm > 255) + cm = 255; + + return cm; +} + +static void virtio_gpu_edid_set_srgb_chromaticity(uint8_t *edid) +{ + /* Check EDID 1.4 Section 3.7: sRGB chromaticity coordinates in EDID + * 10-bit fixed-point form, value = round(coordinate * 1024). The white + * point is D65. + */ + const uint16_t red_x = 655; /* round(0.640 * 1024) */ + const uint16_t red_y = 338; /* round(0.330 * 1024) */ + const uint16_t green_x = 307; /* round(0.300 * 1024) */ + const uint16_t green_y = 614; /* round(0.600 * 1024) */ + const uint16_t blue_x = 154; /* round(0.150 * 1024) */ + const uint16_t blue_y = 61; /* round(0.060 * 1024) */ + const uint16_t white_x = 320; /* round(0.313 * 1024) */ + const uint16_t white_y = 337; /* round(0.329 * 1024) */ + + edid[25] = ((red_x & 0x3) << 6) | ((red_y & 0x3) << 4) | + ((green_x & 0x3) << 2) | (green_y & 0x3); + edid[26] = ((blue_x & 0x3) << 6) | ((blue_y & 0x3) << 4) | + ((white_x & 0x3) << 2) | (white_y & 0x3); + edid[27] = red_x >> 2; + edid[28] = red_y >> 2; + edid[29] = green_x >> 2; + edid[30] = green_y >> 2; + edid[31] = blue_x >> 2; + edid[32] = blue_y >> 2; + edid[33] = white_x >> 2; + edid[34] = white_y >> 2; +} + +static void virtio_gpu_edid_set_detailed_timing(uint8_t *desc, + uint32_t width, + uint32_t height, + uint16_t width_mm, + uint16_t height_mm) +{ + /* Check EDID 1.4 Section 3.10.2: detailed timing descriptor layout. */ + uint32_t h_blank; /* Horizontal blanking pixels. */ + uint32_t h_front; /* Horizontal front porch pixels. */ + uint32_t h_sync; /* Horizontal sync pulse width. */ + uint32_t v_blank; /* Vertical blanking lines. */ + uint32_t v_front; /* Vertical front porch lines. */ + uint32_t v_sync; /* Vertical sync pulse width. */ + uint32_t pixel_clock_10khz; /* Pixel clock in 10 kHz units. */ + + if (width == DMT_BASE_WIDTH && height == DMT_BASE_HEIGHT) { + /* VESA DMT 1024x768@60Hz, also advertised in the base EDID established + * timings field. EDID stores pixel clock in 10 kHz units, so 6500 + * means 65.00 MHz. + */ + pixel_clock_10khz = DMT_BASE_PIXEL_CLOCK_10KHZ; + h_blank = DMT_BASE_H_BLANK; + h_front = DMT_BASE_H_FRONT; + h_sync = DMT_BASE_H_SYNC; + v_blank = DMT_BASE_V_BLANK; + v_front = DMT_BASE_V_FRONT; + v_sync = DMT_BASE_V_SYNC; + } else { + /* Fallback only for future multi-mode or non-default scanouts. The + * current machine registers one 1024x768 scanout, so this path is not + * reachable in the default build. Scale porch/sync proportions from + * the VESA DMT 1024x768@60Hz timing instead of inventing ad hoc + * ratios. + */ + h_blank = ((uint64_t) width * DMT_BASE_H_BLANK + DMT_BASE_WIDTH / 2U) / + DMT_BASE_WIDTH; + h_front = ((uint64_t) width * DMT_BASE_H_FRONT + DMT_BASE_WIDTH / 2U) / + DMT_BASE_WIDTH; + h_sync = ((uint64_t) width * DMT_BASE_H_SYNC + DMT_BASE_WIDTH / 2U) / + DMT_BASE_WIDTH; + if (h_front == 0) + h_front = 1; + if (h_sync == 0) + h_sync = 1; + if (h_blank <= h_front + h_sync) { + /* Keep front porch and sync pulse inside the blanking interval so + * the remaining pixels form the back porch. + */ + h_blank = h_front + h_sync + 1U; + } + + v_blank = + ((uint64_t) height * DMT_BASE_V_BLANK + DMT_BASE_HEIGHT / 2U) / + DMT_BASE_HEIGHT; + v_front = + ((uint64_t) height * DMT_BASE_V_FRONT + DMT_BASE_HEIGHT / 2U) / + DMT_BASE_HEIGHT; + v_sync = ((uint64_t) height * DMT_BASE_V_SYNC + DMT_BASE_HEIGHT / 2U) / + DMT_BASE_HEIGHT; + if (v_front == 0) + v_front = 1; + if (v_sync == 0) + v_sync = 1; + if (v_blank <= v_front + v_sync) + v_blank = v_front + v_sync + 1U; + + /* Pixel clock = refresh rate * horizontal total * vertical total. + * Divide by 10000 because the descriptor stores the clock in 10 kHz + * units. The +5000 rounds to the nearest 10 kHz. + */ + pixel_clock_10khz = (60U * ((uint64_t) width + h_blank) * + ((uint64_t) height + v_blank) + + 5000U) / + 10000U; + if (pixel_clock_10khz > 0xffffU) + pixel_clock_10khz = 0xffffU; + } + + /* Clamp fields to the bit widths defined by Table 3.21: + * active/blanking/image-size fields are 12-bit, horizontal sync fields are + * 10-bit, and vertical sync fields are 6-bit. + */ + DMT_BOUND_FIELD(width, 4095U); + DMT_BOUND_FIELD(height, 4095U); + DMT_BOUND_FIELD(h_blank, 4095U); + DMT_BOUND_FIELD(h_front, 1023U); + DMT_BOUND_FIELD(h_sync, 1023U); + DMT_BOUND_FIELD(v_blank, 4095U); + DMT_BOUND_FIELD(v_front, 63U); + DMT_BOUND_FIELD(v_sync, 63U); + + /* Bytes 0-1: pixel clock, little-endian, in 10 kHz units. */ + desc[0] = pixel_clock_10khz & 0xff; + desc[1] = (pixel_clock_10khz >> 8) & 0xff; + + /* Bytes 2-4: horizontal active and blanking, each split as low 8 bits plus + * high 4 bits packed into byte 4. + */ + desc[2] = width & 0xff; + desc[3] = h_blank & 0xff; + desc[4] = ((width >> 8) << 4) | (h_blank >> 8); + + /* Bytes 5-7: vertical active and blanking, using the same 12-bit packing + * pattern as the horizontal fields. + */ + desc[5] = height & 0xff; + desc[6] = v_blank & 0xff; + desc[7] = ((height >> 8) << 4) | (v_blank >> 8); + + /* Bytes 8-11: sync offsets and pulse widths. Horizontal fields are 10-bit; + * vertical fields are 6-bit and share byte 10 for their low nibbles. + */ + desc[8] = h_front & 0xff; + desc[9] = h_sync & 0xff; + desc[10] = ((v_front & 0xf) << 4) | (v_sync & 0xf); + desc[11] = ((h_front >> 8) << 6) | ((h_sync >> 8) << 4) | + ((v_front >> 4) << 2) | (v_sync >> 4); + + /* Bytes 12-14: displayed image size in millimeters, again as two 12-bit + * fields packed as low 8 bits plus high 4 bits. + */ + desc[12] = width_mm & 0xff; + desc[13] = height_mm & 0xff; + desc[14] = ((width_mm >> 8) << 4) | (height_mm >> 8); + + /* Bytes 15-16: horizontal and vertical border, unused for this display. */ + desc[15] = 0; + desc[16] = 0; + + /* Byte 17: non-interlaced, no stereo, digital separate sync, negative H/V + * sync polarity. + */ + desc[17] = 0x18; +} + +/* EDID data follows "VESA ENHANCED EXTENDED DISPLAY IDENTIFICATION DATA + * STANDARD" (defines EDID Structure Version 1, Revision 4). + */ +static void virtio_gpu_generate_edid(uint8_t *edid, + uint32_t width, + uint32_t height) +{ + /* Check EDID 1.4 Section 3.1: base EDID block layout. */ + if (width == 0) + width = SCREEN_WIDTH; + if (height == 0) + height = SCREEN_HEIGHT; + + uint16_t width_mm = virtio_gpu_edid_pixels_to_mm(width); + uint16_t height_mm = virtio_gpu_edid_pixels_to_mm(height); + + memset(edid, 0, EDID_BLOCK_SIZE); + + /* Check EDID 1.4 Section 3.3: EDID header. */ + edid[0] = 0x00; + edid[1] = 0xff; + edid[2] = 0xff; + edid[3] = 0xff; + edid[4] = 0xff; + edid[5] = 0xff; + edid[6] = 0xff; + edid[7] = 0x00; + + /* Check EDID 1.4 Section 3.4.1: ID Manufacturer Name, stored as a + * 3-character PNPID in 5-bit compressed ASCII. + */ + char manufacture[3] = {'T', 'W', 'N'}; + + /* Vendor ID uses 2 bytes to store 3 characters, where 'A' starts as 1 */ + uint16_t vendor_id = ((((manufacture[0] - '@') & 0b11111) << 10) | + (((manufacture[1] - '@') & 0b11111) << 5) | + (((manufacture[2] - '@') & 0b11111) << 0)); + /* Convert vendor ID to big-endian order */ + edid[8] = vendor_id >> 8; + edid[9] = vendor_id & 0xff; + + /* Check EDID 1.4 Sections 3.4.2 and 3.4.3: product code and serial + * number, all zeros if unused. + */ + memset(&edid[10], 0, sizeof(uint16_t) + sizeof(uint32_t)); + + /* Check EDID 1.4 Section 3.4.4: week of manufacture, 0 if unused. */ + edid[16] = 0; + /* Check EDID 1.4 Section 3.4.4: year of manufacture starts from 1990. */ + edid[17] = 2023 - 1990; + + /* Check EDID 1.4 Section 3.5: version 1, revision 4. */ + edid[18] = 1; /* Version number */ + edid[19] = 4; /* Revision number */ + + /* Check EDID 1.4 Section 3.6.1: video input definition. */ + uint8_t signal_interface = 0b1 << 7; /* digital */ + uint8_t color_bit_depth = 0b010 << 4; /* 8 bits per primary color */ + uint8_t interface_type = 0b101; /* DisplayPort is supported */ + edid[20] = signal_interface | color_bit_depth | interface_type; + + /* Check EDID 1.4 Section 3.6.2: screen size or aspect ratio. */ + edid[21] = virtio_gpu_edid_mm_to_cm(width_mm); + edid[22] = virtio_gpu_edid_mm_to_cm(height_mm); + + /* Check EDID 1.4 Section 3.6.3: gamma value. */ + edid[23] = 120; /* 2.20 */ + + /* Check EDID 1.4 Section 3.6.4: feature support. */ + uint8_t power_management = 0 << 4; /* standby, suspend and active-off + * modes are not supported + */ + uint8_t color_type = 0 << 3; /* RGB 4:4:4 */ + uint8_t other_flags = 0b110; /* [2]: sRGB as default color space + * [1]: Preferred timing mode with native + * format [0]: Non-continuous frequency + */ + edid[24] = power_management | color_type | other_flags; + + virtio_gpu_edid_set_srgb_chromaticity(edid); + + /* Check EDID 1.4 Section 3.8: established timings. These are the default + * timings defined by the VESA. Each bit represents 1 configuration. For + * now, we enable the timing configurations of 1024x768@60Hz only. + */ + edid[35] = 0b00000000; + edid[36] = (width == DMT_BASE_WIDTH && height == DMT_BASE_HEIGHT) + ? 0b00001000 + : 0b00000000; + edid[37] = 0b00000000; + + /* Check EDID 1.4 Section 3.9: standard timings. The 16 bytes from + * edid[38] to edid[53] hold eight 2-byte timing identifiers. Mark every + * standard timing slot unused. + */ + memset(&edid[38], 0x01, 16); + + /* Check EDID 1.4 Sections 3.10.1 and 3.10.2: first detailed timing + * descriptor is the preferred timing mode, here the native scanout mode at + * 60Hz. + */ + virtio_gpu_edid_set_detailed_timing(&edid[54], width, height, width_mm, + height_mm); + + /* Check EDID 1.4 Sections 3.10 and 3.10.3.11: mark remaining 18-byte + * descriptor slots unused with Dummy Descriptor tag 10h. + */ + for (size_t desc = 72; desc < 126; desc += 18) + edid[desc + 3] = 0x10; + + /* Check EDID 1.4 Section 3.11: extension block count. */ + edid[126] = 0; /* No other extension blocks are defined */ + + /* Check EDID 1.4 Section 3.11: checksum of the base EDID block. */ + edid[EDID_BLOCK_SIZE - 1U] = + virtio_gpu_generate_edid_checksum(edid, EDID_BLOCK_SIZE - 1U); +} + +void virtio_gpu_get_edid_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + struct virtio_gpu_cmd_get_edid *request = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_cmd_get_edid)); + if (!request) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + int resp_idx = virtio_gpu_get_response_desc( + vq_desc, VIRTIO_GPU_MAX_DESC, sizeof(struct virtio_gpu_resp_edid)); + if (resp_idx < 0) { + *plen = 0; + return; + } + + if (request->scanout >= PRIV(vgpu)->num_scanouts || + !PRIV(vgpu)->scanouts[request->scanout].enabled) { + fprintf(stderr, VIRTIO_GPU_LOG_PREFIX "%s(): invalid scanout id %u\n", + __func__, request->scanout); + *plen = virtio_gpu_write_ctrl_response( + vgpu, &request->hdr, &vq_desc[resp_idx], + VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID); + return; + } + + const struct virtio_gpu_scanout_info *scanout = + &PRIV(vgpu)->scanouts[request->scanout]; + + struct virtio_gpu_resp_edid *response = virtio_gpu_mem_guest_to_host( + vgpu, vq_desc[resp_idx].addr, sizeof(struct virtio_gpu_resp_edid)); + if (!response) { + *plen = 0; + return; + } + + memset(response, 0, sizeof(*response)); + response->hdr.type = VIRTIO_GPU_RESP_OK_EDID; + response->size = EDID_BLOCK_SIZE; /* One base EDID block. */ + virtio_gpu_generate_edid((uint8_t *) response->edid, scanout->width, + scanout->height); + + /* return write length */ + *plen = sizeof(*response); + if (request->hdr.flags & VIRTIO_GPU_FLAG_FENCE) { + response->hdr.flags = VIRTIO_GPU_FLAG_FENCE; + response->hdr.fence_id = request->hdr.fence_id; + } +} + +void virtio_gpu_cmd_undefined_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen) +{ + struct virtio_gpu_ctrl_hdr *header = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_ctrl_hdr)); + if (!header) { + virtio_gpu_set_fail(vgpu); + *plen = 0; + return; + } + + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): unsupported VirtIO-GPU command type " + "%u\n", + __func__, header->type); + + virtio_gpu_set_fail(vgpu); + *plen = 0; +} + +static int virtio_gpu_desc_handler(virtio_gpu_state_t *vgpu, + const virtio_gpu_queue_t *queue, + int queue_index, + uint32_t desc_idx, + uint32_t *plen) +{ + struct virtq_desc vq_desc[VIRTIO_GPU_MAX_DESC] = {0}; + + /* Collect descriptors */ + for (int i = 0; i < VIRTIO_GPU_MAX_DESC; i++) { + if (desc_idx >= queue->QueueNum) { + virtio_gpu_set_fail(vgpu); + return -1; + } + + /* The size of 'struct virtq_desc' is 4 words. */ + uint32_t desc_offset = queue->QueueDesc + desc_idx * 4; + uint32_t *desc = &vgpu->ram[desc_offset]; + + /* The guest is riscv32, so the upper 32 bits of every descriptor + * address must be zero. Reject any descriptor whose 'addr_high' is set + * before later code truncates it via 'virtio_gpu_mem_guest_to_host()', + * which would otherwise silently mask a guest bug. + */ + if (desc[1] != 0) { + virtio_gpu_set_fail(vgpu); + return -1; + } + + /* Retrieve the fields of the current descriptor. */ + vq_desc[i].addr = desc[0]; + vq_desc[i].len = desc[2]; + vq_desc[i].flags = desc[3]; + desc_idx = desc[3] >> 16; /* 'vq_desc[desc_cnt].next' */ + + /* Leave the loop if 'VIRTIO_DESC_F_NEXT' is not set. */ + if (!(vq_desc[i].flags & VIRTIO_DESC_F_NEXT)) + break; + } + + struct virtio_gpu_ctrl_hdr *header = virtio_gpu_get_request( + vgpu, vq_desc, sizeof(struct virtio_gpu_ctrl_hdr)); + if (!header) { + virtio_gpu_set_fail(vgpu); + return -1; + } + + bool is_cursor_cmd = header->type == VIRTIO_GPU_CMD_UPDATE_CURSOR || + header->type == VIRTIO_GPU_CMD_MOVE_CURSOR; + if ((queue_index == VIRTIO_GPU_CONTROLQ && is_cursor_cmd) || + (queue_index == VIRTIO_GPU_CURSORQ && !is_cursor_cmd)) { + virtio_gpu_set_fail(vgpu); + return -1; + } + + /* Keep the fixed 3-descriptor contract explicit. Longer chains need + * multi-SG parsing, so reject them before command dispatch. + * + * TODO: Support generic descriptor-chain parsing. + */ + if (vq_desc[VIRTIO_GPU_MAX_DESC - 1].flags & VIRTIO_DESC_F_NEXT) { + int resp_idx = virtio_gpu_get_response_desc( + vq_desc, VIRTIO_GPU_MAX_DESC, sizeof(struct virtio_gpu_ctrl_hdr)); + if (resp_idx < 0) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): descriptor chain exceeds supported length and has " + "no usable response descriptor\n", + __func__); + virtio_gpu_set_fail(vgpu); + return -1; + } + + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): descriptor chain exceeds supported length\n", + __func__); + *plen = virtio_gpu_write_ctrl_response(vgpu, header, &vq_desc[resp_idx], + VIRTIO_GPU_RESP_ERR_UNSPEC); + if (!*plen) { + virtio_gpu_set_fail(vgpu); + return -1; + } + + return 0; + } + + /* Process the command */ + switch (header->type) { + /* 2D commands */ + VIRTIO_GPU_CMD_CASE(GET_DISPLAY_INFO, get_display_info) + VIRTIO_GPU_CMD_CASE(RESOURCE_CREATE_2D, resource_create_2d) + VIRTIO_GPU_CMD_CASE(RESOURCE_UNREF, resource_unref) + VIRTIO_GPU_CMD_CASE(SET_SCANOUT, set_scanout) + VIRTIO_GPU_CMD_CASE(RESOURCE_FLUSH, resource_flush) + VIRTIO_GPU_CMD_CASE(TRANSFER_TO_HOST_2D, transfer_to_host_2d) + VIRTIO_GPU_CMD_CASE(RESOURCE_ATTACH_BACKING, resource_attach_backing) + VIRTIO_GPU_CMD_CASE(RESOURCE_DETACH_BACKING, resource_detach_backing) + VIRTIO_GPU_CMD_CASE(GET_CAPSET_INFO, get_capset_info) + VIRTIO_GPU_CMD_CASE(GET_CAPSET, get_capset) + VIRTIO_GPU_CMD_CASE(GET_EDID, get_edid) + VIRTIO_GPU_CMD_CASE(RESOURCE_ASSIGN_UUID, resource_assign_uuid) + VIRTIO_GPU_CMD_CASE(RESOURCE_CREATE_BLOB, resource_create_blob) + VIRTIO_GPU_CMD_CASE(SET_SCANOUT_BLOB, set_scanout_blob) + /* 3D commands */ + VIRTIO_GPU_CMD_CASE(CTX_CREATE, ctx_create) + VIRTIO_GPU_CMD_CASE(CTX_DESTROY, ctx_destroy) + VIRTIO_GPU_CMD_CASE(CTX_ATTACH_RESOURCE, ctx_attach_resource) + VIRTIO_GPU_CMD_CASE(CTX_DETACH_RESOURCE, ctx_detach_resource) + VIRTIO_GPU_CMD_CASE(RESOURCE_CREATE_3D, resource_create_3d) + VIRTIO_GPU_CMD_CASE(TRANSFER_TO_HOST_3D, transfer_to_host_3d) + VIRTIO_GPU_CMD_CASE(TRANSFER_FROM_HOST_3D, transfer_from_host_3d) + VIRTIO_GPU_CMD_CASE(SUBMIT_3D, submit_3d) + VIRTIO_GPU_CMD_CASE(RESOURCE_MAP_BLOB, resource_map_blob) + VIRTIO_GPU_CMD_CASE(RESOURCE_UNMAP_BLOB, resource_unmap_blob) + VIRTIO_GPU_CMD_CASE(UPDATE_CURSOR, update_cursor) + VIRTIO_GPU_CMD_CASE(MOVE_CURSOR, move_cursor) + default: + virtio_gpu_cmd_undefined_handler(vgpu, vq_desc, plen); + return -1; + } + + return 0; +} + +static void virtio_gpu_queue_notify_handler(virtio_gpu_state_t *vgpu, int index) +{ + uint32_t *ram = vgpu->ram; + virtio_gpu_queue_t *queue = &vgpu->queues[index]; + if (vgpu->Status & VIRTIO_STATUS__DEVICE_NEEDS_RESET) + return; + + if (!((vgpu->Status & VIRTIO_STATUS__DRIVER_OK) && queue->ready)) + return virtio_gpu_set_fail(vgpu); + + /* Check for new buffers */ + uint16_t new_avail = ram[queue->QueueAvail] >> 16; + uint16_t avail_delta = (uint16_t) (new_avail - queue->last_avail); + if (avail_delta > (uint16_t) queue->QueueNum) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): queue %d avail index advanced by %u entries, exceeds " + "queue size %u\n", + __func__, index, (unsigned) avail_delta, + (unsigned) queue->QueueNum); + virtio_gpu_set_fail(vgpu); + return; + } + + if (queue->last_avail == new_avail) + return; + + /* Process them */ + uint16_t new_used = + ram[queue->QueueUsed] >> 16; /* 'virtq_used.idx' (le16) */ + while (queue->last_avail != new_avail) { + /* Obtain the index in the ring buffer */ + uint16_t queue_idx = queue->last_avail % queue->QueueNum; + + /* Since each buffer index occupies 2 bytes but the memory is aligned + * with 4 bytes, and the first element of the available queue is stored + * at 'ram[queue->QueueAvail + 1]', to acquire the buffer index, it + * requires the following array index calculation and bit shifting. + * Check also 'struct virtq_avail' in the spec. + */ + uint16_t buffer_idx = ram[queue->QueueAvail + 1 + queue_idx / 2] >> + (16 * (queue_idx % 2)); + + /* Consume request from the available queue and process the data in the + * descriptor list. + */ + uint32_t len = 0; + int result = + virtio_gpu_desc_handler(vgpu, queue, index, buffer_idx, &len); + if (result != 0) + return; + + /* Write used element information ('struct virtq_used_elem') to the used + * queue + */ + uint32_t vq_used_addr = + queue->QueueUsed + 1 + (new_used % queue->QueueNum) * 2; + ram[vq_used_addr] = buffer_idx; /* 'virtq_used_elem.id' (le32) */ + ram[vq_used_addr + 1] = len; /* 'virtq_used_elem.len' (le32) */ + queue->last_avail++; + new_used++; + } + + /* Update 'virtq_used.idx' (keep 'virtq_used.flags' in low 16 bits). */ + ram[queue->QueueUsed] &= MASK(16); /* clear high 16 bits (idx) */ + ram[queue->QueueUsed] |= ((uint32_t) new_used) << 16; /* set idx */ + + /* Send interrupt, unless 'VIRTQ_AVAIL_F_NO_INTERRUPT' is set. */ + if (!(ram[queue->QueueAvail] & 1)) + vgpu->InterruptStatus |= VIRTIO_INT__USED_RING; +} + +static inline uint32_t virtio_gpu_preprocess(virtio_gpu_state_t *vgpu, + uint32_t addr) +{ + if ((addr >= RAM_SIZE) || (addr & 0b11)) + return virtio_gpu_set_fail(vgpu), 0; + + return addr >> 2; +} + +static void virtio_gpu_update_status(virtio_gpu_state_t *vgpu, uint32_t status) +{ + vgpu->Status |= status; + if (status) + return; + + if (g_virtio_gpu_backend.reset) + g_virtio_gpu_backend.reset(vgpu); + + /* Reset VirtIO device state (feature negotiation, queue descriptors, + * avail/used rings, status and interrupt registers). 'ram' and 'priv' are + * infrastructure pointers provided by the host, not device state, so + * they are saved and restored across the 'memset()'. + * + * 'vgpu->priv' ('virtio_gpu_data_t') is intentionally NOT reset here. + * It holds host-configured scanout info (display dimensions / enabled + * flags) set up before the guest driver probes the device. The guest + * re-queries this via 'CMD_GET_DISPLAY_INFO' after each reset, so it must + * survive. Renderer-specific bindings and resources live behind the + * backend hook and are reset before the generic device state is cleared. + */ + uint32_t *ram = vgpu->ram; + void *priv = vgpu->priv; + memset(vgpu, 0, sizeof(*vgpu)); + vgpu->ram = ram; + vgpu->priv = priv; +} + +static bool virtio_gpu_reg_read(virtio_gpu_state_t *vgpu, + uint32_t addr, + uint32_t *value) +{ +#define _(reg) VIRTIO_##reg + switch (addr) { + case _(MagicValue): + *value = 0x74726976; + return true; + case _(Version): + *value = 2; + return true; + case _(DeviceID): + *value = 16; + return true; + case _(VendorID): + *value = VIRTIO_VENDOR_ID; + return true; + case _(DeviceFeatures): + /* TODO: Advertise virgl/3D and blob-resource feature bits after the + * backend supports their command and display paths. + */ + *value = vgpu->DeviceFeaturesSel == 0 + ? VIRTIO_GPU_F_EDID + : (vgpu->DeviceFeaturesSel == 1 ? VIRTIO_F_VERSION_1 : 0); + return true; + case _(QueueNumMax): + *value = VIRTIO_GPU_QUEUE_NUM_MAX; + return true; + case _(QueueReady): + *value = VIRTIO_GPU_QUEUE.ready ? 1 : 0; + return true; + case _(InterruptStatus): + *value = vgpu->InterruptStatus; + return true; + case _(Status): + *value = vgpu->Status; + return true; + case _(SHMLenLow): + case _(SHMLenHigh): + /* TODO: Implement shared-memory regions before advertising + * VIRTIO_GPU_F_RESOURCE_BLOB. + */ + *value = -1; + return true; + case _(SHMBaseLow): + case _(SHMBaseHigh): + *value = 0; + return true; + case _(ConfigGeneration): + *value = 0; + return true; + default: + /* Unimplemented common registers, including write-only 'SHMSel', + * intentionally fault instead of returning placeholder values. + * TODO: Implement 'QueueReset' when advertising VIRTIO_F_RING_RESET. + */ + if (!RANGE_CHECK(addr, _(Config), sizeof(struct virtio_gpu_config))) + return false; + + /* Read configuration from the corresponding register */ + uint32_t offset = (addr - _(Config)) << 2; + switch (offset) { + case offsetof(struct virtio_gpu_config, events_read): { + *value = 0; /* No event is implemented currently */ + return true; + } + case offsetof(struct virtio_gpu_config, num_scanouts): { + *value = PRIV(vgpu)->num_scanouts; + return true; + } + case offsetof(struct virtio_gpu_config, num_capsets): { + /* TODO: Return virgl capsets after implementing the corresponding + * 3D command backend. Zero capsets keeps guests on the 2D path. + */ + *value = 0; + return true; + } + default: + return false; + } + } +#undef _ +} + +void virtio_gpu_read(hart_t *vm, + virtio_gpu_state_t *vgpu, + uint32_t addr, + uint8_t width, + uint32_t *value) +{ + /* The VGPU device exposes its MMIO registers as aligned 32-bit words + * only. It rejects byte and halfword accesses instead of emulating + * partial register reads. + */ + switch (width) { + case RV_MEM_LW: + if (!virtio_gpu_reg_read(vgpu, addr >> 2, value)) + vm_set_exception(vm, RV_EXC_LOAD_FAULT, vm->exc_val); + break; + case RV_MEM_LBU: + case RV_MEM_LB: + case RV_MEM_LHU: + case RV_MEM_LH: + vm_set_exception(vm, RV_EXC_LOAD_MISALIGN, vm->exc_val); + return; + default: + vm_set_exception(vm, RV_EXC_ILLEGAL_INSN, 0); + return; + } +} + +/* After 'QueueReady' is set, 'QueueNum' and the ring address registers have + * already been validated and may be consumed by the device. Reject later + * writes to that virtqueue configuration instead of letting the guest change + * it under the running queue. + */ +static bool virtio_gpu_vq_config_after_ready(virtio_gpu_state_t *vgpu, + uint32_t addr) +{ + if (!VIRTIO_GPU_QUEUE.ready) + return false; + +#define _(reg) VIRTIO_##reg + switch (addr) { + case _(QueueNum): + case _(QueueDescLow): + case _(QueueDescHigh): + case _(QueueDriverLow): + case _(QueueDriverHigh): + case _(QueueDeviceLow): + case _(QueueDeviceHigh): + return true; + default: + return false; + } +#undef _ +} + +static bool virtio_gpu_reg_write(virtio_gpu_state_t *vgpu, + uint32_t addr, + uint32_t value) +{ +#define _(reg) VIRTIO_##reg + if (virtio_gpu_vq_config_after_ready(vgpu, addr)) { + virtio_gpu_set_fail(vgpu); + return true; + } + + switch (addr) { + case _(DeviceFeaturesSel): + vgpu->DeviceFeaturesSel = value; + return true; + case _(DriverFeatures): + if (vgpu->DriverFeaturesSel == 0) + vgpu->DriverFeatures = value; + return true; + case _(DriverFeaturesSel): + vgpu->DriverFeaturesSel = value; + return true; + case _(QueueSel): + if (value < ARRAY_SIZE(vgpu->queues)) + vgpu->QueueSel = value; + else + virtio_gpu_set_fail(vgpu); + return true; + case _(QueueNum): + if (value > 0 && value <= VIRTIO_GPU_QUEUE_NUM_MAX) + VIRTIO_GPU_QUEUE.QueueNum = value; + else + virtio_gpu_set_fail(vgpu); + return true; + case _(QueueReady): + VIRTIO_GPU_QUEUE.ready = value & 1; + if (value & 1) { + /* Validate that the full rings fit in guest RAM before allowing + * the queue to go live. 'virtio_gpu_preprocess()' only checked the + * base addresses. Here we verify the end of each ring region. + * All addresses are word indices (byte address >> 2). + * + * These sizes assume 'VIRTIO_F_EVENT_IDX' is not negotiated. We + * never advertise it (see 'DeviceFeatures'), so neither + * 'avail.used_event' nor 'used.avail_event' exist. If that flag is + * ever added, both end calculations need an extra word for the + * trailing '*_event' field. + */ + uint32_t qnum = VIRTIO_GPU_QUEUE.QueueNum; + uint32_t ram_words = RAM_SIZE / sizeof(uint32_t); + + /* Desc table: 'QueueNum' entries * 4 words each. */ + uint32_t desc_end = VIRTIO_GPU_QUEUE.QueueDesc + qnum * 4; + /* Avail ring: one word for 'flags' + 'idx', then + * ceil('QueueNum' / 2) words for 16-bit descriptor indexes. + */ + uint32_t avail_end = + VIRTIO_GPU_QUEUE.QueueAvail + 1 + (qnum + 1) / 2; + /* Used ring: one word for 'flags' + 'idx', then 'QueueNum' + * entries of 'struct virtq_used_elem' (2 words each). + */ + uint32_t used_end = VIRTIO_GPU_QUEUE.QueueUsed + 1 + qnum * 2; + + if (!qnum || desc_end > ram_words || avail_end > ram_words || + used_end > ram_words) { + VIRTIO_GPU_QUEUE.ready = false; + virtio_gpu_set_fail(vgpu); + return true; + } + VIRTIO_GPU_QUEUE.last_avail = + vgpu->ram[VIRTIO_GPU_QUEUE.QueueAvail] >> 16; + } + return true; + case _(QueueDescLow): + VIRTIO_GPU_QUEUE.QueueDesc = virtio_gpu_preprocess(vgpu, value); + return true; + case _(QueueDescHigh): + if (value) + virtio_gpu_set_fail(vgpu); + return true; + case _(QueueDriverLow): + VIRTIO_GPU_QUEUE.QueueAvail = virtio_gpu_preprocess(vgpu, value); + return true; + case _(QueueDriverHigh): + if (value) + virtio_gpu_set_fail(vgpu); + return true; + case _(QueueDeviceLow): + VIRTIO_GPU_QUEUE.QueueUsed = virtio_gpu_preprocess(vgpu, value); + return true; + case _(QueueDeviceHigh): + if (value) + virtio_gpu_set_fail(vgpu); + return true; + case _(QueueNotify): + if (value < ARRAY_SIZE(vgpu->queues)) + virtio_gpu_queue_notify_handler(vgpu, value); + else + virtio_gpu_set_fail(vgpu); + return true; + case _(InterruptACK): + vgpu->InterruptStatus &= ~value; + return true; + case _(Status): + virtio_gpu_update_status(vgpu, value); + return true; + case _(SHMSel): + /* No shared-memory regions are advertised, so the selector is accepted + * and ignored. + */ + return true; + default: + /* Unsupported writes fault instead of updating unknown state. + * TODO: Implement 'QueueReset' when advertising VIRTIO_F_RING_RESET. + */ + if (!RANGE_CHECK(addr, _(Config), sizeof(struct virtio_gpu_config))) + return false; + + /* Write configuration to the corresponding register */ + uint32_t offset = (addr - _(Config)) << 2; + switch (offset) { + case offsetof(struct virtio_gpu_config, events_clear): { + /* Ignored, no event is implemented currently */ + return true; + } + default: + return false; + } + } +#undef _ +} + +void virtio_gpu_write(hart_t *vm, + virtio_gpu_state_t *vgpu, + uint32_t addr, + uint8_t width, + uint32_t value) +{ + /* The VGPU device applies the same rule to writes: only aligned 32-bit + * stores are accepted for the MMIO register block, and narrower accesses + * fault. + */ + switch (width) { + case RV_MEM_SW: + if (!virtio_gpu_reg_write(vgpu, addr >> 2, value)) + vm_set_exception(vm, RV_EXC_STORE_FAULT, vm->exc_val); + break; + case RV_MEM_SB: + case RV_MEM_SH: + vm_set_exception(vm, RV_EXC_STORE_MISALIGN, vm->exc_val); + return; + default: + vm_set_exception(vm, RV_EXC_ILLEGAL_INSN, 0); + return; + } +} + +void virtio_gpu_init(virtio_gpu_state_t *vgpu) +{ + static bool initialized = false; + + if (initialized) { + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX + "%s(): only one virtio-gpu instance is supported\n", + __func__); + exit(EXIT_FAILURE); + } + initialized = true; + + vgpu->priv = &virtio_gpu_data; +} + +uint32_t virtio_gpu_register_scanout(virtio_gpu_state_t *vgpu, + uint32_t width, + uint32_t height) +{ + int scanout_num = PRIV(vgpu)->num_scanouts; + if (scanout_num >= VIRTIO_GPU_MAX_SCANOUTS) { + /* Registration is init-only today. Return an error instead if scanout + * creation becomes dynamic or guest-triggered. + */ + fprintf(stderr, + VIRTIO_GPU_LOG_PREFIX "%s(): exceeded scanout maximum number\n", + __func__); + exit(EXIT_FAILURE); + } + + PRIV(vgpu)->scanouts[scanout_num].width = width; + PRIV(vgpu)->scanouts[scanout_num].height = height; + PRIV(vgpu)->scanouts[scanout_num].enabled = 1; + PRIV(vgpu)->scanouts[scanout_num].primary_resource_id = 0; + PRIV(vgpu)->scanouts[scanout_num].cursor_resource_id = 0; + PRIV(vgpu)->scanouts[scanout_num].src_x = 0; + PRIV(vgpu)->scanouts[scanout_num].src_y = 0; + PRIV(vgpu)->scanouts[scanout_num].src_w = 0; + PRIV(vgpu)->scanouts[scanout_num].src_h = 0; + + /* 'scanout_num' will match the guest-visible 'scanout_id'. See + * 'virtio_gpu_get_display_info_handler()' above for how that index is + * exposed to the guest and later reused in 'SET_SCANOUT'/'GET_EDID'. + */ + PRIV(vgpu)->num_scanouts++; + + return (uint32_t) scanout_num; +} diff --git a/virtio-gpu.h b/virtio-gpu.h new file mode 100644 index 00000000..12cde627 --- /dev/null +++ b/virtio-gpu.h @@ -0,0 +1,388 @@ +#pragma once + +#if !SEMU_HAS(VIRTIOGPU) +#error Only valid when Virtio-GPU is enabled. +#endif + +#include +#include + +#include "device.h" +#include "virtio.h" + +#define VIRTIO_GPU_MAX_SCANOUTS 16 +#define VIRTIO_GPU_LOG_PREFIX "[SEMU VGPU] " +#define VIRTIO_GPU_CMD_UNDEF virtio_gpu_cmd_undefined_handler +#define VIRTIO_GPU_FLAG_FENCE (1 << 0) + +/* Maximum descriptor chain length accepted by 'virtio_gpu_desc_handler()'. + * + * semu follows the common Linux virtio-gpu control queue shape in + * 'virtio_gpu_queue_fenced_ctrl_buffer()': 'sgs[3]' holds 'vcmd' (request), + * optional 'vout' (command data, e.g. 'RESOURCE_ATTACH_BACKING' entries), and + * optional 'vresp' (response). The supported commands therefore fit in "request + * + one data segment + response". + * + * This is not a general virtio-gpu descriptor-chain limit. Linux allocates the + * backing-entry array in 'virtio_gpu_object_shmem_init()' with + * 'kvmalloc_objs()'. If that buffer falls back to 'vmalloc', + * 'virtio_gpu_queue_fenced_ctrl_buffer()' detects it with 'is_vmalloc_addr()' + * and 'vmalloc_to_sgt()' expands 'vout' into multiple scatter-gather entries. + * + * Supporting that path would require accepting a longer descriptor chain and + * auditing every handler that indexes 'vq_desc[]'. Longer chains are rejected. + * The current response-descriptor lookup is also part of this fixed-shape + * parser: it scans the zero-initialized 3-entry array, not an arbitrary + * guest-provided scatter-gather chain. + * + * TODO: Support generic descriptor-chain parsing. + */ +#define VIRTIO_GPU_MAX_DESC 3 + +/* Core per-scanout metadata keyed by the guest-visible 'scanout_id'. This + * combines guest-visible display info ('width'/'height'/'enabled') with the + * current primary/cursor resource bindings. + */ +struct virtio_gpu_scanout_info { + uint32_t width, height; + uint32_t enabled; + uint32_t primary_resource_id; + uint32_t cursor_resource_id; + uint32_t src_x, src_y, src_w, src_h; +}; + +typedef struct { + struct virtio_gpu_scanout_info scanouts[VIRTIO_GPU_MAX_SCANOUTS]; + uint32_t num_scanouts; +} virtio_gpu_data_t; + +PACKED(struct virtio_gpu_config { + uint32_t events_read; + uint32_t events_clear; + uint32_t num_scanouts; + uint32_t num_capsets; +}); + +PACKED(struct virtio_gpu_ctrl_hdr { + uint32_t type; + uint32_t flags; + uint64_t fence_id; + uint32_t ctx_id; + uint8_t ring_idx; + uint8_t padding[3]; +}); + +PACKED(struct virtio_gpu_rect { + uint32_t x; + uint32_t y; + uint32_t width; + uint32_t height; +}); + +PACKED(struct virtio_gpu_resp_disp_info { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_display_one { + struct virtio_gpu_rect r; + uint32_t enabled; + uint32_t flags; + } pmodes[VIRTIO_GPU_MAX_SCANOUTS]; +}); + +PACKED(struct virtio_gpu_res_create_2d { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t format; + uint32_t width; + uint32_t height; +}); + +PACKED(struct virtio_gpu_res_unref { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_set_scanout { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_rect r; + uint32_t scanout_id; + uint32_t resource_id; +}); + +PACKED(struct virtio_gpu_res_flush { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_rect r; + uint32_t resource_id; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_trans_to_host_2d { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_rect r; + uint64_t offset; + uint32_t resource_id; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_res_attach_backing { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t nr_entries; +}); + +PACKED(struct virtio_gpu_res_detach_backing { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_mem_entry { + uint64_t addr; + uint32_t length; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_cmd_get_edid { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t scanout; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_resp_edid { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t size; + uint32_t padding; + char edid[1024]; +}); + +PACKED(struct virtio_gpu_get_capset_info { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t capset_index; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_resp_capset_info { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t capset_id; + uint32_t capset_max_version; + uint32_t capset_max_size; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_get_capset { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t capset_id; + uint32_t capset_version; +}); + +PACKED(struct virtio_gpu_resp_capset { + struct virtio_gpu_ctrl_hdr hdr; + uint8_t capset_data[]; +}); + +PACKED(struct virtio_gpu_ctx_create { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t nlen; + uint32_t context_init; + char debug_name[64]; +}); + +PACKED(struct virtio_gpu_cursor_pos { + uint32_t scanout_id; + uint32_t x; + uint32_t y; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_update_cursor { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_cursor_pos pos; + uint32_t resource_id; + uint32_t hot_x; + uint32_t hot_y; + uint32_t padding; +}); + +/* clang-format off */ +PACKED(struct virtio_gpu_ctx_destroy { + struct virtio_gpu_ctrl_hdr hdr; +}); +/* clang-format on */ + +PACKED(struct virtio_gpu_resource_create_3d { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t target; + uint32_t format; + uint32_t bind; + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t array_size; + uint32_t last_level; + uint32_t nr_samples; + uint32_t flags; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_ctx_resource { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t resource_id; + uint32_t padding; +}); + +PACKED(struct virtio_gpu_box { + uint32_t x; + uint32_t y; + uint32_t z; + uint32_t w; + uint32_t h; + uint32_t d; +}); + +PACKED(struct virtio_gpu_transfer_host_3d { + struct virtio_gpu_ctrl_hdr hdr; + struct virtio_gpu_box box; + uint64_t offset; + uint32_t resource_id; + uint32_t level; + uint32_t stride; + uint32_t layer_stride; +}); + +PACKED(struct virtio_gpu_cmd_submit { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t size; + uint32_t num_in_fences; +}); + +PACKED(struct virtio_gpu_resp_map_info { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t map_info; + uint32_t padding; +}); + +enum virtio_gpu_ctrl_type { + /* 2D commands */ + VIRTIO_GPU_CMD_GET_DISPLAY_INFO = 0x0100, + VIRTIO_GPU_CMD_RESOURCE_CREATE_2D, + VIRTIO_GPU_CMD_RESOURCE_UNREF, + VIRTIO_GPU_CMD_SET_SCANOUT, + VIRTIO_GPU_CMD_RESOURCE_FLUSH, + VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D, + VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING, + VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING, + VIRTIO_GPU_CMD_GET_CAPSET_INFO, + VIRTIO_GPU_CMD_GET_CAPSET, + VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, + VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB, + VIRTIO_GPU_CMD_SET_SCANOUT_BLOB, + + /* 3D commands */ + VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, + VIRTIO_GPU_CMD_CTX_DESTROY, + VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE, + VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE, + VIRTIO_GPU_CMD_RESOURCE_CREATE_3D, + VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D, + VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D, + VIRTIO_GPU_CMD_SUBMIT_3D, + VIRTIO_GPU_CMD_RESOURCE_MAP_BLOB, + VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB, + + /* Cursor commands */ + VIRTIO_GPU_CMD_UPDATE_CURSOR = 0x0300, + VIRTIO_GPU_CMD_MOVE_CURSOR, + + /* Success responses */ + VIRTIO_GPU_RESP_OK_NODATA = 0x1100, + VIRTIO_GPU_RESP_OK_DISPLAY_INFO, + VIRTIO_GPU_RESP_OK_CAPSET_INFO, + VIRTIO_GPU_RESP_OK_CAPSET, + VIRTIO_GPU_RESP_OK_EDID, + + /* Error responses */ + VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, + VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY, + VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID, + VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID, + VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID, + VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER, +}; + +enum virtio_gpu_formats { + VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM = 1, + VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM = 2, + VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM = 3, + VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM = 4, + VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM = 67, + VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM = 68, + VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM = 121, + VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM = 134 +}; + +typedef void (*virtio_gpu_cmd_func)(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen); +typedef void (*virtio_gpu_backend_lifecycle_func)(virtio_gpu_state_t *vgpu); + +struct virtio_gpu_cmd_backend { + virtio_gpu_backend_lifecycle_func reset; + /* 2D commands */ + virtio_gpu_cmd_func get_display_info; + virtio_gpu_cmd_func resource_create_2d; + virtio_gpu_cmd_func resource_unref; + virtio_gpu_cmd_func set_scanout; + virtio_gpu_cmd_func resource_flush; + virtio_gpu_cmd_func transfer_to_host_2d; + virtio_gpu_cmd_func resource_attach_backing; + virtio_gpu_cmd_func resource_detach_backing; + virtio_gpu_cmd_func get_capset_info; + virtio_gpu_cmd_func get_capset; + virtio_gpu_cmd_func get_edid; + virtio_gpu_cmd_func resource_assign_uuid; + virtio_gpu_cmd_func resource_create_blob; + virtio_gpu_cmd_func set_scanout_blob; + /* 3D commands */ + virtio_gpu_cmd_func ctx_create; + virtio_gpu_cmd_func ctx_destroy; + virtio_gpu_cmd_func ctx_attach_resource; + virtio_gpu_cmd_func ctx_detach_resource; + virtio_gpu_cmd_func resource_create_3d; + virtio_gpu_cmd_func transfer_to_host_3d; + virtio_gpu_cmd_func transfer_from_host_3d; + virtio_gpu_cmd_func submit_3d; + virtio_gpu_cmd_func resource_map_blob; + virtio_gpu_cmd_func resource_unmap_blob; + /* Cursor commands */ + virtio_gpu_cmd_func update_cursor; + virtio_gpu_cmd_func move_cursor; +}; + +void *virtio_gpu_mem_guest_to_host(virtio_gpu_state_t *vgpu, + uint32_t addr, + uint32_t size); +void *virtio_gpu_get_request(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + size_t request_size); +int virtio_gpu_get_response_desc(struct virtq_desc *vq_desc, + int max_desc, + size_t response_size); +uint32_t virtio_gpu_write_ctrl_response( + virtio_gpu_state_t *vgpu, + const struct virtio_gpu_ctrl_hdr *request, + const struct virtq_desc *response_desc, + uint32_t type); + +void virtio_gpu_set_fail(virtio_gpu_state_t *vgpu); + +void virtio_gpu_get_display_info_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen); +void virtio_gpu_get_edid_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen); +void virtio_gpu_cmd_undefined_handler(virtio_gpu_state_t *vgpu, + struct virtq_desc *vq_desc, + uint32_t *plen); diff --git a/window-sw.c b/window-sw.c index 3d464034..7e9fdfdc 100644 --- a/window-sw.c +++ b/window-sw.c @@ -1,23 +1,68 @@ #include +#include +#include #include +#include #include -#include +#include #include -#include "device.h" -#include "feature.h" +#if SEMU_HAS(VIRTIOGPU) +#include "vgpu-display.h" +#include "virtio-gpu.h" +#endif +#if SEMU_HAS(VIRTIOINPUT) #include "virtio-input-event.h" +#endif #include "window.h" -static SDL_Window *sdl_window; +#define WINDOW_LOG_PREFIX "[SEMU WINDOW] " + static int wake_write_fd = -1; +static bool sdl_initialized = false; static bool headless_mode = false; -static bool mouse_grabbed = false; static bool should_exit = false; -/* The backend only needs the pipe's write end. The emulator owns the read end - * and drains it after poll() returns. +#if SEMU_HAS(VIRTIOINPUT) +static bool mouse_grabbed = false; +static SDL_Window *sdl_input_window; +#else +#define SDL_EVENT_WAIT_TIMEOUT_MS 1 /* ms */ +#define SDL_EVENT_BURST_LIMIT 64U +#endif + +#if SEMU_HAS(VIRTIOGPU) +/* SDL-owned retained state for a single plane. Textures live only on the SDL + * thread and are updated from immutable CPU-frame display resources. */ +struct sdl_plane_info { + uint32_t width; + uint32_t height; + uint32_t sdl_format; + bool alpha_blend; + SDL_Texture *texture; +}; + +/* SDL-owned retained state for one scanout. 'window_init_sw()' creates the + * window/renderer, then 'window_drain_display_queue()' updates the primary and + * cursor planes from queued display payloads before rendering them. + */ +struct sdl_scanout_info { + struct sdl_plane_info primary_plane; + struct sdl_plane_info cursor_plane; + SDL_Rect cursor_rect; + uint32_t cursor_hot_x; + uint32_t cursor_hot_y; + uint32_t window_width; + uint32_t window_height; + + SDL_Window *window; + SDL_Renderer *renderer; +}; + +static struct sdl_scanout_info sdl_scanouts[VIRTIO_GPU_MAX_SCANOUTS]; +#endif + static void window_set_wake_fd_sw(int fd) { wake_write_fd = fd; @@ -35,13 +80,13 @@ static void window_wake_backend_sw(void) } } -static inline void window_shutdown_sw(void) +static void window_shutdown_sw(void) { /* Both user-driven close and emulator-driven shutdown funnel through the * same flag so the main thread and emulator thread observe one exit state. */ __atomic_store_n(&should_exit, true, __ATOMIC_RELAXED); - /* Unblock any poll(-1) in the SMP emulator loop immediately. */ + /* Unblock any 'poll(-1)' in the SMP emulator loop immediately. */ window_wake_backend_sw(); } @@ -50,13 +95,14 @@ static bool window_is_closed_sw(void) return __atomic_load_n(&should_exit, __ATOMIC_RELAXED); } +#if SEMU_HAS(VIRTIOINPUT) /* Main-thread-only helper for relative-pointer devices. SDL's grab and * relative mouse APIs are part of the windowing backend, so callers use this * to switch between normal host-pointer mode and guest-directed mouse mode. */ static void window_set_mouse_grab_sw(bool grabbed) { - if (headless_mode || !sdl_window) { + if (headless_mode || !sdl_input_window) { mouse_grabbed = false; return; } @@ -72,10 +118,10 @@ static void window_set_mouse_grab_sw(bool grabbed) SDL_GetError()); return; } - SDL_SetWindowGrab(sdl_window, SDL_TRUE); + SDL_SetWindowGrab(sdl_input_window, SDL_TRUE); SDL_ShowCursor(SDL_DISABLE); } else { - SDL_SetWindowGrab(sdl_window, SDL_FALSE); + SDL_SetWindowGrab(sdl_input_window, SDL_FALSE); SDL_SetRelativeMouseMode(SDL_FALSE); SDL_ShowCursor(SDL_ENABLE); } @@ -87,13 +133,335 @@ static bool window_is_mouse_grabbed_sw(void) { return mouse_grabbed; } +#endif + +#if SEMU_HAS(VIRTIOGPU) +static bool vgpu_format_to_sdl_format(enum virtio_gpu_formats virtio_gpu_format, + uint32_t *sdl_format) +{ + switch (virtio_gpu_format) { + case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM: + *sdl_format = SDL_PIXELFORMAT_ARGB8888; + return true; + case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM: + *sdl_format = SDL_PIXELFORMAT_XRGB8888; + return true; + case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM: + *sdl_format = SDL_PIXELFORMAT_BGRA8888; + return true; + case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM: + *sdl_format = SDL_PIXELFORMAT_BGRX8888; + return true; + case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM: + *sdl_format = SDL_PIXELFORMAT_ABGR8888; + return true; + case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM: + *sdl_format = SDL_PIXELFORMAT_RGBX8888; + return true; + case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM: + *sdl_format = SDL_PIXELFORMAT_RGBA8888; + return true; + case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM: + *sdl_format = SDL_PIXELFORMAT_XBGR8888; + return true; + default: + return false; + } +} + +static void sdl_plane_info_reset(struct sdl_plane_info *plane) +{ + bool alpha_blend = plane->alpha_blend; + if (plane->texture) + SDL_DestroyTexture(plane->texture); + memset(plane, 0, sizeof(*plane)); + plane->alpha_blend = alpha_blend; +} + +static void sdl_plane_info_cleanup(struct sdl_plane_info *plane) +{ + if (plane->texture) + SDL_DestroyTexture(plane->texture); + memset(plane, 0, sizeof(*plane)); +} + +static void sdl_scanout_info_cleanup(struct sdl_scanout_info *scanout) +{ + sdl_plane_info_cleanup(&scanout->primary_plane); + sdl_plane_info_cleanup(&scanout->cursor_plane); + + if (scanout->renderer) + SDL_DestroyRenderer(scanout->renderer); + if (scanout->window) + SDL_DestroyWindow(scanout->window); + + memset(scanout, 0, sizeof(*scanout)); +} + +static bool sdl_plane_info_get_sdl_format( + const struct sdl_plane_info *plane, + const struct vgpu_display_payload *payload, + uint32_t *sdl_format) +{ + /* The plane keeps its SDL objects across frames, but the payload format is + * still per-update data. Resolve the incoming VirtIO-GPU format first, + * then adjust it below if this plane requires alpha. + */ + const struct vgpu_display_cpu_payload *frame = &payload->cpu; + if (!vgpu_format_to_sdl_format(frame->format, sdl_format)) { + fprintf(stderr, "%s(): invalid resource format %u\n", __func__, + (uint32_t) frame->format); + return false; + } + + /* Cursor textures need an alpha-capable SDL format. If the incoming format + * is an XRGB/XBGR/BGRX/RGBX variant, switch to the matching alpha version + * so the high byte is preserved as transparency instead of being ignored. + */ + if (plane->alpha_blend) { + switch (*sdl_format) { + case SDL_PIXELFORMAT_XRGB8888: + *sdl_format = SDL_PIXELFORMAT_ARGB8888; + break; + case SDL_PIXELFORMAT_BGRX8888: + *sdl_format = SDL_PIXELFORMAT_BGRA8888; + break; + case SDL_PIXELFORMAT_RGBX8888: + *sdl_format = SDL_PIXELFORMAT_RGBA8888; + break; + case SDL_PIXELFORMAT_XBGR8888: + *sdl_format = SDL_PIXELFORMAT_ABGR8888; + break; + default: + break; + } + } + + return true; +} + +static SDL_Texture *sdl_plane_info_create_texture( + SDL_Renderer *renderer, + const struct sdl_plane_info *plane, + const struct vgpu_display_cpu_payload *frame, + uint32_t sdl_format) +{ + SDL_Texture *texture = + SDL_CreateTexture(renderer, sdl_format, SDL_TEXTUREACCESS_STREAMING, + frame->width, frame->height); + if (!texture) { + fprintf(stderr, "%s(): failed to create texture: %s\n", __func__, + SDL_GetError()); + return NULL; + } + + if (plane->alpha_blend) { + if (SDL_SetTextureBlendMode(texture, SDL_BLENDMODE_BLEND) < 0) { + fprintf(stderr, "%s(): failed to enable texture blending: %s\n", + __func__, SDL_GetError()); + } + } + + return texture; +} + +static bool sdl_plane_info_update_texture( + SDL_Renderer *renderer, + struct sdl_plane_info *plane, + const struct vgpu_display_payload *payload, + const char *plane_name) +{ + const struct vgpu_display_cpu_payload *frame = &payload->cpu; + uint32_t sdl_format; + if (!sdl_plane_info_get_sdl_format(plane, payload, &sdl_format)) + return false; + + bool reuse_texture = plane->texture && plane->width == frame->width && + plane->height == frame->height && + plane->sdl_format == sdl_format; + SDL_Texture *texture = plane->texture; + + if (!reuse_texture) { + texture = + sdl_plane_info_create_texture(renderer, plane, frame, sdl_format); + if (!texture) + return false; + } + + /* Keep the retained plane state unchanged until the new pixels are known + * to be uploaded successfully. + */ + if (SDL_UpdateTexture(texture, NULL, frame->pixels, frame->stride) != 0) { + fprintf(stderr, "%s(): failed to update %s texture: %s\n", __func__, + plane_name, SDL_GetError()); + if (!reuse_texture) + SDL_DestroyTexture(texture); + return false; + } + + if (!reuse_texture) { + if (plane->texture) + SDL_DestroyTexture(plane->texture); + plane->texture = texture; + } + plane->width = frame->width; + plane->height = frame->height; + plane->sdl_format = sdl_format; + return true; +} + +static bool sdl_cursor_rect_update_position(SDL_Rect *rect, + int32_t x, + int32_t y, + uint32_t hot_x, + uint32_t hot_y) +{ + int64_t rect_x = (int64_t) x - (int64_t) hot_x; + int64_t rect_y = (int64_t) y - (int64_t) hot_y; + + if (rect_x < INT_MIN || rect_x > INT_MAX || rect_y < INT_MIN || + rect_y > INT_MAX) { + fprintf(stderr, + WINDOW_LOG_PREFIX + "%s(): cursor position out of SDL range " + "(x=%" PRId32 " y=%" PRId32 " hot_x=%u hot_y=%u)\n", + __func__, x, y, (unsigned) hot_x, (unsigned) hot_y); + return false; + } + + rect->x = (int) rect_x; + rect->y = (int) rect_y; + return true; +} + +static bool sdl_scanout_apply_cursor_frame( + struct sdl_scanout_info *scanout, + const struct vgpu_display_payload *payload, + int32_t x, + int32_t y, + uint32_t hot_x, + uint32_t hot_y) +{ + const struct vgpu_display_cpu_payload *frame = &payload->cpu; + struct sdl_plane_info *plane = &scanout->cursor_plane; + SDL_Rect new_cursor_rect = scanout->cursor_rect; + + if (frame->width > INT_MAX || frame->height > INT_MAX) { + fprintf(stderr, + WINDOW_LOG_PREFIX + "%s(): cursor size out of SDL range (%ux%u)\n", + __func__, frame->width, frame->height); + return false; + } + + if (!sdl_cursor_rect_update_position(&new_cursor_rect, x, y, hot_x, hot_y)) + return false; + + if (!sdl_plane_info_update_texture(scanout->renderer, plane, payload, + "cursor")) + return false; + + scanout->cursor_hot_x = hot_x; + scanout->cursor_hot_y = hot_y; + new_cursor_rect.w = (int) frame->width; + new_cursor_rect.h = (int) frame->height; + scanout->cursor_rect = new_cursor_rect; + return true; +} + +static void sdl_scanout_render(const struct sdl_scanout_info *scanout) +{ + SDL_RenderClear(scanout->renderer); + + if (scanout->primary_plane.texture) + SDL_RenderCopy(scanout->renderer, scanout->primary_plane.texture, NULL, + NULL); + + if (scanout->cursor_plane.texture) + SDL_RenderCopy(scanout->renderer, scanout->cursor_plane.texture, NULL, + &scanout->cursor_rect); + + SDL_RenderPresent(scanout->renderer); +} + +static void window_drain_display_queue(void) +{ + bool dirty_scanouts[VIRTIO_GPU_MAX_SCANOUTS] = {0}; + struct vgpu_display_cmd cmd; + + /* Drain display bridge commands, update only SDL-owned state, then render + * each affected scanout once. The bridge publishes reliable clear + * generations and filters stale lossy frame/move queue entries. + */ + while (vgpu_display_pop_cmd(&cmd)) { + /* 'scanout_id' was validated by the guest-facing backend before the + * command entered the display bridge. + */ + struct sdl_scanout_info *scanout = &sdl_scanouts[cmd.scanout_id]; + if (!scanout->window || !scanout->renderer) { + vgpu_display_release_cmd(&cmd); + continue; + } + + switch (cmd.type) { + case VGPU_DISPLAY_CMD_PRIMARY_CLEAR: + sdl_plane_info_reset(&scanout->primary_plane); + dirty_scanouts[cmd.scanout_id] = true; + break; + case VGPU_DISPLAY_CMD_CURSOR_CLEAR: + memset(&scanout->cursor_rect, 0, sizeof(scanout->cursor_rect)); + scanout->cursor_hot_x = 0; + scanout->cursor_hot_y = 0; + sdl_plane_info_reset(&scanout->cursor_plane); + dirty_scanouts[cmd.scanout_id] = true; + break; + case VGPU_DISPLAY_CMD_PRIMARY_SET: + /* Use '|=' to keep earlier dirty state for this scanout. A failed + * upload leaves the old texture visible and does not dirty the + * scanout by itself. + */ + dirty_scanouts[cmd.scanout_id] |= sdl_plane_info_update_texture( + scanout->renderer, &scanout->primary_plane, + cmd.u.primary_set.payload, "primary"); + break; + case VGPU_DISPLAY_CMD_CURSOR_SET: + /* Use '|=' to keep earlier dirty state for this scanout. A failed + * upload leaves the old cursor visible and does not dirty the + * scanout by itself. + */ + dirty_scanouts[cmd.scanout_id] |= sdl_scanout_apply_cursor_frame( + scanout, cmd.u.cursor_set.payload, cmd.u.cursor_set.x, + cmd.u.cursor_set.y, cmd.u.cursor_set.hot_x, + cmd.u.cursor_set.hot_y); + break; + case VGPU_DISPLAY_CMD_CURSOR_MOVE: + if (!sdl_cursor_rect_update_position( + &scanout->cursor_rect, cmd.u.cursor_move.x, + cmd.u.cursor_move.y, scanout->cursor_hot_x, + scanout->cursor_hot_y)) + break; + dirty_scanouts[cmd.scanout_id] = true; + break; + } + + vgpu_display_release_cmd(&cmd); + } + + for (uint32_t i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { + if (!dirty_scanouts[i] || !sdl_scanouts[i].window || + !sdl_scanouts[i].renderer) + continue; + sdl_scanout_render(&sdl_scanouts[i]); + } +} +#endif /* Main loop runs on the main thread */ static void window_main_loop_sw(void) { if (headless_mode) { - /* Block until the emulator calls window_shutdown_sw(), so main() can - * proceed to pthread_join() rather than stopping the emulator + /* Block until the emulator calls 'window_shutdown_sw()', so 'main()' + * can proceed to 'pthread_join()' rather than stopping the emulator * immediately. There is no SDL event loop in this mode, so the main * thread just polls the shared close flag. */ @@ -103,27 +471,52 @@ static void window_main_loop_sw(void) } /* relaxed ordering is sufficient: the only consequence of reading a stale - * false is a few extra loop iterations (each blocked up to 1 ms inside - * SDL_WaitEventTimeout). Ordering with the emulator thread is provided by - * pthread_join(), not by this flag. + * false is a few extra loop iterations. Ordering with the emulator thread + * is provided by 'pthread_join()', not by this flag. */ while (!window_is_closed_sw()) { +#if SEMU_HAS(VIRTIOINPUT) if (vinput_handle_events()) { - /* User closed the window. Set the flag so window_shutdown_sw() + /* User closed the window. Set the flag so 'window_shutdown_sw()' * (called from the emulator thread) does not race with us, then - * return normally so main() can pthread_join the emulator thread - * and collect its exit code. + * return normally so 'main()' can 'pthread_join()' the emulator + * thread and collect its exit code. */ window_shutdown_sw(); return; } +#else + SDL_Event e; + /* Without 'virtio-input', there is no SDL event pump to wake on display + * commands. Use a short timeout so 'VIRTIOGPU'-only builds periodically + * drain the display bridge; a future SDL user-event bridge could make + * this fully event-driven. + */ + if (SDL_WaitEventTimeout(&e, SDL_EVENT_WAIT_TIMEOUT_MS)) { + uint32_t processed = 0; + do { + if (e.type == SDL_QUIT) { + window_shutdown_sw(); + return; + } + processed++; + } while (processed < SDL_EVENT_BURST_LIMIT && SDL_PollEvent(&e)); + } +#endif + +#if SEMU_HAS(VIRTIOGPU) + window_drain_display_queue(); +#endif } } -static void window_init_sw(bool headless) +static void window_init_sw(bool headless, uint32_t width, uint32_t height) { if (headless) { headless_mode = true; +#if SEMU_HAS(VIRTIOGPU) + vgpu_display_set_unavailable(); +#endif return; } @@ -133,33 +526,141 @@ static void window_init_sw(bool headless) "Running in headless mode.\n", SDL_GetError()); headless_mode = true; +#if SEMU_HAS(VIRTIOGPU) + vgpu_display_set_unavailable(); +#endif return; } + sdl_initialized = true; - sdl_window = SDL_CreateWindow("semu", SDL_WINDOWPOS_UNDEFINED, - SDL_WINDOWPOS_UNDEFINED, SCREEN_WIDTH, - SCREEN_HEIGHT, SDL_WINDOW_SHOWN); - if (!sdl_window) { +#if SEMU_HAS(VIRTIOGPU) + /* The current machine setup registers exactly one scanout before calling + * 'window_init_sw()', so materialize scanout 0 directly here. If semu grows + * multiple scanouts later, this can be extended to iterate all registered + * scanouts or restored to an explicit per-scanout setup path. + */ + struct sdl_scanout_info *scanout = &sdl_scanouts[0]; + scanout->window = SDL_CreateWindow("semu", SDL_WINDOWPOS_UNDEFINED, + SDL_WINDOWPOS_UNDEFINED, width, height, + SDL_WINDOW_SHOWN); + if (!scanout->window) { + fprintf(stderr, + "window_init_sw(): failed to create SDL window for display " + "0: %s\n" + "Running in headless mode.\n", + SDL_GetError()); + headless_mode = true; + SDL_Quit(); + sdl_initialized = false; + vgpu_display_set_unavailable(); + return; + } + + scanout->renderer = + SDL_CreateRenderer(scanout->window, -1, SDL_RENDERER_ACCELERATED); + if (!scanout->renderer) { + fprintf(stderr, + "window_init_sw(): accelerated renderer not available, " + "trying software renderer: %s\n", + SDL_GetError()); + scanout->renderer = + SDL_CreateRenderer(scanout->window, -1, SDL_RENDERER_SOFTWARE); + } + if (!scanout->renderer) { + fprintf(stderr, + "window_init_sw(): failed to create renderer for display " + "0: %s\n" + "Running in headless mode.\n", + SDL_GetError()); + SDL_DestroyWindow(scanout->window); + scanout->window = NULL; + headless_mode = true; + SDL_Quit(); + sdl_initialized = false; + vgpu_display_set_unavailable(); + return; + } + + scanout->window_width = width; + scanout->window_height = height; + scanout->cursor_plane.alpha_blend = true; + +#if SEMU_HAS(VIRTIOINPUT) + if (!sdl_input_window) + sdl_input_window = scanout->window; +#endif + + SDL_SetRenderDrawColor(scanout->renderer, 0, 0, 0, 255); + SDL_RenderClear(scanout->renderer); + SDL_RenderPresent(scanout->renderer); +#else /* !SEMU_HAS(VIRTIOGPU) */ + sdl_input_window = SDL_CreateWindow("semu", SDL_WINDOWPOS_UNDEFINED, + SDL_WINDOWPOS_UNDEFINED, width, height, + SDL_WINDOW_SHOWN); + if (!sdl_input_window) { fprintf(stderr, "window_init_sw(): failed to create SDL window: %s\n" "Running in headless mode.\n", SDL_GetError()); headless_mode = true; + SDL_Quit(); + sdl_initialized = false; return; } +#endif +} + +static void window_cleanup_sw(void) +{ +#if SEMU_HAS(VIRTIOINPUT) + if (sdl_initialized) + window_set_mouse_grab_sw(false); + /* Keep cleanup idempotent when SDL was never initialized or grab release + * returned early. + */ + mouse_grabbed = false; +#endif - fprintf(stderr, - "semu: click window to capture mouse, Ctrl+Alt+G to " - "release\n"); + wake_write_fd = -1; + +#if SEMU_HAS(VIRTIOGPU) + for (uint32_t i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) + sdl_scanout_info_cleanup(&sdl_scanouts[i]); + + struct vgpu_display_cmd cmd; + while (vgpu_display_pop_cmd(&cmd)) + vgpu_display_release_cmd(&cmd); +#elif SEMU_HAS(VIRTIOINPUT) + if (sdl_input_window) + SDL_DestroyWindow(sdl_input_window); +#endif + +#if SEMU_HAS(VIRTIOINPUT) + sdl_input_window = NULL; +#endif + + if (sdl_initialized) { + SDL_Quit(); + sdl_initialized = false; + } + + /* Cleanup normally runs before process exit. Reset frontend flags anyway + * so a future re-init path cannot inherit stale headless/shutdown state. + */ + headless_mode = false; + should_exit = false; } const struct window_backend g_window = { .window_init = window_init_sw, .window_main_loop = window_main_loop_sw, .window_shutdown = window_shutdown_sw, + .window_cleanup = window_cleanup_sw, .window_is_closed = window_is_closed_sw, .window_set_wake_fd = window_set_wake_fd_sw, .window_wake_backend = window_wake_backend_sw, +#if SEMU_HAS(VIRTIOINPUT) .window_set_mouse_grab = window_set_mouse_grab_sw, .window_is_mouse_grabbed = window_is_mouse_grabbed_sw, +#endif }; diff --git a/window.h b/window.h index 5f2e51e0..16064a39 100644 --- a/window.h +++ b/window.h @@ -1,47 +1,55 @@ #pragma once #include +#include -#include "feature.h" +#if SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) -#if SEMU_HAS(VIRTIOINPUT) struct window_backend { /* When headless is true, the backend skips SDL_Init / window creation and * behaves as if SDL had failed -- useful for batch runs (CI, 'make check') * that have no display attached. + * The caller also passes the default SDL window size. VirtIO-GPU builds + * use it as the initial scanout size; input-only builds use it for the + * grab target window because they do not have a display mode of their own. */ - void (*window_init)(bool headless); - /* Main loop function that runs on the main thread (for macOS SDL2). - * If non-NULL, the emulator runs in a background thread while this - * function handles window events on the main thread. + void (*window_init)(bool headless, uint32_t width, uint32_t height); + /* Main loop function that runs on the main thread. If non-NULL, the + * emulator runs in a background thread while this function handles window + * events on the main thread. * Returns when the emulator should exit. */ void (*window_main_loop)(void); - /* Called from the emulator thread when semu_run() returns, to unblock - * window_main_loop() so the main thread can proceed to pthread_join. + /* Called from the emulator thread when 'semu_run()' returns, to unblock + * 'window_main_loop()' so the main thread can proceed to 'pthread_join()'. */ void (*window_shutdown)(void); - /* Returns true once the window has been closed (or SDL failed to - * initialize). Safe to call from any thread. + /* Release frontend resources after the emulator producer has stopped, or + * after initialization fails before the producer starts. + */ + void (*window_cleanup)(void); + /* Returns true once the window has been closed (or initialization fell + * back to headless mode). Safe to call from any thread. */ bool (*window_is_closed)(void); - /* Register the write end of a pipe to be written when the window shuts - * down. Must be called before window_main_loop(). + /* Register the write end of the wake pipe used to break the emulator + * thread out of 'poll(-1)' when the backend queues work for it. */ void (*window_set_wake_fd)(int fd); - /* Best-effort wakeup hook for the backend self-pipe. */ + /* Best-effort wakeup hook for the emulator thread. The backend uses this + * after queuing work such as input events or shutdown requests. + */ void (*window_wake_backend)(void); - /* Enable or disable SDL's relative mouse mode for the frontend window. - * When this returns with grab enabled, pointer motion is reported as - * relative deltas, the host cursor is hidden, and SDL confines the - * pointer to the semu window until the grab is released again. +#if SEMU_HAS(VIRTIOINPUT) + /* Switch the backend between normal host-pointer mode and grabbed + * relative-pointer mode. Must be called from the main thread because it + * touches window-system state directly. */ void (*window_set_mouse_grab)(bool grabbed); - /* Returns true once the frontend window currently owns the host mouse - * grab. Safe to call from the main thread while translating SDL events. - */ + /* Returns true once the backend currently owns the host mouse grab. */ bool (*window_is_mouse_grabbed)(void); +#endif /* SEMU_HAS(VIRTIOINPUT) */ }; extern const struct window_backend g_window; -#endif +#endif /* SEMU_HAS(VIRTIOINPUT) || SEMU_HAS(VIRTIOGPU) */