Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/arch/arm64/desc.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
#pragma once

#define RAM_BASE (1UL << 31)

/* GIC SPIs (offset by ARM_GIC_SPI_BASE inside vm_irq_line). Distinct lines per
* device so a level-triggered ARM GIC can de-assert per source rather than
* sharing a vector across virtio paths.
*/
#define SERIAL_IRQ 0
#define VIRTIO_BLK_IRQ 1
#define VIRTIO_NET_IRQ 2
#define KERNEL_OPTS "console=ttyS0"

/* panic=-1 reboots immediately on guest panic. arm64 has no keyboard reset
* path; the kernel issues a PSCI SYSTEM_RESET / SYSTEM_OFF, which KVM
* surfaces as KVM_EXIT_SYSTEM_EVENT and vm_run() handles as a clean exit.
*/
#define KERNEL_OPTS "console=ttyS0 panic=-1"
24 changes: 21 additions & 3 deletions src/arch/arm64/vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ int vm_arch_cpu_init(vm_t *v)
if (ioctl(v->vm_fd, KVM_ARM_PREFERRED_TARGET, &vcpu_init) < 0)
return throw_err("Failed to find perferred CPU type\n");

/* Enable in-kernel PSCI 0.2 emulation. Without this, a guest panic with
* panic=-1 issues SYSTEM_OFF and KVM either ignores the SMC/HVC (so the
* guest spins) or signals an undefined-instruction trap. With PSCI on,
* the call surfaces as KVM_EXIT_SYSTEM_EVENT to the host loop.
*/
vcpu_init.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;

if (ioctl(v->vcpu_fd, KVM_ARM_VCPU_INIT, &vcpu_init))
return throw_err("Failed to initialize vCPU\n");

Expand Down Expand Up @@ -142,9 +149,9 @@ int vm_arch_init_platform_device(vm_t *v)
if (serial_init(&v->serial, &v->io_bus))
return throw_err("Failed to init UART device");

/* Zero virtio_blk_dev so pci_dev_is_registered() observes a clean
* state when the user boots without -d. virtio_net_init memsets
* inside vm_enable_net, so virtio_net_dev is covered by that path.
/* Zero virtio_blk_dev so pci_dev_is_registered() observes a clean state
* when the user boots without -d. virtio_net_init memsets inside
* vm_enable_net, so virtio_net_dev is covered by that path.
* x86 already does the same call in its vm_arch_init_platform_device.
*/
virtio_blk_init(&v->virtio_blk_dev);
Expand Down Expand Up @@ -347,6 +354,17 @@ static int generate_fdt(vm_t *v)
__FDT(property_cell, "phandle", FDT_PHANDLE_GIC);
__FDT(end_node);

/* /psci node: lets the guest discover the in-kernel PSCI 0.2 emulator
* we requested in vm_arch_cpu_init via KVM_ARM_VCPU_PSCI_0_2. KVM uses
* HVC as the conduit on the virtual CPU. Without this node the kernel
* doesn't know the firmware interface exists and falls back to a
* spinloop on panic.
*/
__FDT(begin_node, "psci");
__FDT(property_string, "compatible", "arm,psci-0.2");
__FDT(property_string, "method", "hvc");
__FDT(end_node);

/* /uart node: serial device */
/* The node name of the serial device is different from kvmtool. */
__FDT(begin_node, "uart");
Expand Down
12 changes: 11 additions & 1 deletion src/arch/x86/desc.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
#pragma once

#define RAM_BASE 0

/* IO-APIC GSIs. Each device gets its own line so we never share a vector
* between virtio devices, which keeps level-triggered ISA legacy IRQs (the
* 16550 on IRQ4) out of the way of edge-triggered virtio MSI-less paths.
*/
#define SERIAL_IRQ 4
#define VIRTIO_NET_IRQ 14
#define VIRTIO_BLK_IRQ 15
#define KERNEL_OPTS "console=ttyS0 pci=conf1"

/* panic=-1 reboots immediately on guest panic; reboot=k uses the keyboard
* controller path which on KVM ends in a triple-fault, surfacing cleanly as
* KVM_EXIT_SHUTDOWN to the host loop in vm_run().
*/
#define KERNEL_OPTS "console=ttyS0 pci=conf1 panic=-1 reboot=k"
15 changes: 10 additions & 5 deletions src/diskimg.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,18 @@ ssize_t diskimg_read(struct diskimg *diskimg,
off_t offset,
size_t size)
{
lseek(diskimg->fd, offset, SEEK_SET);
return read(diskimg->fd, data, size);
/* pread/pwrite carry the offset in the syscall, so concurrent virtq
* workers cannot race on a shared file pointer the way lseek+read does.
*/
return pread(diskimg->fd, data, size, offset);
}

ssize_t diskimg_write(struct diskimg *diskimg,
void *data,
off_t offset,
size_t size)
{
lseek(diskimg->fd, offset, SEEK_SET);
return write(diskimg->fd, data, size);
return pwrite(diskimg->fd, data, size, offset);
}

int diskimg_flush(struct diskimg *diskimg)
Expand All @@ -33,7 +34,11 @@ int diskimg_init(struct diskimg *diskimg, const char *file_path)
if (diskimg->fd < 0)
return -1;
struct stat st;
fstat(diskimg->fd, &st);
if (fstat(diskimg->fd, &st) < 0) {
close(diskimg->fd);
diskimg->fd = -1;
return -1;
}
diskimg->size = st.st_size;
return 0;
}
Expand Down
7 changes: 5 additions & 2 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ int main(int argc, char *argv[])
}
}

set_input_mode();

vm_t vm;
if (vm_init(&vm) < 0)
return throw_err("Failed to initialize guest vm");
Expand All @@ -99,6 +97,11 @@ int main(int argc, char *argv[])
if (vm_late_init(&vm) < 0)
return -1;

/* Switch the terminal to raw mode only once setup has succeeded so that
* any error from the load/init paths above is rendered on a normal tty.
*/
set_input_mode();

vm_run(&vm);
vm_exit(&vm);

Expand Down
1 change: 0 additions & 1 deletion src/virtio-blk.c
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ static int virtio_blk_setup(struct virtio_blk_dev *dev, struct diskimg *diskimg)
}

dev->enable = true;
/* FIXME: irq_num should be different to other devs */
dev->irq_num = VIRTIO_BLK_IRQ;
dev->diskimg = diskimg;
dev->config.capacity = diskimg->size >> 9;
Expand Down
39 changes: 36 additions & 3 deletions src/virtio-pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,39 @@ static void virtio_pci_write_guest_feature(struct virtio_pci_dev *dev)

static void virtio_pci_reset(struct virtio_pci_dev *dev)
{
/* TODO: virtio pci reset */
/* Virtio 1.x §2.4: writing 0 to device_status resets the device to its
* initial post-power-on state. We clear the negotiated bits the guest
* is about to re-write: acked features, ISR, the per-virtq packed-ring
* indices, and the common-cfg selector bytes. This is sufficient for a
* driver re-probe (reload, kexec) where the guest hasn't yet enabled
* any virtqueue.
*
* We deliberately leave info.enable, desc_ring, and the device/driver
* event pointers alone, because the device-emulator workers in
* virtio-blk / virtio-net poll those without locking. A full reset that
* tears down enabled queues would have to write the per-device stopfd
* and pthread_join the workers, which the generic virtio-pci layer has
* no handle on — leaving that for a follow-up that adds a
* virtio_pci_ops::reset hook.
*/
dev->guest_feature = 0;
dev->config.common_cfg.device_feature_select = 0;
dev->config.common_cfg.guest_feature_select = 0;
dev->config.common_cfg.guest_feature = 0;
dev->config.common_cfg.queue_select = 0;
__atomic_store_n(&dev->config.isr_cap.isr_status, 0, __ATOMIC_RELEASE);

for (uint16_t i = 0; i < dev->num_queues; i++) {
struct virtq *vq = &dev->vq[i];
if (vq->info.enable)
continue;
vq->info.size = VIRTQ_SIZE;
vq->info.desc_addr = 0;
vq->info.device_addr = 0;
vq->info.driver_addr = 0;
vq->next_avail_idx = 0;
vq->used_wrap_count = 1;
}
}

static void virtio_pci_write_status(struct virtio_pci_dev *dev)
Expand All @@ -62,7 +94,7 @@ static void virtio_pci_select_virtq(struct virtio_pci_dev *dev)
uint16_t select = dev->config.common_cfg.queue_select;
struct virtio_pci_common_cfg *config = &dev->config.common_cfg;

if (select < config->num_queues) {
if (select < dev->num_queues) {
uint64_t offset = offsetof(struct virtio_pci_common_cfg, queue_size);
memcpy((void *) ((uintptr_t) config + offset), &dev->vq[select].info,
sizeof(struct virtq_info));
Expand Down Expand Up @@ -114,7 +146,7 @@ static void virtio_pci_space_write(struct virtio_pci_dev *dev,
offset <= VIRTIO_PCI_COMMON_Q_USEDHI) {
uint16_t select = dev->config.common_cfg.queue_select;
uint64_t info_offset = offset - VIRTIO_PCI_COMMON_Q_SIZE;
if (select < dev->config.common_cfg.num_queues) {
if (select < dev->num_queues) {
memcpy((void *) ((uintptr_t) &dev->vq[select].info +
info_offset),
data, size);
Expand Down Expand Up @@ -251,6 +283,7 @@ void virtio_pci_set_virtq(struct virtio_pci_dev *dev,
struct virtq *vq,
uint16_t num_queues)
{
dev->num_queues = num_queues;
dev->config.common_cfg.num_queues = num_queues;
dev->vq = vq;
}
Expand Down
7 changes: 7 additions & 0 deletions src/virtio-pci.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ struct virtio_pci_dev {
struct virtio_pci_notify_cap *notify_cap;
struct virtio_pci_cap *dev_cfg_cap;
struct virtq *vq;
/* Host-side mirror of the queue count. config.common_cfg.num_queues
* lives in guest-writable BAR memory (the unconditional memcpy in
* virtio_pci_space_write lets a guest overwrite it), so trusting it
* to bound vq[] indexing is an OOB-write primitive. Bounds checks
* use this field instead.
*/
uint16_t num_queues;
};

uint64_t virtio_pci_get_notify_addr(struct virtio_pci_dev *dev,
Expand Down
14 changes: 14 additions & 0 deletions src/vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,20 @@ int vm_run(vm_t *v)
printf("shutdown\n");
munmap(run, run_size);
return 0;
case KVM_EXIT_SYSTEM_EVENT: {
/* arm64 PSCI SYSTEM_OFF / SYSTEM_RESET land here. SHUTDOWN and
* RESET are clean exits from our POV — kvm-host has no reboot
* loop, and a guest panic with panic=-1 reaches us as RESET
* (indistinguishable from a userspace `reboot`), matching the
* x86 reboot=k path that comes back as KVM_EXIT_SHUTDOWN.
* CRASH is the one type that signals host-relevant failure
* (NMI watchdog, kdump trigger), so propagate it as -1.
*/
uint32_t type = run->system_event.type;
printf("system event %u\n", type);
munmap(run, run_size);
return type == KVM_SYSTEM_EVENT_CRASH ? -1 : 0;
}
default:
printf("reason: %d\n", run->exit_reason);
munmap(run, run_size);
Expand Down