diff --git a/src/firecracker/src/api_server/request/actions.rs b/src/firecracker/src/api_server/request/actions.rs index a3b3f3f3a88..2db9dc7c682 100644 --- a/src/firecracker/src/api_server/request/actions.rs +++ b/src/firecracker/src/api_server/request/actions.rs @@ -7,7 +7,7 @@ use vmm::rpc_interface::VmmAction; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; -#[cfg(target_arch = "aarch64")] +#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use super::StatusCode; // The names of the members from this enum must precisely correspond (as a string) to the possible @@ -45,6 +45,13 @@ pub(crate) fn parse_put_actions(body: &Body) -> Result); pub enum TargetArch { X86_64, Aarch64, + Riscv64, } impl TargetArch { @@ -176,6 +177,7 @@ impl TargetArch { match self { TargetArch::X86_64 => SCMP_ARCH_X86_64, TargetArch::Aarch64 => SCMP_ARCH_AARCH64, + TargetArch::Riscv64 => SCMP_ARCH_RISCV64, } } } @@ -186,6 +188,7 @@ impl FromStr for TargetArch { match s.to_lowercase().as_str() { "x86_64" => Ok(TargetArch::X86_64), "aarch64" => Ok(TargetArch::Aarch64), + "riscv64" => Ok(TargetArch::Riscv64), _ => Err(s.to_string()), } } diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 6ecdb75e5fe..8e7f69c9da1 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -45,7 +45,7 @@ vm-superio = "0.8.0" vmm-sys-util = { version = "0.12.1", features = ["with-serde"] } zerocopy = { version = "0.8.25" } -[target.'cfg(target_arch = "aarch64")'.dependencies] +[target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] vm-fdt = "0.3.0" [dev-dependencies] diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index 61d65fea1a5..1ff279981ef 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -26,6 +26,24 @@ pub use aarch64::{ load_kernel, }; +/// Module for riscv64 related functionality. +#[cfg(target_arch = "riscv64")] +pub mod riscv64; + +#[cfg(target_arch = "riscv64")] +pub use riscv64::kvm::{Kvm, KvmArchError}; +#[cfg(target_arch = "riscv64")] +pub use riscv64::vcpu::*; +#[cfg(target_arch = "riscv64")] +pub use riscv64::vm::{ArchVm, ArchVmError, VmState}; +#[cfg(target_arch = "riscv64")] +pub use riscv64::{ + ConfigurationError, MMIO_MEM_SIZE, MMIO_MEM_START, arch_memory_regions, + configure_system_for_boot, get_kernel_start, initrd_load_addr, layout::CMDLINE_MAX_SIZE, + layout::IRQ_BASE, layout::IRQ_MAX, layout::SYSTEM_MEM_SIZE, layout::SYSTEM_MEM_START, + load_kernel, +}; + /// Module for x86_64 related functionality. #[cfg(target_arch = "x86_64")] pub mod x86_64; @@ -51,12 +69,13 @@ pub enum DeviceType { /// Device Type: Virtio. Virtio(u32), /// Device Type: Serial. - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] Serial, /// Device Type: RTC. #[cfg(target_arch = "aarch64")] Rtc, /// Device Type: BootTimer. + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] BootTimer, } diff --git a/src/vmm/src/arch/riscv64/aia/mod.rs b/src/vmm/src/arch/riscv64/aia/mod.rs new file mode 100644 index 00000000000..b0a8b333025 --- /dev/null +++ b/src/vmm/src/arch/riscv64/aia/mod.rs @@ -0,0 +1,213 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright © 2024 Institute of Software, CAS. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +mod regs; + +use kvm_ioctls::{DeviceFd, VmFd}; +pub use regs::AiaState; + +/// Represent an AIA device +#[derive(Debug)] +pub struct AIADevice { + fd: DeviceFd, + vcpu_count: u64, +} + +impl AIADevice { + const VERSION: u32 = kvm_bindings::kvm_device_type_KVM_DEV_TYPE_RISCV_AIA; + + /// Return whether the device is MSI compatible. + pub fn msi_compatible(&self) -> bool { + true + } + + /// Return the FDT `compatible` property string for IMSIC. + pub fn imsic_compatibility(&self) -> &str { + "riscv,imsics" + } + + /// Return IMSIC properties. + pub fn imsic_properties(&self) -> [u32; 4] { + [ + 0, + AIADevice::get_imsic_addr() as u32, + 0, + super::layout::IMSIC_SZ_PH * self.vcpu_count as u32, + ] + } + + /// Return the FDT `compatible` property string for APLIC. + pub fn aplic_compatibility(&self) -> &str { + "riscv,aplic" + } + + /// Return APLIC properties. + pub fn aplic_properties(&self) -> [u32; 4] { + [ + 0, + AIADevice::get_aplic_addr() as u32, + 0, + ::kvm_bindings::KVM_DEV_RISCV_APLIC_SIZE, + ] + } + + /// Return the file descriptor of the AIA device. + pub fn device_fd(&self) -> &DeviceFd { + &self.fd + } + + /// Returns the number vCPUs this AIA device handles. + pub fn vcpu_count(&self) -> u64 { + self.vcpu_count + } + + fn get_aplic_addr() -> u64 { + super::layout::APLIC_START + } + + fn get_imsic_addr() -> u64 { + super::layout::IMSIC_START + } + + /// Create the AIA device object. + pub fn create_device(fd: DeviceFd, vcpu_count: u64) -> Self { + Self { fd, vcpu_count } + } + + /// Initialize an AIA device. + pub fn init_device(vm: &VmFd) -> Result { + let mut aia_device = kvm_bindings::kvm_create_device { + type_: Self::VERSION, + fd: 0, + flags: 0, + }; + + vm.create_device(&mut aia_device) + .map_err(AiaError::CreateAIA) + } + + fn init_device_attributes(aia_device: &Self) -> Result<(), AiaError> { + // Set attributes. + let nr_irqs: u32 = super::layout::IRQ_MAX; + let aia_nr_sources: u32 = nr_irqs; + Self::set_device_attribute( + aia_device.device_fd(), + kvm_bindings::KVM_DEV_RISCV_AIA_GRP_CONFIG, + u64::from(kvm_bindings::KVM_DEV_RISCV_AIA_CONFIG_SRCS), + &aia_nr_sources as *const u32 as u64, + 0, + )?; + + let aia_hart_bits = u64::from(aia_device.vcpu_count) - 1; + let aia_hart_bits = ::std::cmp::max(64 - aia_hart_bits.leading_zeros(), 1); + Self::set_device_attribute( + aia_device.device_fd(), + kvm_bindings::KVM_DEV_RISCV_AIA_GRP_CONFIG, + u64::from(kvm_bindings::KVM_DEV_RISCV_AIA_CONFIG_HART_BITS), + &aia_hart_bits as *const u32 as u64, + 0, + )?; + + // Set APLIC address. + let aia_addr_aplic: u64 = AIADevice::get_aplic_addr(); + Self::set_device_attribute( + aia_device.device_fd(), + kvm_bindings::KVM_DEV_RISCV_AIA_GRP_ADDR, + u64::from(kvm_bindings::KVM_DEV_RISCV_AIA_ADDR_APLIC), + &aia_addr_aplic as *const u64 as u64, + 0, + )?; + + let aia_imsic_addr = |hart| -> u64 { + AIADevice::get_imsic_addr() + u64::from(hart) * u64::from(super::layout::IMSIC_SZ_PH) + }; + for i in 0..aia_device.vcpu_count { + let aia_addr_imsic = aia_imsic_addr(i); + let aia_addr_imsic_attr = 1 + u64::from(i); + Self::set_device_attribute( + aia_device.device_fd(), + kvm_bindings::KVM_DEV_RISCV_AIA_GRP_ADDR, + u64::from(aia_addr_imsic_attr), + &aia_addr_imsic as *const u64 as u64, + 0, + )?; + } + + Ok(()) + } + + /// Create an AIA device. + pub fn create_aia(vm: &VmFd, vcpu_count: u64) -> Result { + let aia_fd = Self::init_device(vm)?; + + let device = Self::create_device(aia_fd, vcpu_count); + + Self::init_device_attributes(&device)?; + + Self::finalize_device(&device)?; + + Ok(device) + } + + /// Finalize the setup of an AIA device. + pub fn finalize_device(aia_device: &Self) -> Result<(), AiaError> { + // Finalize the AIA. + Self::set_device_attribute( + aia_device.device_fd(), + kvm_bindings::KVM_DEV_RISCV_AIA_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_RISCV_AIA_CTRL_INIT), + 0, + 0, + )?; + + Ok(()) + } + + /// Set an AIA device attribute. + pub fn set_device_attribute( + fd: &DeviceFd, + group: u32, + attr: u64, + addr: u64, + flags: u32, + ) -> Result<(), AiaError> { + let attr = kvm_bindings::kvm_device_attr { + flags, + group, + attr, + addr, + }; + fd.set_device_attr(&attr) + .map_err(|err| AiaError::DeviceAttribute(err, true, group))?; + + Ok(()) + } + + /// A safe wrapper over unsafe kvm_ioctl::get_device_attr() + pub fn get_device_attribute( + &self, + attr: &mut ::kvm_bindings::kvm_device_attr, + ) -> Result<(), AiaError> { + // SAFETY: attr.addr is safe to write to. + unsafe { + self.fd + .get_device_attr(attr) + .map_err(|err| AiaError::DeviceAttribute(err, true, attr.group))? + }; + + Ok(()) + } +} + +/// Errors thrown while setting up the AIA. +#[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] +pub enum AiaError { + /// Error while calling KVM ioctl for setting up the global interrupt controller: {0} + CreateAIA(kvm_ioctls::Error), + /// Error while setting or getting device attributes for the AIA: {0}, {1}, {2} + DeviceAttribute(kvm_ioctls::Error, bool, u32), +} diff --git a/src/vmm/src/arch/riscv64/aia/regs.rs b/src/vmm/src/arch/riscv64/aia/regs.rs new file mode 100644 index 00000000000..63aa9cd3808 --- /dev/null +++ b/src/vmm/src/arch/riscv64/aia/regs.rs @@ -0,0 +1,11 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +/// Structure used for serializing the state of the AIA registers. +/// For now, it is just a placeholder. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct AiaState; diff --git a/src/vmm/src/arch/riscv64/fdt.rs b/src/vmm/src/arch/riscv64/fdt.rs new file mode 100644 index 00000000000..4acaf529efc --- /dev/null +++ b/src/vmm/src/arch/riscv64/fdt.rs @@ -0,0 +1,553 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright © 2024 Institute of Software, CAS. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + + +use std::collections::HashMap; +use std::ffi::CString; + +use kvm_bindings::*; +use vm_fdt::{Error as VmFdtError, FdtWriter}; +use vm_memory::GuestMemoryError; + +use super::super::DeviceType; +use super::aia::AIADevice; +use super::regs::*; +use crate::device_manager::mmio::MMIODeviceInfo; +use crate::logger::error; +use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap}; +use crate::vstate::vcpu::Vcpu; + +const ADDRESS_CELLS: u32 = 0x2; +const SIZE_CELLS: u32 = 0x2; +const CPU_INTC_BASE_PHANDLE: u32 = 3; +const AIA_APLIC_PHANDLE: u32 = 1; +const AIA_IMSIC_PHANDLE: u32 = 2; +const S_MODE_EXT_IRQ: u32 = 9; +const IRQ_TYPE_LEVEL_HIGH: u32 = 4; +const IRQ_TYPE_EDGE_RISING: u32 = 0x00000001; + +struct IsaExtInfo<'a> { + name: &'a [u8], + ext_id: KVM_RISCV_ISA_EXT_ID, +} + +// Sorted alphabetically +const ISA_INFO_ARRAY: [IsaExtInfo; 46] = [ + IsaExtInfo { + name: b"smstateen", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SMSTATEEN, + }, + IsaExtInfo { + name: b"ssaia", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SSAIA, + }, + IsaExtInfo { + name: b"sstc", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SSTC, + }, + IsaExtInfo { + name: b"svinval", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SVINVAL, + }, + IsaExtInfo { + name: b"svnapot", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SVNAPOT, + }, + IsaExtInfo { + name: b"svpbmt", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SVPBMT, + }, + IsaExtInfo { + name: b"zacas", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZACAS, + }, + IsaExtInfo { + name: b"zba", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBA, + }, + IsaExtInfo { + name: b"zbb", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBB, + }, + IsaExtInfo { + name: b"zbc", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBC, + }, + IsaExtInfo { + name: b"zbkb", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBKB, + }, + IsaExtInfo { + name: b"zbkc", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBKC, + }, + IsaExtInfo { + name: b"zbkx", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBKX, + }, + IsaExtInfo { + name: b"zbs", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBS, + }, + IsaExtInfo { + name: b"zfa", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZFA, + }, + IsaExtInfo { + name: b"zfh", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZFH, + }, + IsaExtInfo { + name: b"zfhmin", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZFHMIN, + }, + IsaExtInfo { + name: b"zicbom", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICBOM, + }, + IsaExtInfo { + name: b"zicboz", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICBOZ, + }, + IsaExtInfo { + name: b"zicntr", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICNTR, + }, + IsaExtInfo { + name: b"zicond", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICOND, + }, + IsaExtInfo { + name: b"zicsr", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICSR, + }, + IsaExtInfo { + name: b"zifencei", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZIFENCEI, + }, + IsaExtInfo { + name: b"zihintntl", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZIHINTNTL, + }, + IsaExtInfo { + name: b"zihintpause", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZIHINTPAUSE, + }, + IsaExtInfo { + name: b"zihpm", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZIHPM, + }, + IsaExtInfo { + name: b"zknd", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKND, + }, + IsaExtInfo { + name: b"zkne", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKNE, + }, + IsaExtInfo { + name: b"zknh", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKNH, + }, + IsaExtInfo { + name: b"zkr", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKR, + }, + IsaExtInfo { + name: b"zksed", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKSED, + }, + IsaExtInfo { + name: b"zksh", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKSH, + }, + IsaExtInfo { + name: b"zkt", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKT, + }, + IsaExtInfo { + name: b"ztso", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZTSO, + }, + IsaExtInfo { + name: b"zvbb", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVBB, + }, + IsaExtInfo { + name: b"zvbc", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVBC, + }, + IsaExtInfo { + name: b"zvfh", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVFH, + }, + IsaExtInfo { + name: b"zvfhmin", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVFHMIN, + }, + IsaExtInfo { + name: b"zvkb", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKB, + }, + IsaExtInfo { + name: b"zvkg", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKG, + }, + IsaExtInfo { + name: b"zvkned", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKNED, + }, + IsaExtInfo { + name: b"zvknha", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKNHA, + }, + IsaExtInfo { + name: b"zvknhb", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKNHB, + }, + IsaExtInfo { + name: b"zvksed", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKSED, + }, + IsaExtInfo { + name: b"zvksh", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKSH, + }, + IsaExtInfo { + name: b"zvkt", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKT, + }, +]; + +/// Errors thrown while configuring the Flattened Device Tree for riscv64. +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum FdtError { + /// Create FDT error: {0} + CreateFdt(#[from] VmFdtError), + /// Read cache info error: {0} + ReadCacheInfo(String), + /// Failure in writing FDT in memory. + WriteFdtToMemory(#[from] GuestMemoryError), + /// Get device attribute error. + GetDeviceAttr, + /// Get one register error. + GetOneReg(u64, kvm_ioctls::Error), +} + +pub fn create_fdt( + vcpus: &[Vcpu], + guest_mem: &GuestMemoryMmap, + cmdline: CString, + timer_freq: u32, + device_info: &HashMap<(DeviceType, String), MMIODeviceInfo>, + aia_device: &AIADevice, +) -> Result, FdtError> { + let mut fdt_writer = FdtWriter::new()?; + + let root = fdt_writer.begin_node("")?; + + fdt_writer.property_string("compatible", "linux,dummy-virt")?; + fdt_writer.property_u32("#address-cells", ADDRESS_CELLS)?; + fdt_writer.property_u32("#size-cells", SIZE_CELLS)?; + create_cpu_nodes(&mut fdt_writer, vcpus, timer_freq)?; + create_memory_node(&mut fdt_writer, guest_mem)?; + create_chosen_node(&mut fdt_writer, cmdline)?; + create_aia_node(&mut fdt_writer, aia_device)?; + create_devices_node(&mut fdt_writer, device_info)?; + + fdt_writer.end_node(root)?; + + let fdt_final = fdt_writer.finish()?; + + Ok(fdt_final) +} + +const CPU_ISA_MAX_LEN: usize = ISA_INFO_ARRAY.len() * 16; + +// Create FDT cpu nodes the way kvmtool does. +fn create_cpu_nodes(fdt: &mut FdtWriter, vcpus: &[Vcpu], timer_freq: u32) -> Result<(), FdtError> { + let valid_isa_order = b"IEMAFDQCLBJTPVNSUHKORWXYZG"; + let mut cbom = false; + let cbom_blksz = &mut [0u8; 8]; + let mut cboz = false; + let cboz_blksz = &mut [0u8; 8]; + + let cpus = fdt.begin_node("cpus")?; + + fdt.property_u32("#address-cells", 0x1)?; + fdt.property_u32("#size-cells", 0x0)?; + fdt.property_u32("timebase-frequency", timer_freq)?; + + for (cpu_index, vcpu) in vcpus.iter().enumerate() { + let vcpu_fd = &vcpu.kvm_vcpu.fd; + let cpu_index = u32::try_from(cpu_index).unwrap(); + + let cpu_isa = &mut [0; CPU_ISA_MAX_LEN]; + let mut pos = "rv64".len(); + cpu_isa[0..pos].copy_from_slice(b"rv64"); + + let mut bytes = [0u8; 8]; + let off_isa = std::mem::offset_of!(kvm_riscv_config, isa); + let id_isa = riscv64_reg_config_id!(off_isa); + + vcpu_fd + .get_one_reg(id_isa, &mut bytes) + .map_err(|err| FdtError::GetOneReg(id_isa, err))?; + + let isa = u64::from_le_bytes(bytes); + + for i in valid_isa_order { + let index = *i - 'A' as u8; + if isa & (1 << index) != 0 { + cpu_isa[pos] = 'a' as u8 + index; + pos += 1; + } + } + + for isa_ext_info in ISA_INFO_ARRAY { + let ext_id = isa_ext_info.ext_id; + let id_isa_ext = riscv64_reg_isa_ext!(ext_id); + let isa_ext_out = &mut [0u8; 8]; + if vcpu_fd.get_one_reg(id_isa_ext, isa_ext_out).is_err() { + continue; + } + + if u64::from_le_bytes(*isa_ext_out) == 0u64 { + // This extension is not available + continue; + } + + if ext_id == KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICBOM && !cbom { + let off_zicbom_blk_size = std::mem::offset_of!(kvm_riscv_config, zicbom_block_size); + let id_zicbom_blk_size = riscv64_reg_config_id!(off_zicbom_blk_size); + vcpu_fd + .get_one_reg(id_zicbom_blk_size, cbom_blksz) + .map_err(|err| { + error!("get_one_reg() failed: {err:?}"); + FdtError::GetDeviceAttr + })?; + cbom = true; + } + + if ext_id == KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICBOZ && !cboz { + let off_zicboz_blk_size = std::mem::offset_of!(kvm_riscv_config, zicboz_block_size); + let id_zicboz_blk_size = riscv64_reg_config_id!(off_zicboz_blk_size); + vcpu_fd + .get_one_reg(id_zicboz_blk_size, cboz_blksz) + .map_err(|err| { + error!("get_one_reg() failed: {err:?}"); + FdtError::GetDeviceAttr + })?; + cboz = true; + } + + cpu_isa[pos] = '_' as u8; + pos += 1; + let name_len = isa_ext_info.name.len(); + cpu_isa[pos..pos + name_len].copy_from_slice(isa_ext_info.name); + pos += name_len; + } + + let off_satp = std::mem::offset_of!(kvm_riscv_config, satp_mode); + let id_satp = riscv64_reg_config_id!(off_satp); + let b = &mut [0u8; 8]; + let satp_mode = if vcpu_fd.get_one_reg(id_satp, b).is_ok() { + u64::from_le_bytes(*b) + } else { + 8 + }; + + let cpu = fdt.begin_node(&format!("cpu@{:x}", cpu_index))?; + fdt.property_string("device_type", "cpu")?; + fdt.property_string("compatible", "riscv")?; + + match satp_mode { + 10 => fdt.property_string("mmu-type", "riscv,sv57")?, + 9 => fdt.property_string("mmu-type", "riscv,sv48")?, + 8 => fdt.property_string("mmu-type", "riscv,sv39")?, + _ => fdt.property_string("mmu-type", "riscv,none")?, + } + + fdt.property_string( + "riscv,isa", + ::std::str::from_utf8(&cpu_isa[0..pos]).expect("cpu_isa unexpected error"), + )?; + + if cbom { + fdt.property_u32( + "riscv,cbom-block-size", + u32::try_from(u64::from_le_bytes(*cbom_blksz)).unwrap(), + )?; + } + + if cboz { + fdt.property_u32( + "riscv,cboz-block-size", + u32::try_from(u64::from_le_bytes(*cboz_blksz)).unwrap(), + )?; + } + + fdt.property_u32("reg", cpu_index)?; + fdt.property_string("status", "okay")?; + + // interrupt controller node + let intc_node = fdt.begin_node("interrupt-controller")?; + fdt.property_string("compatible", "riscv,cpu-intc")?; + fdt.property_u32("#interrupt-cells", 1u32)?; + fdt.property_null("interrupt-controller")?; + fdt.property_u32("phandle", CPU_INTC_BASE_PHANDLE + cpu_index)?; + fdt.end_node(intc_node)?; + + fdt.end_node(cpu)?; + } + + fdt.end_node(cpus)?; + + Ok(()) +} + +fn create_memory_node(fdt: &mut FdtWriter, guest_mem: &GuestMemoryMmap) -> Result<(), FdtError> { + let mem_size = guest_mem.last_addr().raw_value() + - super::layout::DRAM_MEM_START + - super::layout::SYSTEM_MEM_SIZE + + 1; + let mem_reg_prop = &[ + super::layout::DRAM_MEM_START + super::layout::SYSTEM_MEM_SIZE, + mem_size, + ]; + let mem = fdt.begin_node("memory@ram")?; + fdt.property_string("device_type", "memory")?; + fdt.property_array_u64("reg", mem_reg_prop)?; + fdt.end_node(mem)?; + + Ok(()) +} + +fn create_chosen_node(fdt: &mut FdtWriter, cmdline: CString) -> Result<(), FdtError> { + let chosen = fdt.begin_node("chosen")?; + + let cmdline_string = cmdline + .into_string() + .map_err(|_| vm_fdt::Error::InvalidString)?; + fdt.property_string("bootargs", cmdline_string.as_str())?; + + fdt.end_node(chosen)?; + + Ok(()) +} + +fn create_aia_node(fdt: &mut FdtWriter, aia: &AIADevice) -> Result<(), FdtError> { + if aia.msi_compatible() { + let imsic_name = format!("imsics@{:08x}", super::layout::IMSIC_START); + let imsic_node = fdt.begin_node(&imsic_name)?; + + fdt.property_string("compatible", aia.imsic_compatibility())?; + let imsic_reg_prop = aia.imsic_properties(); + fdt.property_array_u32("reg", &imsic_reg_prop)?; + fdt.property_u32("#interrupt-cells", 0u32)?; + fdt.property_null("interrupt-controller")?; + fdt.property_null("msi-controller")?; + + let mut aia_nr_ids: u32 = 0; + let mut nr_ids_attr = ::kvm_bindings::kvm_device_attr::default(); + nr_ids_attr.group = ::kvm_bindings::KVM_DEV_RISCV_AIA_GRP_CONFIG; + nr_ids_attr.attr = ::kvm_bindings::KVM_DEV_RISCV_AIA_CONFIG_IDS as u64; + nr_ids_attr.addr = &mut aia_nr_ids as *mut u32 as u64; + + aia.get_device_attribute(&mut nr_ids_attr) + .map_err(|_| FdtError::GetDeviceAttr)?; + + fdt.property_u32("riscv,num-ids", aia_nr_ids)?; + fdt.property_u32("phandle", AIA_IMSIC_PHANDLE)?; + + let mut irq_cells = vec![]; + let num_cpus = aia.vcpu_count() as u32; + for i in 0..num_cpus { + irq_cells.push(CPU_INTC_BASE_PHANDLE + i); + irq_cells.push(S_MODE_EXT_IRQ); + } + fdt.property_array_u32("interrupts-extended", &irq_cells)?; + + fdt.end_node(imsic_node)?; + } + + let aplic_name = format!("aplic@{:x}", super::layout::APLIC_START); + let aplic_node = fdt.begin_node(&aplic_name)?; + + fdt.property_string("compatible", aia.aplic_compatibility())?; + let reg_cells = aia.aplic_properties(); + fdt.property_array_u32("reg", ®_cells)?; + fdt.property_u32("#interrupt-cells", 2u32)?; + fdt.property_null("interrupt-controller")?; + + // TODO num-sources should be equal to the IRQ allocated lines, and not randomly hardcoded. + fdt.property_u32("riscv,num-sources", 10u32)?; + fdt.property_u32("phandle", AIA_APLIC_PHANDLE)?; + fdt.property_u32("msi-parent", AIA_IMSIC_PHANDLE)?; + + fdt.end_node(aplic_node)?; + + Ok(()) +} + +fn create_devices_node( + fdt: &mut FdtWriter, + devices_info: &HashMap<(DeviceType, String), MMIODeviceInfo>, +) -> Result<(), FdtError> { + // Create one temp Vec to store all virtio devices + let mut ordered_virtio_device: Vec<&MMIODeviceInfo> = Vec::new(); + + for ((device_type, _device_id), info) in devices_info { + match device_type { + DeviceType::Serial => create_serial_node(fdt, info)?, + DeviceType::Virtio(_) => { + ordered_virtio_device.push(info); + } + } + } + + // Sort out virtio devices by address from low to high and insert them into fdt table. + ordered_virtio_device.sort_by_key(|a| a.addr); + for ordered_device_info in ordered_virtio_device.drain(..) { + create_virtio_node(fdt, ordered_device_info)?; + } + + Ok(()) +} + +fn create_virtio_node(fdt: &mut FdtWriter, dev_info: &MMIODeviceInfo) -> Result<(), FdtError> { + let virtio_mmio = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr))?; + let irq = [dev_info.irq.unwrap().into(), IRQ_TYPE_EDGE_RISING]; + + fdt.property_string("compatible", "virtio,mmio")?; + fdt.property_array_u64("reg", &[dev_info.addr, dev_info.len])?; + fdt.property_array_u32("interrupts", &irq)?; + fdt.property_u32("interrupt-parent", AIA_APLIC_PHANDLE)?; + fdt.end_node(virtio_mmio)?; + + Ok(()) +} + +fn create_serial_node(fdt: &mut FdtWriter, dev_info: &MMIODeviceInfo) -> Result<(), FdtError> { + let serial_reg_prop = [dev_info.addr, dev_info.len]; + let irq = [dev_info.irq.unwrap().into(), IRQ_TYPE_LEVEL_HIGH]; + + let serial_name = format!("serial@{:x}", dev_info.addr); + let serial_node = fdt.begin_node(&serial_name)?; + fdt.property_string("compatible", "ns16550a")?; + fdt.property_array_u64("reg", &serial_reg_prop)?; + fdt.property_u32("clock-frequency", 3686400)?; + fdt.property_u32("interrupt-parent", AIA_APLIC_PHANDLE)?; + fdt.property_array_u32("interrupts", &irq)?; + fdt.end_node(serial_node)?; + + Ok(()) +} diff --git a/src/vmm/src/arch/riscv64/kvm.rs b/src/vmm/src/arch/riscv64/kvm.rs new file mode 100644 index 00000000000..3df61aba241 --- /dev/null +++ b/src/vmm/src/arch/riscv64/kvm.rs @@ -0,0 +1,43 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::convert::Infallible; + +use kvm_ioctls::Kvm as KvmFd; + +use crate::cpu_config::templates::KvmCapability; + +/// ['Kvm'] initialization can't fail for Riscv64 +pub type KvmArchError = Infallible; + +/// Struct with kvm fd and kvm associated parameters. +#[derive(Debug)] +pub struct Kvm { + /// KVM fd. + pub fd: KvmFd, + /// Additional capabilities that were specified in cpu template. + pub kvm_cap_modifiers: Vec, +} + +impl Kvm { + pub(crate) const DEFAULT_CAPABILITIES: [u32; 5] = [ + kvm_bindings::KVM_CAP_IOEVENTFD, + kvm_bindings::KVM_CAP_USER_MEMORY, + kvm_bindings::KVM_CAP_DEVICE_CTRL, + kvm_bindings::KVM_CAP_MP_STATE, + kvm_bindings::KVM_CAP_ONE_REG, + ]; + + /// Initialize [`Kvm`] type for riscv64 architecture. + pub fn init_arch( + fd: KvmFd, + kvm_cap_modifiers: Vec, + ) -> Result { + Ok(Self { + fd, + kvm_cap_modifiers, + }) + } +} diff --git a/src/vmm/src/arch/riscv64/layout.rs b/src/vmm/src/arch/riscv64/layout.rs new file mode 100644 index 00000000000..cab21910aa0 --- /dev/null +++ b/src/vmm/src/arch/riscv64/layout.rs @@ -0,0 +1,93 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +// 1024GB +------------------------------------------------------+ +// | . | +// | . | +// | . | +// | . | DRAM_MEM_MAX_SIZE (1022GB) +// | . | +// | . | +// | . | +// 2MB + 2GB +------------------------------------------------------+ RISCV_KERNEL_START +// | | +// | | +// | | +// | | +// | | +// 2GB +------------------------------------------------------+ DRAM_MEM_START +// | | +// | | +// | | +// | | +// | | +// | | +// | | +// | | +// | | +// 1GB +------------------------------------------------------+ MMIO_MEM_START +// | | +// | | +// | | +// | | +// | | +// | | +// 128MB + 4KB +------------------------------------------------------+ IMSIC_START + IMSIC_SZ_PH +// | | +// 128MB +------------------------------------------------------+ IMSIC_START +// | | +// | | +// | | +// | | +// 0 +------------------------------------------------------+ APLIC_START + +/// Start of RAM on 64 bit RISCV. +pub const DRAM_MEM_START: u64 = 0x8000_0000; // 2GB. +/// The maximum RAM size. +pub const DRAM_MEM_MAX_SIZE: usize = 0x00FF_8000_0000; // 1024 - 2 = 1022G. + +/// Start of system's RAM, which is actually the start of the FDT blob. +pub const SYSTEM_MEM_START: u64 = DRAM_MEM_START; + +/// Size of memory region for FDT placement. The kernel image starts after it, +/// since the kernel needs to be 2MB-aligned for riscv64. +pub const SYSTEM_MEM_SIZE: u64 = 0x20_0000; // 2MB. + +/// Kernel command line maximum size. +/// As per `arch/riscv/include/uapi/asm/setup.h`. +pub const CMDLINE_MAX_SIZE: usize = 1024; + +/// Maximum size of the device tree blob. +pub const FDT_MAX_SIZE: usize = 0x1_0000; + +// From the RISC-V Privlidged Spec v1.10: +// +// Global interrupt sources are assigned small unsigned integer identifiers, +// beginning at the value 1. An interrupt ID of 0 is reserved to mean no +// interrupt. Interrupt identifiers are also used to break ties when two or +// more interrupt sources have the same assigned priority. Smaller values of +// interrupt ID take precedence over larger values of interrupt ID. +// +// While the RISC-V supervisor spec doesn't define the maximum number of +// devices supported by the PLIC, the largest number supported by devices +// marked as 'riscv,plic0' (which is the only device type this driver supports, +// and is the only extant PLIC as of now) is 1024. As mentioned above, device +// 0 is defined to be non-existant so this device really only supports 1023 +// devices. +/// Thi highest usable interrupt on riscv64. +pub const IRQ_MAX: u32 = 1023; +/// First usable interrupt on riscv64. +pub const IRQ_BASE: u32 = 1; + +/// Below this address will reside the AIA, above this address will reside the MMIO devices. +pub const MAPPED_IO_START: u64 = 1 << 30; // 1 GB + +/// The start of IMSIC(s). +pub const IMSIC_START: u64 = 0x0800_0000; +/// IMISC size per hart. +pub const IMSIC_SZ_PH: u32 = ::kvm_bindings::KVM_DEV_RISCV_IMSIC_SIZE; + +/// The start of APLIC. +pub const APLIC_START: u64 = 0x00; diff --git a/src/vmm/src/arch/riscv64/mod.rs b/src/vmm/src/arch/riscv64/mod.rs new file mode 100644 index 00000000000..80d14549740 --- /dev/null +++ b/src/vmm/src/arch/riscv64/mod.rs @@ -0,0 +1,174 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/// Module for the global interrupt controller configuration. +pub mod aia; +mod fdt; +/// Architecture specific KVM-related code. +pub mod kvm; +/// Layout for this riscv64 system. +pub mod layout; +/// Logic for configuring riscv64 registers. +pub mod regs; +/// Architecture specific vCPU code. +pub mod vcpu; +/// Architecture specific VM state code. +pub mod vm; + +use std::cmp::min; +use std::fs::File; + +use linux_loader::loader::pe::PE as Loader; +use linux_loader::loader::{Cmdline, KernelLoader}; +use vm_memory::GuestMemoryError; + +use crate::arch::{BootProtocol, EntryPoint}; +use crate::cpu_config::riscv64::CpuConfiguration; +use crate::cpu_config::riscv64::custom_cpu_template::CustomCpuTemplate; +use crate::initrd::InitrdConfig; +use crate::vmm_config::machine_config::MachineConfig; +use crate::vstate::memory::{Bytes, GuestAddress, GuestMemoryMmap}; +use crate::vstate::vcpu::KvmVcpuError; +use crate::{Vcpu, VcpuConfig, Vmm, logger}; + +/// Errors thrown while configuring riscv64 system. +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum ConfigurationError { + /// Failed to create a Flattened Device Tree for this riscv64 microVM: {0} + SetupFDT(#[from] fdt::FdtError), + /// Failed to write to guest memory. + MemoryError(GuestMemoryError), + /// Cannot copy kernel file fd + KernelFile, + /// Cannot load kernel due to invalid memory configuration or invalid kernel image: {0} + KernelLoader(#[from] linux_loader::loader::Error), + /// Error configuring the vcpu: {0} + VcpuConfigure(#[from] KvmVcpuError), +} + +/// The start of the memory area reserved for MMIO devices. +pub const MMIO_MEM_START: u64 = layout::MAPPED_IO_START; +/// The size of the memory area reserved for MMIO devices. +pub const MMIO_MEM_SIZE: u64 = layout::DRAM_MEM_START - layout::MAPPED_IO_START; //>> 1GB + +/// Returns a Vec of the valid memory addresses for riscv64. +/// See [`layout`](layout) module for a drawing of the specific memory model for this platform. +/// +/// The `offset` parameter specified the offset from [`layout::DRAM_MEM_START`]. +pub fn arch_memory_regions(offset: usize, size: usize) -> Vec<(GuestAddress, usize)> { + assert!(size > 0, "Attempt to allocate guest memory of length 0"); + assert!( + offset.checked_add(size).is_some(), + "Attempt to allocate guest memory such that the address space would wrap around" + ); + assert!( + offset < layout::DRAM_MEM_MAX_SIZE, + "offset outside allowed DRAM range" + ); + + let dram_size = min(size, layout::DRAM_MEM_MAX_SIZE - offset); + + if dram_size != size { + logger::warn!( + "Requested offset/memory size {}/{} exceeds architectural maximum (1022GiB). Size has \ + been truncated to {}", + offset, + size, + dram_size + ); + } + + vec![( + GuestAddress(layout::DRAM_MEM_START + offset as u64), + dram_size, + )] +} + +/// Configures the system for booting Linux. +pub fn configure_system_for_boot( + vmm: &mut Vmm, + vcpus: &mut [Vcpu], + machine_config: &MachineConfig, + cpu_template: &CustomCpuTemplate, + entry_point: EntryPoint, + _initrd: &Option, + boot_cmdline: Cmdline, +) -> Result<(), ConfigurationError> { + let cpu_config = { CpuConfiguration }; + + // Apply CPU template to the base CpuConfiguration. + let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template); + + let vcpu_config = VcpuConfig { + vcpu_count: machine_config.vcpu_count, + smt: machine_config.smt, + cpu_config, + }; + + // Configure vCPUs with normalizing and setting the generated CPU configuration. + for vcpu in vcpus.iter_mut() { + vcpu.kvm_vcpu + .configure(vmm.vm.guest_memory(), entry_point, &vcpu_config)?; + } + let cmdline = boot_cmdline + .as_cstring() + .expect("Cannot create cstring from cmdline string"); + + let guest_mem = &vmm.vm.guest_memory(); + // TODO: get timer frequency appropriately. + let fdt = fdt::create_fdt( + vcpus, + guest_mem, + cmdline, + 0x989680u32, + vmm.mmio_device_manager.get_device_info(), + vmm.vm.get_irqchip(), + )?; + let fdt_address = GuestAddress(get_fdt_addr()); + guest_mem + .write_slice(fdt.as_slice(), fdt_address) + .map_err(ConfigurationError::MemoryError)?; + + Ok(()) +} + +/// Load linux kernel into guest memory. +pub fn load_kernel( + kernel: &File, + guest_memory: &GuestMemoryMmap, +) -> Result { + // Need to clone the File because reading from it + // mutates it. + let mut kernel_file = kernel + .try_clone() + .map_err(|_| ConfigurationError::KernelFile)?; + + let entry_addr = Loader::load( + guest_memory, + Some(GuestAddress(get_kernel_start())), + &mut kernel_file, + None, + )?; + + Ok(EntryPoint { + entry_addr: entry_addr.kernel_load, + protocol: BootProtocol::LinuxBoot, + }) +} + +/// Returns the memory address where the kernel could be loaded. +pub fn get_kernel_start() -> u64 { + layout::SYSTEM_MEM_START + layout::SYSTEM_MEM_SIZE +} + +/// Returns the memory address where the initrd could be loaded. Unimplemented for now. +pub fn initrd_load_addr(_guest_mem: &GuestMemoryMmap, _initrd_size: usize) -> Option { + unimplemented!() +} + +/// Auxiliary function to get the address where the device tree blob is loaded. +fn get_fdt_addr() -> u64 { + layout::DRAM_MEM_START +} diff --git a/src/vmm/src/arch/riscv64/regs.rs b/src/vmm/src/arch/riscv64/regs.rs new file mode 100644 index 00000000000..ee0f159cb18 --- /dev/null +++ b/src/vmm/src/arch/riscv64/regs.rs @@ -0,0 +1,156 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright © 2024 Institute of Software, CAS. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +/// Storage for riscv64 registers with different sizes. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Riscv64RegisterVec { + ids: Vec, + data: Vec, +} + +impl Serialize for Riscv64RegisterVec { + fn serialize(&self, _: S) -> Result + where + S: Serializer, + { + unimplemented!(); + } +} + +impl<'de> Deserialize<'de> for Riscv64RegisterVec { + fn deserialize(_deserializer: D) -> Result + where + D: Deserializer<'de>, + { + unimplemented!(); + } +} + +// #[repr(C)] +// #[derive(Debug, Default, Copy, Clone, PartialEq)] +// pub struct kvm_riscv_config { +// pub isa: u64, +// pub zicbom_block_size: u64, +// pub mvendorid: u64, +// pub marchid: u64, +// pub mimpid: u64, +// pub zicboz_block_size: u64, +// pub satp_mode: u64, +// } + +// Helper macro from Cloud Hypervisor. +/// Get the ID of a register. +#[macro_export] +macro_rules! riscv64_reg_id { + ($reg_type: tt, $offset: tt) => { + // The core registers of an riscv64 machine are represented + // in kernel by the `kvm_riscv_core` structure: + // + // struct kvm_riscv_core { + // struct user_regs_struct regs; + // unsigned long mode; + // }; + // + // struct user_regs_struct { + // unsigned long pc; + // unsigned long ra; + // unsigned long sp; + // unsigned long gp; + // unsigned long tp; + // unsigned long t0; + // unsigned long t1; + // unsigned long t2; + // unsigned long s0; + // unsigned long s1; + // unsigned long a0; + // unsigned long a1; + // unsigned long a2; + // unsigned long a3; + // unsigned long a4; + // unsigned long a5; + // unsigned long a6; + // unsigned long a7; + // unsigned long s2; + // unsigned long s3; + // unsigned long s4; + // unsigned long s5; + // unsigned long s6; + // unsigned long s7; + // unsigned long s8; + // unsigned long s9; + // unsigned long s10; + // unsigned long s11; + // unsigned long t3; + // unsigned long t4; + // unsigned long t5; + // unsigned long t6; + // }; + // The id of a core register can be obtained like this: offset = id & + // ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_CORE). Thus, + // id = KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_CORE | offset + // + // To generalize, the id of a register can be obtained by: + // id = KVM_REG_RISCV | KVM_REG_SIZE_U64 | + // KVM_REG_RISCV_CORE/KVM_REG_RISCV_CONFIG/KVM_REG_RISCV_TIMER | + // offset + KVM_REG_RISCV as u64 + | u64::from($reg_type) + | u64::from(KVM_REG_SIZE_U64) + | ($offset as u64 / std::mem::size_of::() as u64) + }; +} +pub(crate) use riscv64_reg_id; + +/// Return the ID of an ISA register. +#[macro_export] +macro_rules! riscv64_isa_id { + ($reg_type: tt, $id: tt) => { + KVM_REG_RISCV as u64 | u64::from($reg_type) | KVM_REG_SIZE_U64 as u64 | u64::from($id) + }; +} +pub(crate) use riscv64_isa_id; + +/// Return the ID of a core register. +#[macro_export] +macro_rules! riscv64_reg_core_id { + ($offset: tt) => { + riscv64_reg_id!(KVM_REG_RISCV_CORE, $offset) + }; +} +pub(crate) use riscv64_reg_core_id; + +/// Return the ID of a config register. +#[macro_export] +macro_rules! riscv64_reg_config_id { + ($offset: tt) => { + riscv64_reg_id!(KVM_REG_RISCV_CONFIG, $offset) + }; +} +pub(crate) use riscv64_reg_config_id; + +/// Return the ID of timer register. +#[macro_export] +macro_rules! riscv64_reg_timer_id { + ($offset: tt) => { + riscv64_reg_id!(KVM_REG_RISCV_TIMER, $offset) + }; +} + +/// Return the ID of an ISA extension. +#[macro_export] +macro_rules! riscv64_reg_isa_ext { + ($ext_id: tt) => { + riscv64_isa_id!(KVM_REG_RISCV_ISA_EXT, $ext_id) + }; +} +pub(crate) use riscv64_reg_isa_ext; diff --git a/src/vmm/src/arch/riscv64/vcpu.rs b/src/vmm/src/arch/riscv64/vcpu.rs new file mode 100644 index 00000000000..05103ee15a8 --- /dev/null +++ b/src/vmm/src/arch/riscv64/vcpu.rs @@ -0,0 +1,197 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + + +use std::fmt::Debug; + +use kvm_bindings::*; +use kvm_ioctls::{VcpuExit, VcpuFd}; +use serde::{Deserialize, Serialize}; + +use super::get_fdt_addr; +use super::regs::*; +use crate::arch::EntryPoint; +use crate::cpu_config::templates::CpuConfiguration; +use crate::logger::{IncMetric, METRICS, error}; +use crate::vcpu::{VcpuConfig, VcpuError}; +use crate::vstate::memory::{Address, GuestMemoryMmap}; +use crate::vstate::vcpu::VcpuEmulation; +use crate::vstate::vm::Vm; + +/// Errors thrown while setting riscv64 registers. +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum VcpuArchError { + /// Failed to get register {0}: {1} + GetOneReg(u64, kvm_ioctls::Error), + /// Failed to set register {0}: {1} + SetOneReg(u64, kvm_ioctls::Error), + /// Failed to retrieve list of registers: {0} + GetRegList(kvm_ioctls::Error), + /// Failed to get multiprocessor state: {0} + GetMp(kvm_ioctls::Error), + /// Failed to set multiprocessor state: {0} + SetMp(kvm_ioctls::Error), +} + +/// Mandatory registers to set before booting a riscv64 vCPU: +/// a0: hart/core ID +/// a1: FDT address +/// pc: kernel entry point +pub fn setup_boot_regs( + vcpufd: &VcpuFd, + cpu_id: u8, + kernel_entry_addr: u64, + _mem: &GuestMemoryMmap, +) -> Result<(), VcpuArchError> { + let off_a0 = std::mem::offset_of!(user_regs_struct, a0); + let id_a0 = riscv64_reg_core_id!(off_a0); + vcpufd + .set_one_reg(id_a0, &u64::from(cpu_id).to_le_bytes()) + .map_err(|err| VcpuArchError::SetOneReg(id_a0, err))?; + + let off_pc = std::mem::offset_of!(user_regs_struct, pc); + let id_pc = riscv64_reg_core_id!(off_pc); + vcpufd + .set_one_reg(id_pc, &kernel_entry_addr.to_le_bytes()) + .map_err(|err| VcpuArchError::SetOneReg(id_pc, err))?; + + let fdt_start: u64 = get_fdt_addr(); + let off_a1 = std::mem::offset_of!(user_regs_struct, a1); + let id_a1 = riscv64_reg_core_id!(off_a1); + vcpufd + .set_one_reg(id_a1, &fdt_start.to_le_bytes()) + .map_err(|err| VcpuArchError::SetOneReg(id_a1, err))?; + + Ok(()) +} + +/// Errors associated with the wrappers over KVM ioctls. +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum KvmVcpuError { + /// Error configuring the vcpu registers: {0} + ConfigureRegisters(VcpuArchError), + /// Error creating vcpu: {0} + CreateVcpu(kvm_ioctls::Error), + /// Failed to dump CPU configuration: {0} + DumpCpuConfig(VcpuArchError), + /// Error getting the vcpu preferred target: {0} + GetPreferredTarget(kvm_ioctls::Error), + /// Error initializing the vcpu: {0} + Init(kvm_ioctls::Error), + /// Error applying template: {0} + ApplyCpuTemplate(VcpuArchError), + /// Failed to restore the state of the vcpu: {0} + RestoreState(VcpuArchError), + /// Failed to save the state of the vcpu: {0} + SaveState(VcpuArchError), +} + +/// Error type for [`KvmVcpu::configure`]. +pub type KvmVcpuConfigureError = KvmVcpuError; + +/// A wrapper around creating and using a kvm riscv64 vcpu. +#[derive(Debug)] +pub struct KvmVcpu { + /// Index of vcpu. + pub index: u8, + /// KVM vcpu fd. + pub fd: VcpuFd, + /// Vcpu peripherals, such as buses. + pub peripherals: Peripherals, +} + +/// Vcpu peripherals. +#[derive(Default, Debug)] +pub struct Peripherals { + /// mmio bus. + pub mmio_bus: Option, +} + +impl KvmVcpu { + /// Constructs a new kvm vcpu with arch specific functionality. + /// + /// # Arguments + /// + /// * `index` - Represents the 0-based CPU index between [0, max vcpus). + /// * `vm` - The vm to which this vcpu will get attached. + pub fn new(index: u8, vm: &Vm) -> Result { + let kvm_vcpu = vm + .fd() + .create_vcpu(index.into()) + .map_err(KvmVcpuError::CreateVcpu)?; + + Ok(KvmVcpu { + index, + fd: kvm_vcpu, + peripherals: Default::default(), + }) + } + + /// Configures an riscv64 specific vcpu for booting Linux. + /// + /// # Arguments + /// + /// * `guest_mem` - The guest memory used by this microvm. + /// * `kernel_entry_point` - Specifies the boot protocol and offset from `guest_mem` at which + /// the kernel starts. + /// * `_vcpu_config` - The vCPU configuration. Not used in RISC-V. + pub fn configure( + &mut self, + guest_mem: &GuestMemoryMmap, + kernel_entry_point: EntryPoint, + _vcpu_config: &VcpuConfig, + ) -> Result<(), KvmVcpuError> { + setup_boot_regs( + &self.fd, + self.index, + kernel_entry_point.entry_addr.raw_value(), + guest_mem, + ) + .map_err(KvmVcpuError::ConfigureRegisters)?; + + Ok(()) + } + + /// Save the KVM internal state. Unimplemented. + pub fn save_state(&self) -> Result { + unimplemented!(); + } + + /// Use provided state to populate KVM internal state. Unimplemented. + pub fn restore_state(&mut self, _state: &VcpuState) -> Result<(), KvmVcpuError> { + unimplemented!(); + } + + /// Dumps CPU configuration. Unimplemented. + pub fn dump_cpu_config(&self) -> Result { + unimplemented!(); + } +} + +impl Peripherals { + /// Runs the vCPU in KVM context and handles the kvm exit reason. + /// + /// Returns error or enum specifying whether emulation was handled or interrupted. + pub fn run_arch_emulation(&self, exit: VcpuExit) -> Result { + METRICS.vcpu.failures.inc(); + // TODO: Are we sure we want to finish running a vcpu upon + // receiving a vm exit that is not necessarily an error? + error!("Unexpected exit reason on vcpu run: {:?}", exit); + Err(VcpuError::UnhandledKvmExit(format!("{:?}", exit))) + } +} + +/// Structure holding vCPU kvm state. +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +pub struct VcpuState { + /// Multiprocessing state. + pub mp_state: kvm_mp_state, + /// vCPU registers. + pub regs: Riscv64RegisterVec, +} diff --git a/src/vmm/src/arch/riscv64/vm.rs b/src/vmm/src/arch/riscv64/vm.rs new file mode 100644 index 00000000000..39897488309 --- /dev/null +++ b/src/vmm/src/arch/riscv64/vm.rs @@ -0,0 +1,85 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +use crate::Kvm; +use crate::arch::riscv64::aia::AiaState; +use crate::vstate::memory::GuestMemoryState; +use crate::vstate::vm::{VmCommon, VmError}; + +/// Structure representing the current architecture's understand of what a "virtual machine" is. +#[derive(Debug)] +pub struct ArchVm { + /// Architecture independent parts of a vm. + pub common: VmCommon, + /// On riscv64 we need to keep around the fd obtained by creating the AIA device. + irqchip_handle: Option, +} + +/// Error type for [`Vm::restore_state`] +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum ArchVmError { + /// Error creating the global interrupt controller: {0} + VmCreateAIA(crate::arch::riscv64::aia::AiaError), + /// Failed to save the VM's AIA state: {0} + SaveAia(crate::arch::riscv64::aia::AiaError), + /// Failed to restore the VM's AIA state: {0} + RestoreAia(crate::arch::riscv64::aia::AiaError), +} + +impl ArchVm { + /// Create a new `Vm` struct. + pub fn new(kvm: &Kvm) -> Result { + let common = Self::create_common(kvm)?; + Ok(ArchVm { + common, + irqchip_handle: None, + }) + } + + /// Pre-vCPU creation setup. + pub fn arch_pre_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { + Ok(()) + } + + /// Post-vCPU creation setup. + pub fn arch_post_create_vcpus(&mut self, nr_vcpus: u8) -> Result<(), ArchVmError> { + self.setup_irqchip(nr_vcpus) + } + + /// Creates the AIA (Advanced Interrupt Architecture) IRQchip. + pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), ArchVmError> { + self.irqchip_handle = Some( + crate::arch::riscv64::aia::AIADevice::create_aia(&self.fd(), vcpu_count.into()) + .map_err(ArchVmError::VmCreateAIA)?, + ); + Ok(()) + } + + /// Gets a reference to the irqchip of the VM. + pub fn get_irqchip(&self) -> &crate::arch::riscv64::aia::AIADevice { + self.irqchip_handle.as_ref().expect("IRQ chip not set") + } + + /// Saves and returns the Kvm Vm state. + pub fn save_state(&self) -> Result { + unimplemented!() + } + + /// Restore the KVM VM state + pub fn restore_state(&mut self) -> Result<(), ArchVmError> { + unimplemented!() + } +} + +/// Structure holding an general specific VM state. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct VmState { + /// Guest memory state. + pub memory: GuestMemoryState, + /// AIA state. + pub aia: AiaState, +} diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 4a810ee083a..2251633be3b 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -5,6 +5,8 @@ use std::fmt::Debug; use std::io; +#[cfg(target_arch = "riscv64")] +use std::os::fd::AsRawFd; #[cfg(feature = "gdb")] use std::sync::mpsc; use std::sync::{Arc, Mutex}; @@ -13,6 +15,7 @@ use event_manager::{MutEventSubscriber, SubscriberOps}; use libc::EFD_NONBLOCK; use linux_loader::cmdline::Cmdline as LoaderKernelCmdline; use userfaultfd::Uffd; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use utils::time::TimestampUs; #[cfg(target_arch = "aarch64")] use vm_memory::GuestAddress; @@ -30,23 +33,31 @@ use crate::cpu_config::templates::{ use crate::device_manager::acpi::ACPIDeviceManager; #[cfg(target_arch = "x86_64")] use crate::device_manager::legacy::PortIODeviceManager; +#[cfg(target_arch = "riscv64")] +use crate::device_manager::mmio::MMIODeviceInfo; use crate::device_manager::mmio::{MMIODeviceManager, MmioError}; use crate::device_manager::persist::{ ACPIDeviceManagerConstructorArgs, ACPIDeviceManagerRestoreError, MMIODevManagerConstructorArgs, }; use crate::device_manager::resources::ResourceAllocator; use crate::devices::BusDevice; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::acpi::vmgenid::{VmGenId, VmGenIdError}; +#[cfg(target_arch = "riscv64")] +use crate::devices::legacy::IrqLineTrigger; #[cfg(target_arch = "aarch64")] use crate::devices::legacy::RTCDevice; use crate::devices::legacy::serial::SerialOut; use crate::devices::legacy::{EventFdTrigger, SerialEventsWrapper, SerialWrapper}; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::block::device::Block; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::mmio::MmioTransport; use crate::devices::virtio::net::Net; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::rng::Entropy; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; #[cfg(feature = "gdb")] use crate::gdb; @@ -83,6 +94,7 @@ pub enum StartMicrovmError { #[cfg(target_arch = "x86_64")] CreateLegacyDevice(device_manager::legacy::LegacyDeviceError), /// Error creating VMGenID device: {0} + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] CreateVMGenID(VmGenIdError), /// Error enabling pvtime on vcpu: {0} #[cfg(target_arch = "aarch64")] @@ -135,7 +147,7 @@ impl std::convert::From for StartMicrovmError { } } -#[cfg_attr(target_arch = "aarch64", allow(unused))] +#[cfg_attr(any(target_arch = "aarch64", target_arch = "riscv64"), allow(unused))] fn create_vmm_and_vcpus( instance_info: &InstanceInfo, event_manager: &mut EventManager, @@ -209,6 +221,7 @@ pub fn build_microvm_for_boot( ) -> Result>, StartMicrovmError> { use self::StartMicrovmError::*; + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] // Timestamp for measuring microVM boot duration. let request_ts = TimestampUs::default(); @@ -260,10 +273,12 @@ pub fn build_microvm_for_boot( // The boot timer device needs to be the first device attached in order // to maintain the same MMIO address referenced in the documentation // and tests. + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] if vm_resources.boot_timer { attach_boot_timer_device(&mut vmm, request_ts)?; } + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] if let Some(balloon) = vm_resources.balloon.get() { attach_balloon_device(&mut vmm, &mut boot_cmdline, balloon, event_manager)?; } @@ -274,6 +289,7 @@ pub fn build_microvm_for_boot( vm_resources.block.devices.iter(), event_manager, )?; + attach_net_devices( &mut vmm, &mut boot_cmdline, @@ -281,10 +297,12 @@ pub fn build_microvm_for_boot( event_manager, )?; + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] if let Some(unix_vsock) = vm_resources.vsock.get() { attach_unixsock_vsock_device(&mut vmm, &mut boot_cmdline, unix_vsock, event_manager)?; } + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] if let Some(entropy) = vm_resources.entropy.get() { attach_entropy_device(&mut vmm, &mut boot_cmdline, entropy, event_manager)?; } @@ -292,6 +310,10 @@ pub fn build_microvm_for_boot( #[cfg(target_arch = "aarch64")] attach_legacy_devices_aarch64(event_manager, &mut vmm, &mut boot_cmdline)?; + #[cfg(target_arch = "riscv64")] + attach_legacy_devices_riscv64(event_manager, &mut vmm, &mut boot_cmdline)?; + + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] attach_vmgenid_device(&mut vmm)?; #[cfg(target_arch = "aarch64")] @@ -551,6 +573,7 @@ pub fn build_microvm_from_snapshot( Ok(vmm) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] /// Sets up the serial device. pub fn setup_serial_device( event_manager: &mut EventManager, @@ -574,6 +597,35 @@ pub fn setup_serial_device( Ok(serial) } +#[cfg(target_arch = "riscv64")] +/// Sets up the serial device. +pub fn setup_serial_device( + event_manager: &mut EventManager, + vmfd: &kvm_ioctls::VmFd, + input: std::io::Stdin, + out: std::io::Stdout, + device_info: &Option, +) -> Result>, VmmError> { + let interrupt_evt = IrqLineTrigger::new( + vmfd.as_raw_fd(), + device_info.as_ref().unwrap().irq.unwrap().get(), + ); + let kick_stdin_read_evt = + EventFdTrigger::new(EventFd::new(EFD_NONBLOCK).map_err(VmmError::EventFd)?); + let serial = Arc::new(Mutex::new(BusDevice::Serial(SerialWrapper { + serial: Serial::with_events( + interrupt_evt, + SerialEventsWrapper { + buffer_ready_event_fd: Some(kick_stdin_read_evt), + }, + SerialOut::Stdout(out), + ), + input: Some(input), + }))); + event_manager.add_subscriber(serial.clone()); + Ok(serial) +} + /// 64 bytes due to alignment requirement in 3.1 of https://www.kernel.org/doc/html/v5.8/virt/kvm/devices/vcpu.html#attribute-kvm-arm-vcpu-pvtime-ipa #[cfg(target_arch = "aarch64")] const STEALTIME_STRUCT_MEM_SIZE: u64 = 64; @@ -646,6 +698,47 @@ fn attach_legacy_devices_aarch64( .map_err(VmmError::RegisterMMIODevice) } +#[cfg(target_arch = "riscv64")] +fn attach_legacy_devices_riscv64( + event_manager: &mut EventManager, + vmm: &mut Vmm, + cmdline: &mut LoaderKernelCmdline, +) -> Result<(), VmmError> { + // Serial device setup. + let cmdline_contains_console = cmdline + .as_cstring() + .map_err(|_| VmmError::Cmdline)? + .into_string() + .map_err(|_| VmmError::Cmdline)? + .contains("console="); + + if cmdline_contains_console { + // Make stdout non-blocking. + set_stdout_nonblocking(); + let device_info = vmm + .mmio_device_manager + .allocate_mmio_resources(&mut vmm.resource_allocator, 1) + .map_err(|err| VmmError::DeviceManager(err))?; + + let serial = setup_serial_device( + event_manager, + vmm.vm.fd(), + std::io::stdin(), + std::io::stdout(), + &Some(device_info.clone()), + )?; + + vmm.mmio_device_manager + .register_mmio_serial(&mut vmm.resource_allocator, serial, Some(device_info)) + .map_err(VmmError::RegisterMMIODevice)?; + vmm.mmio_device_manager + .add_mmio_serial_to_cmdline(cmdline) + .map_err(VmmError::RegisterMMIODevice)?; + } + + Ok(()) +} + /// Attaches a VirtioDevice device to the device manager and event manager. fn attach_virtio_device( event_manager: &mut EventManager, @@ -670,6 +763,7 @@ fn attach_virtio_device( .map(|_| ()) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub(crate) fn attach_boot_timer_device( vmm: &mut Vmm, request_ts: TimestampUs, @@ -682,6 +776,7 @@ pub(crate) fn attach_boot_timer_device( Ok(()) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn attach_vmgenid_device(vmm: &mut Vmm) -> Result<(), StartMicrovmError> { let vmgenid = VmGenId::new(vmm.vm.guest_memory(), &mut vmm.resource_allocator) .map_err(StartMicrovmError::CreateVMGenID)?; @@ -693,6 +788,7 @@ fn attach_vmgenid_device(vmm: &mut Vmm) -> Result<(), StartMicrovmError> { Ok(()) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn attach_entropy_device( vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, @@ -763,6 +859,7 @@ fn attach_net_devices<'a, I: Iterator>> + Debug>( Ok(()) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn attach_unixsock_vsock_device( vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, @@ -774,6 +871,7 @@ fn attach_unixsock_vsock_device( attach_virtio_device(event_manager, vmm, id, unix_vsock.clone(), cmdline, false) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn attach_balloon_device( vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, diff --git a/src/vmm/src/cpu_config/mod.rs b/src/vmm/src/cpu_config/mod.rs index 4c7404a14d3..ba7a93e446a 100644 --- a/src/vmm/src/cpu_config/mod.rs +++ b/src/vmm/src/cpu_config/mod.rs @@ -14,5 +14,9 @@ pub mod x86_64; #[cfg(target_arch = "aarch64")] pub mod aarch64; +/// Module containing type implementations needed for riscv64 CPU configuration +#[cfg(target_arch = "riscv64")] +pub mod riscv64; + #[cfg(test)] pub(crate) mod test_utils; diff --git a/src/vmm/src/cpu_config/riscv64/custom_cpu_template.rs b/src/vmm/src/cpu_config/riscv64/custom_cpu_template.rs new file mode 100644 index 00000000000..9829c88b0d0 --- /dev/null +++ b/src/vmm/src/cpu_config/riscv64/custom_cpu_template.rs @@ -0,0 +1,64 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::borrow::Cow; + +use serde::{Deserialize, Serialize}; + +use crate::cpu_config::templates::{ + CpuTemplateType, GetCpuTemplate, GetCpuTemplateError, KvmCapability, +}; + +impl GetCpuTemplate for Option { + // We only support the default template for now. + fn get_cpu_template(&self) -> Result, GetCpuTemplateError> { + match self { + Some(template_type) => match template_type { + CpuTemplateType::Custom(_) => unimplemented!(), + CpuTemplateType::Static(_) => unimplemented!(), + }, + None => Ok(Cow::Owned(CustomCpuTemplate::default())), + } + } +} + +/// Wrapper type to containing riscv64 CPU config modifiers. +#[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct CustomCpuTemplate { + /// Additional kvm capabilities to check before + /// configuring vcpus. + #[serde(default)] + pub kvm_capabilities: Vec, + /// Modifiers of enabled vcpu features for vcpu. + #[serde(default)] + pub vcpu_features: Vec, + /// Modifiers for registers on Riscv64 CPUs. + #[serde(default)] + pub reg_modifiers: Vec, +} + +impl CustomCpuTemplate { + /// Get a list of register IDs that are modified by the CPU template. We don't use CPU + /// templates for RISC-V, thus just return an empty array. + pub fn reg_list(&self) -> Vec { + vec![] + } + + /// Validate the correctness of the template. We don't use CPU templates on RISC-V, thus just + /// return always successfully. + pub fn validate(&self) -> Result<(), serde_json::Error> { + Ok(()) + } +} + +/// Struct for defining enabled vcpu features. For now, it is just used as a placeholder. +#[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)] +pub struct VcpuFeatures; + +/// Wrapper of a mask defined as a bitmap to apply changes to a given register's value. For now, it +/// is used just as a placeholder. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Hash)] +pub struct RegisterModifier; diff --git a/src/vmm/src/cpu_config/riscv64/mod.rs b/src/vmm/src/cpu_config/riscv64/mod.rs new file mode 100644 index 00000000000..096d25f3855 --- /dev/null +++ b/src/vmm/src/cpu_config/riscv64/mod.rs @@ -0,0 +1,38 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/// Module for custom CPU templates +pub mod custom_cpu_template; +/// Module for static CPU templates +pub mod static_cpu_templates; + +use super::templates::CustomCpuTemplate; +use crate::arch::riscv64::vcpu::VcpuArchError; +use crate::vstate::vcpu::KvmVcpuError; + +/// Errors thrown while configuring templates. +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum CpuConfigurationError { + /// Error initializing the vcpu: {0} + VcpuInit(#[from] KvmVcpuError), + /// Error reading vcpu registers: {0} + VcpuGetRegs(#[from] VcpuArchError), +} + +/// CPU configuration for riscv64. Just a placeholder. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct CpuConfiguration; + +impl CpuConfiguration { + /// Creates new guest CPU config based on the provided template. Not actually implemented yet. + pub fn apply_template(self, _: &CustomCpuTemplate) -> Self { + self + } + + /// Returns ids of registers that are changed by this template. + pub fn register_ids(&self) -> Vec { + unimplemented!(); + } +} diff --git a/src/vmm/src/cpu_config/riscv64/static_cpu_templates.rs b/src/vmm/src/cpu_config/riscv64/static_cpu_templates.rs new file mode 100644 index 00000000000..a3950dc8ce0 --- /dev/null +++ b/src/vmm/src/cpu_config/riscv64/static_cpu_templates.rs @@ -0,0 +1,29 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +/// Templates available for configuring the supported RISCV CPU types. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum StaticCpuTemplate { + /// No CPU template is used. + #[default] + None, +} + +impl StaticCpuTemplate { + /// Check if no template specified. + pub fn is_none(&self) -> bool { + self == &StaticCpuTemplate::None + } +} + +impl std::fmt::Display for StaticCpuTemplate { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + StaticCpuTemplate::None => write!(f, "None"), + } + } +} diff --git a/src/vmm/src/cpu_config/templates.rs b/src/vmm/src/cpu_config/templates.rs index 559da632cc4..cb322e747c3 100644 --- a/src/vmm/src/cpu_config/templates.rs +++ b/src/vmm/src/cpu_config/templates.rs @@ -19,6 +19,15 @@ mod common_types { }; } +#[cfg(target_arch = "riscv64")] +mod common_types { + pub use crate::cpu_config::riscv64::custom_cpu_template::CustomCpuTemplate; + pub use crate::cpu_config::riscv64::static_cpu_templates::StaticCpuTemplate; + pub use crate::cpu_config::riscv64::{ + CpuConfiguration, CpuConfigurationError as GuestConfigError, + }; +} + use std::borrow::Cow; use std::fmt::Debug; diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 99bde6e2e78..48c1dbbf81a 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -8,6 +8,8 @@ use std::collections::HashMap; use std::fmt::Debug; use std::num::NonZeroU32; +#[cfg(target_arch = "riscv64")] +use std::os::fd::AsRawFd; use std::sync::{Arc, Mutex}; #[cfg(target_arch = "x86_64")] @@ -26,6 +28,7 @@ use crate::arch::DeviceType::Virtio; use crate::devices::BusDevice; #[cfg(target_arch = "aarch64")] use crate::devices::legacy::RTCDevice; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::pseudo::BootTimer; use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::block::device::Block; @@ -144,7 +147,7 @@ impl MMIODeviceManager { } /// Allocates resources for a new device to be added. - fn allocate_mmio_resources( + pub fn allocate_mmio_resources( &mut self, resource_allocator: &mut ResourceAllocator, irq_count: u32, @@ -196,7 +199,11 @@ impl MMIODeviceManager { }; let identifier; { + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] let locked_device = mmio_device.locked_device(); + #[cfg(target_arch = "riscv64")] + let mut locked_device = mmio_device.locked_device(); + identifier = (DeviceType::Virtio(locked_device.device_type()), device_id); for (i, queue_evt) in locked_device.queue_events().iter().enumerate() { let io_addr = IoEventAddress::Mmio( @@ -205,8 +212,14 @@ impl MMIODeviceManager { vm.register_ioevent(queue_evt, &io_addr, u32::try_from(i).unwrap()) .map_err(MmioError::RegisterIoEvent)?; } + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] vm.register_irqfd(&locked_device.interrupt_trigger().irq_evt, irq.get()) .map_err(MmioError::RegisterIrqFd)?; + + #[cfg(target_arch = "riscv64")] + locked_device + .interrupt_trigger_mut() + .set_vmfd_and_gsi(vm.as_raw_fd(), irq.get()); } self.register_mmio_device( @@ -299,7 +312,29 @@ impl MMIODeviceManager { self.register_mmio_device(identifier, device_info, serial) } - #[cfg(target_arch = "aarch64")] + #[cfg(target_arch = "riscv64")] + /// Register an early console at the specified MMIO configuration if given as parameter, + /// otherwise allocate a new MMIO resources for it. + pub fn register_mmio_serial( + &mut self, + resource_allocator: &mut ResourceAllocator, + serial: Arc>, + device_info_opt: Option, + ) -> Result<(), MmioError> { + // Create a new MMIODeviceInfo object on boot path or unwrap the + // existing object on restore path. + let device_info = if let Some(device_info) = device_info_opt { + device_info + } else { + self.allocate_mmio_resources(resource_allocator, 1)? + }; + + let identifier = (DeviceType::Serial, DeviceType::Serial.to_string()); + // Register the newly created Serial object. + self.register_mmio_device(identifier, device_info, serial) + } + + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] /// Append the registered early console to the kernel cmdline. pub fn add_mmio_serial_to_cmdline( &self, @@ -342,6 +377,7 @@ impl MMIODeviceManager { } /// Register a boot timer device. + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub fn register_mmio_boot_timer( &mut self, resource_allocator: &mut ResourceAllocator, diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 30a6387bc82..c9fa5ba64f5 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -8,6 +8,7 @@ use std::sync::{Arc, Mutex}; use event_manager::{MutEventSubscriber, SubscriberOps}; use kvm_ioctls::VmFd; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use log::{error, warn}; use serde::{Deserialize, Serialize}; use vm_allocator::AllocPolicy; @@ -35,12 +36,13 @@ use crate::devices::virtio::rng::Entropy; use crate::devices::virtio::rng::persist::{ EntropyConstructorArgs, EntropyPersistError as EntropyError, EntropyState, }; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +use crate::devices::virtio::vsock::TYPE_VSOCK; use crate::devices::virtio::vsock::persist::{ VsockConstructorArgs, VsockState, VsockUdsConstructorArgs, }; -use crate::devices::virtio::vsock::{ - TYPE_VSOCK, Vsock, VsockError, VsockUnixBackend, VsockUnixBackendError, -}; +use crate::devices::virtio::vsock::{Vsock, VsockError, VsockUnixBackend, VsockUnixBackendError}; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::{TYPE_BALLOON, TYPE_BLOCK, TYPE_NET, TYPE_RNG}; use crate::mmds::data_store::MmdsVersion; use crate::resources::{ResourcesError, VmResources}; @@ -283,6 +285,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { type ConstructorArgs = MMIODevManagerConstructorArgs<'a>; type Error = DevicePersistError; + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn save(&self) -> Self::State { let mut states = DeviceStates::default(); let _: Result<(), ()> = self.for_each_device(|devtype, devid, device_info, bus_dev| { @@ -409,6 +412,11 @@ impl<'a> Persist<'a> for MMIODeviceManager { states } + #[cfg(target_arch = "riscv64")] + fn save(&self) -> Self::State { + unimplemented!(); + } + fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, diff --git a/src/vmm/src/devices/legacy/mod.rs b/src/vmm/src/devices/legacy/mod.rs index b28ae7082fe..5fea7574a4f 100644 --- a/src/vmm/src/devices/legacy/mod.rs +++ b/src/vmm/src/devices/legacy/mod.rs @@ -13,11 +13,15 @@ pub mod serial; use std::io; use std::ops::Deref; +#[cfg(target_arch = "riscv64")] +use std::os::fd::AsRawFd; use serde::Serializer; use serde::ser::SerializeMap; use vm_superio::Trigger; use vmm_sys_util::eventfd::EventFd; +#[cfg(target_arch = "riscv64")] +use vmm_sys_util::{errno, ioctl::ioctl_with_ref, ioctl_ioc_nr, ioctl_iow_nr}; pub use self::i8042::{I8042Device, I8042Error as I8042DeviceError}; #[cfg(target_arch = "aarch64")] @@ -25,6 +29,8 @@ pub use self::rtc_pl031::RTCDevice; pub use self::serial::{ IER_RDA_BIT, IER_RDA_OFFSET, SerialDevice, SerialEventsWrapper, SerialWrapper, }; +#[cfg(target_arch = "riscv64")] +use crate::logger::error; /// Wrapper for implementing the trigger functionality for `EventFd`. /// @@ -64,6 +70,74 @@ impl EventFdTrigger { } } +// TODO: raw_vmfd and gsi are actually never None. +#[cfg(target_arch = "riscv64")] +#[derive(Debug)] +pub struct IrqLineTrigger { + raw_vmfd: Option, + gsi: Option, +} + +#[cfg(target_arch = "riscv64")] +impl IrqLineTrigger { + pub fn new(raw_vmfd: i32, gsi: u32) -> Self { + Self { + raw_vmfd: Some(raw_vmfd), + gsi: Some(gsi), + } + } + + // This function is taken from kvm-ioctls because it requires VmFd, which we don't + // have at this point. However, it only uses the raw file descriptor, which is just + // an i32. So, we copy it here and use it directly with the raw fd. + fn set_irq_line(fd: F, irq: u32, active: bool) -> Result<(), kvm_ioctls::Error> { + let mut irq_level = kvm_bindings::kvm_irq_level::default(); + irq_level.__bindgen_anon_1.irq = irq; + irq_level.level = u32::from(active); + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(&fd, IrqLineTrigger::KVM_IRQ_LINE(), &irq_level) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + ioctl_iow_nr!( + KVM_IRQ_LINE, + kvm_bindings::KVMIO, + 0x61, + kvm_bindings::kvm_irq_level + ); +} + +#[cfg(target_arch = "riscv64")] +impl Trigger for IrqLineTrigger { + type E = ::std::io::Error; + + fn trigger(&self) -> ::std::io::Result<()> { + // Safe to unwrap since `gsi` and `vmfd` have been set + let gsi = self.gsi.unwrap(); + + IrqLineTrigger::set_irq_line(self.raw_vmfd.unwrap().as_raw_fd(), gsi, true).map_err( + |err| { + error!("set_irq_line() failed: {err:?}"); + std::io::Error::last_os_error() + }, + )?; + IrqLineTrigger::set_irq_line(self.raw_vmfd.unwrap().as_raw_fd(), gsi, false).map_err( + |err| { + error!("set_irq_line() failed: {err:?}"); + std::io::Error::last_os_error() + }, + )?; + + Ok(()) + } +} + /// Called by METRICS.flush(), this function facilitates serialization of aggregated metrics. pub fn flush_metrics(serializer: S) -> Result { let mut seq = serializer.serialize_map(Some(1))?; diff --git a/src/vmm/src/devices/legacy/serial.rs b/src/vmm/src/devices/legacy/serial.rs index 278c15a4464..5c993a4be68 100644 --- a/src/vmm/src/devices/legacy/serial.rs +++ b/src/vmm/src/devices/legacy/serial.rs @@ -19,6 +19,8 @@ use vm_superio::{Serial, Trigger}; use vmm_sys_util::epoll::EventSet; use crate::devices::legacy::EventFdTrigger; +#[cfg(target_arch = "riscv64")] +use crate::devices::legacy::IrqLineTrigger; use crate::logger::{IncMetric, SharedIncMetric}; /// Received Data Available interrupt - for letting the driver know that @@ -71,6 +73,7 @@ pub trait RawIOHandler { fn raw_input(&mut self, _data: &[u8]) -> Result<(), RawIOError>; } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] impl RawIOHandler for Serial { // This is not used for anything and is basically just a dummy implementation for `raw_input`. fn raw_input(&mut self, data: &[u8]) -> Result<(), RawIOError> { @@ -88,6 +91,24 @@ impl RawIOHandler for Serial RawIOHandler for Serial { + // This is not used for anything and is basically just a dummy implementation for `raw_input`. + fn raw_input(&mut self, data: &[u8]) -> Result<(), RawIOError> { + // Fail fast if the serial is serviced with more data than it can buffer. + if data.len() > self.fifo_capacity() { + return Err(RawIOError::Serial(SerialError::FullFifo)); + } + + // Before enqueuing bytes we first check if there is enough free space + // in the FIFO. + if self.fifo_capacity() >= data.len() { + self.enqueue_raw_bytes(data).map_err(RawIOError::Serial)?; + } + Ok(()) + } +} + /// Wrapper over available events (i.e metrics, buffer ready etc). #[derive(Debug)] pub struct SerialEventsWrapper { @@ -152,6 +173,7 @@ pub struct SerialWrapper pub input: Option, } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] impl SerialWrapper { fn handle_ewouldblock(&self, ops: &mut EventOps) { let buffer_ready_fd = self.buffer_ready_evt_fd(); @@ -219,9 +241,83 @@ impl SerialWrapper SerialWrapper { + fn handle_ewouldblock(&self, ops: &mut EventOps) { + let buffer_ready_fd = self.buffer_ready_evt_fd(); + let input_fd = self.serial_input_fd(); + if input_fd < 0 || buffer_ready_fd < 0 { + error!("Serial does not have a configured input source."); + return; + } + match ops.add(Events::new(&input_fd, EventSet::IN)) { + Err(event_manager::Error::FdAlreadyRegistered) => (), + Err(err) => { + error!( + "Could not register the serial input to the event manager: {:?}", + err + ); + } + Ok(()) => { + // Bytes might had come on the unregistered stdin. Try to consume any. + self.serial.events().in_buffer_empty() + } + }; + } + + fn recv_bytes(&mut self) -> io::Result { + let avail_cap = self.serial.fifo_capacity(); + if avail_cap == 0 { + return Err(io::Error::from_raw_os_error(libc::ENOBUFS)); + } + + if let Some(input) = self.input.as_mut() { + let mut out = vec![0u8; avail_cap]; + let count = input.read(&mut out)?; + if count > 0 { + self.serial + .raw_input(&out[..count]) + .map_err(|_| io::Error::from_raw_os_error(libc::ENOBUFS))?; + } + + return Ok(count); + } + + Err(io::Error::from_raw_os_error(libc::ENOTTY)) + } + + #[inline] + fn buffer_ready_evt_fd(&self) -> RawFd { + self.serial + .events() + .buffer_ready_event_fd + .as_ref() + .map_or(-1, |buf_ready| buf_ready.as_raw_fd()) + } + + #[inline] + fn serial_input_fd(&self) -> RawFd { + self.input.as_ref().map_or(-1, |input| input.as_raw_fd()) + } + + fn consume_buffer_ready_event(&self) -> io::Result { + self.serial + .events() + .buffer_ready_event_fd + .as_ref() + .map_or(Ok(0), |buf_ready| buf_ready.read()) + } +} + +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] /// Type for representing a serial device. pub type SerialDevice = SerialWrapper; +#[cfg(target_arch = "riscv64")] +/// Type for representing a serial device. +pub type SerialDevice = SerialWrapper; + +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] impl MutEventSubscriber for SerialWrapper { @@ -319,6 +415,104 @@ impl MutEventSubscriber } } +#[cfg(target_arch = "riscv64")] +impl MutEventSubscriber + for SerialWrapper +{ + /// Handle events on the serial input fd. + fn process(&mut self, event: Events, ops: &mut EventOps) { + #[inline] + fn unregister_source(ops: &mut EventOps, source: &T) { + match ops.remove(Events::new(source, EventSet::IN)) { + Ok(_) => (), + Err(_) => error!("Could not unregister source fd: {}", source.as_raw_fd()), + } + } + + let input_fd = self.serial_input_fd(); + let buffer_ready_fd = self.buffer_ready_evt_fd(); + if input_fd < 0 || buffer_ready_fd < 0 { + error!("Serial does not have a configured input source."); + return; + } + + if buffer_ready_fd == event.fd() { + match self.consume_buffer_ready_event() { + Ok(_) => (), + Err(err) => { + error!( + "Detach serial device input source due to error in consuming the buffer \ + ready event: {:?}", + err + ); + unregister_source(ops, &input_fd); + unregister_source(ops, &buffer_ready_fd); + return; + } + } + } + + // We expect to receive: `EventSet::IN`, `EventSet::HANG_UP` or + // `EventSet::ERROR`. To process all these events we just have to + // read from the serial input. + match self.recv_bytes() { + Ok(count) => { + // Handle EOF if the event came from the input source. + if input_fd == event.fd() && count == 0 { + unregister_source(ops, &input_fd); + unregister_source(ops, &buffer_ready_fd); + warn!("Detached the serial input due to peer close/error."); + } + } + Err(err) => { + match err.raw_os_error() { + Some(errno) if errno == libc::ENOBUFS => { + unregister_source(ops, &input_fd); + } + Some(errno) if errno == libc::EWOULDBLOCK => { + self.handle_ewouldblock(ops); + } + Some(errno) if errno == libc::ENOTTY => { + error!("The serial device does not have the input source attached."); + unregister_source(ops, &input_fd); + unregister_source(ops, &buffer_ready_fd); + } + Some(_) | None => { + // Unknown error, detach the serial input source. + unregister_source(ops, &input_fd); + unregister_source(ops, &buffer_ready_fd); + warn!("Detached the serial input due to peer close/error."); + } + } + } + } + } + + /// Initial registration of pollable objects. + /// If serial input is present, register the serial input FD as readable. + fn init(&mut self, ops: &mut EventOps) { + if self.input.is_some() && self.serial.events().buffer_ready_event_fd.is_some() { + let serial_fd = self.serial_input_fd(); + let buf_ready_evt = self.buffer_ready_evt_fd(); + + // If the jailer is instructed to daemonize before exec-ing into firecracker, we set + // stdin, stdout and stderr to be open('/dev/null'). However, if stdin is redirected + // from /dev/null then trying to register FILENO_STDIN to epoll will fail with EPERM. + // Therefore, only try to register stdin to epoll if it is a terminal or a FIFO pipe. + // SAFETY: isatty has no invariants that need to be upheld. If serial_fd is an invalid + // argument, it will return 0 and set errno to EBADF. + if unsafe { libc::isatty(serial_fd) } == 1 || is_fifo(serial_fd) { + if let Err(err) = ops.add(Events::new(&serial_fd, EventSet::IN)) { + warn!("Failed to register serial input fd: {}", err); + } + } + if let Err(err) = ops.add(Events::new(&buf_ready_evt, EventSet::IN)) { + warn!("Failed to register serial buffer ready event: {}", err); + } + } + } +} + /// Checks whether the given file descriptor is a FIFO pipe. fn is_fifo(fd: RawFd) -> bool { let mut stat = std::mem::MaybeUninit::::uninit(); @@ -337,6 +531,7 @@ fn is_fifo(fd: RawFd) -> bool { (stat.st_mode & libc::S_IFIFO) != 0 } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] impl SerialWrapper { @@ -361,6 +556,31 @@ impl } } +#[cfg(target_arch = "riscv64")] +impl + SerialWrapper +{ + pub fn bus_read(&mut self, offset: u64, data: &mut [u8]) { + if let (Ok(offset), 1) = (u8::try_from(offset), data.len()) { + data[0] = self.serial.read(offset); + } else { + METRICS.missed_read_count.inc(); + } + } + + pub fn bus_write(&mut self, offset: u64, data: &[u8]) { + if let (Ok(offset), 1) = (u8::try_from(offset), data.len()) { + if let Err(err) = self.serial.write(offset, data[0]) { + // Counter incremented for any handle_write() error. + error!("Failed the write to serial: {:?}", err); + METRICS.error_count.inc(); + } + } else { + METRICS.missed_write_count.inc(); + } + } +} + #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index 186f09275bc..56429153a3f 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -577,6 +577,11 @@ impl VirtioDevice for Balloon { &self.irq_trigger } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + unimplemented!() + } + fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); diff --git a/src/vmm/src/devices/virtio/block/device.rs b/src/vmm/src/devices/virtio/block/device.rs index bf3043bcdd4..832879ce690 100644 --- a/src/vmm/src/devices/virtio/block/device.rs +++ b/src/vmm/src/devices/virtio/block/device.rs @@ -180,6 +180,14 @@ impl VirtioDevice for Block { } } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + match self { + Self::Virtio(b) => b.interrupt_trigger_mut(), + Self::VhostUser(_) => unimplemented!(), + } + } + fn read_config(&self, offset: u64, data: &mut [u8]) { match self { Self::Virtio(b) => b.read_config(offset, data), diff --git a/src/vmm/src/devices/virtio/block/vhost_user/device.rs b/src/vmm/src/devices/virtio/block/vhost_user/device.rs index b0bf5a31e3f..51e42bb7559 100644 --- a/src/vmm/src/devices/virtio/block/vhost_user/device.rs +++ b/src/vmm/src/devices/virtio/block/vhost_user/device.rs @@ -314,6 +314,11 @@ impl VirtioDevice for VhostUserBlock &self.irq_trigger } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + unimplemented!() + } + fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); diff --git a/src/vmm/src/devices/virtio/block/virtio/device.rs b/src/vmm/src/devices/virtio/block/virtio/device.rs index b11c757d43c..124dcd24b4d 100644 --- a/src/vmm/src/devices/virtio/block/virtio/device.rs +++ b/src/vmm/src/devices/virtio/block/virtio/device.rs @@ -598,6 +598,11 @@ impl VirtioDevice for VirtioBlock { &self.irq_trigger } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + &mut self.irq_trigger + } + fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); diff --git a/src/vmm/src/devices/virtio/device.rs b/src/vmm/src/devices/virtio/device.rs index 62131e775f5..3796a650474 100644 --- a/src/vmm/src/devices/virtio/device.rs +++ b/src/vmm/src/devices/virtio/device.rs @@ -6,10 +6,14 @@ // found in the THIRD-PARTY file. use std::fmt; +#[cfg(target_arch = "riscv64")] +use std::os::fd::AsRawFd; use std::sync::Arc; use std::sync::atomic::{AtomicU32, Ordering}; use vmm_sys_util::eventfd::EventFd; +#[cfg(target_arch = "riscv64")] +use vmm_sys_util::{errno, ioctl::ioctl_with_ref, ioctl_ioc_nr, ioctl_iow_nr}; use super::ActivateError; use super::mmio::{VIRTIO_MMIO_INT_CONFIG, VIRTIO_MMIO_INT_VRING}; @@ -57,9 +61,15 @@ pub enum IrqType { #[derive(Debug)] pub struct IrqTrigger { pub(crate) irq_status: Arc, + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub(crate) irq_evt: EventFd, + #[cfg(target_arch = "riscv64")] + pub(crate) raw_vmfd: Option, + #[cfg(target_arch = "riscv64")] + pub(crate) gsi: Option, } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] impl IrqTrigger { pub fn new() -> std::io::Result { Ok(Self { @@ -84,6 +94,68 @@ impl IrqTrigger { } } +#[cfg(target_arch = "riscv64")] +impl IrqTrigger { + pub fn new() -> std::io::Result { + Ok(Self { + irq_status: Arc::new(AtomicU32::new(0)), + raw_vmfd: None, + gsi: None, + }) + } + + pub fn trigger_irq(&self, irq_type: IrqType) -> Result<(), std::io::Error> { + let irq = match irq_type { + IrqType::Config => VIRTIO_MMIO_INT_CONFIG, + IrqType::Vring => VIRTIO_MMIO_INT_VRING, + }; + self.irq_status.fetch_or(irq, Ordering::SeqCst); + + // Safe to unwrap since `gsi` and `vmfd` have been set + let gsi = self.gsi.unwrap(); + IrqTrigger::set_irq_line(self.raw_vmfd.unwrap(), gsi, true).map_err(|err| { + error!("Failed to set IRQ line: {:?}", err); + std::io::Error::last_os_error() + })?; + IrqTrigger::set_irq_line(self.raw_vmfd.unwrap(), gsi, false).map_err(|err| { + error!("Failed to set IRQ line: {:?}", err); + std::io::Error::last_os_error() + })?; + + Ok(()) + } + + pub fn set_vmfd_and_gsi(&mut self, raw_vmfd: i32, gsi: u32) { + self.raw_vmfd = Some(raw_vmfd); + self.gsi = Some(gsi); + } + + // This function is taken from kvm-ioctls because it requires VmFd, which we don't + // have at this point. However, it only uses the raw file descriptor, which is just + // an i32. So, we copy it here and use it directly with the raw fd. + fn set_irq_line(fd: F, irq: u32, active: bool) -> Result<(), kvm_ioctls::Error> { + let mut irq_level = kvm_bindings::kvm_irq_level::default(); + irq_level.__bindgen_anon_1.irq = irq; + irq_level.level = u32::from(active); + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(&fd, IrqTrigger::KVM_IRQ_LINE(), &irq_level) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + ioctl_iow_nr!( + KVM_IRQ_LINE, + kvm_bindings::KVMIO, + 0x61, + kvm_bindings::kvm_irq_level + ); +} + /// Trait for virtio devices to be driven by a virtio transport. /// /// The lifecycle of a virtio device is to be moved to a virtio transport, which will then query the @@ -126,6 +198,9 @@ pub trait VirtioDevice: AsAny + Send { fn interrupt_trigger(&self) -> &IrqTrigger; + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger; + /// The set of feature bits shifted by `page * 32`. fn avail_features_by_page(&self, page: u32) -> u32 { let avail_features = self.avail_features(); diff --git a/src/vmm/src/devices/virtio/net/device.rs b/src/vmm/src/devices/virtio/net/device.rs index fff04d1da1a..29dd654e278 100755 --- a/src/vmm/src/devices/virtio/net/device.rs +++ b/src/vmm/src/devices/virtio/net/device.rs @@ -965,6 +965,12 @@ impl VirtioDevice for Net { fn interrupt_trigger(&self) -> &IrqTrigger { &self.irq_trigger } + + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + &mut self.irq_trigger + } + fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); diff --git a/src/vmm/src/devices/virtio/rng/device.rs b/src/vmm/src/devices/virtio/rng/device.rs index 97ac8676e0a..b09fc1ab6c6 100644 --- a/src/vmm/src/devices/virtio/rng/device.rs +++ b/src/vmm/src/devices/virtio/rng/device.rs @@ -270,6 +270,11 @@ impl VirtioDevice for Entropy { &self.irq_trigger } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + unimplemented!() + } + fn avail_features(&self) -> u64 { self.avail_features } diff --git a/src/vmm/src/devices/virtio/vhost_user.rs b/src/vmm/src/devices/virtio/vhost_user.rs index 83174fbc4d3..c162b25a505 100644 --- a/src/vmm/src/devices/virtio/vhost_user.rs +++ b/src/vmm/src/devices/virtio/vhost_user.rs @@ -394,6 +394,7 @@ impl VhostUserHandleImpl { Ok(()) } + #[cfg_attr(target_arch = "riscv64", allow(unused_variables))] /// Set up vhost-user backend. This includes updating memory table, /// sending information about virtio rings and enabling them. pub fn setup_backend( @@ -439,6 +440,9 @@ impl VhostUserHandleImpl { .set_vring_base(*queue_index, queue.avail_ring_idx_get()) .map_err(VhostUserError::VhostUserSetVringBase)?; + // TODO: This is a temporary workaround to avoid `irq_trigger.irq_evt` unknown field + // error, since we don't implement vhost for RISC-V yet. + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] // No matter the queue, we set irq_evt for signaling the guest that buffers were // consumed. self.vu diff --git a/src/vmm/src/devices/virtio/vsock/device.rs b/src/vmm/src/devices/virtio/vsock/device.rs index aa114f6cccb..60d5e75182a 100644 --- a/src/vmm/src/devices/virtio/vsock/device.rs +++ b/src/vmm/src/devices/virtio/vsock/device.rs @@ -300,6 +300,11 @@ where &self.irq_trigger } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + unimplemented!() + } + fn read_config(&self, offset: u64, data: &mut [u8]) { match offset { 0 if data.len() == 8 => byte_order::write_le_u64(data, self.cid()), diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 29f3b0148ac..34ad18198ab 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -207,7 +207,7 @@ pub const HTTP_MAX_PAYLOAD_SIZE: usize = 51200; pub enum VmmError { /// Failed to allocate guest resource: {0} AllocateResources(#[from] vm_allocator::Error), - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] /// Invalid command line error. Cmdline, /// Device manager error: {0} @@ -450,7 +450,7 @@ impl Vmm { // would be to save the whole serial device state when we do the vm // serialization. For now we set that bit manually - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] { let serial_bus_device = self.get_bus_device(DeviceType::Serial, "Serial"); if serial_bus_device.is_none() { @@ -515,6 +515,12 @@ impl Vmm { self.vm.save_state(&mpidrs).map_err(SaveVmState)? } + #[cfg(target_arch = "riscv64")] + { + // TODO: `save_state()` is unimplemented on riscv64. + // It is just a stub, to pass compilation. + self.vm.save_state().map_err(SaveVmState)? + } }; let device_states = self.mmio_device_manager.save(); diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 4111d8d6c34..dda07d6a0c1 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -26,6 +26,7 @@ use crate::cpu_config::x86_64::cpuid::CpuidTrait; #[cfg(target_arch = "x86_64")] use crate::cpu_config::x86_64::cpuid::common::get_vendor_id_from_host; use crate::device_manager::persist::{ACPIDeviceManagerState, DevicePersistError, DeviceStates}; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::logger::{info, warn}; use crate::resources::VmResources; use crate::seccomp::BpfThreadMap; diff --git a/src/vmm/src/snapshot/mod.rs b/src/vmm/src/snapshot/mod.rs index 57ad3980215..40bcabf7485 100644 --- a/src/vmm/src/snapshot/mod.rs +++ b/src/vmm/src/snapshot/mod.rs @@ -53,6 +53,10 @@ const BINCODE_CONFIG: Configuration) -> String { let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); diff --git a/src/vmm/src/test_utils/mod.rs b/src/vmm/src/test_utils/mod.rs index 7cb16a2a213..2a3bf29b2d1 100644 --- a/src/vmm/src/test_utils/mod.rs +++ b/src/vmm/src/test_utils/mod.rs @@ -3,20 +3,28 @@ #![allow(missing_docs)] +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use std::sync::{Arc, Mutex}; use vm_memory::GuestAddress; use vmm_sys_util::tempdir::TempDir; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::builder::build_microvm_for_boot; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::resources::VmResources; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::seccomp::get_empty_filters; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::test_utils::mock_resources::{MockBootSourceConfig, MockVmConfig, MockVmResources}; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::vmm_config::boot_source::BootSourceConfig; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::HugePageConfig; use crate::vstate::memory; use crate::vstate::memory::{GuestMemoryMmap, GuestRegionMmap}; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::{EventManager, Vmm}; pub mod mock_resources; @@ -65,6 +73,7 @@ pub fn arch_mem_raw(mem_size_bytes: usize) -> Vec { multi_region_mem_raw(&crate::arch::arch_memory_regions(0, mem_size_bytes)) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub fn create_vmm( _kernel_image: Option<&str>, is_diff: bool, @@ -105,10 +114,12 @@ pub fn create_vmm( (vmm, event_manager) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub fn default_vmm(kernel_image: Option<&str>) -> (Arc>, EventManager) { create_vmm(kernel_image, false, true) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub fn default_vmm_no_boot(kernel_image: Option<&str>) -> (Arc>, EventManager) { create_vmm(kernel_image, false, false) } diff --git a/src/vmm/src/vmm_config/machine_config.rs b/src/vmm/src/vmm_config/machine_config.rs index cfe7105fdf8..ebe5607cb05 100644 --- a/src/vmm/src/vmm_config/machine_config.rs +++ b/src/vmm/src/vmm_config/machine_config.rs @@ -270,7 +270,11 @@ impl MachineConfig { let cpu_template = match update.cpu_template { None => self.cpu_template.clone(), + #[cfg(target_arch = "riscv64")] + Some(_) => unreachable!(), + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] Some(StaticCpuTemplate::None) => None, + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] Some(other) => Some(CpuTemplateType::Static(other)), };