From 28848b7b03bb784e5c4859cd3e1b5b722a63f048 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Thu, 22 May 2025 12:58:56 +0300 Subject: [PATCH 01/17] seccompiler: support riscv64 target Firecracker creates seccomp filters at build time, and fails if there is no support for the targeted architecture. This commit adds support for the RISC-V architecture in the seccompiler crate. For now, we do not supply any riscv64-specific JSON file in `resources/seccomp`, adhering to the default one, since RISC-V support is experimental. Signed-off-by: Dimitris Charisis --- src/seccompiler/src/bindings.rs | 1 + src/seccompiler/src/types.rs | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/seccompiler/src/bindings.rs b/src/seccompiler/src/bindings.rs index 969ea91cd1c..6e5198fac42 100644 --- a/src/seccompiler/src/bindings.rs +++ b/src/seccompiler/src/bindings.rs @@ -56,6 +56,7 @@ pub struct scmp_arg_cmp { pub const SCMP_ARCH_X86_64: u32 = 0xc000003e; pub const SCMP_ARCH_AARCH64: u32 = 0xc00000b7; +pub const SCMP_ARCH_RISCV64: u32 = 0xc00000f3; /// Kill the process pub const SCMP_ACT_KILL_PROCESS: u32 = 0x80000000; /// Kill the thread diff --git a/src/seccompiler/src/types.rs b/src/seccompiler/src/types.rs index 2035f8b8ea4..3206532b9bf 100644 --- a/src/seccompiler/src/types.rs +++ b/src/seccompiler/src/types.rs @@ -169,6 +169,7 @@ pub struct BpfJson(pub BTreeMap); pub enum TargetArch { X86_64, Aarch64, + Riscv64, } impl TargetArch { @@ -176,6 +177,7 @@ impl TargetArch { match self { TargetArch::X86_64 => SCMP_ARCH_X86_64, TargetArch::Aarch64 => SCMP_ARCH_AARCH64, + TargetArch::Riscv64 => SCMP_ARCH_RISCV64, } } } @@ -186,6 +188,7 @@ impl FromStr for TargetArch { match s.to_lowercase().as_str() { "x86_64" => Ok(TargetArch::X86_64), "aarch64" => Ok(TargetArch::Aarch64), + "riscv64" => Ok(TargetArch::Riscv64), _ => Err(s.to_string()), } } From d3d7a71c6dbd49d33272534c472c026e920286fc Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Thu, 22 May 2025 14:07:48 +0300 Subject: [PATCH 02/17] vmm: introduce riscv64 module Introduce a scaffold for the riscv64 module under `vmm/src/arch`. For now, this mostly provides unimplemented stubs to build the basic components needed. We will gradually fill in the essential functions and structures for riscv64 support in follow-up commits. Signed-off-by: Dimitris Charisis --- src/vmm/src/arch/mod.rs | 6 +++++ src/vmm/src/arch/riscv64/mod.rs | 37 +++++++++++++++++++++++++++ src/vmm/src/cpu_config/mod.rs | 4 +++ src/vmm/src/cpu_config/riscv64/mod.rs | 8 ++++++ 4 files changed, 55 insertions(+) create mode 100644 src/vmm/src/arch/riscv64/mod.rs create mode 100644 src/vmm/src/cpu_config/riscv64/mod.rs diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index 61d65fea1a5..70f7053dcd6 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -26,6 +26,12 @@ pub use aarch64::{ load_kernel, }; +/// Module for riscv64 related functionality. +#[cfg(target_arch = "riscv64")] +pub mod riscv64; +#[cfg(target_arch = "riscv64")] +pub use riscv64::{ConfigurationError, configure_system_for_boot, load_kernel}; + /// Module for x86_64 related functionality. #[cfg(target_arch = "x86_64")] pub mod x86_64; diff --git a/src/vmm/src/arch/riscv64/mod.rs b/src/vmm/src/arch/riscv64/mod.rs new file mode 100644 index 00000000000..da747255c9e --- /dev/null +++ b/src/vmm/src/arch/riscv64/mod.rs @@ -0,0 +1,37 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use linux_loader::loader::Cmdline; + +use crate::arch::EntryPoint; +use crate::cpu_config::riscv64::CpuConfigurationError; +use crate::cpu_config::templates::CustomCpuTemplate; +use crate::initrd::InitrdConfig; +use crate::vmm_config::machine_config::MachineConfig; +use crate::vstate::memory::GuestMemoryMmap; + +/// Errors thrown while configuring riscv64 system. +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum ConfigurationError {} + +/// Configures the system for booting Linux. +pub fn configure_system_for_boot( + vmm: &mut Vmm, + vcpus: &mut [Vcpu], + machine_config: &MachineConfig, + cpu_template: &CustomCpuTemplate, + entry_point: EntryPoint, + initrd: &Option, + boot_cmdline: Cmdline, +) -> Result<(), ConfigurationError> { + todo!() +} + +/// Load linux kernel into guest memory. +pub fn load_kernel( + kernel: &File, + guest_memory: &GuestMemoryMmap, +) -> Result { + todo!() +} diff --git a/src/vmm/src/cpu_config/mod.rs b/src/vmm/src/cpu_config/mod.rs index 4c7404a14d3..ba7a93e446a 100644 --- a/src/vmm/src/cpu_config/mod.rs +++ b/src/vmm/src/cpu_config/mod.rs @@ -14,5 +14,9 @@ pub mod x86_64; #[cfg(target_arch = "aarch64")] pub mod aarch64; +/// Module containing type implementations needed for riscv64 CPU configuration +#[cfg(target_arch = "riscv64")] +pub mod riscv64; + #[cfg(test)] pub(crate) mod test_utils; diff --git a/src/vmm/src/cpu_config/riscv64/mod.rs b/src/vmm/src/cpu_config/riscv64/mod.rs new file mode 100644 index 00000000000..1bbe3bf04f8 --- /dev/null +++ b/src/vmm/src/cpu_config/riscv64/mod.rs @@ -0,0 +1,8 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/// Errors thrown while configuring templates. +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum CpuConfigurationError {} From aa4df287d7f986df25493d01a7e617e06dd155a5 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Thu, 22 May 2025 14:47:26 +0300 Subject: [PATCH 03/17] vmm(riscv64): introduce KVM module Introduce the `kvm` module under `arch/riscv64`. This module will hold riscv64-specific KVM code and follows the existing `aarch64` implementation pattern. Signed-off-by: Dimitris Charisis --- src/vmm/src/arch/mod.rs | 2 ++ src/vmm/src/arch/riscv64/kvm.rs | 43 +++++++++++++++++++++++++++++++++ src/vmm/src/arch/riscv64/mod.rs | 3 +++ 3 files changed, 48 insertions(+) create mode 100644 src/vmm/src/arch/riscv64/kvm.rs diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index 70f7053dcd6..60e5272e68b 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -30,6 +30,8 @@ pub use aarch64::{ #[cfg(target_arch = "riscv64")] pub mod riscv64; #[cfg(target_arch = "riscv64")] +pub use riscv64::kvm::Kvm; +#[cfg(target_arch = "riscv64")] pub use riscv64::{ConfigurationError, configure_system_for_boot, load_kernel}; /// Module for x86_64 related functionality. diff --git a/src/vmm/src/arch/riscv64/kvm.rs b/src/vmm/src/arch/riscv64/kvm.rs new file mode 100644 index 00000000000..3df61aba241 --- /dev/null +++ b/src/vmm/src/arch/riscv64/kvm.rs @@ -0,0 +1,43 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::convert::Infallible; + +use kvm_ioctls::Kvm as KvmFd; + +use crate::cpu_config::templates::KvmCapability; + +/// ['Kvm'] initialization can't fail for Riscv64 +pub type KvmArchError = Infallible; + +/// Struct with kvm fd and kvm associated parameters. +#[derive(Debug)] +pub struct Kvm { + /// KVM fd. + pub fd: KvmFd, + /// Additional capabilities that were specified in cpu template. + pub kvm_cap_modifiers: Vec, +} + +impl Kvm { + pub(crate) const DEFAULT_CAPABILITIES: [u32; 5] = [ + kvm_bindings::KVM_CAP_IOEVENTFD, + kvm_bindings::KVM_CAP_USER_MEMORY, + kvm_bindings::KVM_CAP_DEVICE_CTRL, + kvm_bindings::KVM_CAP_MP_STATE, + kvm_bindings::KVM_CAP_ONE_REG, + ]; + + /// Initialize [`Kvm`] type for riscv64 architecture. + pub fn init_arch( + fd: KvmFd, + kvm_cap_modifiers: Vec, + ) -> Result { + Ok(Self { + fd, + kvm_cap_modifiers, + }) + } +} diff --git a/src/vmm/src/arch/riscv64/mod.rs b/src/vmm/src/arch/riscv64/mod.rs index da747255c9e..de0a7420728 100644 --- a/src/vmm/src/arch/riscv64/mod.rs +++ b/src/vmm/src/arch/riscv64/mod.rs @@ -2,6 +2,9 @@ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +/// Architecture specific KVM-related code. +pub mod kvm; + use linux_loader::loader::Cmdline; use crate::arch::EntryPoint; From cd7a6a9ffbb1c8a8040465cbd3dbc4d6afeeebcb Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Thu, 22 May 2025 15:31:54 +0300 Subject: [PATCH 04/17] vmm(riscv64): introduce layout Introduce the VM layout for riscv64, following the aarch64 logic as much as possible. Signed-off-by: Dimitris Charisis --- src/vmm/src/arch/riscv64/layout.rs | 93 ++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 src/vmm/src/arch/riscv64/layout.rs diff --git a/src/vmm/src/arch/riscv64/layout.rs b/src/vmm/src/arch/riscv64/layout.rs new file mode 100644 index 00000000000..cab21910aa0 --- /dev/null +++ b/src/vmm/src/arch/riscv64/layout.rs @@ -0,0 +1,93 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +// 1024GB +------------------------------------------------------+ +// | . | +// | . | +// | . | +// | . | DRAM_MEM_MAX_SIZE (1022GB) +// | . | +// | . | +// | . | +// 2MB + 2GB +------------------------------------------------------+ RISCV_KERNEL_START +// | | +// | | +// | | +// | | +// | | +// 2GB +------------------------------------------------------+ DRAM_MEM_START +// | | +// | | +// | | +// | | +// | | +// | | +// | | +// | | +// | | +// 1GB +------------------------------------------------------+ MMIO_MEM_START +// | | +// | | +// | | +// | | +// | | +// | | +// 128MB + 4KB +------------------------------------------------------+ IMSIC_START + IMSIC_SZ_PH +// | | +// 128MB +------------------------------------------------------+ IMSIC_START +// | | +// | | +// | | +// | | +// 0 +------------------------------------------------------+ APLIC_START + +/// Start of RAM on 64 bit RISCV. +pub const DRAM_MEM_START: u64 = 0x8000_0000; // 2GB. +/// The maximum RAM size. +pub const DRAM_MEM_MAX_SIZE: usize = 0x00FF_8000_0000; // 1024 - 2 = 1022G. + +/// Start of system's RAM, which is actually the start of the FDT blob. +pub const SYSTEM_MEM_START: u64 = DRAM_MEM_START; + +/// Size of memory region for FDT placement. The kernel image starts after it, +/// since the kernel needs to be 2MB-aligned for riscv64. +pub const SYSTEM_MEM_SIZE: u64 = 0x20_0000; // 2MB. + +/// Kernel command line maximum size. +/// As per `arch/riscv/include/uapi/asm/setup.h`. +pub const CMDLINE_MAX_SIZE: usize = 1024; + +/// Maximum size of the device tree blob. +pub const FDT_MAX_SIZE: usize = 0x1_0000; + +// From the RISC-V Privlidged Spec v1.10: +// +// Global interrupt sources are assigned small unsigned integer identifiers, +// beginning at the value 1. An interrupt ID of 0 is reserved to mean no +// interrupt. Interrupt identifiers are also used to break ties when two or +// more interrupt sources have the same assigned priority. Smaller values of +// interrupt ID take precedence over larger values of interrupt ID. +// +// While the RISC-V supervisor spec doesn't define the maximum number of +// devices supported by the PLIC, the largest number supported by devices +// marked as 'riscv,plic0' (which is the only device type this driver supports, +// and is the only extant PLIC as of now) is 1024. As mentioned above, device +// 0 is defined to be non-existant so this device really only supports 1023 +// devices. +/// Thi highest usable interrupt on riscv64. +pub const IRQ_MAX: u32 = 1023; +/// First usable interrupt on riscv64. +pub const IRQ_BASE: u32 = 1; + +/// Below this address will reside the AIA, above this address will reside the MMIO devices. +pub const MAPPED_IO_START: u64 = 1 << 30; // 1 GB + +/// The start of IMSIC(s). +pub const IMSIC_START: u64 = 0x0800_0000; +/// IMISC size per hart. +pub const IMSIC_SZ_PH: u32 = ::kvm_bindings::KVM_DEV_RISCV_IMSIC_SIZE; + +/// The start of APLIC. +pub const APLIC_START: u64 = 0x00; From 245a9513b476a9dc57e16cea2fc063ceef1870ab Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Thu, 22 May 2025 16:27:51 +0300 Subject: [PATCH 05/17] vmm(riscv64): introduce register macros for register ID access Introduce helper macros to easily retrieve the ID of riscv64 registers. Signed-off-by: Dimitris Charisis --- src/vmm/src/arch/riscv64/regs.rs | 157 +++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 src/vmm/src/arch/riscv64/regs.rs diff --git a/src/vmm/src/arch/riscv64/regs.rs b/src/vmm/src/arch/riscv64/regs.rs new file mode 100644 index 00000000000..46f088de067 --- /dev/null +++ b/src/vmm/src/arch/riscv64/regs.rs @@ -0,0 +1,157 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright © 2024 Institute of Software, CAS. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +/// Storage for riscv64 registers with different sizes. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Riscv64RegisterVec { + ids: Vec, + data: Vec, +} + +impl Serialize for Riscv64RegisterVec { + fn serialize(&self, _: S) -> Result + where + S: Serializer, + { + unimplemented!(); + } +} + +impl<'de> Deserialize<'de> for Riscv64RegisterVec { + fn deserialize(_deserializer: D) -> Result + where + D: Deserializer<'de>, + { + unimplemented!(); + } +} + +// #[repr(C)] +// #[derive(Debug, Default, Copy, Clone, PartialEq)] +// pub struct kvm_riscv_config { +// pub isa: u64, +// pub zicbom_block_size: u64, +// pub mvendorid: u64, +// pub marchid: u64, +// pub mimpid: u64, +// pub zicboz_block_size: u64, +// pub satp_mode: u64, +// } + +// Helper macro from Cloud Hypervisor. +/// Get the ID of a register. +#[macro_export] +macro_rules! riscv64_reg_id { + ($reg_type: tt, $offset: tt) => { + // The core registers of an riscv64 machine are represented + // in kernel by the `kvm_riscv_core` structure: + // + // struct kvm_riscv_core { + // struct user_regs_struct regs; + // unsigned long mode; + // }; + // + // struct user_regs_struct { + // unsigned long pc; + // unsigned long ra; + // unsigned long sp; + // unsigned long gp; + // unsigned long tp; + // unsigned long t0; + // unsigned long t1; + // unsigned long t2; + // unsigned long s0; + // unsigned long s1; + // unsigned long a0; + // unsigned long a1; + // unsigned long a2; + // unsigned long a3; + // unsigned long a4; + // unsigned long a5; + // unsigned long a6; + // unsigned long a7; + // unsigned long s2; + // unsigned long s3; + // unsigned long s4; + // unsigned long s5; + // unsigned long s6; + // unsigned long s7; + // unsigned long s8; + // unsigned long s9; + // unsigned long s10; + // unsigned long s11; + // unsigned long t3; + // unsigned long t4; + // unsigned long t5; + // unsigned long t6; + // }; + // The id of a core register can be obtained like this: offset = id & + // ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_RISCV_CORE). Thus, + // id = KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_CORE | offset + // + // To generalize, the id of a register can be obtained by: + // id = KVM_REG_RISCV | KVM_REG_SIZE_U64 | + // KVM_REG_RISCV_CORE/KVM_REG_RISCV_CONFIG/KVM_REG_RISCV_TIMER | + // offset + KVM_REG_RISCV as u64 + | u64::from($reg_type) + | u64::from(KVM_REG_SIZE_U64) + | ($offset as u64 / std::mem::size_of::() as u64) + }; +} +pub(crate) use riscv64_reg_id; + +/// Return the ID of an ISA register. +#[macro_export] +macro_rules! riscv64_isa_id { + ($reg_type: tt, $id: tt) => { + KVM_REG_RISCV as u64 | u64::from($reg_type) | KVM_REG_SIZE_U64 as u64 | u64::from($id) + }; +} +pub(crate) use riscv64_isa_id; + +/// Return the ID of a core register. +#[macro_export] +macro_rules! riscv64_reg_core_id { + ($offset: tt) => { + riscv64_reg_id!(KVM_REG_RISCV_CORE, $offset) + }; +} +pub(crate) use riscv64_reg_core_id; + +/// Return the ID of a config register. +#[macro_export] +macro_rules! riscv64_reg_config_id { + ($offset: tt) => { + riscv64_reg_id!(KVM_REG_RISCV_CONFIG, $offset) + }; +} +pub(crate) use riscv64_reg_config_id; + +/// Return the ID of timer register. +#[macro_export] +macro_rules! riscv64_reg_timer_id { + ($offset: tt) => { + riscv64_reg_id!(KVM_REG_RISCV_TIMER, $offset) + }; +} +pub(crate) use riscv64_reg_timer_id; + +/// Return the ID of an ISA extension. +#[macro_export] +macro_rules! riscv64_reg_isa_ext { + ($ext_id: tt) => { + riscv64_isa_id!(KVM_REG_RISCV_ISA_EXT, $ext_id) + }; +} +pub(crate) use riscv64_reg_isa_ext; From 7839704b73a09fb18d0b86b80281babb716001f2 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Thu, 22 May 2025 16:54:38 +0300 Subject: [PATCH 06/17] vmm(riscv64): add CPU template stubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add placeholder CPU template implementations for riscv64. These are not intended to be used as RISC-V support is still experimental and we don’t plan to configure vCPUs via templates for now. All functions either return defaults or are marked unimplemented. Signed-off-by: Dimitris Charisis --- .../cpu_config/riscv64/custom_cpu_template.rs | 64 +++++++++++++++++++ src/vmm/src/cpu_config/riscv64/mod.rs | 24 +++++++ .../riscv64/static_cpu_templates.rs | 29 +++++++++ src/vmm/src/cpu_config/templates.rs | 9 +++ 4 files changed, 126 insertions(+) create mode 100644 src/vmm/src/cpu_config/riscv64/custom_cpu_template.rs create mode 100644 src/vmm/src/cpu_config/riscv64/static_cpu_templates.rs diff --git a/src/vmm/src/cpu_config/riscv64/custom_cpu_template.rs b/src/vmm/src/cpu_config/riscv64/custom_cpu_template.rs new file mode 100644 index 00000000000..9829c88b0d0 --- /dev/null +++ b/src/vmm/src/cpu_config/riscv64/custom_cpu_template.rs @@ -0,0 +1,64 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::borrow::Cow; + +use serde::{Deserialize, Serialize}; + +use crate::cpu_config::templates::{ + CpuTemplateType, GetCpuTemplate, GetCpuTemplateError, KvmCapability, +}; + +impl GetCpuTemplate for Option { + // We only support the default template for now. + fn get_cpu_template(&self) -> Result, GetCpuTemplateError> { + match self { + Some(template_type) => match template_type { + CpuTemplateType::Custom(_) => unimplemented!(), + CpuTemplateType::Static(_) => unimplemented!(), + }, + None => Ok(Cow::Owned(CustomCpuTemplate::default())), + } + } +} + +/// Wrapper type to containing riscv64 CPU config modifiers. +#[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct CustomCpuTemplate { + /// Additional kvm capabilities to check before + /// configuring vcpus. + #[serde(default)] + pub kvm_capabilities: Vec, + /// Modifiers of enabled vcpu features for vcpu. + #[serde(default)] + pub vcpu_features: Vec, + /// Modifiers for registers on Riscv64 CPUs. + #[serde(default)] + pub reg_modifiers: Vec, +} + +impl CustomCpuTemplate { + /// Get a list of register IDs that are modified by the CPU template. We don't use CPU + /// templates for RISC-V, thus just return an empty array. + pub fn reg_list(&self) -> Vec { + vec![] + } + + /// Validate the correctness of the template. We don't use CPU templates on RISC-V, thus just + /// return always successfully. + pub fn validate(&self) -> Result<(), serde_json::Error> { + Ok(()) + } +} + +/// Struct for defining enabled vcpu features. For now, it is just used as a placeholder. +#[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)] +pub struct VcpuFeatures; + +/// Wrapper of a mask defined as a bitmap to apply changes to a given register's value. For now, it +/// is used just as a placeholder. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Hash)] +pub struct RegisterModifier; diff --git a/src/vmm/src/cpu_config/riscv64/mod.rs b/src/vmm/src/cpu_config/riscv64/mod.rs index 1bbe3bf04f8..30bdc17bbdf 100644 --- a/src/vmm/src/cpu_config/riscv64/mod.rs +++ b/src/vmm/src/cpu_config/riscv64/mod.rs @@ -3,6 +3,30 @@ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +/// Module for custom CPU templates. +pub mod custom_cpu_template; +/// Module for static CPU templates. +pub mod static_cpu_templates; + +use super::templates::CustomCpuTemplate; + /// Errors thrown while configuring templates. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum CpuConfigurationError {} + +/// CPU configuration for riscv64. Just a nameholder. +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct CpuConfiguration; + +impl CpuConfiguration { + /// Creates new guest CPU config based on the provided template. + /// Not actually implemented yet. + pub fn apply_template(self, _: &CustomCpuTemplate) -> Self { + self + } + + /// Returns ids of registers that are changed by this template. + pub fn register_ids(&self) -> Vec { + unimplemented!(); + } +} diff --git a/src/vmm/src/cpu_config/riscv64/static_cpu_templates.rs b/src/vmm/src/cpu_config/riscv64/static_cpu_templates.rs new file mode 100644 index 00000000000..a3950dc8ce0 --- /dev/null +++ b/src/vmm/src/cpu_config/riscv64/static_cpu_templates.rs @@ -0,0 +1,29 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +/// Templates available for configuring the supported RISCV CPU types. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum StaticCpuTemplate { + /// No CPU template is used. + #[default] + None, +} + +impl StaticCpuTemplate { + /// Check if no template specified. + pub fn is_none(&self) -> bool { + self == &StaticCpuTemplate::None + } +} + +impl std::fmt::Display for StaticCpuTemplate { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + StaticCpuTemplate::None => write!(f, "None"), + } + } +} diff --git a/src/vmm/src/cpu_config/templates.rs b/src/vmm/src/cpu_config/templates.rs index 559da632cc4..cb322e747c3 100644 --- a/src/vmm/src/cpu_config/templates.rs +++ b/src/vmm/src/cpu_config/templates.rs @@ -19,6 +19,15 @@ mod common_types { }; } +#[cfg(target_arch = "riscv64")] +mod common_types { + pub use crate::cpu_config::riscv64::custom_cpu_template::CustomCpuTemplate; + pub use crate::cpu_config::riscv64::static_cpu_templates::StaticCpuTemplate; + pub use crate::cpu_config::riscv64::{ + CpuConfiguration, CpuConfigurationError as GuestConfigError, + }; +} + use std::borrow::Cow; use std::fmt::Debug; From 57f8bf1591574badc24b54dce138949813707c07 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Fri, 23 May 2025 10:20:47 +0300 Subject: [PATCH 07/17] vmm(riscv64): add vCPU support Introduce vCPU-specific structures based on the aarch64 implementation. Implement the arch-specific `setup_boot_regs` function to properly initialize the required registers before vCPU boot. Signed-off-by: Dimitris Charisis --- src/vmm/src/arch/riscv64/mod.rs | 5 + src/vmm/src/arch/riscv64/vcpu.rs | 197 +++++++++++++++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 src/vmm/src/arch/riscv64/vcpu.rs diff --git a/src/vmm/src/arch/riscv64/mod.rs b/src/vmm/src/arch/riscv64/mod.rs index de0a7420728..677876d86dd 100644 --- a/src/vmm/src/arch/riscv64/mod.rs +++ b/src/vmm/src/arch/riscv64/mod.rs @@ -38,3 +38,8 @@ pub fn load_kernel( ) -> Result { todo!() } + +/// Auxiliary function to get the address where the device tree blob is loaded. +fn get_fdt_addr() -> u64 { + layout::DRAM_MEM_START +} diff --git a/src/vmm/src/arch/riscv64/vcpu.rs b/src/vmm/src/arch/riscv64/vcpu.rs new file mode 100644 index 00000000000..05103ee15a8 --- /dev/null +++ b/src/vmm/src/arch/riscv64/vcpu.rs @@ -0,0 +1,197 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + + +use std::fmt::Debug; + +use kvm_bindings::*; +use kvm_ioctls::{VcpuExit, VcpuFd}; +use serde::{Deserialize, Serialize}; + +use super::get_fdt_addr; +use super::regs::*; +use crate::arch::EntryPoint; +use crate::cpu_config::templates::CpuConfiguration; +use crate::logger::{IncMetric, METRICS, error}; +use crate::vcpu::{VcpuConfig, VcpuError}; +use crate::vstate::memory::{Address, GuestMemoryMmap}; +use crate::vstate::vcpu::VcpuEmulation; +use crate::vstate::vm::Vm; + +/// Errors thrown while setting riscv64 registers. +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum VcpuArchError { + /// Failed to get register {0}: {1} + GetOneReg(u64, kvm_ioctls::Error), + /// Failed to set register {0}: {1} + SetOneReg(u64, kvm_ioctls::Error), + /// Failed to retrieve list of registers: {0} + GetRegList(kvm_ioctls::Error), + /// Failed to get multiprocessor state: {0} + GetMp(kvm_ioctls::Error), + /// Failed to set multiprocessor state: {0} + SetMp(kvm_ioctls::Error), +} + +/// Mandatory registers to set before booting a riscv64 vCPU: +/// a0: hart/core ID +/// a1: FDT address +/// pc: kernel entry point +pub fn setup_boot_regs( + vcpufd: &VcpuFd, + cpu_id: u8, + kernel_entry_addr: u64, + _mem: &GuestMemoryMmap, +) -> Result<(), VcpuArchError> { + let off_a0 = std::mem::offset_of!(user_regs_struct, a0); + let id_a0 = riscv64_reg_core_id!(off_a0); + vcpufd + .set_one_reg(id_a0, &u64::from(cpu_id).to_le_bytes()) + .map_err(|err| VcpuArchError::SetOneReg(id_a0, err))?; + + let off_pc = std::mem::offset_of!(user_regs_struct, pc); + let id_pc = riscv64_reg_core_id!(off_pc); + vcpufd + .set_one_reg(id_pc, &kernel_entry_addr.to_le_bytes()) + .map_err(|err| VcpuArchError::SetOneReg(id_pc, err))?; + + let fdt_start: u64 = get_fdt_addr(); + let off_a1 = std::mem::offset_of!(user_regs_struct, a1); + let id_a1 = riscv64_reg_core_id!(off_a1); + vcpufd + .set_one_reg(id_a1, &fdt_start.to_le_bytes()) + .map_err(|err| VcpuArchError::SetOneReg(id_a1, err))?; + + Ok(()) +} + +/// Errors associated with the wrappers over KVM ioctls. +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum KvmVcpuError { + /// Error configuring the vcpu registers: {0} + ConfigureRegisters(VcpuArchError), + /// Error creating vcpu: {0} + CreateVcpu(kvm_ioctls::Error), + /// Failed to dump CPU configuration: {0} + DumpCpuConfig(VcpuArchError), + /// Error getting the vcpu preferred target: {0} + GetPreferredTarget(kvm_ioctls::Error), + /// Error initializing the vcpu: {0} + Init(kvm_ioctls::Error), + /// Error applying template: {0} + ApplyCpuTemplate(VcpuArchError), + /// Failed to restore the state of the vcpu: {0} + RestoreState(VcpuArchError), + /// Failed to save the state of the vcpu: {0} + SaveState(VcpuArchError), +} + +/// Error type for [`KvmVcpu::configure`]. +pub type KvmVcpuConfigureError = KvmVcpuError; + +/// A wrapper around creating and using a kvm riscv64 vcpu. +#[derive(Debug)] +pub struct KvmVcpu { + /// Index of vcpu. + pub index: u8, + /// KVM vcpu fd. + pub fd: VcpuFd, + /// Vcpu peripherals, such as buses. + pub peripherals: Peripherals, +} + +/// Vcpu peripherals. +#[derive(Default, Debug)] +pub struct Peripherals { + /// mmio bus. + pub mmio_bus: Option, +} + +impl KvmVcpu { + /// Constructs a new kvm vcpu with arch specific functionality. + /// + /// # Arguments + /// + /// * `index` - Represents the 0-based CPU index between [0, max vcpus). + /// * `vm` - The vm to which this vcpu will get attached. + pub fn new(index: u8, vm: &Vm) -> Result { + let kvm_vcpu = vm + .fd() + .create_vcpu(index.into()) + .map_err(KvmVcpuError::CreateVcpu)?; + + Ok(KvmVcpu { + index, + fd: kvm_vcpu, + peripherals: Default::default(), + }) + } + + /// Configures an riscv64 specific vcpu for booting Linux. + /// + /// # Arguments + /// + /// * `guest_mem` - The guest memory used by this microvm. + /// * `kernel_entry_point` - Specifies the boot protocol and offset from `guest_mem` at which + /// the kernel starts. + /// * `_vcpu_config` - The vCPU configuration. Not used in RISC-V. + pub fn configure( + &mut self, + guest_mem: &GuestMemoryMmap, + kernel_entry_point: EntryPoint, + _vcpu_config: &VcpuConfig, + ) -> Result<(), KvmVcpuError> { + setup_boot_regs( + &self.fd, + self.index, + kernel_entry_point.entry_addr.raw_value(), + guest_mem, + ) + .map_err(KvmVcpuError::ConfigureRegisters)?; + + Ok(()) + } + + /// Save the KVM internal state. Unimplemented. + pub fn save_state(&self) -> Result { + unimplemented!(); + } + + /// Use provided state to populate KVM internal state. Unimplemented. + pub fn restore_state(&mut self, _state: &VcpuState) -> Result<(), KvmVcpuError> { + unimplemented!(); + } + + /// Dumps CPU configuration. Unimplemented. + pub fn dump_cpu_config(&self) -> Result { + unimplemented!(); + } +} + +impl Peripherals { + /// Runs the vCPU in KVM context and handles the kvm exit reason. + /// + /// Returns error or enum specifying whether emulation was handled or interrupted. + pub fn run_arch_emulation(&self, exit: VcpuExit) -> Result { + METRICS.vcpu.failures.inc(); + // TODO: Are we sure we want to finish running a vcpu upon + // receiving a vm exit that is not necessarily an error? + error!("Unexpected exit reason on vcpu run: {:?}", exit); + Err(VcpuError::UnhandledKvmExit(format!("{:?}", exit))) + } +} + +/// Structure holding vCPU kvm state. +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +pub struct VcpuState { + /// Multiprocessing state. + pub mp_state: kvm_mp_state, + /// vCPU registers. + pub regs: Riscv64RegisterVec, +} From b998eb2a8093d8629341886bb1ebdbbe0b1d1df0 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Fri, 23 May 2025 13:28:44 +0300 Subject: [PATCH 08/17] vmm(riscv64): introduce `vm`, `fdt`, and `aia` modules Add `vm` architecture-specific state module, following the aarch64 paradigm. Add support for the in-kernel KVM Advanced Interrupt Architecture (AIA) irqchip, which emulates the AIA APLIC and per-hart IMSIC. Introduce a new `fdt` module to describe the provided hardware platform. Update `Cargo.toml` to include `vm-fdt` rust-vmm crate for riscv64 target. Signed-off-by: Dimitris Charisis --- src/vmm/Cargo.toml | 2 +- src/vmm/src/arch/riscv64/aia/mod.rs | 213 +++++++++++ src/vmm/src/arch/riscv64/aia/regs.rs | 11 + src/vmm/src/arch/riscv64/fdt.rs | 553 +++++++++++++++++++++++++++ src/vmm/src/arch/riscv64/vm.rs | 85 ++++ 5 files changed, 863 insertions(+), 1 deletion(-) create mode 100644 src/vmm/src/arch/riscv64/aia/mod.rs create mode 100644 src/vmm/src/arch/riscv64/aia/regs.rs create mode 100644 src/vmm/src/arch/riscv64/fdt.rs create mode 100644 src/vmm/src/arch/riscv64/vm.rs diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 6ecdb75e5fe..8e7f69c9da1 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -45,7 +45,7 @@ vm-superio = "0.8.0" vmm-sys-util = { version = "0.12.1", features = ["with-serde"] } zerocopy = { version = "0.8.25" } -[target.'cfg(target_arch = "aarch64")'.dependencies] +[target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] vm-fdt = "0.3.0" [dev-dependencies] diff --git a/src/vmm/src/arch/riscv64/aia/mod.rs b/src/vmm/src/arch/riscv64/aia/mod.rs new file mode 100644 index 00000000000..b0a8b333025 --- /dev/null +++ b/src/vmm/src/arch/riscv64/aia/mod.rs @@ -0,0 +1,213 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright © 2024 Institute of Software, CAS. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +mod regs; + +use kvm_ioctls::{DeviceFd, VmFd}; +pub use regs::AiaState; + +/// Represent an AIA device +#[derive(Debug)] +pub struct AIADevice { + fd: DeviceFd, + vcpu_count: u64, +} + +impl AIADevice { + const VERSION: u32 = kvm_bindings::kvm_device_type_KVM_DEV_TYPE_RISCV_AIA; + + /// Return whether the device is MSI compatible. + pub fn msi_compatible(&self) -> bool { + true + } + + /// Return the FDT `compatible` property string for IMSIC. + pub fn imsic_compatibility(&self) -> &str { + "riscv,imsics" + } + + /// Return IMSIC properties. + pub fn imsic_properties(&self) -> [u32; 4] { + [ + 0, + AIADevice::get_imsic_addr() as u32, + 0, + super::layout::IMSIC_SZ_PH * self.vcpu_count as u32, + ] + } + + /// Return the FDT `compatible` property string for APLIC. + pub fn aplic_compatibility(&self) -> &str { + "riscv,aplic" + } + + /// Return APLIC properties. + pub fn aplic_properties(&self) -> [u32; 4] { + [ + 0, + AIADevice::get_aplic_addr() as u32, + 0, + ::kvm_bindings::KVM_DEV_RISCV_APLIC_SIZE, + ] + } + + /// Return the file descriptor of the AIA device. + pub fn device_fd(&self) -> &DeviceFd { + &self.fd + } + + /// Returns the number vCPUs this AIA device handles. + pub fn vcpu_count(&self) -> u64 { + self.vcpu_count + } + + fn get_aplic_addr() -> u64 { + super::layout::APLIC_START + } + + fn get_imsic_addr() -> u64 { + super::layout::IMSIC_START + } + + /// Create the AIA device object. + pub fn create_device(fd: DeviceFd, vcpu_count: u64) -> Self { + Self { fd, vcpu_count } + } + + /// Initialize an AIA device. + pub fn init_device(vm: &VmFd) -> Result { + let mut aia_device = kvm_bindings::kvm_create_device { + type_: Self::VERSION, + fd: 0, + flags: 0, + }; + + vm.create_device(&mut aia_device) + .map_err(AiaError::CreateAIA) + } + + fn init_device_attributes(aia_device: &Self) -> Result<(), AiaError> { + // Set attributes. + let nr_irqs: u32 = super::layout::IRQ_MAX; + let aia_nr_sources: u32 = nr_irqs; + Self::set_device_attribute( + aia_device.device_fd(), + kvm_bindings::KVM_DEV_RISCV_AIA_GRP_CONFIG, + u64::from(kvm_bindings::KVM_DEV_RISCV_AIA_CONFIG_SRCS), + &aia_nr_sources as *const u32 as u64, + 0, + )?; + + let aia_hart_bits = u64::from(aia_device.vcpu_count) - 1; + let aia_hart_bits = ::std::cmp::max(64 - aia_hart_bits.leading_zeros(), 1); + Self::set_device_attribute( + aia_device.device_fd(), + kvm_bindings::KVM_DEV_RISCV_AIA_GRP_CONFIG, + u64::from(kvm_bindings::KVM_DEV_RISCV_AIA_CONFIG_HART_BITS), + &aia_hart_bits as *const u32 as u64, + 0, + )?; + + // Set APLIC address. + let aia_addr_aplic: u64 = AIADevice::get_aplic_addr(); + Self::set_device_attribute( + aia_device.device_fd(), + kvm_bindings::KVM_DEV_RISCV_AIA_GRP_ADDR, + u64::from(kvm_bindings::KVM_DEV_RISCV_AIA_ADDR_APLIC), + &aia_addr_aplic as *const u64 as u64, + 0, + )?; + + let aia_imsic_addr = |hart| -> u64 { + AIADevice::get_imsic_addr() + u64::from(hart) * u64::from(super::layout::IMSIC_SZ_PH) + }; + for i in 0..aia_device.vcpu_count { + let aia_addr_imsic = aia_imsic_addr(i); + let aia_addr_imsic_attr = 1 + u64::from(i); + Self::set_device_attribute( + aia_device.device_fd(), + kvm_bindings::KVM_DEV_RISCV_AIA_GRP_ADDR, + u64::from(aia_addr_imsic_attr), + &aia_addr_imsic as *const u64 as u64, + 0, + )?; + } + + Ok(()) + } + + /// Create an AIA device. + pub fn create_aia(vm: &VmFd, vcpu_count: u64) -> Result { + let aia_fd = Self::init_device(vm)?; + + let device = Self::create_device(aia_fd, vcpu_count); + + Self::init_device_attributes(&device)?; + + Self::finalize_device(&device)?; + + Ok(device) + } + + /// Finalize the setup of an AIA device. + pub fn finalize_device(aia_device: &Self) -> Result<(), AiaError> { + // Finalize the AIA. + Self::set_device_attribute( + aia_device.device_fd(), + kvm_bindings::KVM_DEV_RISCV_AIA_GRP_CTRL, + u64::from(kvm_bindings::KVM_DEV_RISCV_AIA_CTRL_INIT), + 0, + 0, + )?; + + Ok(()) + } + + /// Set an AIA device attribute. + pub fn set_device_attribute( + fd: &DeviceFd, + group: u32, + attr: u64, + addr: u64, + flags: u32, + ) -> Result<(), AiaError> { + let attr = kvm_bindings::kvm_device_attr { + flags, + group, + attr, + addr, + }; + fd.set_device_attr(&attr) + .map_err(|err| AiaError::DeviceAttribute(err, true, group))?; + + Ok(()) + } + + /// A safe wrapper over unsafe kvm_ioctl::get_device_attr() + pub fn get_device_attribute( + &self, + attr: &mut ::kvm_bindings::kvm_device_attr, + ) -> Result<(), AiaError> { + // SAFETY: attr.addr is safe to write to. + unsafe { + self.fd + .get_device_attr(attr) + .map_err(|err| AiaError::DeviceAttribute(err, true, attr.group))? + }; + + Ok(()) + } +} + +/// Errors thrown while setting up the AIA. +#[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] +pub enum AiaError { + /// Error while calling KVM ioctl for setting up the global interrupt controller: {0} + CreateAIA(kvm_ioctls::Error), + /// Error while setting or getting device attributes for the AIA: {0}, {1}, {2} + DeviceAttribute(kvm_ioctls::Error, bool, u32), +} diff --git a/src/vmm/src/arch/riscv64/aia/regs.rs b/src/vmm/src/arch/riscv64/aia/regs.rs new file mode 100644 index 00000000000..63aa9cd3808 --- /dev/null +++ b/src/vmm/src/arch/riscv64/aia/regs.rs @@ -0,0 +1,11 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +/// Structure used for serializing the state of the AIA registers. +/// For now, it is just a placeholder. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct AiaState; diff --git a/src/vmm/src/arch/riscv64/fdt.rs b/src/vmm/src/arch/riscv64/fdt.rs new file mode 100644 index 00000000000..4acaf529efc --- /dev/null +++ b/src/vmm/src/arch/riscv64/fdt.rs @@ -0,0 +1,553 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright © 2024 Institute of Software, CAS. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + + +use std::collections::HashMap; +use std::ffi::CString; + +use kvm_bindings::*; +use vm_fdt::{Error as VmFdtError, FdtWriter}; +use vm_memory::GuestMemoryError; + +use super::super::DeviceType; +use super::aia::AIADevice; +use super::regs::*; +use crate::device_manager::mmio::MMIODeviceInfo; +use crate::logger::error; +use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap}; +use crate::vstate::vcpu::Vcpu; + +const ADDRESS_CELLS: u32 = 0x2; +const SIZE_CELLS: u32 = 0x2; +const CPU_INTC_BASE_PHANDLE: u32 = 3; +const AIA_APLIC_PHANDLE: u32 = 1; +const AIA_IMSIC_PHANDLE: u32 = 2; +const S_MODE_EXT_IRQ: u32 = 9; +const IRQ_TYPE_LEVEL_HIGH: u32 = 4; +const IRQ_TYPE_EDGE_RISING: u32 = 0x00000001; + +struct IsaExtInfo<'a> { + name: &'a [u8], + ext_id: KVM_RISCV_ISA_EXT_ID, +} + +// Sorted alphabetically +const ISA_INFO_ARRAY: [IsaExtInfo; 46] = [ + IsaExtInfo { + name: b"smstateen", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SMSTATEEN, + }, + IsaExtInfo { + name: b"ssaia", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SSAIA, + }, + IsaExtInfo { + name: b"sstc", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SSTC, + }, + IsaExtInfo { + name: b"svinval", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SVINVAL, + }, + IsaExtInfo { + name: b"svnapot", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SVNAPOT, + }, + IsaExtInfo { + name: b"svpbmt", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_SVPBMT, + }, + IsaExtInfo { + name: b"zacas", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZACAS, + }, + IsaExtInfo { + name: b"zba", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBA, + }, + IsaExtInfo { + name: b"zbb", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBB, + }, + IsaExtInfo { + name: b"zbc", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBC, + }, + IsaExtInfo { + name: b"zbkb", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBKB, + }, + IsaExtInfo { + name: b"zbkc", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBKC, + }, + IsaExtInfo { + name: b"zbkx", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBKX, + }, + IsaExtInfo { + name: b"zbs", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZBS, + }, + IsaExtInfo { + name: b"zfa", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZFA, + }, + IsaExtInfo { + name: b"zfh", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZFH, + }, + IsaExtInfo { + name: b"zfhmin", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZFHMIN, + }, + IsaExtInfo { + name: b"zicbom", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICBOM, + }, + IsaExtInfo { + name: b"zicboz", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICBOZ, + }, + IsaExtInfo { + name: b"zicntr", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICNTR, + }, + IsaExtInfo { + name: b"zicond", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICOND, + }, + IsaExtInfo { + name: b"zicsr", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICSR, + }, + IsaExtInfo { + name: b"zifencei", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZIFENCEI, + }, + IsaExtInfo { + name: b"zihintntl", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZIHINTNTL, + }, + IsaExtInfo { + name: b"zihintpause", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZIHINTPAUSE, + }, + IsaExtInfo { + name: b"zihpm", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZIHPM, + }, + IsaExtInfo { + name: b"zknd", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKND, + }, + IsaExtInfo { + name: b"zkne", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKNE, + }, + IsaExtInfo { + name: b"zknh", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKNH, + }, + IsaExtInfo { + name: b"zkr", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKR, + }, + IsaExtInfo { + name: b"zksed", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKSED, + }, + IsaExtInfo { + name: b"zksh", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKSH, + }, + IsaExtInfo { + name: b"zkt", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZKT, + }, + IsaExtInfo { + name: b"ztso", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZTSO, + }, + IsaExtInfo { + name: b"zvbb", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVBB, + }, + IsaExtInfo { + name: b"zvbc", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVBC, + }, + IsaExtInfo { + name: b"zvfh", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVFH, + }, + IsaExtInfo { + name: b"zvfhmin", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVFHMIN, + }, + IsaExtInfo { + name: b"zvkb", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKB, + }, + IsaExtInfo { + name: b"zvkg", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKG, + }, + IsaExtInfo { + name: b"zvkned", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKNED, + }, + IsaExtInfo { + name: b"zvknha", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKNHA, + }, + IsaExtInfo { + name: b"zvknhb", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKNHB, + }, + IsaExtInfo { + name: b"zvksed", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKSED, + }, + IsaExtInfo { + name: b"zvksh", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKSH, + }, + IsaExtInfo { + name: b"zvkt", + ext_id: KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZVKT, + }, +]; + +/// Errors thrown while configuring the Flattened Device Tree for riscv64. +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum FdtError { + /// Create FDT error: {0} + CreateFdt(#[from] VmFdtError), + /// Read cache info error: {0} + ReadCacheInfo(String), + /// Failure in writing FDT in memory. + WriteFdtToMemory(#[from] GuestMemoryError), + /// Get device attribute error. + GetDeviceAttr, + /// Get one register error. + GetOneReg(u64, kvm_ioctls::Error), +} + +pub fn create_fdt( + vcpus: &[Vcpu], + guest_mem: &GuestMemoryMmap, + cmdline: CString, + timer_freq: u32, + device_info: &HashMap<(DeviceType, String), MMIODeviceInfo>, + aia_device: &AIADevice, +) -> Result, FdtError> { + let mut fdt_writer = FdtWriter::new()?; + + let root = fdt_writer.begin_node("")?; + + fdt_writer.property_string("compatible", "linux,dummy-virt")?; + fdt_writer.property_u32("#address-cells", ADDRESS_CELLS)?; + fdt_writer.property_u32("#size-cells", SIZE_CELLS)?; + create_cpu_nodes(&mut fdt_writer, vcpus, timer_freq)?; + create_memory_node(&mut fdt_writer, guest_mem)?; + create_chosen_node(&mut fdt_writer, cmdline)?; + create_aia_node(&mut fdt_writer, aia_device)?; + create_devices_node(&mut fdt_writer, device_info)?; + + fdt_writer.end_node(root)?; + + let fdt_final = fdt_writer.finish()?; + + Ok(fdt_final) +} + +const CPU_ISA_MAX_LEN: usize = ISA_INFO_ARRAY.len() * 16; + +// Create FDT cpu nodes the way kvmtool does. +fn create_cpu_nodes(fdt: &mut FdtWriter, vcpus: &[Vcpu], timer_freq: u32) -> Result<(), FdtError> { + let valid_isa_order = b"IEMAFDQCLBJTPVNSUHKORWXYZG"; + let mut cbom = false; + let cbom_blksz = &mut [0u8; 8]; + let mut cboz = false; + let cboz_blksz = &mut [0u8; 8]; + + let cpus = fdt.begin_node("cpus")?; + + fdt.property_u32("#address-cells", 0x1)?; + fdt.property_u32("#size-cells", 0x0)?; + fdt.property_u32("timebase-frequency", timer_freq)?; + + for (cpu_index, vcpu) in vcpus.iter().enumerate() { + let vcpu_fd = &vcpu.kvm_vcpu.fd; + let cpu_index = u32::try_from(cpu_index).unwrap(); + + let cpu_isa = &mut [0; CPU_ISA_MAX_LEN]; + let mut pos = "rv64".len(); + cpu_isa[0..pos].copy_from_slice(b"rv64"); + + let mut bytes = [0u8; 8]; + let off_isa = std::mem::offset_of!(kvm_riscv_config, isa); + let id_isa = riscv64_reg_config_id!(off_isa); + + vcpu_fd + .get_one_reg(id_isa, &mut bytes) + .map_err(|err| FdtError::GetOneReg(id_isa, err))?; + + let isa = u64::from_le_bytes(bytes); + + for i in valid_isa_order { + let index = *i - 'A' as u8; + if isa & (1 << index) != 0 { + cpu_isa[pos] = 'a' as u8 + index; + pos += 1; + } + } + + for isa_ext_info in ISA_INFO_ARRAY { + let ext_id = isa_ext_info.ext_id; + let id_isa_ext = riscv64_reg_isa_ext!(ext_id); + let isa_ext_out = &mut [0u8; 8]; + if vcpu_fd.get_one_reg(id_isa_ext, isa_ext_out).is_err() { + continue; + } + + if u64::from_le_bytes(*isa_ext_out) == 0u64 { + // This extension is not available + continue; + } + + if ext_id == KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICBOM && !cbom { + let off_zicbom_blk_size = std::mem::offset_of!(kvm_riscv_config, zicbom_block_size); + let id_zicbom_blk_size = riscv64_reg_config_id!(off_zicbom_blk_size); + vcpu_fd + .get_one_reg(id_zicbom_blk_size, cbom_blksz) + .map_err(|err| { + error!("get_one_reg() failed: {err:?}"); + FdtError::GetDeviceAttr + })?; + cbom = true; + } + + if ext_id == KVM_RISCV_ISA_EXT_ID_KVM_RISCV_ISA_EXT_ZICBOZ && !cboz { + let off_zicboz_blk_size = std::mem::offset_of!(kvm_riscv_config, zicboz_block_size); + let id_zicboz_blk_size = riscv64_reg_config_id!(off_zicboz_blk_size); + vcpu_fd + .get_one_reg(id_zicboz_blk_size, cboz_blksz) + .map_err(|err| { + error!("get_one_reg() failed: {err:?}"); + FdtError::GetDeviceAttr + })?; + cboz = true; + } + + cpu_isa[pos] = '_' as u8; + pos += 1; + let name_len = isa_ext_info.name.len(); + cpu_isa[pos..pos + name_len].copy_from_slice(isa_ext_info.name); + pos += name_len; + } + + let off_satp = std::mem::offset_of!(kvm_riscv_config, satp_mode); + let id_satp = riscv64_reg_config_id!(off_satp); + let b = &mut [0u8; 8]; + let satp_mode = if vcpu_fd.get_one_reg(id_satp, b).is_ok() { + u64::from_le_bytes(*b) + } else { + 8 + }; + + let cpu = fdt.begin_node(&format!("cpu@{:x}", cpu_index))?; + fdt.property_string("device_type", "cpu")?; + fdt.property_string("compatible", "riscv")?; + + match satp_mode { + 10 => fdt.property_string("mmu-type", "riscv,sv57")?, + 9 => fdt.property_string("mmu-type", "riscv,sv48")?, + 8 => fdt.property_string("mmu-type", "riscv,sv39")?, + _ => fdt.property_string("mmu-type", "riscv,none")?, + } + + fdt.property_string( + "riscv,isa", + ::std::str::from_utf8(&cpu_isa[0..pos]).expect("cpu_isa unexpected error"), + )?; + + if cbom { + fdt.property_u32( + "riscv,cbom-block-size", + u32::try_from(u64::from_le_bytes(*cbom_blksz)).unwrap(), + )?; + } + + if cboz { + fdt.property_u32( + "riscv,cboz-block-size", + u32::try_from(u64::from_le_bytes(*cboz_blksz)).unwrap(), + )?; + } + + fdt.property_u32("reg", cpu_index)?; + fdt.property_string("status", "okay")?; + + // interrupt controller node + let intc_node = fdt.begin_node("interrupt-controller")?; + fdt.property_string("compatible", "riscv,cpu-intc")?; + fdt.property_u32("#interrupt-cells", 1u32)?; + fdt.property_null("interrupt-controller")?; + fdt.property_u32("phandle", CPU_INTC_BASE_PHANDLE + cpu_index)?; + fdt.end_node(intc_node)?; + + fdt.end_node(cpu)?; + } + + fdt.end_node(cpus)?; + + Ok(()) +} + +fn create_memory_node(fdt: &mut FdtWriter, guest_mem: &GuestMemoryMmap) -> Result<(), FdtError> { + let mem_size = guest_mem.last_addr().raw_value() + - super::layout::DRAM_MEM_START + - super::layout::SYSTEM_MEM_SIZE + + 1; + let mem_reg_prop = &[ + super::layout::DRAM_MEM_START + super::layout::SYSTEM_MEM_SIZE, + mem_size, + ]; + let mem = fdt.begin_node("memory@ram")?; + fdt.property_string("device_type", "memory")?; + fdt.property_array_u64("reg", mem_reg_prop)?; + fdt.end_node(mem)?; + + Ok(()) +} + +fn create_chosen_node(fdt: &mut FdtWriter, cmdline: CString) -> Result<(), FdtError> { + let chosen = fdt.begin_node("chosen")?; + + let cmdline_string = cmdline + .into_string() + .map_err(|_| vm_fdt::Error::InvalidString)?; + fdt.property_string("bootargs", cmdline_string.as_str())?; + + fdt.end_node(chosen)?; + + Ok(()) +} + +fn create_aia_node(fdt: &mut FdtWriter, aia: &AIADevice) -> Result<(), FdtError> { + if aia.msi_compatible() { + let imsic_name = format!("imsics@{:08x}", super::layout::IMSIC_START); + let imsic_node = fdt.begin_node(&imsic_name)?; + + fdt.property_string("compatible", aia.imsic_compatibility())?; + let imsic_reg_prop = aia.imsic_properties(); + fdt.property_array_u32("reg", &imsic_reg_prop)?; + fdt.property_u32("#interrupt-cells", 0u32)?; + fdt.property_null("interrupt-controller")?; + fdt.property_null("msi-controller")?; + + let mut aia_nr_ids: u32 = 0; + let mut nr_ids_attr = ::kvm_bindings::kvm_device_attr::default(); + nr_ids_attr.group = ::kvm_bindings::KVM_DEV_RISCV_AIA_GRP_CONFIG; + nr_ids_attr.attr = ::kvm_bindings::KVM_DEV_RISCV_AIA_CONFIG_IDS as u64; + nr_ids_attr.addr = &mut aia_nr_ids as *mut u32 as u64; + + aia.get_device_attribute(&mut nr_ids_attr) + .map_err(|_| FdtError::GetDeviceAttr)?; + + fdt.property_u32("riscv,num-ids", aia_nr_ids)?; + fdt.property_u32("phandle", AIA_IMSIC_PHANDLE)?; + + let mut irq_cells = vec![]; + let num_cpus = aia.vcpu_count() as u32; + for i in 0..num_cpus { + irq_cells.push(CPU_INTC_BASE_PHANDLE + i); + irq_cells.push(S_MODE_EXT_IRQ); + } + fdt.property_array_u32("interrupts-extended", &irq_cells)?; + + fdt.end_node(imsic_node)?; + } + + let aplic_name = format!("aplic@{:x}", super::layout::APLIC_START); + let aplic_node = fdt.begin_node(&aplic_name)?; + + fdt.property_string("compatible", aia.aplic_compatibility())?; + let reg_cells = aia.aplic_properties(); + fdt.property_array_u32("reg", ®_cells)?; + fdt.property_u32("#interrupt-cells", 2u32)?; + fdt.property_null("interrupt-controller")?; + + // TODO num-sources should be equal to the IRQ allocated lines, and not randomly hardcoded. + fdt.property_u32("riscv,num-sources", 10u32)?; + fdt.property_u32("phandle", AIA_APLIC_PHANDLE)?; + fdt.property_u32("msi-parent", AIA_IMSIC_PHANDLE)?; + + fdt.end_node(aplic_node)?; + + Ok(()) +} + +fn create_devices_node( + fdt: &mut FdtWriter, + devices_info: &HashMap<(DeviceType, String), MMIODeviceInfo>, +) -> Result<(), FdtError> { + // Create one temp Vec to store all virtio devices + let mut ordered_virtio_device: Vec<&MMIODeviceInfo> = Vec::new(); + + for ((device_type, _device_id), info) in devices_info { + match device_type { + DeviceType::Serial => create_serial_node(fdt, info)?, + DeviceType::Virtio(_) => { + ordered_virtio_device.push(info); + } + } + } + + // Sort out virtio devices by address from low to high and insert them into fdt table. + ordered_virtio_device.sort_by_key(|a| a.addr); + for ordered_device_info in ordered_virtio_device.drain(..) { + create_virtio_node(fdt, ordered_device_info)?; + } + + Ok(()) +} + +fn create_virtio_node(fdt: &mut FdtWriter, dev_info: &MMIODeviceInfo) -> Result<(), FdtError> { + let virtio_mmio = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr))?; + let irq = [dev_info.irq.unwrap().into(), IRQ_TYPE_EDGE_RISING]; + + fdt.property_string("compatible", "virtio,mmio")?; + fdt.property_array_u64("reg", &[dev_info.addr, dev_info.len])?; + fdt.property_array_u32("interrupts", &irq)?; + fdt.property_u32("interrupt-parent", AIA_APLIC_PHANDLE)?; + fdt.end_node(virtio_mmio)?; + + Ok(()) +} + +fn create_serial_node(fdt: &mut FdtWriter, dev_info: &MMIODeviceInfo) -> Result<(), FdtError> { + let serial_reg_prop = [dev_info.addr, dev_info.len]; + let irq = [dev_info.irq.unwrap().into(), IRQ_TYPE_LEVEL_HIGH]; + + let serial_name = format!("serial@{:x}", dev_info.addr); + let serial_node = fdt.begin_node(&serial_name)?; + fdt.property_string("compatible", "ns16550a")?; + fdt.property_array_u64("reg", &serial_reg_prop)?; + fdt.property_u32("clock-frequency", 3686400)?; + fdt.property_u32("interrupt-parent", AIA_APLIC_PHANDLE)?; + fdt.property_array_u32("interrupts", &irq)?; + fdt.end_node(serial_node)?; + + Ok(()) +} diff --git a/src/vmm/src/arch/riscv64/vm.rs b/src/vmm/src/arch/riscv64/vm.rs new file mode 100644 index 00000000000..39897488309 --- /dev/null +++ b/src/vmm/src/arch/riscv64/vm.rs @@ -0,0 +1,85 @@ +// Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +use crate::Kvm; +use crate::arch::riscv64::aia::AiaState; +use crate::vstate::memory::GuestMemoryState; +use crate::vstate::vm::{VmCommon, VmError}; + +/// Structure representing the current architecture's understand of what a "virtual machine" is. +#[derive(Debug)] +pub struct ArchVm { + /// Architecture independent parts of a vm. + pub common: VmCommon, + /// On riscv64 we need to keep around the fd obtained by creating the AIA device. + irqchip_handle: Option, +} + +/// Error type for [`Vm::restore_state`] +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum ArchVmError { + /// Error creating the global interrupt controller: {0} + VmCreateAIA(crate::arch::riscv64::aia::AiaError), + /// Failed to save the VM's AIA state: {0} + SaveAia(crate::arch::riscv64::aia::AiaError), + /// Failed to restore the VM's AIA state: {0} + RestoreAia(crate::arch::riscv64::aia::AiaError), +} + +impl ArchVm { + /// Create a new `Vm` struct. + pub fn new(kvm: &Kvm) -> Result { + let common = Self::create_common(kvm)?; + Ok(ArchVm { + common, + irqchip_handle: None, + }) + } + + /// Pre-vCPU creation setup. + pub fn arch_pre_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { + Ok(()) + } + + /// Post-vCPU creation setup. + pub fn arch_post_create_vcpus(&mut self, nr_vcpus: u8) -> Result<(), ArchVmError> { + self.setup_irqchip(nr_vcpus) + } + + /// Creates the AIA (Advanced Interrupt Architecture) IRQchip. + pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), ArchVmError> { + self.irqchip_handle = Some( + crate::arch::riscv64::aia::AIADevice::create_aia(&self.fd(), vcpu_count.into()) + .map_err(ArchVmError::VmCreateAIA)?, + ); + Ok(()) + } + + /// Gets a reference to the irqchip of the VM. + pub fn get_irqchip(&self) -> &crate::arch::riscv64::aia::AIADevice { + self.irqchip_handle.as_ref().expect("IRQ chip not set") + } + + /// Saves and returns the Kvm Vm state. + pub fn save_state(&self) -> Result { + unimplemented!() + } + + /// Restore the KVM VM state + pub fn restore_state(&mut self) -> Result<(), ArchVmError> { + unimplemented!() + } +} + +/// Structure holding an general specific VM state. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct VmState { + /// Guest memory state. + pub memory: GuestMemoryState, + /// AIA state. + pub aia: AiaState, +} From 52ac59c6ac881f8e635d9cff6d3dc25bbc27fd1b Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Fri, 23 May 2025 14:14:22 +0300 Subject: [PATCH 09/17] vmm(riscv64): set `SNAPSHOT_MAGIC_ID` to an arbitrary value We don't plan to support snapshots on RISC-V at this stage. As a workaround to bypass related errors, set `SNAPSHOT_MAGIC_ID` to a dummy value. Signed-off-by: Dimitris Charisis --- src/vmm/src/snapshot/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/vmm/src/snapshot/mod.rs b/src/vmm/src/snapshot/mod.rs index 57ad3980215..40bcabf7485 100644 --- a/src/vmm/src/snapshot/mod.rs +++ b/src/vmm/src/snapshot/mod.rs @@ -53,6 +53,10 @@ const BINCODE_CONFIG: Configuration Date: Fri, 23 May 2025 14:21:43 +0300 Subject: [PATCH 10/17] vmm(riscv64): add fake files to silence `test_utils` errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a temporary workaround. Since we lack proper testing support for riscv64 and don’t support the `test_utils` module, add two dummy file paths for `DEFAULT_KERNEL_IMAGE` and `NOISY_KERNEL_IMAGE` to silence compilation errors. We plan to either implement proper tests or remove `test_utils` usage from the riscv64 path in the future. Signed-off-by: Dimitris Charisis --- src/vmm/src/test_utils/mock_resources/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/vmm/src/test_utils/mock_resources/mod.rs b/src/vmm/src/test_utils/mock_resources/mod.rs index f8485bf9678..b389fe02748 100644 --- a/src/vmm/src/test_utils/mock_resources/mod.rs +++ b/src/vmm/src/test_utils/mock_resources/mod.rs @@ -14,10 +14,14 @@ pub const DEFAULT_BOOT_ARGS: &str = "reboot=k panic=1 pci=off"; pub const DEFAULT_KERNEL_IMAGE: &str = "test_elf.bin"; #[cfg(target_arch = "aarch64")] pub const DEFAULT_KERNEL_IMAGE: &str = "test_pe.bin"; +#[cfg(target_arch = "riscv64")] +pub const DEFAULT_KERNEL_IMAGE: &str = "test_pe_riscv.bin"; // Fake file, to pass compilation #[cfg(target_arch = "x86_64")] pub const NOISY_KERNEL_IMAGE: &str = "test_noisy_elf.bin"; #[cfg(target_arch = "aarch64")] pub const NOISY_KERNEL_IMAGE: &str = "test_pe.bin"; +#[cfg(target_arch = "riscv64")] +pub const NOISY_KERNEL_IMAGE: &str = "test_pe_noisy.bin"; // Fake file, to pass compilation pub fn kernel_image_path(kernel_image: Option<&str>) -> String { let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); From 6ed5f4fc0bd713c172a4d518d6b4a5f413dbe527 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Mon, 26 May 2025 11:13:00 +0300 Subject: [PATCH 11/17] vmm(riscv64): add boot system configuration and kernel loading support Implement `configure_system_for_boot` and `load_kernel` for riscv64, adding initial support for booting riscv64 microVMs. This gets the minimal pieces in place to start a Linux kernel inside a riscv64 VM. Populate `(Cpu)ConfigurationError` enums following the aarch64 logic, excluding the unreachable vCPU configuration error, as we don't support either custom or static vCPU configuration for riscv64 yet. Wire up module exports for riscv64-specific code. Signed-off-by: Dimitris Charisis --- src/vmm/src/arch/mod.rs | 14 ++- src/vmm/src/arch/riscv64/mod.rs | 147 ++++++++++++++++++++++++-- src/vmm/src/cpu_config/riscv64/mod.rs | 17 +-- 3 files changed, 161 insertions(+), 17 deletions(-) diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index 60e5272e68b..7eb5bdab440 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -29,10 +29,20 @@ pub use aarch64::{ /// Module for riscv64 related functionality. #[cfg(target_arch = "riscv64")] pub mod riscv64; + +#[cfg(target_arch = "riscv64")] +pub use riscv64::kvm::{Kvm, KvmArchError}; #[cfg(target_arch = "riscv64")] -pub use riscv64::kvm::Kvm; +pub use riscv64::vcpu::*; #[cfg(target_arch = "riscv64")] -pub use riscv64::{ConfigurationError, configure_system_for_boot, load_kernel}; +pub use riscv64::vm::{ArchVm, ArchVmError, VmState}; +#[cfg(target_arch = "riscv64")] +pub use riscv64::{ + ConfigurationError, MMIO_MEM_SIZE, MMIO_MEM_START, arch_memory_regions, + configure_system_for_boot, get_kernel_start, initrd_load_addr, layout::CMDLINE_MAX_SIZE, + layout::IRQ_BASE, layout::IRQ_MAX, layout::SYSTEM_MEM_SIZE, layout::SYSTEM_MEM_START, + load_kernel, +}; /// Module for x86_64 related functionality. #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/arch/riscv64/mod.rs b/src/vmm/src/arch/riscv64/mod.rs index 677876d86dd..80d14549740 100644 --- a/src/vmm/src/arch/riscv64/mod.rs +++ b/src/vmm/src/arch/riscv64/mod.rs @@ -1,22 +1,90 @@ // Copyright © 2025 Computing Systems Laboratory (CSLab), ECE, NTUA. All rights reserved. +// // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +/// Module for the global interrupt controller configuration. +pub mod aia; +mod fdt; /// Architecture specific KVM-related code. pub mod kvm; +/// Layout for this riscv64 system. +pub mod layout; +/// Logic for configuring riscv64 registers. +pub mod regs; +/// Architecture specific vCPU code. +pub mod vcpu; +/// Architecture specific VM state code. +pub mod vm; -use linux_loader::loader::Cmdline; +use std::cmp::min; +use std::fs::File; -use crate::arch::EntryPoint; -use crate::cpu_config::riscv64::CpuConfigurationError; -use crate::cpu_config::templates::CustomCpuTemplate; +use linux_loader::loader::pe::PE as Loader; +use linux_loader::loader::{Cmdline, KernelLoader}; +use vm_memory::GuestMemoryError; + +use crate::arch::{BootProtocol, EntryPoint}; +use crate::cpu_config::riscv64::CpuConfiguration; +use crate::cpu_config::riscv64::custom_cpu_template::CustomCpuTemplate; use crate::initrd::InitrdConfig; use crate::vmm_config::machine_config::MachineConfig; -use crate::vstate::memory::GuestMemoryMmap; +use crate::vstate::memory::{Bytes, GuestAddress, GuestMemoryMmap}; +use crate::vstate::vcpu::KvmVcpuError; +use crate::{Vcpu, VcpuConfig, Vmm, logger}; /// Errors thrown while configuring riscv64 system. #[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum ConfigurationError {} +pub enum ConfigurationError { + /// Failed to create a Flattened Device Tree for this riscv64 microVM: {0} + SetupFDT(#[from] fdt::FdtError), + /// Failed to write to guest memory. + MemoryError(GuestMemoryError), + /// Cannot copy kernel file fd + KernelFile, + /// Cannot load kernel due to invalid memory configuration or invalid kernel image: {0} + KernelLoader(#[from] linux_loader::loader::Error), + /// Error configuring the vcpu: {0} + VcpuConfigure(#[from] KvmVcpuError), +} + +/// The start of the memory area reserved for MMIO devices. +pub const MMIO_MEM_START: u64 = layout::MAPPED_IO_START; +/// The size of the memory area reserved for MMIO devices. +pub const MMIO_MEM_SIZE: u64 = layout::DRAM_MEM_START - layout::MAPPED_IO_START; //>> 1GB + +/// Returns a Vec of the valid memory addresses for riscv64. +/// See [`layout`](layout) module for a drawing of the specific memory model for this platform. +/// +/// The `offset` parameter specified the offset from [`layout::DRAM_MEM_START`]. +pub fn arch_memory_regions(offset: usize, size: usize) -> Vec<(GuestAddress, usize)> { + assert!(size > 0, "Attempt to allocate guest memory of length 0"); + assert!( + offset.checked_add(size).is_some(), + "Attempt to allocate guest memory such that the address space would wrap around" + ); + assert!( + offset < layout::DRAM_MEM_MAX_SIZE, + "offset outside allowed DRAM range" + ); + + let dram_size = min(size, layout::DRAM_MEM_MAX_SIZE - offset); + + if dram_size != size { + logger::warn!( + "Requested offset/memory size {}/{} exceeds architectural maximum (1022GiB). Size has \ + been truncated to {}", + offset, + size, + dram_size + ); + } + + vec![( + GuestAddress(layout::DRAM_MEM_START + offset as u64), + dram_size, + )] +} /// Configures the system for booting Linux. pub fn configure_system_for_boot( @@ -25,10 +93,45 @@ pub fn configure_system_for_boot( machine_config: &MachineConfig, cpu_template: &CustomCpuTemplate, entry_point: EntryPoint, - initrd: &Option, + _initrd: &Option, boot_cmdline: Cmdline, ) -> Result<(), ConfigurationError> { - todo!() + let cpu_config = { CpuConfiguration }; + + // Apply CPU template to the base CpuConfiguration. + let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template); + + let vcpu_config = VcpuConfig { + vcpu_count: machine_config.vcpu_count, + smt: machine_config.smt, + cpu_config, + }; + + // Configure vCPUs with normalizing and setting the generated CPU configuration. + for vcpu in vcpus.iter_mut() { + vcpu.kvm_vcpu + .configure(vmm.vm.guest_memory(), entry_point, &vcpu_config)?; + } + let cmdline = boot_cmdline + .as_cstring() + .expect("Cannot create cstring from cmdline string"); + + let guest_mem = &vmm.vm.guest_memory(); + // TODO: get timer frequency appropriately. + let fdt = fdt::create_fdt( + vcpus, + guest_mem, + cmdline, + 0x989680u32, + vmm.mmio_device_manager.get_device_info(), + vmm.vm.get_irqchip(), + )?; + let fdt_address = GuestAddress(get_fdt_addr()); + guest_mem + .write_slice(fdt.as_slice(), fdt_address) + .map_err(ConfigurationError::MemoryError)?; + + Ok(()) } /// Load linux kernel into guest memory. @@ -36,7 +139,33 @@ pub fn load_kernel( kernel: &File, guest_memory: &GuestMemoryMmap, ) -> Result { - todo!() + // Need to clone the File because reading from it + // mutates it. + let mut kernel_file = kernel + .try_clone() + .map_err(|_| ConfigurationError::KernelFile)?; + + let entry_addr = Loader::load( + guest_memory, + Some(GuestAddress(get_kernel_start())), + &mut kernel_file, + None, + )?; + + Ok(EntryPoint { + entry_addr: entry_addr.kernel_load, + protocol: BootProtocol::LinuxBoot, + }) +} + +/// Returns the memory address where the kernel could be loaded. +pub fn get_kernel_start() -> u64 { + layout::SYSTEM_MEM_START + layout::SYSTEM_MEM_SIZE +} + +/// Returns the memory address where the initrd could be loaded. Unimplemented for now. +pub fn initrd_load_addr(_guest_mem: &GuestMemoryMmap, _initrd_size: usize) -> Option { + unimplemented!() } /// Auxiliary function to get the address where the device tree blob is loaded. diff --git a/src/vmm/src/cpu_config/riscv64/mod.rs b/src/vmm/src/cpu_config/riscv64/mod.rs index 30bdc17bbdf..2c2ed57b7b4 100644 --- a/src/vmm/src/cpu_config/riscv64/mod.rs +++ b/src/vmm/src/cpu_config/riscv64/mod.rs @@ -3,24 +3,29 @@ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -/// Module for custom CPU templates. pub mod custom_cpu_template; -/// Module for static CPU templates. +/// Module for static CPU templates pub mod static_cpu_templates; use super::templates::CustomCpuTemplate; +use crate::arch::riscv64::vcpu::VcpuArchError; +use crate::vstate::vcpu::KvmVcpuError; /// Errors thrown while configuring templates. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] -pub enum CpuConfigurationError {} +pub enum CpuConfigurationError { + /// Error initializing the vcpu: {0} + VcpuInit(#[from] KvmVcpuError), + /// Error reading vcpu registers: {0} + VcpuGetRegs(#[from] VcpuArchError), +} -/// CPU configuration for riscv64. Just a nameholder. +/// CPU configuration for riscv64. Just a placeholder. #[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct CpuConfiguration; impl CpuConfiguration { - /// Creates new guest CPU config based on the provided template. - /// Not actually implemented yet. + /// Creates new guest CPU config based on the provided template. Not actually implemented yet. pub fn apply_template(self, _: &CustomCpuTemplate) -> Self { self } From 96d9bb7352eb644a0b392f4026c65a1b0715b517 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Mon, 26 May 2025 11:58:13 +0300 Subject: [PATCH 12/17] vmm(riscv64): specify supported device types For now, we only plan to support Serial and Virtio devices for riscv64. Signed-off-by: Dimitris Charisis --- src/vmm/src/arch/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index 7eb5bdab440..1ff279981ef 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -69,12 +69,13 @@ pub enum DeviceType { /// Device Type: Virtio. Virtio(u32), /// Device Type: Serial. - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] Serial, /// Device Type: RTC. #[cfg(target_arch = "aarch64")] Rtc, /// Device Type: BootTimer. + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] BootTimer, } From 3f525a8fbc6132181c572f8b687a5b2ebe2049c7 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Mon, 26 May 2025 13:33:18 +0300 Subject: [PATCH 13/17] vmm(riscv64): add conditional compilation guards This commit adds architecture-specific compilation guards to allow building the `vmm` crate for riscv64. Functionality not supported on riscv64 is excluded from the build. Signed-off-by: Dimitris Charisis --- src/vmm/src/arch/riscv64/regs.rs | 1 - src/vmm/src/builder.rs | 23 ++++++++++++++++++++++- src/vmm/src/cpu_config/riscv64/mod.rs | 1 + src/vmm/src/device_manager/mmio.rs | 2 ++ src/vmm/src/device_manager/persist.rs | 14 +++++++++++--- src/vmm/src/lib.rs | 8 +++++++- src/vmm/src/persist.rs | 1 + src/vmm/src/test_utils/mod.rs | 11 +++++++++++ src/vmm/src/vmm_config/machine_config.rs | 4 ++++ 9 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/vmm/src/arch/riscv64/regs.rs b/src/vmm/src/arch/riscv64/regs.rs index 46f088de067..ee0f159cb18 100644 --- a/src/vmm/src/arch/riscv64/regs.rs +++ b/src/vmm/src/arch/riscv64/regs.rs @@ -145,7 +145,6 @@ macro_rules! riscv64_reg_timer_id { riscv64_reg_id!(KVM_REG_RISCV_TIMER, $offset) }; } -pub(crate) use riscv64_reg_timer_id; /// Return the ID of an ISA extension. #[macro_export] diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 4a810ee083a..1622ce0b59d 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -13,6 +13,7 @@ use event_manager::{MutEventSubscriber, SubscriberOps}; use libc::EFD_NONBLOCK; use linux_loader::cmdline::Cmdline as LoaderKernelCmdline; use userfaultfd::Uffd; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use utils::time::TimestampUs; #[cfg(target_arch = "aarch64")] use vm_memory::GuestAddress; @@ -36,17 +37,22 @@ use crate::device_manager::persist::{ }; use crate::device_manager::resources::ResourceAllocator; use crate::devices::BusDevice; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::acpi::vmgenid::{VmGenId, VmGenIdError}; #[cfg(target_arch = "aarch64")] use crate::devices::legacy::RTCDevice; use crate::devices::legacy::serial::SerialOut; use crate::devices::legacy::{EventFdTrigger, SerialEventsWrapper, SerialWrapper}; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::block::device::Block; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::mmio::MmioTransport; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::net::Net; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::rng::Entropy; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; #[cfg(feature = "gdb")] use crate::gdb; @@ -83,6 +89,7 @@ pub enum StartMicrovmError { #[cfg(target_arch = "x86_64")] CreateLegacyDevice(device_manager::legacy::LegacyDeviceError), /// Error creating VMGenID device: {0} + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] CreateVMGenID(VmGenIdError), /// Error enabling pvtime on vcpu: {0} #[cfg(target_arch = "aarch64")] @@ -135,7 +142,7 @@ impl std::convert::From for StartMicrovmError { } } -#[cfg_attr(target_arch = "aarch64", allow(unused))] +#[cfg_attr(any(target_arch = "aarch64", target_arch = "riscv64"), allow(unused))] fn create_vmm_and_vcpus( instance_info: &InstanceInfo, event_manager: &mut EventManager, @@ -209,6 +216,7 @@ pub fn build_microvm_for_boot( ) -> Result>, StartMicrovmError> { use self::StartMicrovmError::*; + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] // Timestamp for measuring microVM boot duration. let request_ts = TimestampUs::default(); @@ -260,10 +268,12 @@ pub fn build_microvm_for_boot( // The boot timer device needs to be the first device attached in order // to maintain the same MMIO address referenced in the documentation // and tests. + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] if vm_resources.boot_timer { attach_boot_timer_device(&mut vmm, request_ts)?; } + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] if let Some(balloon) = vm_resources.balloon.get() { attach_balloon_device(&mut vmm, &mut boot_cmdline, balloon, event_manager)?; } @@ -274,6 +284,8 @@ pub fn build_microvm_for_boot( vm_resources.block.devices.iter(), event_manager, )?; + + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] attach_net_devices( &mut vmm, &mut boot_cmdline, @@ -281,10 +293,12 @@ pub fn build_microvm_for_boot( event_manager, )?; + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] if let Some(unix_vsock) = vm_resources.vsock.get() { attach_unixsock_vsock_device(&mut vmm, &mut boot_cmdline, unix_vsock, event_manager)?; } + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] if let Some(entropy) = vm_resources.entropy.get() { attach_entropy_device(&mut vmm, &mut boot_cmdline, entropy, event_manager)?; } @@ -292,6 +306,7 @@ pub fn build_microvm_for_boot( #[cfg(target_arch = "aarch64")] attach_legacy_devices_aarch64(event_manager, &mut vmm, &mut boot_cmdline)?; + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] attach_vmgenid_device(&mut vmm)?; #[cfg(target_arch = "aarch64")] @@ -670,6 +685,7 @@ fn attach_virtio_device( .map(|_| ()) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub(crate) fn attach_boot_timer_device( vmm: &mut Vmm, request_ts: TimestampUs, @@ -682,6 +698,7 @@ pub(crate) fn attach_boot_timer_device( Ok(()) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn attach_vmgenid_device(vmm: &mut Vmm) -> Result<(), StartMicrovmError> { let vmgenid = VmGenId::new(vmm.vm.guest_memory(), &mut vmm.resource_allocator) .map_err(StartMicrovmError::CreateVMGenID)?; @@ -693,6 +710,7 @@ fn attach_vmgenid_device(vmm: &mut Vmm) -> Result<(), StartMicrovmError> { Ok(()) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn attach_entropy_device( vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, @@ -749,6 +767,7 @@ fn attach_block_devices<'a, I: Iterator>> + Debug>( Ok(()) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn attach_net_devices<'a, I: Iterator>> + Debug>( vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, @@ -763,6 +782,7 @@ fn attach_net_devices<'a, I: Iterator>> + Debug>( Ok(()) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn attach_unixsock_vsock_device( vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, @@ -774,6 +794,7 @@ fn attach_unixsock_vsock_device( attach_virtio_device(event_manager, vmm, id, unix_vsock.clone(), cmdline, false) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn attach_balloon_device( vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, diff --git a/src/vmm/src/cpu_config/riscv64/mod.rs b/src/vmm/src/cpu_config/riscv64/mod.rs index 2c2ed57b7b4..096d25f3855 100644 --- a/src/vmm/src/cpu_config/riscv64/mod.rs +++ b/src/vmm/src/cpu_config/riscv64/mod.rs @@ -3,6 +3,7 @@ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +/// Module for custom CPU templates pub mod custom_cpu_template; /// Module for static CPU templates pub mod static_cpu_templates; diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 99bde6e2e78..03c2c233f95 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -26,6 +26,7 @@ use crate::arch::DeviceType::Virtio; use crate::devices::BusDevice; #[cfg(target_arch = "aarch64")] use crate::devices::legacy::RTCDevice; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::pseudo::BootTimer; use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::block::device::Block; @@ -342,6 +343,7 @@ impl MMIODeviceManager { } /// Register a boot timer device. + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub fn register_mmio_boot_timer( &mut self, resource_allocator: &mut ResourceAllocator, diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 30a6387bc82..c9fa5ba64f5 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -8,6 +8,7 @@ use std::sync::{Arc, Mutex}; use event_manager::{MutEventSubscriber, SubscriberOps}; use kvm_ioctls::VmFd; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use log::{error, warn}; use serde::{Deserialize, Serialize}; use vm_allocator::AllocPolicy; @@ -35,12 +36,13 @@ use crate::devices::virtio::rng::Entropy; use crate::devices::virtio::rng::persist::{ EntropyConstructorArgs, EntropyPersistError as EntropyError, EntropyState, }; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +use crate::devices::virtio::vsock::TYPE_VSOCK; use crate::devices::virtio::vsock::persist::{ VsockConstructorArgs, VsockState, VsockUdsConstructorArgs, }; -use crate::devices::virtio::vsock::{ - TYPE_VSOCK, Vsock, VsockError, VsockUnixBackend, VsockUnixBackendError, -}; +use crate::devices::virtio::vsock::{Vsock, VsockError, VsockUnixBackend, VsockUnixBackendError}; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::{TYPE_BALLOON, TYPE_BLOCK, TYPE_NET, TYPE_RNG}; use crate::mmds::data_store::MmdsVersion; use crate::resources::{ResourcesError, VmResources}; @@ -283,6 +285,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { type ConstructorArgs = MMIODevManagerConstructorArgs<'a>; type Error = DevicePersistError; + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn save(&self) -> Self::State { let mut states = DeviceStates::default(); let _: Result<(), ()> = self.for_each_device(|devtype, devid, device_info, bus_dev| { @@ -409,6 +412,11 @@ impl<'a> Persist<'a> for MMIODeviceManager { states } + #[cfg(target_arch = "riscv64")] + fn save(&self) -> Self::State { + unimplemented!(); + } + fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 29f3b0148ac..fe2ac30a429 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -450,7 +450,7 @@ impl Vmm { // would be to save the whole serial device state when we do the vm // serialization. For now we set that bit manually - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] { let serial_bus_device = self.get_bus_device(DeviceType::Serial, "Serial"); if serial_bus_device.is_none() { @@ -515,6 +515,12 @@ impl Vmm { self.vm.save_state(&mpidrs).map_err(SaveVmState)? } + #[cfg(target_arch = "riscv64")] + { + // TODO: `save_state()` is unimplemented on riscv64. + // It is just a stub, to pass compilation. + self.vm.save_state().map_err(SaveVmState)? + } }; let device_states = self.mmio_device_manager.save(); diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 4111d8d6c34..dda07d6a0c1 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -26,6 +26,7 @@ use crate::cpu_config::x86_64::cpuid::CpuidTrait; #[cfg(target_arch = "x86_64")] use crate::cpu_config::x86_64::cpuid::common::get_vendor_id_from_host; use crate::device_manager::persist::{ACPIDeviceManagerState, DevicePersistError, DeviceStates}; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::logger::{info, warn}; use crate::resources::VmResources; use crate::seccomp::BpfThreadMap; diff --git a/src/vmm/src/test_utils/mod.rs b/src/vmm/src/test_utils/mod.rs index 7cb16a2a213..2a3bf29b2d1 100644 --- a/src/vmm/src/test_utils/mod.rs +++ b/src/vmm/src/test_utils/mod.rs @@ -3,20 +3,28 @@ #![allow(missing_docs)] +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use std::sync::{Arc, Mutex}; use vm_memory::GuestAddress; use vmm_sys_util::tempdir::TempDir; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::builder::build_microvm_for_boot; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::resources::VmResources; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::seccomp::get_empty_filters; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::test_utils::mock_resources::{MockBootSourceConfig, MockVmConfig, MockVmResources}; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::vmm_config::boot_source::BootSourceConfig; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::HugePageConfig; use crate::vstate::memory; use crate::vstate::memory::{GuestMemoryMmap, GuestRegionMmap}; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::{EventManager, Vmm}; pub mod mock_resources; @@ -65,6 +73,7 @@ pub fn arch_mem_raw(mem_size_bytes: usize) -> Vec { multi_region_mem_raw(&crate::arch::arch_memory_regions(0, mem_size_bytes)) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub fn create_vmm( _kernel_image: Option<&str>, is_diff: bool, @@ -105,10 +114,12 @@ pub fn create_vmm( (vmm, event_manager) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub fn default_vmm(kernel_image: Option<&str>) -> (Arc>, EventManager) { create_vmm(kernel_image, false, true) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub fn default_vmm_no_boot(kernel_image: Option<&str>) -> (Arc>, EventManager) { create_vmm(kernel_image, false, false) } diff --git a/src/vmm/src/vmm_config/machine_config.rs b/src/vmm/src/vmm_config/machine_config.rs index cfe7105fdf8..ebe5607cb05 100644 --- a/src/vmm/src/vmm_config/machine_config.rs +++ b/src/vmm/src/vmm_config/machine_config.rs @@ -270,7 +270,11 @@ impl MachineConfig { let cpu_template = match update.cpu_template { None => self.cpu_template.clone(), + #[cfg(target_arch = "riscv64")] + Some(_) => unreachable!(), + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] Some(StaticCpuTemplate::None) => None, + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] Some(other) => Some(CpuTemplateType::Static(other)), }; From ca360423adf6ab3a1a57fb02099175560864c160 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Mon, 26 May 2025 13:49:37 +0300 Subject: [PATCH 14/17] api_server(riscv64): disable SendCtrlAltDel action The `SendCtrlAltDel` API action is only supported on x86, as it relies on legacy PC hardware interfaces (like the i8042 controller) which are not present on other architectures. This commit disables `SendCtrlAltDel` action on riscv64 mirroring the existing handling for aarch64. See related discussion in issue #1339. Signed-off-by: Dimitris Charisis --- src/firecracker/src/api_server/request/actions.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/firecracker/src/api_server/request/actions.rs b/src/firecracker/src/api_server/request/actions.rs index a3b3f3f3a88..2db9dc7c682 100644 --- a/src/firecracker/src/api_server/request/actions.rs +++ b/src/firecracker/src/api_server/request/actions.rs @@ -7,7 +7,7 @@ use vmm::rpc_interface::VmmAction; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; -#[cfg(target_arch = "aarch64")] +#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use super::StatusCode; // The names of the members from this enum must precisely correspond (as a string) to the possible @@ -45,6 +45,13 @@ pub(crate) fn parse_put_actions(body: &Body) -> Result Date: Mon, 26 May 2025 14:44:48 +0300 Subject: [PATCH 15/17] riscv64: add interrupt support for block devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds interrupt support for VirtIO block devices on riscv64, providing the necessary functionality to support interrupts for VirtIO devices in general. Interrupt delivery is implemented using KVM's in-kernel irqchip. Unlike x86_64 and aarch64, we couldn’t get the existing IRQFD-based approach to work reliably on riscv64. As a result, this implementation uses the `KVM_IRQ_LINE` ioctl to signal interrupts. To support this, it extends the `IrqTrigger` structure to hold both the raw VM file descriptor and the GSI number. Storing the raw file descriptor was necessary because `IrqTrigger` cannot hold a `VmFd` directly, as this type doesn't implement `Clone`. Having the raw VM file descriptor and the corresponding GSI allows `trigger_irq()` to perform the necessary ioctls to assert and de-assert interrupt lines when needed. Signed-off-by: Dimitris Charisis --- src/vmm/src/device_manager/mmio.rs | 12 +++ src/vmm/src/devices/virtio/balloon/device.rs | 5 ++ src/vmm/src/devices/virtio/block/device.rs | 8 ++ .../devices/virtio/block/vhost_user/device.rs | 5 ++ .../src/devices/virtio/block/virtio/device.rs | 5 ++ src/vmm/src/devices/virtio/device.rs | 75 +++++++++++++++++++ src/vmm/src/devices/virtio/net/device.rs | 6 ++ src/vmm/src/devices/virtio/rng/device.rs | 5 ++ src/vmm/src/devices/virtio/vhost_user.rs | 4 + src/vmm/src/devices/virtio/vsock/device.rs | 5 ++ 10 files changed, 130 insertions(+) diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 03c2c233f95..d5a4e1bd4a6 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -8,6 +8,8 @@ use std::collections::HashMap; use std::fmt::Debug; use std::num::NonZeroU32; +#[cfg(target_arch = "riscv64")] +use std::os::fd::AsRawFd; use std::sync::{Arc, Mutex}; #[cfg(target_arch = "x86_64")] @@ -197,7 +199,11 @@ impl MMIODeviceManager { }; let identifier; { + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] let locked_device = mmio_device.locked_device(); + #[cfg(target_arch = "riscv64")] + let mut locked_device = mmio_device.locked_device(); + identifier = (DeviceType::Virtio(locked_device.device_type()), device_id); for (i, queue_evt) in locked_device.queue_events().iter().enumerate() { let io_addr = IoEventAddress::Mmio( @@ -206,8 +212,14 @@ impl MMIODeviceManager { vm.register_ioevent(queue_evt, &io_addr, u32::try_from(i).unwrap()) .map_err(MmioError::RegisterIoEvent)?; } + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] vm.register_irqfd(&locked_device.interrupt_trigger().irq_evt, irq.get()) .map_err(MmioError::RegisterIrqFd)?; + + #[cfg(target_arch = "riscv64")] + locked_device + .interrupt_trigger_mut() + .set_vmfd_and_gsi(vm.as_raw_fd(), irq.get()); } self.register_mmio_device( diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index 186f09275bc..56429153a3f 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -577,6 +577,11 @@ impl VirtioDevice for Balloon { &self.irq_trigger } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + unimplemented!() + } + fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); diff --git a/src/vmm/src/devices/virtio/block/device.rs b/src/vmm/src/devices/virtio/block/device.rs index bf3043bcdd4..832879ce690 100644 --- a/src/vmm/src/devices/virtio/block/device.rs +++ b/src/vmm/src/devices/virtio/block/device.rs @@ -180,6 +180,14 @@ impl VirtioDevice for Block { } } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + match self { + Self::Virtio(b) => b.interrupt_trigger_mut(), + Self::VhostUser(_) => unimplemented!(), + } + } + fn read_config(&self, offset: u64, data: &mut [u8]) { match self { Self::Virtio(b) => b.read_config(offset, data), diff --git a/src/vmm/src/devices/virtio/block/vhost_user/device.rs b/src/vmm/src/devices/virtio/block/vhost_user/device.rs index b0bf5a31e3f..51e42bb7559 100644 --- a/src/vmm/src/devices/virtio/block/vhost_user/device.rs +++ b/src/vmm/src/devices/virtio/block/vhost_user/device.rs @@ -314,6 +314,11 @@ impl VirtioDevice for VhostUserBlock &self.irq_trigger } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + unimplemented!() + } + fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); diff --git a/src/vmm/src/devices/virtio/block/virtio/device.rs b/src/vmm/src/devices/virtio/block/virtio/device.rs index b11c757d43c..124dcd24b4d 100644 --- a/src/vmm/src/devices/virtio/block/virtio/device.rs +++ b/src/vmm/src/devices/virtio/block/virtio/device.rs @@ -598,6 +598,11 @@ impl VirtioDevice for VirtioBlock { &self.irq_trigger } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + &mut self.irq_trigger + } + fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); diff --git a/src/vmm/src/devices/virtio/device.rs b/src/vmm/src/devices/virtio/device.rs index 62131e775f5..3796a650474 100644 --- a/src/vmm/src/devices/virtio/device.rs +++ b/src/vmm/src/devices/virtio/device.rs @@ -6,10 +6,14 @@ // found in the THIRD-PARTY file. use std::fmt; +#[cfg(target_arch = "riscv64")] +use std::os::fd::AsRawFd; use std::sync::Arc; use std::sync::atomic::{AtomicU32, Ordering}; use vmm_sys_util::eventfd::EventFd; +#[cfg(target_arch = "riscv64")] +use vmm_sys_util::{errno, ioctl::ioctl_with_ref, ioctl_ioc_nr, ioctl_iow_nr}; use super::ActivateError; use super::mmio::{VIRTIO_MMIO_INT_CONFIG, VIRTIO_MMIO_INT_VRING}; @@ -57,9 +61,15 @@ pub enum IrqType { #[derive(Debug)] pub struct IrqTrigger { pub(crate) irq_status: Arc, + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] pub(crate) irq_evt: EventFd, + #[cfg(target_arch = "riscv64")] + pub(crate) raw_vmfd: Option, + #[cfg(target_arch = "riscv64")] + pub(crate) gsi: Option, } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] impl IrqTrigger { pub fn new() -> std::io::Result { Ok(Self { @@ -84,6 +94,68 @@ impl IrqTrigger { } } +#[cfg(target_arch = "riscv64")] +impl IrqTrigger { + pub fn new() -> std::io::Result { + Ok(Self { + irq_status: Arc::new(AtomicU32::new(0)), + raw_vmfd: None, + gsi: None, + }) + } + + pub fn trigger_irq(&self, irq_type: IrqType) -> Result<(), std::io::Error> { + let irq = match irq_type { + IrqType::Config => VIRTIO_MMIO_INT_CONFIG, + IrqType::Vring => VIRTIO_MMIO_INT_VRING, + }; + self.irq_status.fetch_or(irq, Ordering::SeqCst); + + // Safe to unwrap since `gsi` and `vmfd` have been set + let gsi = self.gsi.unwrap(); + IrqTrigger::set_irq_line(self.raw_vmfd.unwrap(), gsi, true).map_err(|err| { + error!("Failed to set IRQ line: {:?}", err); + std::io::Error::last_os_error() + })?; + IrqTrigger::set_irq_line(self.raw_vmfd.unwrap(), gsi, false).map_err(|err| { + error!("Failed to set IRQ line: {:?}", err); + std::io::Error::last_os_error() + })?; + + Ok(()) + } + + pub fn set_vmfd_and_gsi(&mut self, raw_vmfd: i32, gsi: u32) { + self.raw_vmfd = Some(raw_vmfd); + self.gsi = Some(gsi); + } + + // This function is taken from kvm-ioctls because it requires VmFd, which we don't + // have at this point. However, it only uses the raw file descriptor, which is just + // an i32. So, we copy it here and use it directly with the raw fd. + fn set_irq_line(fd: F, irq: u32, active: bool) -> Result<(), kvm_ioctls::Error> { + let mut irq_level = kvm_bindings::kvm_irq_level::default(); + irq_level.__bindgen_anon_1.irq = irq; + irq_level.level = u32::from(active); + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(&fd, IrqTrigger::KVM_IRQ_LINE(), &irq_level) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + ioctl_iow_nr!( + KVM_IRQ_LINE, + kvm_bindings::KVMIO, + 0x61, + kvm_bindings::kvm_irq_level + ); +} + /// Trait for virtio devices to be driven by a virtio transport. /// /// The lifecycle of a virtio device is to be moved to a virtio transport, which will then query the @@ -126,6 +198,9 @@ pub trait VirtioDevice: AsAny + Send { fn interrupt_trigger(&self) -> &IrqTrigger; + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger; + /// The set of feature bits shifted by `page * 32`. fn avail_features_by_page(&self, page: u32) -> u32 { let avail_features = self.avail_features(); diff --git a/src/vmm/src/devices/virtio/net/device.rs b/src/vmm/src/devices/virtio/net/device.rs index fff04d1da1a..eec439f1387 100755 --- a/src/vmm/src/devices/virtio/net/device.rs +++ b/src/vmm/src/devices/virtio/net/device.rs @@ -965,6 +965,12 @@ impl VirtioDevice for Net { fn interrupt_trigger(&self) -> &IrqTrigger { &self.irq_trigger } + + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + unimplemented!() + } + fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); diff --git a/src/vmm/src/devices/virtio/rng/device.rs b/src/vmm/src/devices/virtio/rng/device.rs index 97ac8676e0a..b09fc1ab6c6 100644 --- a/src/vmm/src/devices/virtio/rng/device.rs +++ b/src/vmm/src/devices/virtio/rng/device.rs @@ -270,6 +270,11 @@ impl VirtioDevice for Entropy { &self.irq_trigger } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + unimplemented!() + } + fn avail_features(&self) -> u64 { self.avail_features } diff --git a/src/vmm/src/devices/virtio/vhost_user.rs b/src/vmm/src/devices/virtio/vhost_user.rs index 83174fbc4d3..c162b25a505 100644 --- a/src/vmm/src/devices/virtio/vhost_user.rs +++ b/src/vmm/src/devices/virtio/vhost_user.rs @@ -394,6 +394,7 @@ impl VhostUserHandleImpl { Ok(()) } + #[cfg_attr(target_arch = "riscv64", allow(unused_variables))] /// Set up vhost-user backend. This includes updating memory table, /// sending information about virtio rings and enabling them. pub fn setup_backend( @@ -439,6 +440,9 @@ impl VhostUserHandleImpl { .set_vring_base(*queue_index, queue.avail_ring_idx_get()) .map_err(VhostUserError::VhostUserSetVringBase)?; + // TODO: This is a temporary workaround to avoid `irq_trigger.irq_evt` unknown field + // error, since we don't implement vhost for RISC-V yet. + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] // No matter the queue, we set irq_evt for signaling the guest that buffers were // consumed. self.vu diff --git a/src/vmm/src/devices/virtio/vsock/device.rs b/src/vmm/src/devices/virtio/vsock/device.rs index aa114f6cccb..60d5e75182a 100644 --- a/src/vmm/src/devices/virtio/vsock/device.rs +++ b/src/vmm/src/devices/virtio/vsock/device.rs @@ -300,6 +300,11 @@ where &self.irq_trigger } + #[cfg(target_arch = "riscv64")] + fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { + unimplemented!() + } + fn read_config(&self, offset: u64, data: &mut [u8]) { match offset { 0 if data.len() == 8 => byte_order::write_le_u64(data, self.cid()), From 5ce8a7da8e001ed09731fd0928abf3b016871f35 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Tue, 27 May 2025 10:32:38 +0300 Subject: [PATCH 16/17] riscv64: add interrupt support for serial device Implement interrupt support for the legacy serial console device on riscv64, following the same logic as the VirtIO device implementation. Replace the `EventFdTrigger` structure, which triggers IRQFD-related interrupts on x86_64 and aarch64, with a new `IrqLineTrigger` type. This type encapsulates the two required pieces of information to fire a `KVM_IRQ_LINE` interrupt: the raw file descriptor behind the `VmFd` type and the corresponding GSI number. This change is necessary because the `VmFd` structure does not implement `Clone`, preventing it from being carried into the `trigger()` function when injecting an interrupt from the serial device. Additionally, make the `allocate_mmio_resources()` method of `MMIODeviceManager` public. This is required to allocate an IRQ number *before* setting up the serial device, since `setup_serial_device()` builds the `IrqLineTrigger` and needs to know the IRQ number associated with the serial device at that point. Signed-off-by: Dimitris Charisis --- src/vmm/src/builder.rs | 80 ++++++++++ src/vmm/src/device_manager/mmio.rs | 26 +++- src/vmm/src/devices/legacy/mod.rs | 74 +++++++++ src/vmm/src/devices/legacy/serial.rs | 220 +++++++++++++++++++++++++++ src/vmm/src/lib.rs | 2 +- 5 files changed, 399 insertions(+), 3 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 1622ce0b59d..1521324edec 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -5,6 +5,8 @@ use std::fmt::Debug; use std::io; +#[cfg(target_arch = "riscv64")] +use std::os::fd::AsRawFd; #[cfg(feature = "gdb")] use std::sync::mpsc; use std::sync::{Arc, Mutex}; @@ -31,6 +33,8 @@ use crate::cpu_config::templates::{ use crate::device_manager::acpi::ACPIDeviceManager; #[cfg(target_arch = "x86_64")] use crate::device_manager::legacy::PortIODeviceManager; +#[cfg(target_arch = "riscv64")] +use crate::device_manager::mmio::MMIODeviceInfo; use crate::device_manager::mmio::{MMIODeviceManager, MmioError}; use crate::device_manager::persist::{ ACPIDeviceManagerConstructorArgs, ACPIDeviceManagerRestoreError, MMIODevManagerConstructorArgs, @@ -39,6 +43,8 @@ use crate::device_manager::resources::ResourceAllocator; use crate::devices::BusDevice; #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::acpi::vmgenid::{VmGenId, VmGenIdError}; +#[cfg(target_arch = "riscv64")] +use crate::devices::legacy::IrqLineTrigger; #[cfg(target_arch = "aarch64")] use crate::devices::legacy::RTCDevice; use crate::devices::legacy::serial::SerialOut; @@ -306,6 +312,9 @@ pub fn build_microvm_for_boot( #[cfg(target_arch = "aarch64")] attach_legacy_devices_aarch64(event_manager, &mut vmm, &mut boot_cmdline)?; + #[cfg(target_arch = "riscv64")] + attach_legacy_devices_riscv64(event_manager, &mut vmm, &mut boot_cmdline)?; + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] attach_vmgenid_device(&mut vmm)?; @@ -566,6 +575,7 @@ pub fn build_microvm_from_snapshot( Ok(vmm) } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] /// Sets up the serial device. pub fn setup_serial_device( event_manager: &mut EventManager, @@ -589,6 +599,35 @@ pub fn setup_serial_device( Ok(serial) } +#[cfg(target_arch = "riscv64")] +/// Sets up the serial device. +pub fn setup_serial_device( + event_manager: &mut EventManager, + vmfd: &kvm_ioctls::VmFd, + input: std::io::Stdin, + out: std::io::Stdout, + device_info: &Option, +) -> Result>, VmmError> { + let interrupt_evt = IrqLineTrigger::new( + vmfd.as_raw_fd(), + device_info.as_ref().unwrap().irq.unwrap().get(), + ); + let kick_stdin_read_evt = + EventFdTrigger::new(EventFd::new(EFD_NONBLOCK).map_err(VmmError::EventFd)?); + let serial = Arc::new(Mutex::new(BusDevice::Serial(SerialWrapper { + serial: Serial::with_events( + interrupt_evt, + SerialEventsWrapper { + buffer_ready_event_fd: Some(kick_stdin_read_evt), + }, + SerialOut::Stdout(out), + ), + input: Some(input), + }))); + event_manager.add_subscriber(serial.clone()); + Ok(serial) +} + /// 64 bytes due to alignment requirement in 3.1 of https://www.kernel.org/doc/html/v5.8/virt/kvm/devices/vcpu.html#attribute-kvm-arm-vcpu-pvtime-ipa #[cfg(target_arch = "aarch64")] const STEALTIME_STRUCT_MEM_SIZE: u64 = 64; @@ -661,6 +700,47 @@ fn attach_legacy_devices_aarch64( .map_err(VmmError::RegisterMMIODevice) } +#[cfg(target_arch = "riscv64")] +fn attach_legacy_devices_riscv64( + event_manager: &mut EventManager, + vmm: &mut Vmm, + cmdline: &mut LoaderKernelCmdline, +) -> Result<(), VmmError> { + // Serial device setup. + let cmdline_contains_console = cmdline + .as_cstring() + .map_err(|_| VmmError::Cmdline)? + .into_string() + .map_err(|_| VmmError::Cmdline)? + .contains("console="); + + if cmdline_contains_console { + // Make stdout non-blocking. + set_stdout_nonblocking(); + let device_info = vmm + .mmio_device_manager + .allocate_mmio_resources(&mut vmm.resource_allocator, 1) + .map_err(|err| VmmError::DeviceManager(err))?; + + let serial = setup_serial_device( + event_manager, + vmm.vm.fd(), + std::io::stdin(), + std::io::stdout(), + &Some(device_info.clone()), + )?; + + vmm.mmio_device_manager + .register_mmio_serial(&mut vmm.resource_allocator, serial, Some(device_info)) + .map_err(VmmError::RegisterMMIODevice)?; + vmm.mmio_device_manager + .add_mmio_serial_to_cmdline(cmdline) + .map_err(VmmError::RegisterMMIODevice)?; + } + + Ok(()) +} + /// Attaches a VirtioDevice device to the device manager and event manager. fn attach_virtio_device( event_manager: &mut EventManager, diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index d5a4e1bd4a6..48c1dbbf81a 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -147,7 +147,7 @@ impl MMIODeviceManager { } /// Allocates resources for a new device to be added. - fn allocate_mmio_resources( + pub fn allocate_mmio_resources( &mut self, resource_allocator: &mut ResourceAllocator, irq_count: u32, @@ -312,7 +312,29 @@ impl MMIODeviceManager { self.register_mmio_device(identifier, device_info, serial) } - #[cfg(target_arch = "aarch64")] + #[cfg(target_arch = "riscv64")] + /// Register an early console at the specified MMIO configuration if given as parameter, + /// otherwise allocate a new MMIO resources for it. + pub fn register_mmio_serial( + &mut self, + resource_allocator: &mut ResourceAllocator, + serial: Arc>, + device_info_opt: Option, + ) -> Result<(), MmioError> { + // Create a new MMIODeviceInfo object on boot path or unwrap the + // existing object on restore path. + let device_info = if let Some(device_info) = device_info_opt { + device_info + } else { + self.allocate_mmio_resources(resource_allocator, 1)? + }; + + let identifier = (DeviceType::Serial, DeviceType::Serial.to_string()); + // Register the newly created Serial object. + self.register_mmio_device(identifier, device_info, serial) + } + + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] /// Append the registered early console to the kernel cmdline. pub fn add_mmio_serial_to_cmdline( &self, diff --git a/src/vmm/src/devices/legacy/mod.rs b/src/vmm/src/devices/legacy/mod.rs index b28ae7082fe..5fea7574a4f 100644 --- a/src/vmm/src/devices/legacy/mod.rs +++ b/src/vmm/src/devices/legacy/mod.rs @@ -13,11 +13,15 @@ pub mod serial; use std::io; use std::ops::Deref; +#[cfg(target_arch = "riscv64")] +use std::os::fd::AsRawFd; use serde::Serializer; use serde::ser::SerializeMap; use vm_superio::Trigger; use vmm_sys_util::eventfd::EventFd; +#[cfg(target_arch = "riscv64")] +use vmm_sys_util::{errno, ioctl::ioctl_with_ref, ioctl_ioc_nr, ioctl_iow_nr}; pub use self::i8042::{I8042Device, I8042Error as I8042DeviceError}; #[cfg(target_arch = "aarch64")] @@ -25,6 +29,8 @@ pub use self::rtc_pl031::RTCDevice; pub use self::serial::{ IER_RDA_BIT, IER_RDA_OFFSET, SerialDevice, SerialEventsWrapper, SerialWrapper, }; +#[cfg(target_arch = "riscv64")] +use crate::logger::error; /// Wrapper for implementing the trigger functionality for `EventFd`. /// @@ -64,6 +70,74 @@ impl EventFdTrigger { } } +// TODO: raw_vmfd and gsi are actually never None. +#[cfg(target_arch = "riscv64")] +#[derive(Debug)] +pub struct IrqLineTrigger { + raw_vmfd: Option, + gsi: Option, +} + +#[cfg(target_arch = "riscv64")] +impl IrqLineTrigger { + pub fn new(raw_vmfd: i32, gsi: u32) -> Self { + Self { + raw_vmfd: Some(raw_vmfd), + gsi: Some(gsi), + } + } + + // This function is taken from kvm-ioctls because it requires VmFd, which we don't + // have at this point. However, it only uses the raw file descriptor, which is just + // an i32. So, we copy it here and use it directly with the raw fd. + fn set_irq_line(fd: F, irq: u32, active: bool) -> Result<(), kvm_ioctls::Error> { + let mut irq_level = kvm_bindings::kvm_irq_level::default(); + irq_level.__bindgen_anon_1.irq = irq; + irq_level.level = u32::from(active); + + // SAFETY: Safe because we know that our file is a VM fd, we know the kernel will only read + // the correct amount of memory from our pointer, and we verify the return result. + let ret = unsafe { ioctl_with_ref(&fd, IrqLineTrigger::KVM_IRQ_LINE(), &irq_level) }; + if ret == 0 { + Ok(()) + } else { + Err(errno::Error::last()) + } + } + + ioctl_iow_nr!( + KVM_IRQ_LINE, + kvm_bindings::KVMIO, + 0x61, + kvm_bindings::kvm_irq_level + ); +} + +#[cfg(target_arch = "riscv64")] +impl Trigger for IrqLineTrigger { + type E = ::std::io::Error; + + fn trigger(&self) -> ::std::io::Result<()> { + // Safe to unwrap since `gsi` and `vmfd` have been set + let gsi = self.gsi.unwrap(); + + IrqLineTrigger::set_irq_line(self.raw_vmfd.unwrap().as_raw_fd(), gsi, true).map_err( + |err| { + error!("set_irq_line() failed: {err:?}"); + std::io::Error::last_os_error() + }, + )?; + IrqLineTrigger::set_irq_line(self.raw_vmfd.unwrap().as_raw_fd(), gsi, false).map_err( + |err| { + error!("set_irq_line() failed: {err:?}"); + std::io::Error::last_os_error() + }, + )?; + + Ok(()) + } +} + /// Called by METRICS.flush(), this function facilitates serialization of aggregated metrics. pub fn flush_metrics(serializer: S) -> Result { let mut seq = serializer.serialize_map(Some(1))?; diff --git a/src/vmm/src/devices/legacy/serial.rs b/src/vmm/src/devices/legacy/serial.rs index 278c15a4464..5c993a4be68 100644 --- a/src/vmm/src/devices/legacy/serial.rs +++ b/src/vmm/src/devices/legacy/serial.rs @@ -19,6 +19,8 @@ use vm_superio::{Serial, Trigger}; use vmm_sys_util::epoll::EventSet; use crate::devices::legacy::EventFdTrigger; +#[cfg(target_arch = "riscv64")] +use crate::devices::legacy::IrqLineTrigger; use crate::logger::{IncMetric, SharedIncMetric}; /// Received Data Available interrupt - for letting the driver know that @@ -71,6 +73,7 @@ pub trait RawIOHandler { fn raw_input(&mut self, _data: &[u8]) -> Result<(), RawIOError>; } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] impl RawIOHandler for Serial { // This is not used for anything and is basically just a dummy implementation for `raw_input`. fn raw_input(&mut self, data: &[u8]) -> Result<(), RawIOError> { @@ -88,6 +91,24 @@ impl RawIOHandler for Serial RawIOHandler for Serial { + // This is not used for anything and is basically just a dummy implementation for `raw_input`. + fn raw_input(&mut self, data: &[u8]) -> Result<(), RawIOError> { + // Fail fast if the serial is serviced with more data than it can buffer. + if data.len() > self.fifo_capacity() { + return Err(RawIOError::Serial(SerialError::FullFifo)); + } + + // Before enqueuing bytes we first check if there is enough free space + // in the FIFO. + if self.fifo_capacity() >= data.len() { + self.enqueue_raw_bytes(data).map_err(RawIOError::Serial)?; + } + Ok(()) + } +} + /// Wrapper over available events (i.e metrics, buffer ready etc). #[derive(Debug)] pub struct SerialEventsWrapper { @@ -152,6 +173,7 @@ pub struct SerialWrapper pub input: Option, } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] impl SerialWrapper { fn handle_ewouldblock(&self, ops: &mut EventOps) { let buffer_ready_fd = self.buffer_ready_evt_fd(); @@ -219,9 +241,83 @@ impl SerialWrapper SerialWrapper { + fn handle_ewouldblock(&self, ops: &mut EventOps) { + let buffer_ready_fd = self.buffer_ready_evt_fd(); + let input_fd = self.serial_input_fd(); + if input_fd < 0 || buffer_ready_fd < 0 { + error!("Serial does not have a configured input source."); + return; + } + match ops.add(Events::new(&input_fd, EventSet::IN)) { + Err(event_manager::Error::FdAlreadyRegistered) => (), + Err(err) => { + error!( + "Could not register the serial input to the event manager: {:?}", + err + ); + } + Ok(()) => { + // Bytes might had come on the unregistered stdin. Try to consume any. + self.serial.events().in_buffer_empty() + } + }; + } + + fn recv_bytes(&mut self) -> io::Result { + let avail_cap = self.serial.fifo_capacity(); + if avail_cap == 0 { + return Err(io::Error::from_raw_os_error(libc::ENOBUFS)); + } + + if let Some(input) = self.input.as_mut() { + let mut out = vec![0u8; avail_cap]; + let count = input.read(&mut out)?; + if count > 0 { + self.serial + .raw_input(&out[..count]) + .map_err(|_| io::Error::from_raw_os_error(libc::ENOBUFS))?; + } + + return Ok(count); + } + + Err(io::Error::from_raw_os_error(libc::ENOTTY)) + } + + #[inline] + fn buffer_ready_evt_fd(&self) -> RawFd { + self.serial + .events() + .buffer_ready_event_fd + .as_ref() + .map_or(-1, |buf_ready| buf_ready.as_raw_fd()) + } + + #[inline] + fn serial_input_fd(&self) -> RawFd { + self.input.as_ref().map_or(-1, |input| input.as_raw_fd()) + } + + fn consume_buffer_ready_event(&self) -> io::Result { + self.serial + .events() + .buffer_ready_event_fd + .as_ref() + .map_or(Ok(0), |buf_ready| buf_ready.read()) + } +} + +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] /// Type for representing a serial device. pub type SerialDevice = SerialWrapper; +#[cfg(target_arch = "riscv64")] +/// Type for representing a serial device. +pub type SerialDevice = SerialWrapper; + +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] impl MutEventSubscriber for SerialWrapper { @@ -319,6 +415,104 @@ impl MutEventSubscriber } } +#[cfg(target_arch = "riscv64")] +impl MutEventSubscriber + for SerialWrapper +{ + /// Handle events on the serial input fd. + fn process(&mut self, event: Events, ops: &mut EventOps) { + #[inline] + fn unregister_source(ops: &mut EventOps, source: &T) { + match ops.remove(Events::new(source, EventSet::IN)) { + Ok(_) => (), + Err(_) => error!("Could not unregister source fd: {}", source.as_raw_fd()), + } + } + + let input_fd = self.serial_input_fd(); + let buffer_ready_fd = self.buffer_ready_evt_fd(); + if input_fd < 0 || buffer_ready_fd < 0 { + error!("Serial does not have a configured input source."); + return; + } + + if buffer_ready_fd == event.fd() { + match self.consume_buffer_ready_event() { + Ok(_) => (), + Err(err) => { + error!( + "Detach serial device input source due to error in consuming the buffer \ + ready event: {:?}", + err + ); + unregister_source(ops, &input_fd); + unregister_source(ops, &buffer_ready_fd); + return; + } + } + } + + // We expect to receive: `EventSet::IN`, `EventSet::HANG_UP` or + // `EventSet::ERROR`. To process all these events we just have to + // read from the serial input. + match self.recv_bytes() { + Ok(count) => { + // Handle EOF if the event came from the input source. + if input_fd == event.fd() && count == 0 { + unregister_source(ops, &input_fd); + unregister_source(ops, &buffer_ready_fd); + warn!("Detached the serial input due to peer close/error."); + } + } + Err(err) => { + match err.raw_os_error() { + Some(errno) if errno == libc::ENOBUFS => { + unregister_source(ops, &input_fd); + } + Some(errno) if errno == libc::EWOULDBLOCK => { + self.handle_ewouldblock(ops); + } + Some(errno) if errno == libc::ENOTTY => { + error!("The serial device does not have the input source attached."); + unregister_source(ops, &input_fd); + unregister_source(ops, &buffer_ready_fd); + } + Some(_) | None => { + // Unknown error, detach the serial input source. + unregister_source(ops, &input_fd); + unregister_source(ops, &buffer_ready_fd); + warn!("Detached the serial input due to peer close/error."); + } + } + } + } + } + + /// Initial registration of pollable objects. + /// If serial input is present, register the serial input FD as readable. + fn init(&mut self, ops: &mut EventOps) { + if self.input.is_some() && self.serial.events().buffer_ready_event_fd.is_some() { + let serial_fd = self.serial_input_fd(); + let buf_ready_evt = self.buffer_ready_evt_fd(); + + // If the jailer is instructed to daemonize before exec-ing into firecracker, we set + // stdin, stdout and stderr to be open('/dev/null'). However, if stdin is redirected + // from /dev/null then trying to register FILENO_STDIN to epoll will fail with EPERM. + // Therefore, only try to register stdin to epoll if it is a terminal or a FIFO pipe. + // SAFETY: isatty has no invariants that need to be upheld. If serial_fd is an invalid + // argument, it will return 0 and set errno to EBADF. + if unsafe { libc::isatty(serial_fd) } == 1 || is_fifo(serial_fd) { + if let Err(err) = ops.add(Events::new(&serial_fd, EventSet::IN)) { + warn!("Failed to register serial input fd: {}", err); + } + } + if let Err(err) = ops.add(Events::new(&buf_ready_evt, EventSet::IN)) { + warn!("Failed to register serial buffer ready event: {}", err); + } + } + } +} + /// Checks whether the given file descriptor is a FIFO pipe. fn is_fifo(fd: RawFd) -> bool { let mut stat = std::mem::MaybeUninit::::uninit(); @@ -337,6 +531,7 @@ fn is_fifo(fd: RawFd) -> bool { (stat.st_mode & libc::S_IFIFO) != 0 } +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] impl SerialWrapper { @@ -361,6 +556,31 @@ impl } } +#[cfg(target_arch = "riscv64")] +impl + SerialWrapper +{ + pub fn bus_read(&mut self, offset: u64, data: &mut [u8]) { + if let (Ok(offset), 1) = (u8::try_from(offset), data.len()) { + data[0] = self.serial.read(offset); + } else { + METRICS.missed_read_count.inc(); + } + } + + pub fn bus_write(&mut self, offset: u64, data: &[u8]) { + if let (Ok(offset), 1) = (u8::try_from(offset), data.len()) { + if let Err(err) = self.serial.write(offset, data[0]) { + // Counter incremented for any handle_write() error. + error!("Failed the write to serial: {:?}", err); + METRICS.error_count.inc(); + } + } else { + METRICS.missed_write_count.inc(); + } + } +} + #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index fe2ac30a429..34ad18198ab 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -207,7 +207,7 @@ pub const HTTP_MAX_PAYLOAD_SIZE: usize = 51200; pub enum VmmError { /// Failed to allocate guest resource: {0} AllocateResources(#[from] vm_allocator::Error), - #[cfg(target_arch = "aarch64")] + #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] /// Invalid command line error. Cmdline, /// Device manager error: {0} From 141424bdeb1b31723b2290f47a319723b6e59049 Mon Sep 17 00:00:00 2001 From: Dimitris Charisis Date: Tue, 27 May 2025 10:50:42 +0300 Subject: [PATCH 17/17] riscv64: add interrupt support for net devices Add interrupt support for VirtIO net devices on riscv64. This follows the logic used for VirtIO block devices, inserting interrupts via the `KVM_IRQ_LINE` ioctl. Signed-off-by: Dimitris Charisis --- src/vmm/src/builder.rs | 3 --- src/vmm/src/devices/virtio/net/device.rs | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 1521324edec..2251633be3b 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -54,7 +54,6 @@ use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::block::device::Block; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::mmio::MmioTransport; -#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::net::Net; #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use crate::devices::virtio::rng::Entropy; @@ -291,7 +290,6 @@ pub fn build_microvm_for_boot( event_manager, )?; - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] attach_net_devices( &mut vmm, &mut boot_cmdline, @@ -847,7 +845,6 @@ fn attach_block_devices<'a, I: Iterator>> + Debug>( Ok(()) } -#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] fn attach_net_devices<'a, I: Iterator>> + Debug>( vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, diff --git a/src/vmm/src/devices/virtio/net/device.rs b/src/vmm/src/devices/virtio/net/device.rs index eec439f1387..29dd654e278 100755 --- a/src/vmm/src/devices/virtio/net/device.rs +++ b/src/vmm/src/devices/virtio/net/device.rs @@ -968,7 +968,7 @@ impl VirtioDevice for Net { #[cfg(target_arch = "riscv64")] fn interrupt_trigger_mut(&mut self) -> &mut IrqTrigger { - unimplemented!() + &mut self.irq_trigger } fn read_config(&self, offset: u64, data: &mut [u8]) {