Skip to content

Commit 15a2e6e

Browse files
committed
corndog: add settings generator for hugepages under kernel.sysctl
Signed-off-by: Yutong Sun <yutongsu@amazon.com>
1 parent fe722a5 commit 15a2e6e

File tree

4 files changed

+100
-2
lines changed

4 files changed

+100
-2
lines changed

sources/Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sources/api/corndog/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ exclude = ["README.md"]
1111

1212
[dependencies]
1313
log.workspace = true
14+
num_cpus.workspace = true
15+
pciclient.workspace = true
1416
serde = { workspace = true, features = ["derive"] }
1517
serde_json.workspace = true
1618
simplelog.workspace = true
@@ -20,3 +22,6 @@ bottlerocket-modeled-types.workspace = true
2022

2123
[build-dependencies]
2224
generate-readme.workspace = true
25+
26+
[dev-dependencies]
27+
test-case.workspace = true

sources/api/corndog/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ It sets kernel-related settings, for example:
77
* sysctl values, based on key/value pairs in `settings.kernel.sysctl`
88
* lockdown mode, based on the value of `settings.kernel.lockdown`
99

10+
corndog also provides a settings generator for hugepages, subcommand "generate-hugepages-setting".
11+
1012
## Colophon
1113

1214
This text was generated from `README.tpl` using [cargo-readme](https://crates.io/crates/cargo-readme), and includes the rustdoc from `src/main.rs`.

sources/api/corndog/src/main.rs

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ corndog is a delicious way to get at the meat inside the kernels.
33
It sets kernel-related settings, for example:
44
* sysctl values, based on key/value pairs in `settings.kernel.sysctl`
55
* lockdown mode, based on the value of `settings.kernel.lockdown`
6+
7+
corndog also provides a settings generator for hugepages, subcommand "generate-hugepages-setting".
68
*/
79

810
use bottlerocket_modeled_types::{Lockdown, SysctlKey};
@@ -20,6 +22,10 @@ use std::{env, process};
2022
const SYSCTL_PATH_PREFIX: &str = "/proc/sys";
2123
const LOCKDOWN_PATH: &str = "/sys/kernel/security/lockdown";
2224
const DEFAULT_CONFIG_PATH: &str = "/etc/corndog.toml";
25+
const NR_HUGEPAGES_PATH_SYSCTL: &str = "/proc/sys/vm/nr_hugepages";
26+
/// Number of hugepages we will assign per core.
27+
/// See [`compute_hugepages_for_efa`] for more detail on the computation consideration.
28+
const HUGEPAGES_2MB_PER_CORE: u64 = 110;
2329

2430
/// Store the args we receive on the command line.
2531
struct Args {
@@ -45,20 +51,30 @@ fn run() -> Result<()> {
4551
SimpleLogger::init(args.log_level, LogConfig::default()).context(error::LoggerSnafu)?;
4652

4753
// If the user has kernel settings, apply them.
48-
let kernel = get_kernel_settings(args.config_path)?;
4954
match args.subcommand.as_ref() {
5055
"sysctl" => {
56+
let kernel = get_kernel_settings(args.config_path)?;
5157
if let Some(sysctls) = kernel.sysctl {
5258
debug!("Applying sysctls: {:#?}", sysctls);
5359
set_sysctls(sysctls);
5460
}
5561
}
5662
"lockdown" => {
63+
let kernel = get_kernel_settings(args.config_path)?;
5764
if let Some(lockdown) = kernel.lockdown {
5865
debug!("Setting lockdown: {:#?}", lockdown);
5966
set_lockdown(&lockdown)?;
6067
}
6168
}
69+
"generate-hugepages-setting" => {
70+
let hugepages_setting = generate_hugepages_setting()?;
71+
// We will only fail if we cannot serialize the output to JSON string.
72+
// sundog expects JSON-serialized output so that many types can be represented, allowing the
73+
// API model to use more accurate types.
74+
let output =
75+
serde_json::to_string(&hugepages_setting).context(error::SerializeJsonSnafu)?;
76+
println!("{}", output);
77+
}
6278
_ => usage_msg(format!("Unknown subcommand '{}'", args.subcommand)), // should be unreachable
6379
}
6480

@@ -107,6 +123,55 @@ where
107123
}
108124
}
109125

126+
/// Generate the hugepages setting for defaults.
127+
fn generate_hugepages_setting() -> Result<String> {
128+
// Check if customer has directly written to the nr_hugepage file.
129+
let mut hugepages = fs::read_to_string(NR_HUGEPAGES_PATH_SYSCTL)
130+
.map(check_for_existing_hugepages)
131+
.unwrap_or("0".to_string());
132+
133+
// Check for EFA and compute if necessary, only when hugepages is "0".
134+
if &hugepages == "0" && pciclient::is_efa_attached().unwrap_or(false) {
135+
// We will use [`num_cpus`] to get the number of cores for the compute.
136+
hugepages = compute_hugepages_for_efa(num_cpus::get());
137+
}
138+
Ok(hugepages)
139+
}
140+
141+
// Check if customer has directly written to the nr_hugepage file.
142+
//
143+
// This would be a rare case to hit, as customer would normally modify the hugepages value
144+
// via settings API. (It could happen with a custom variant if hugepages
145+
// are set via a sysctl.d drop-in, for example.)
146+
//
147+
// We expect the existing_hugepages_value to be valid numeric digits. Otherwise, we will
148+
// use "0" as default.
149+
fn check_for_existing_hugepages(existing_hugepages_value: String) -> String {
150+
match existing_hugepages_value.trim().parse::<u64>() {
151+
Ok(value) => {
152+
return value.to_string();
153+
}
154+
Err(err) => {
155+
warn!(
156+
"Failed to parse the existing hugepage value, using 0 as default. Error: {}",
157+
err
158+
);
159+
}
160+
}
161+
"0".to_string()
162+
}
163+
164+
/// Computation:
165+
/// - We need to allocate 110MB memory for each libfabric endpoint.
166+
/// - For optimal setup, Open MPI will open 2 libfabric endpoints each core.
167+
/// - The total number of hugepages will be set as (110MB * 2) * number_of_cores / hugepage_size
168+
/// - We will allocate default hugepage_size = 2MB.
169+
/// - The number of hugepage per core would be 110MB * 2 / 2MB = 110.
170+
fn compute_hugepages_for_efa(num_cores: usize) -> String {
171+
let number_of_hugepages = num_cores as u64 * HUGEPAGES_2MB_PER_CORE;
172+
number_of_hugepages.to_string()
173+
}
174+
110175
/// Sets the requested lockdown mode in the kernel.
111176
///
112177
/// The Linux kernel won't allow lowering the lockdown setting, but we want to allow users to
@@ -165,6 +230,7 @@ fn usage() -> ! {
165230
Subcommands:
166231
sysctl
167232
lockdown
233+
generate-hugepages-setting
168234
169235
Global arguments:
170236
--config-path PATH
@@ -207,7 +273,7 @@ fn parse_args(args: env::Args) -> Args {
207273
)
208274
}
209275

210-
"sysctl" | "lockdown" => subcommand = Some(arg),
276+
"sysctl" | "lockdown" | "generate-hugepages-setting" => subcommand = Some(arg),
211277

212278
_ => usage(),
213279
}
@@ -251,6 +317,9 @@ mod error {
251317
source: Box<toml::de::Error>,
252318
},
253319

320+
#[snafu(display("Error serializing to JSON: {}", source))]
321+
SerializeJson { source: serde_json::error::Error },
322+
254323
#[snafu(display(
255324
"Failed to change lockdown from '{}' to '{}': {}",
256325
current,
@@ -271,6 +340,8 @@ type Result<T> = std::result::Result<T, error::Error>;
271340

272341
#[cfg(test)]
273342
mod test {
343+
use test_case::test_case;
344+
274345
use super::*;
275346

276347
#[test]
@@ -305,4 +376,21 @@ mod test {
305376
parse_kernel_setting("none integrity confidentiality\n")
306377
);
307378
}
379+
380+
#[test]
381+
fn test_compute_hugepages_for_efa() {
382+
let num_cores: usize = 2;
383+
let computed_hugepages = compute_hugepages_for_efa(num_cores);
384+
assert_eq!(computed_hugepages, "220")
385+
}
386+
387+
#[test_case("".to_string(), "0".to_string())]
388+
#[test_case("0".to_string(), "0".to_string())]
389+
#[test_case("-1".to_string(), "0".to_string())]
390+
#[test_case("abc".to_string(), "0".to_string())]
391+
#[test_case("100".to_string(), "100".to_string())]
392+
fn test_check_for_existing_hugepages(existing_value: String, expected_hugepages: String) {
393+
let actual_hugepages = check_for_existing_hugepages(existing_value);
394+
assert_eq!(actual_hugepages, expected_hugepages);
395+
}
308396
}

0 commit comments

Comments
 (0)