@@ -3,6 +3,8 @@ corndog is a delicious way to get at the meat inside the kernels.
3
3
It sets kernel-related settings, for example:
4
4
* sysctl values, based on key/value pairs in `settings.kernel.sysctl`
5
5
* lockdown mode, based on the value of `settings.kernel.lockdown`
6
+
7
+ corndog also provides a settings generator for hugepages, subcommand "generate-hugepages-setting".
6
8
*/
7
9
8
10
use bottlerocket_modeled_types:: { Lockdown , SysctlKey } ;
@@ -20,6 +22,10 @@ use std::{env, process};
20
22
const SYSCTL_PATH_PREFIX : & str = "/proc/sys" ;
21
23
const LOCKDOWN_PATH : & str = "/sys/kernel/security/lockdown" ;
22
24
const DEFAULT_CONFIG_PATH : & str = "/etc/corndog.toml" ;
25
+ const NR_HUGEPAGES_PATH_SYSCTL : & str = "/proc/sys/vm/nr_hugepages" ;
26
+ /// Number of hugepages we will assign per core.
27
+ /// See [`compute_hugepages_for_efa`] for more detail on the computation consideration.
28
+ const HUGEPAGES_2MB_PER_CORE : u64 = 110 ;
23
29
24
30
/// Store the args we receive on the command line.
25
31
struct Args {
@@ -45,20 +51,30 @@ fn run() -> Result<()> {
45
51
SimpleLogger :: init ( args. log_level , LogConfig :: default ( ) ) . context ( error:: LoggerSnafu ) ?;
46
52
47
53
// If the user has kernel settings, apply them.
48
- let kernel = get_kernel_settings ( args. config_path ) ?;
49
54
match args. subcommand . as_ref ( ) {
50
55
"sysctl" => {
56
+ let kernel = get_kernel_settings ( args. config_path ) ?;
51
57
if let Some ( sysctls) = kernel. sysctl {
52
58
debug ! ( "Applying sysctls: {:#?}" , sysctls) ;
53
59
set_sysctls ( sysctls) ;
54
60
}
55
61
}
56
62
"lockdown" => {
63
+ let kernel = get_kernel_settings ( args. config_path ) ?;
57
64
if let Some ( lockdown) = kernel. lockdown {
58
65
debug ! ( "Setting lockdown: {:#?}" , lockdown) ;
59
66
set_lockdown ( & lockdown) ?;
60
67
}
61
68
}
69
+ "generate-hugepages-setting" => {
70
+ let hugepages_setting = generate_hugepages_setting ( ) ?;
71
+ // We will only fail if we cannot serialize the output to JSON string.
72
+ // sundog expects JSON-serialized output so that many types can be represented, allowing the
73
+ // API model to use more accurate types.
74
+ let output =
75
+ serde_json:: to_string ( & hugepages_setting) . context ( error:: SerializeJsonSnafu ) ?;
76
+ println ! ( "{}" , output) ;
77
+ }
62
78
_ => usage_msg ( format ! ( "Unknown subcommand '{}'" , args. subcommand) ) , // should be unreachable
63
79
}
64
80
@@ -107,6 +123,55 @@ where
107
123
}
108
124
}
109
125
126
+ /// Generate the hugepages setting for defaults.
127
+ fn generate_hugepages_setting ( ) -> Result < String > {
128
+ // Check if customer has directly written to the nr_hugepage file.
129
+ let mut hugepages = fs:: read_to_string ( NR_HUGEPAGES_PATH_SYSCTL )
130
+ . map ( check_for_existing_hugepages)
131
+ . unwrap_or ( "0" . to_string ( ) ) ;
132
+
133
+ // Check for EFA and compute if necessary, only when hugepages is "0".
134
+ if & hugepages == "0" && pciclient:: is_efa_attached ( ) . unwrap_or ( false ) {
135
+ // We will use [`num_cpus`] to get the number of cores for the compute.
136
+ hugepages = compute_hugepages_for_efa ( num_cpus:: get ( ) ) ;
137
+ }
138
+ Ok ( hugepages)
139
+ }
140
+
141
+ // Check if customer has directly written to the nr_hugepage file.
142
+ //
143
+ // This would be a rare case to hit, as customer would normally modify the hugepages value
144
+ // via settings API. (It could happen with a custom variant if hugepages
145
+ // are set via a sysctl.d drop-in, for example.)
146
+ //
147
+ // We expect the existing_hugepages_value to be valid numeric digits. Otherwise, we will
148
+ // use "0" as default.
149
+ fn check_for_existing_hugepages ( existing_hugepages_value : String ) -> String {
150
+ match existing_hugepages_value. trim ( ) . parse :: < u64 > ( ) {
151
+ Ok ( value) => {
152
+ return value. to_string ( ) ;
153
+ }
154
+ Err ( err) => {
155
+ warn ! (
156
+ "Failed to parse the existing hugepage value, using 0 as default. Error: {}" ,
157
+ err
158
+ ) ;
159
+ }
160
+ }
161
+ "0" . to_string ( )
162
+ }
163
+
164
+ /// Computation:
165
+ /// - We need to allocate 110MB memory for each libfabric endpoint.
166
+ /// - For optimal setup, Open MPI will open 2 libfabric endpoints each core.
167
+ /// - The total number of hugepages will be set as (110MB * 2) * number_of_cores / hugepage_size
168
+ /// - We will allocate default hugepage_size = 2MB.
169
+ /// - The number of hugepage per core would be 110MB * 2 / 2MB = 110.
170
+ fn compute_hugepages_for_efa ( num_cores : usize ) -> String {
171
+ let number_of_hugepages = num_cores as u64 * HUGEPAGES_2MB_PER_CORE ;
172
+ number_of_hugepages. to_string ( )
173
+ }
174
+
110
175
/// Sets the requested lockdown mode in the kernel.
111
176
///
112
177
/// The Linux kernel won't allow lowering the lockdown setting, but we want to allow users to
@@ -165,6 +230,7 @@ fn usage() -> ! {
165
230
Subcommands:
166
231
sysctl
167
232
lockdown
233
+ generate-hugepages-setting
168
234
169
235
Global arguments:
170
236
--config-path PATH
@@ -207,7 +273,7 @@ fn parse_args(args: env::Args) -> Args {
207
273
)
208
274
}
209
275
210
- "sysctl" | "lockdown" => subcommand = Some ( arg) ,
276
+ "sysctl" | "lockdown" | "generate-hugepages-setting" => subcommand = Some ( arg) ,
211
277
212
278
_ => usage ( ) ,
213
279
}
@@ -251,6 +317,9 @@ mod error {
251
317
source : Box < toml:: de:: Error > ,
252
318
} ,
253
319
320
+ #[ snafu( display( "Error serializing to JSON: {}" , source) ) ]
321
+ SerializeJson { source : serde_json:: error:: Error } ,
322
+
254
323
#[ snafu( display(
255
324
"Failed to change lockdown from '{}' to '{}': {}" ,
256
325
current,
@@ -271,6 +340,8 @@ type Result<T> = std::result::Result<T, error::Error>;
271
340
272
341
#[ cfg( test) ]
273
342
mod test {
343
+ use test_case:: test_case;
344
+
274
345
use super :: * ;
275
346
276
347
#[ test]
@@ -305,4 +376,21 @@ mod test {
305
376
parse_kernel_setting( "none integrity confidentiality\n " )
306
377
) ;
307
378
}
379
+
380
+ #[ test]
381
+ fn test_compute_hugepages_for_efa ( ) {
382
+ let num_cores: usize = 2 ;
383
+ let computed_hugepages = compute_hugepages_for_efa ( num_cores) ;
384
+ assert_eq ! ( computed_hugepages, "220" )
385
+ }
386
+
387
+ #[ test_case( "" . to_string( ) , "0" . to_string( ) ) ]
388
+ #[ test_case( "0" . to_string( ) , "0" . to_string( ) ) ]
389
+ #[ test_case( "-1" . to_string( ) , "0" . to_string( ) ) ]
390
+ #[ test_case( "abc" . to_string( ) , "0" . to_string( ) ) ]
391
+ #[ test_case( "100" . to_string( ) , "100" . to_string( ) ) ]
392
+ fn test_check_for_existing_hugepages ( existing_value : String , expected_hugepages : String ) {
393
+ let actual_hugepages = check_for_existing_hugepages ( existing_value) ;
394
+ assert_eq ! ( actual_hugepages, expected_hugepages) ;
395
+ }
308
396
}
0 commit comments