Skip to content

Commit 2ee905e

Browse files
committed
schnauzer: extend oci_defaults helper to apply higher MEMLOCK limits when EFA is attached
Signed-off-by: Yutong Sun <yutongsu@amazon.com>
1 parent 15a2e6e commit 2ee905e

File tree

3 files changed

+76
-1
lines changed

3 files changed

+76
-1
lines changed

sources/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sources/api/schnauzer/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ log.workspace = true
2929
maplit.workspace = true
3030
models.workspace = true
3131
num_cpus.workspace = true
32+
pciclient.workspace = true
3233
percent-encoding.workspace = true
3334
pest.workspace = true
3435
pest_derive.workspace = true

sources/api/schnauzer/src/helpers/mod.rs

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ const IPV6_LOCALHOST: IpAddr = IpAddr::V6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1))
125125

126126
const DEFAULT_ECS_METADATA_SERVICE_RPS: i32 = 40;
127127
const DEFAULT_ECS_METADATA_SERVICE_BURST: i32 = 60;
128+
/// We use -1 to indicate unlimited value for resource limits.
129+
const RLIMIT_UNLIMITED: i64 = -1;
128130

129131
/// Potential errors during helper execution
130132
mod error {
@@ -258,6 +260,9 @@ mod error {
258260
source: std::net::AddrParseError,
259261
},
260262

263+
#[snafu(display("Failed to check if EFA device is attached: {}", source))]
264+
CheckEfaFailure { source: pciclient::PciClientError },
265+
261266
#[snafu(display(
262267
"Expected an absolute URL, got '{}' in template '{}': '{}'",
263268
url_str,
@@ -1250,7 +1255,7 @@ pub fn oci_defaults(
12501255
runtime.get_capabilities(capabilities)
12511256
}
12521257
OciSpecSection::ResourceLimits => {
1253-
let rlimits = oci_spec_resource_limits(oci_defaults_values)?;
1258+
let rlimits = generate_oci_resource_limits(oci_defaults_values, EfaLspciDetector {})?;
12541259
rlimits
12551260
.iter()
12561261
.map(|(rlimit_type, values)| runtime.get_resource_limits(rlimit_type, values))
@@ -1308,12 +1313,42 @@ fn oci_spec_capabilities(value: &Value) -> Result<String, RenderError> {
13081313
/// This helper function generates the resource limits section of
13091314
/// the OCI runtime spec from the provided `value` parameter, which is
13101315
/// the settings data from the datastore (`settings.oci-defaults.resource-limits`).
1316+
fn generate_oci_resource_limits<T: EfaDetector>(
1317+
value: &Value,
1318+
efa_detector: T,
1319+
) -> Result<HashMap<OciDefaultsResourceLimitType, OciDefaultsResourceLimitV1>, RenderError> {
1320+
let mut rlimits = oci_spec_resource_limits(value)?;
1321+
if efa_detector.is_efa_attached()? {
1322+
// We need to increase the locked memory limits from the default 8096KB to unlimited
1323+
// to account for hugepages allocation.
1324+
rlimits
1325+
.entry(OciDefaultsResourceLimitType::MaxLockedMemory)
1326+
.or_insert(OciDefaultsResourceLimitV1 {
1327+
soft_limit: RLIMIT_UNLIMITED,
1328+
hard_limit: RLIMIT_UNLIMITED,
1329+
});
1330+
}
1331+
Ok(rlimits)
1332+
}
1333+
13111334
fn oci_spec_resource_limits(
13121335
value: &Value,
13131336
) -> Result<HashMap<OciDefaultsResourceLimitType, OciDefaultsResourceLimitV1>, RenderError> {
13141337
Ok(serde_json::from_value(value.clone())?)
13151338
}
13161339

1340+
trait EfaDetector {
1341+
fn is_efa_attached(&self) -> Result<bool, TemplateHelperError>;
1342+
}
1343+
1344+
struct EfaLspciDetector;
1345+
1346+
impl EfaDetector for EfaLspciDetector {
1347+
fn is_efa_attached(&self) -> Result<bool, TemplateHelperError> {
1348+
pciclient::is_efa_attached().context(error::CheckEfaFailureSnafu)
1349+
}
1350+
}
1351+
13171352
// =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^=
13181353
// helpers to the helpers
13191354

@@ -1992,6 +2027,22 @@ mod test_oci_spec {
19922027
use serde_json::json;
19932028
use OciDefaultsResourceLimitType::*;
19942029

2030+
// Custom struct that will always show that EFA is detected.
2031+
struct EfaPresentDetector;
2032+
impl EfaDetector for EfaPresentDetector {
2033+
fn is_efa_attached(&self) -> Result<bool, TemplateHelperError> {
2034+
Ok(true)
2035+
}
2036+
}
2037+
2038+
// Custom struct that will always show that EFA is not detected.
2039+
struct EfaNotPresentDetector;
2040+
impl EfaDetector for EfaNotPresentDetector {
2041+
fn is_efa_attached(&self) -> Result<bool, TemplateHelperError> {
2042+
Ok(false)
2043+
}
2044+
}
2045+
19952046
#[test]
19962047
fn oci_spec_capabilities_test() {
19972048
let json = json!({
@@ -2058,6 +2109,28 @@ mod test_oci_spec {
20582109
}
20592110
}
20602111

2112+
#[test]
2113+
fn generate_oci_resource_limits_efa_detected() {
2114+
let json = json!({"max-open-files": {"hard-limit": 1, "soft-limit": 2}});
2115+
let rlimits = generate_oci_resource_limits(&json, EfaPresentDetector {}).unwrap();
2116+
let rendered = Containerd::get_resource_limits(
2117+
&MaxLockedMemory,
2118+
rlimits.get(&MaxLockedMemory).unwrap(),
2119+
);
2120+
assert_eq!(
2121+
rendered,
2122+
r#"{ "type": "RLIMIT_MEMLOCK", "hard": 18446744073709551615, "soft": 18446744073709551615 }"#
2123+
);
2124+
}
2125+
2126+
#[test]
2127+
fn generate_oci_resource_limits_efa_not_detected() {
2128+
let json = json!({"max-open-files": {"hard-limit": 1, "soft-limit": 2}});
2129+
let rlimits = generate_oci_resource_limits(&json, EfaNotPresentDetector {}).unwrap();
2130+
// If EFA is not detected, we will not set the max-locked-memory rlimit
2131+
assert_eq!(rlimits.get(&MaxLockedMemory), None)
2132+
}
2133+
20612134
#[test]
20622135
fn oci_spec_max_locked_memory_as_unlimited_resource_limit_test() {
20632136
let json = json!({"max-locked-memory": {"hard-limit": "unlimited", "soft-limit": 18}});

0 commit comments

Comments
 (0)