Skip to content

Commit 9335835

Browse files
authored
Warn on startup when data dir is too small (#5601)
* Warn when data dir is too small * Canonicalize mount points
1 parent de78b89 commit 9335835

File tree

5 files changed

+172
-5
lines changed

5 files changed

+172
-5
lines changed

quickwit/Cargo.lock

Lines changed: 82 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ sqlx = { version = "0.7", features = [
230230
] }
231231
syn = { version = "2.0.11", features = ["extra-traits", "full", "parsing"] }
232232
sync_wrapper = "0.1.2"
233+
sysinfo = "0.33.1"
233234
tabled = { version = "0.14", features = ["color"] }
234235
tempfile = "3"
235236
thiserror = "1"

quickwit/quickwit-common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ rayon = { workspace = true }
3434
regex = { workspace = true }
3535
serde = { workspace = true }
3636
siphasher = { workspace = true }
37+
sysinfo = { workspace = true }
3738
tempfile = { workspace = true }
3839
thiserror = { workspace = true }
3940
tokio = { workspace = true }

quickwit/quickwit-common/src/fs.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
use std::path::{Path, PathBuf};
1616

17+
use bytesize::ByteSize;
18+
use sysinfo::{Disk, DiskRefreshKind};
1719
use tokio;
1820

1921
/// Deletes the contents of a directory.
@@ -34,6 +36,38 @@ pub fn get_cache_directory_path(data_dir_path: &Path) -> PathBuf {
3436
data_dir_path.join("indexer-split-cache").join("splits")
3537
}
3638

39+
/// Get the total size of the disk containing the given directory, or `None` if
40+
/// it couldn't be determined.
41+
pub fn get_disk_size(dir_path: &Path) -> Option<ByteSize> {
42+
let disks = sysinfo::Disks::new_with_refreshed_list_specifics(
43+
DiskRefreshKind::nothing().with_storage(),
44+
);
45+
let mut best_match: Option<(&Disk, PathBuf)> = None;
46+
let dir_path = dir_path.canonicalize().ok()?;
47+
for disk in disks.list() {
48+
let canonical_mount_path = disk.mount_point().canonicalize().ok()?;
49+
if dir_path.starts_with(&canonical_mount_path) {
50+
match best_match {
51+
Some((_, best_mount_point))
52+
if canonical_mount_path.starts_with(&best_mount_point) =>
53+
{
54+
best_match = Some((disk, canonical_mount_path.clone()));
55+
}
56+
None => {
57+
best_match = Some((disk, canonical_mount_path.clone()));
58+
}
59+
_ => {}
60+
}
61+
}
62+
if canonical_mount_path.starts_with(&dir_path) && canonical_mount_path != dir_path {
63+
// if a disk is mounted within the directory, we can't determine the
64+
// size of the directories disk
65+
return None;
66+
}
67+
}
68+
best_match.map(|(disk, _)| ByteSize::b(disk.total_space()))
69+
}
70+
3771
#[cfg(test)]
3872
mod tests {
3973
use tempfile;

quickwit/quickwit-config/src/node_config/serialize.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ use std::str::FromStr;
1818
use std::time::Duration;
1919

2020
use anyhow::{bail, Context};
21+
use bytesize::ByteSize;
2122
use http::HeaderMap;
23+
use quickwit_common::fs::get_disk_size;
2224
use quickwit_common::net::{find_private_ip, get_short_hostname, Host};
2325
use quickwit_common::new_coolid;
2426
use quickwit_common::uri::Uri;
@@ -338,9 +340,61 @@ fn validate(node_config: &NodeConfig) -> anyhow::Result<()> {
338340
if node_config.peer_seeds.is_empty() {
339341
warn!("peer seeds are empty");
340342
}
343+
validate_disk_usage(node_config);
341344
Ok(())
342345
}
343346

347+
/// A list of all the known disk budgets
348+
///
349+
/// External disk usage and unbounded disk usages, e.g the indexing workbench
350+
/// (indexing/) and the delete task workbench (delete_task_service/) are not included.
351+
#[derive(Default, Debug)]
352+
struct ExpectedDiskUsage {
353+
// indexer / ingester
354+
split_store_max_num_bytes: Option<ByteSize>,
355+
max_queue_disk_usage: Option<ByteSize>,
356+
// searcher
357+
split_cache: Option<ByteSize>,
358+
}
359+
360+
impl ExpectedDiskUsage {
361+
fn from_config(node_config: &NodeConfig) -> Self {
362+
let mut expected = Self::default();
363+
if node_config.is_service_enabled(QuickwitService::Indexer) {
364+
expected.max_queue_disk_usage =
365+
Some(node_config.ingest_api_config.max_queue_disk_usage);
366+
expected.split_store_max_num_bytes =
367+
Some(node_config.indexer_config.split_store_max_num_bytes);
368+
}
369+
if node_config.is_service_enabled(QuickwitService::Searcher) {
370+
expected.split_cache = node_config
371+
.searcher_config
372+
.split_cache
373+
.map(|limits| limits.max_num_bytes);
374+
}
375+
expected
376+
}
377+
378+
fn total(&self) -> ByteSize {
379+
self.split_store_max_num_bytes.unwrap_or_default()
380+
+ self.max_queue_disk_usage.unwrap_or_default()
381+
+ self.split_cache.unwrap_or_default()
382+
}
383+
}
384+
385+
fn validate_disk_usage(node_config: &NodeConfig) {
386+
if let Some(volume_size) = get_disk_size(&node_config.data_dir_path) {
387+
let expected_disk_usage = ExpectedDiskUsage::from_config(node_config);
388+
if expected_disk_usage.total() > volume_size {
389+
warn!(
390+
?volume_size,
391+
?expected_disk_usage,
392+
"data dir volume too small"
393+
);
394+
}
395+
}
396+
}
397+
344398
#[cfg(test)]
345399
impl Default for NodeConfigBuilder {
346400
fn default() -> Self {

0 commit comments

Comments
 (0)