Skip to content

[CI][Benchmarks] Archive cutoff date #19514

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions devops/scripts/benchmarks/CONTRIB.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,9 @@ The suite is structured around three main components: Suites, Benchmarks, and Re
* **Fields (set by Benchmark):**
* `label`: Unique identifier for this *specific result type* within the benchmark instance (e.g., "Submit In Order Time"). Ideally contains `benchmark.name()`.
* `value`: The measured numerical result (float).
* `unit`: The unit of the value (string, e.g., "μs", "GB/s", "token/s").
* `command`: The command list used to run the benchmark (`list[str]`).
* `env`: Environment variables used (`dict[str, str]`).
* `stdout`: Full standard output of the benchmark run (string).
* `passed`: Boolean indicating if verification passed (default: `True`).
* `unit`: The unit of the value (string, e.g., "μs", "GB/s", "token/s").
* `stddev`: Standard deviation, if calculated by the benchmark itself (float, default: 0.0).
* `git_url`, `git_hash`: Git info for the benchmark's source code (string).
* **Fields (set by Framework):**
Expand Down
47 changes: 43 additions & 4 deletions devops/scripts/benchmarks/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@ def load_result(self, file_path: Path) -> BenchmarkRun:
else:
return None

def load(self, n: int):
def load(self):
"""
Load benchmark runs from the results directory.
This method loads files after the specified archiving criteria,
sorts them by timestamp, and stores the results in self.runs.
"""
results_dir = Path(self.dir) / "results"
if not results_dir.exists() or not results_dir.is_dir():
log.warning(
Expand All @@ -42,7 +47,7 @@ def load(self, n: int):
# Get all JSON files in the results directory
benchmark_files = list(results_dir.glob("*.json"))

# Extract timestamp and sort files by it
# Extract timestamp
def extract_timestamp(file_path: Path) -> str:
try:
# Assumes results are stored as <name>_YYYYMMDD_HHMMSS.json
Expand All @@ -51,11 +56,45 @@ def extract_timestamp(file_path: Path) -> str:
except IndexError:
return ""

baseline_drop_after = options.archive_baseline_days * 3
pr_drop_after = options.archive_pr_days * 3
baseline_cutoff_date = datetime.now(timezone.utc) - timedelta(
days=baseline_drop_after
)
log.debug(f"Baseline cutoff date: {baseline_cutoff_date}")
pr_cutoff_date = datetime.now(timezone.utc) - timedelta(days=pr_drop_after)
log.debug(f"PR cutoff date: {pr_cutoff_date}")

# Filter out files that exceed archiving criteria three times the specified days
def is_file_too_old(file_path: Path) -> bool:
try:
if file_path.stem.startswith("Baseline_"):
cutoff_date = baseline_cutoff_date
else:
cutoff_date = pr_cutoff_date

timestamp_str = extract_timestamp(file_path)
if not timestamp_str:
return False

file_timestamp = datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
# Add timezone info for proper comparison
file_timestamp = file_timestamp.replace(tzinfo=timezone.utc)
return file_timestamp < cutoff_date
except Exception as e:
log.warning(f"Error processing timestamp for {file_path.name}: {e}")
return False

benchmark_files = [
file for file in benchmark_files if not is_file_too_old(file)
]

# Sort files by timestamp
benchmark_files.sort(key=extract_timestamp, reverse=True)

# Load the first n benchmark files
# Load benchmark files
benchmark_runs = []
for file_path in benchmark_files[:n]:
for file_path in benchmark_files:
benchmark_run = self.load_result(file_path)
if benchmark_run:
benchmark_runs.append(benchmark_run)
Expand Down
2 changes: 1 addition & 1 deletion devops/scripts/benchmarks/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
# limit how many files we load.
# should this be configurable?
log.info(f"Loading benchmark history from {results_dir}...")
history.load(1000)
history.load()
log.info(f"Loaded {len(history.runs)} benchmark runs.")

if compare_names:
Expand Down
4 changes: 3 additions & 1 deletion devops/scripts/benchmarks/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ class Options:
git_commit_override: str = None
# Archiving settings
# Archived runs are stored separately from the main dataset but are still accessible
# via the HTML UI when "Include archived runs" is enabled
# via the HTML UI when "Include archived runs" is enabled.
# Archived runs older than 3 times the specified days are not included in the dashboard,
# ie. when archiving data older than 7 days, runs older than 21 days are not included.
archive_baseline_days: int = 30 # Archive Baseline_* runs after 30 days
archive_pr_days: int = 7 # Archive other (PR/dev) runs after 7 days

Expand Down