intel · steffenlarsen · Jul 30, 2025 · Jul 18, 2025 · Jul 18, 2025
@@ -42,11 +42,9 @@ The suite is structured around three main components: Suites, Benchmarks, and Re
     * **Fields (set by Benchmark):**
         * `label`: Unique identifier for this *specific result type* within the benchmark instance (e.g., "Submit In Order Time"). Ideally contains `benchmark.name()`.
         * `value`: The measured numerical result (float).
-        * `unit`: The unit of the value (string, e.g., "μs", "GB/s", "token/s").
         * `command`: The command list used to run the benchmark (`list[str]`).
         * `env`: Environment variables used (`dict[str, str]`).
-        * `stdout`: Full standard output of the benchmark run (string).
-        * `passed`: Boolean indicating if verification passed (default: `True`).
+        * `unit`: The unit of the value (string, e.g., "μs", "GB/s", "token/s").
         * `stddev`: Standard deviation, if calculated by the benchmark itself (float, default: 0.0).
         * `git_url`, `git_hash`: Git info for the benchmark's source code (string).
     * **Fields (set by Framework):**

@@ -19,6 +19,7 @@
 
 class BenchmarkHistory:
     runs = []
+    TIMESTAMP_FORMAT = "%Y%m%d_%H%M%S"
 
     def __init__(self, dir):
         self.dir = dir
@@ -31,7 +32,12 @@ def load_result(self, file_path: Path) -> BenchmarkRun:
         else:
             return None
 
-    def load(self, n: int):
+    def load(self):
+        """
+        Load benchmark runs from the results directory.
+        This method loads files after the specified archiving criteria,
+        sorts them by timestamp, and stores the results in self.runs.
+        """
         results_dir = Path(self.dir) / "results"
         if not results_dir.exists() or not results_dir.is_dir():
             log.warning(
@@ -42,7 +48,7 @@ def load(self, n: int):
         # Get all JSON files in the results directory
         benchmark_files = list(results_dir.glob("*.json"))
 
-        # Extract timestamp and sort files by it
+        # Extract timestamp
         def extract_timestamp(file_path: Path) -> str:
             try:
                 # Assumes results are stored as <name>_YYYYMMDD_HHMMSS.json
@@ -51,11 +57,45 @@ def extract_timestamp(file_path: Path) -> str:
             except IndexError:
                 return ""
 
+        baseline_drop_after = options.archive_baseline_days * 3
+        pr_drop_after = options.archive_pr_days * 3
+        baseline_cutoff_date = datetime.now(timezone.utc) - timedelta(
+            days=baseline_drop_after
+        )
+        log.debug(f"Baseline cutoff date: {baseline_cutoff_date}")
+        pr_cutoff_date = datetime.now(timezone.utc) - timedelta(days=pr_drop_after)
+        log.debug(f"PR cutoff date: {pr_cutoff_date}")
+
+        # Filter out files that exceed archiving criteria three times the specified days
+        def is_file_too_old(file_path: Path) -> bool:
+            try:
+                if file_path.stem.startswith("Baseline_"):
+                    cutoff_date = baseline_cutoff_date
+                else:
+                    cutoff_date = pr_cutoff_date
+
+                timestamp_str = extract_timestamp(file_path)
+                if not timestamp_str:
+                    return False
+
+                file_timestamp = datetime.strptime(timestamp_str, self.TIMESTAMP_FORMAT)
+                # Add timezone info for proper comparison
+                file_timestamp = file_timestamp.replace(tzinfo=timezone.utc)
+                return file_timestamp < cutoff_date
+            except Exception as e:
+                log.warning(f"Error processing timestamp for {file_path.name}: {e}")
+                return False
+
+        benchmark_files = [
+            file for file in benchmark_files if not is_file_too_old(file)
+        ]
+
+        # Sort files by timestamp
         benchmark_files.sort(key=extract_timestamp, reverse=True)
 
-        # Load the first n benchmark files
+        # Load benchmark files
         benchmark_runs = []
-        for file_path in benchmark_files[:n]:
+        for file_path in benchmark_files:
             benchmark_run = self.load_result(file_path)
             if benchmark_run:
                 benchmark_runs.append(benchmark_run)
@@ -163,7 +203,7 @@ def save(self, save_name, results: list[Result], to_file=True):
 
         # Use formatted timestamp for the filename
         timestamp = (
-            datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
+            datetime.now(tz=timezone.utc).strftime(self.TIMESTAMP_FORMAT)
             if options.timestamp_override is None
             else options.timestamp_override
         )

@@ -293,7 +293,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     # limit how many files we load.
     # should this be configurable?
     log.info(f"Loading benchmark history from {results_dir}...")
-    history.load(1000)
+    history.load()
     log.info(f"Loaded {len(history.runs)} benchmark runs.")
 
     if compare_names:

@@ -90,7 +90,9 @@ class Options:
     git_commit_override: str = None
     # Archiving settings
     # Archived runs are stored separately from the main dataset but are still accessible
-    # via the HTML UI when "Include archived runs" is enabled
+    # via the HTML UI when "Include archived runs" is enabled.
+    # Archived runs older than 3 times the specified days are not included in the dashboard,
+    # ie. when archiving data older than 7 days, runs older than 21 days are not included.
     archive_baseline_days: int = 30  # Archive Baseline_* runs after 30 days
     archive_pr_days: int = 7  # Archive other (PR/dev) runs after 7 days