[CI][Benchmarks] Limit dashboard data growth

PatKamin · PatKamin · commit cf4a9af33d4d · 2025-07-18T11:56:33.000Z
Load and parse results only up to the set number of days old which
is three times the defined archiving times.

Archived runs older than 3 times the specified days are not included in the dashboard,
ie. when archiving data older than 7 days, runs older than 21 days are not included.
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
@@ -31,7 +31,12 @@ def load_result(self, file_path: Path) -> BenchmarkRun:
         else:
             return None
 
-    def load(self, n: int):
+    def load(self):
+        """ 
+        Load benchmark runs from the results directory.
+        This method loads files after the specified archiving criteria,
+        sorts them by timestamp, and stores the results in self.runs.
+        """
         results_dir = Path(self.dir) / "results"
         if not results_dir.exists() or not results_dir.is_dir():
             log.warning(
@@ -42,20 +47,52 @@ def load(self, n: int):
         # Get all JSON files in the results directory
         benchmark_files = list(results_dir.glob("*.json"))
 
-        # Extract timestamp and sort files by it
+        # Extract timestamp
         def extract_timestamp(file_path: Path) -> str:
             try:
                 # Assumes results are stored as <name>_YYYYMMDD_HHMMSS.json
                 ts = file_path.stem[-len("YYYYMMDD_HHMMSS") :]
                 return ts if Validate.timestamp(ts) else ""
             except IndexError:
                 return ""
-
+        
+        baseline_drop_after = options.archive_baseline_days * 3
+        pr_drop_after = options.archive_pr_days * 3
+        baseline_cutoff_date = datetime.now(timezone.utc) - timedelta(days=baseline_drop_after)
+        log.debug(f"Baseline cutoff date: {baseline_cutoff_date}")
+        pr_cutoff_date = datetime.now(timezone.utc) - timedelta(days=pr_drop_after)
+        log.debug(f"PR cutoff date: {pr_cutoff_date}")
+
+        # Filter out files that exceed archiving criteria three times the specified days
+        def is_file_too_old(file_path: Path) -> bool:
+            try:
+                if file_path.stem.startswith("Baseline_"):
+                    cutoff_date = baseline_cutoff_date
+                else:
+                    cutoff_date = pr_cutoff_date
+                
+                timestamp_str = extract_timestamp(file_path)
+                if not timestamp_str:
+                    return False
+                
+                file_timestamp = datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
+                # Add timezone info for proper comparison
+                file_timestamp = file_timestamp.replace(tzinfo=timezone.utc)
+                return file_timestamp < cutoff_date
+            except Exception as e:
+                log.warning(f"Error processing timestamp for {file_path.name}: {e}")
+                return False
+        
+        benchmark_files = [
+            file for file in benchmark_files if not is_file_too_old(file)
+        ]
+
+        # Sort files by timestamp
         benchmark_files.sort(key=extract_timestamp, reverse=True)
 
-        # Load the first n benchmark files
+        # Load benchmark files
         benchmark_runs = []
-        for file_path in benchmark_files[:n]:
+        for file_path in benchmark_files:
             benchmark_run = self.load_result(file_path)
             if benchmark_run:
                 benchmark_runs.append(benchmark_run)
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
@@ -293,7 +293,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     # limit how many files we load.
     # should this be configurable?
     log.info(f"Loading benchmark history from {results_dir}...")
-    history.load(1000)
+    history.load()
     log.info(f"Loaded {len(history.runs)} benchmark runs.")
 
     if compare_names:
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
@@ -90,7 +90,9 @@ class Options:
     git_commit_override: str = None
     # Archiving settings
     # Archived runs are stored separately from the main dataset but are still accessible
-    # via the HTML UI when "Include archived runs" is enabled
+    # via the HTML UI when "Include archived runs" is enabled.
+    # Archived runs older than 3 times the specified days are not included in the dashboard,
+    # ie. when archiving data older than 7 days, runs older than 21 days are not included.
     archive_baseline_days: int = 30  # Archive Baseline_* runs after 30 days
     archive_pr_days: int = 7  # Archive other (PR/dev) runs after 7 days