[GuideLLM Refactor] Fix output tests (#387)

jaredoconnell · web-flow · commit 46b5e87d9db3 · 2025-10-03T17:53:17.000-04:00
## Summary

The output formats changed, and therefore the existing tests broke.
Keep in mind that some of the tested code may be replaced in the near
future, as a new console output is in the list of things for 0.5.0.

I ran into issues with duplicate fields on the mock objects for all
computed fields. Those tests are skipped. Let me know if you have a plan
on how to fix this.

This PR will be in a draft state until the CSV changes are merged.

## Test Plan

- Run the tests with pytest.
- Run the tox type lints.


---

- [x] "I certify that all code in this PR is my own, except as noted
below."

## Use of AI

- [x] Includes AI-assisted code completion
- [ ] Includes code generated by an AI application
- [ ] Includes AI-generated tests (NOTE: AI written tests should have a
docstring that includes `## WRITTEN BY AI ##`)
diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py
@@ -330,20 +330,6 @@ def _get_profile_str(self, benchmark: GenerativeBenchmark) -> str:
 
         return ", ".join(f"{key}={value}" for key, value in profile_args.items())
 
-    def _get_args_str(self, benchmark: GenerativeBenchmark) -> str:
-        args = benchmark.args
-        args_dict = OrderedDict(
-            {
-                "max_number": args.max_number,
-                "max_duration": args.max_duration,
-                "warmup_number": args.warmup_number,
-                "warmup_duration": args.warmup_duration,
-                "cooldown_number": args.cooldown_number,
-                "cooldown_duration": args.cooldown_duration,
-            }
-        )
-        return ", ".join(f"{key}={value}" for key, value in args_dict.items())
-
     def _print_section_header(self, title: str, indent: int = 0, new_lines: int = 2):
         self._print_line(
             f"{title}:",
diff --git a/src/guidellm/scheduler/objects.py b/src/guidellm/scheduler/objects.py
@@ -174,7 +174,7 @@ class ScheduledRequestInfo(StandardBaseModel):
     )
     scheduler_start_time: float = Field(
         description="Unix timestamp for the local time when scheduler processing began",
-        default=-1,
+        default=-1.0,
     )
 
     error: str | None = Field(
diff --git a/tests/unit/benchmark/test_output.py b/tests/unit/benchmark/test_output.py
@@ -10,7 +10,7 @@
 from guidellm.benchmark import (
     GenerativeBenchmarksReport,
 )
-from guidellm.benchmark.output import GenerativeBenchmarksConsole
+from guidellm.benchmark.output import GenerativeBenchmarkerConsole, GenerativeBenchmarkerCSV
 from tests.unit.mock_benchmark import mock_generative_benchmark
 
 
@@ -37,8 +37,8 @@ def test_generative_benchmark_marshalling():
     deserialized = GenerativeBenchmarksReport.model_validate(serialized)
     deserialized_benchmark = deserialized.benchmarks[0]
 
-    for field in mock_benchmark.model_fields:
-        assert getattr(mock_benchmark, field) == getattr(deserialized_benchmark, field)
+    # model_dump as workaround for duplicate fields for computed fields.
+    assert mock_benchmark.model_dump() == deserialized_benchmark.model_dump()
 
 
 def test_file_json():
@@ -55,8 +55,8 @@ def test_file_json():
     loaded_report = GenerativeBenchmarksReport.load_file(mock_path)
     loaded_benchmark = loaded_report.benchmarks[0]
 
-    for field in mock_benchmark.model_fields:
-        assert getattr(mock_benchmark, field) == getattr(loaded_benchmark, field)
+    # model_dump as workaround for duplicate fields for computed fields.
+    assert mock_benchmark.model_dump() == loaded_benchmark.model_dump()
 
     mock_path.unlink()
 
@@ -75,18 +75,20 @@ def test_file_yaml():
     loaded_report = GenerativeBenchmarksReport.load_file(mock_path)
     loaded_benchmark = loaded_report.benchmarks[0]
 
-    for field in mock_benchmark.model_fields:
-        assert getattr(mock_benchmark, field) == getattr(loaded_benchmark, field)
+    # model_dump as workaround for duplicate fields for computed fields.
+    assert mock_benchmark.model_dump() == loaded_benchmark.model_dump()
 
     mock_path.unlink()
 
-
-def test_file_csv():
+@pytest.mark.skip(reason="CSV fix not merged yet")
+@pytest.mark.asyncio
+async def test_file_csv():
     mock_benchmark = mock_generative_benchmark()
     report = GenerativeBenchmarksReport(benchmarks=[mock_benchmark])
 
     mock_path = Path("mock_report.csv")
-    report.save_csv(mock_path)
+    csv_benchmarker = GenerativeBenchmarkerCSV(output_path=mock_path)
+    await csv_benchmarker.finalize(report)
 
     with mock_path.open("r") as file:
         reader = csv.reader(file)
@@ -100,109 +102,72 @@ def test_file_csv():
 
 
 def test_console_benchmarks_profile_str():
-    console = GenerativeBenchmarksConsole(enabled=True)
+    console = GenerativeBenchmarkerConsole()
     mock_benchmark = mock_generative_benchmark()
-    console.benchmarks = [mock_benchmark]
     assert (
-        console.benchmarks_profile_str == "type=synchronous, strategies=['synchronous']"
-    )
-
-
-def test_console_benchmarks_args_str():
-    console = GenerativeBenchmarksConsole(enabled=True)
-    mock_benchmark = mock_generative_benchmark()
-    console.benchmarks = [mock_benchmark]
-    assert console.benchmarks_args_str == (
-        "max_number=None, max_duration=10.0, warmup_number=None, "
-        "warmup_duration=None, cooldown_number=None, cooldown_duration=None"
+        console._get_profile_str(mock_benchmark) == "type=synchronous, strategies=['synchronous']"
     )
 
 
-def test_console_benchmarks_worker_desc_str():
-    console = GenerativeBenchmarksConsole(enabled=True)
-    mock_benchmark = mock_generative_benchmark()
-    console.benchmarks = [mock_benchmark]
-    assert console.benchmarks_worker_desc_str == str(mock_benchmark.worker)
-
-
-def test_console_benchmarks_request_loader_desc_str():
-    console = GenerativeBenchmarksConsole(enabled=True)
-    mock_benchmark = mock_generative_benchmark()
-    console.benchmarks = [mock_benchmark]
-    assert console.benchmarks_request_loader_desc_str == str(
-        mock_benchmark.request_loader
-    )
-
-
-def test_console_benchmarks_extras_str():
-    console = GenerativeBenchmarksConsole(enabled=True)
-    mock_benchmark = mock_generative_benchmark()
-    console.benchmarks = [mock_benchmark]
-    assert console.benchmarks_extras_str == "None"
-
-
 def test_console_print_section_header():
-    console = GenerativeBenchmarksConsole(enabled=True)
+    console = GenerativeBenchmarkerConsole()
     with patch.object(console.console, "print") as mock_print:
-        console.print_section_header("Test Header")
+        console._print_section_header("Test Header")
         mock_print.assert_called_once()
 
 
 def test_console_print_labeled_line():
-    console = GenerativeBenchmarksConsole(enabled=True)
+    console = GenerativeBenchmarkerConsole()
     with patch.object(console.console, "print") as mock_print:
-        console.print_labeled_line("Label", "Value")
+        console._print_labeled_line("Label", "Value")
         mock_print.assert_called_once()
 
 
 def test_console_print_line():
-    console = GenerativeBenchmarksConsole(enabled=True)
+    console = GenerativeBenchmarkerConsole()
     with patch.object(console.console, "print") as mock_print:
-        console.print_line("Test Line")
+        console._print_line("Test Line")
         mock_print.assert_called_once()
 
 
 def test_console_print_table():
-    console = GenerativeBenchmarksConsole(enabled=True)
+    console = GenerativeBenchmarkerConsole()
     headers = ["Header1", "Header2"]
     rows = [["Row1Col1", "Row1Col2"], ["Row2Col1", "Row2Col2"]]
     with (
-        patch.object(console, "print_section_header") as mock_header,
-        patch.object(console, "print_table_divider") as mock_divider,
-        patch.object(console, "print_table_row") as mock_row,
+        patch.object(console, "_print_section_header") as mock_header,
+        patch.object(console, "_print_table_divider") as mock_divider,
+        patch.object(console, "_print_table_row") as mock_row,
     ):
-        console.print_table(headers, rows, "Test Table")
+        console._print_table(headers, rows, "Test Table")
         mock_header.assert_called_once()
         mock_divider.assert_called()
         mock_row.assert_called()
 
 
 def test_console_print_benchmarks_metadata():
-    console = GenerativeBenchmarksConsole(enabled=True)
+    console = GenerativeBenchmarkerConsole()
     mock_benchmark = mock_generative_benchmark()
-    console.benchmarks = [mock_benchmark]
     with (
-        patch.object(console, "print_section_header") as mock_header,
-        patch.object(console, "print_labeled_line") as mock_labeled,
+        patch.object(console, "_print_section_header") as mock_header,
+        patch.object(console, "_print_labeled_line") as mock_labeled,
     ):
-        console.print_benchmarks_metadata()
+        console._print_benchmarks_metadata([mock_benchmark])
         mock_header.assert_called_once()
         mock_labeled.assert_called()
 
 
 def test_console_print_benchmarks_info():
-    console = GenerativeBenchmarksConsole(enabled=True)
+    console = GenerativeBenchmarkerConsole()
     mock_benchmark = mock_generative_benchmark()
-    console.benchmarks = [mock_benchmark]
-    with patch.object(console, "print_table") as mock_table:
-        console.print_benchmarks_info()
+    with patch.object(console, "_print_table") as mock_table:
+        console._print_benchmarks_info([mock_benchmark])
         mock_table.assert_called_once()
 
 
 def test_console_print_benchmarks_stats():
-    console = GenerativeBenchmarksConsole(enabled=True)
+    console = GenerativeBenchmarkerConsole()
     mock_benchmark = mock_generative_benchmark()
-    console.benchmarks = [mock_benchmark]
-    with patch.object(console, "print_table") as mock_table:
-        console.print_benchmarks_stats()
+    with patch.object(console, "_print_table") as mock_table:
+        console._print_benchmarks_stats([mock_benchmark])
         mock_table.assert_called_once()
diff --git a/tests/unit/mock_benchmark.py b/tests/unit/mock_benchmark.py
@@ -1,6 +1,5 @@
 """Mock benchmark objects for unit testing."""
-
-from guidellm.backend import GenerationRequestTimings
+from guidellm.backends import GenerationRequestTimings
 from guidellm.benchmark import (
     BenchmarkSchedulerStats,
     GenerativeBenchmark,
@@ -101,7 +100,7 @@ def mock_generative_benchmark() -> GenerativeBenchmark:
             worker_targeted_start_delay_avg=0.1,
             request_start_delay_avg=0.1,
             request_time_avg=0.1,
-            request_targeted_delay_avg=0.1,
+            request_targeted_start_delay_avg=0.1,
         ),
         start_time=1000.0,
         end_time=2000.0,
@@ -130,8 +129,6 @@ def mock_generative_benchmark() -> GenerativeBenchmark:
                     scheduler_info=ScheduledRequestInfo(
                         request_timings=GenerationRequestTimings(
                             request_start=1,
-                            first_iteration=2,
-                            last_iteration=6,
                             request_end=6,
                         )
                     ),

Original file line number	Diff line number	Diff line change
`@@ -174,7 +174,7 @@ class ScheduledRequestInfo(StandardBaseModel):`
`174`	`174`	`)`
`175`	`175`	`scheduler_start_time: float = Field(`
`176`	`176`	`description="Unix timestamp for the local time when scheduler processing began",`
`177`		`- default=-1,`
	`177`	`+ default=-1.0,`
`178`	`178`	`)`
`179`	`179`
`180`	`180`	`error: str \| None = Field(`