RagnarGrootKoerkamp · mpsijm · Aug 9, 2025 · Aug 10, 2025 · Aug 13, 2025 · Aug 14, 2025
diff --git a/bin/config.py b/bin/config.py
@@ -72,9 +72,7 @@
     *KNOWN_TESTCASE_EXTENSIONS,
     *KNOWN_SAMPLE_TESTCASE_EXTENSIONS,
     ".interaction",
-    ".hint",
-    ".desc",
-    #'.args',
+    ".yaml",
 ]
 
 KNOWN_DATA_EXTENSIONS: Final[Sequence[str]] = [

diff --git a/bin/export.py b/bin/export.py
@@ -213,7 +213,7 @@ def add_testcase(in_file: Path) -> None:
     # substitute constants.
     if problem.settings.constants:
         constants_supported = [
-            "data/**/testdata.yaml",
+            "data/**/test_group.yaml",
             f"{InputValidator.source_dir}/**/*",
             f"{AnswerValidator.source_dir}/**/*",
             f"{OutputValidator.source_dir}/**/*",
@@ -298,7 +298,7 @@ def add_testcase(in_file: Path) -> None:
             ryaml_filter(limits, "time_limit")
         # validator_flags
         validator_flags = " ".join(
-            problem.get_testdata_yaml(
+            problem.get_test_case_yaml(
                 problem.path / "data",
                 OutputValidator.args_key,
                 PrintBar("Getting validator_flags for legacy export"),
@@ -325,13 +325,6 @@ def add_testcase(in_file: Path) -> None:
                     else:
                         util.error(f"{f}: no name set for language {lang}.")
 
-        # rename output_validator dir
-        if (export_dir / OutputValidator.source_dir).exists():
-            (export_dir / "output_validators").mkdir(parents=True)
-            (export_dir / OutputValidator.source_dir).rename(
-                export_dir / "output_validators" / OutputValidator.source_dir
-            )
-
         # rename statement dirs
         if (export_dir / "statement").exists():
             (export_dir / "statement").rename(export_dir / "problem_statement")
@@ -352,6 +345,18 @@ def add_testcase(in_file: Path) -> None:
                         add_file(out, f)
             shutil.rmtree(export_dir / d)
 
+        # rename output_validator dir
+        if (export_dir / OutputValidator.source_dir).exists():
+            (export_dir / "output_validators").mkdir(parents=True)
+            (export_dir / OutputValidator.source_dir).rename(
+                export_dir / "output_validators" / OutputValidator.source_dir
+            )
+
+        # rename test_group.yaml back to testdata.yaml
+        for f in (export_dir / "data").rglob("test_group.yaml"):
+            f.rename(f.with_name("testdata.yaml"))
+            # TODO potentially, some keys also need to be renamed, but we don't use this often enough for this to matter (I hope)
+
     # handle yaml updates
     yaml_path.unlink()
     write_yaml(yaml_data, yaml_path)

diff --git a/bin/generate.py b/bin/generate.py
diff --git a/bin/interactive.py b/bin/interactive.py
@@ -56,7 +56,7 @@ def get_validator_command():
                 run.testcase.ans_path.resolve(),
                 run.feedbackdir.resolve(),
             ]
-            + run.testcase.testdata_yaml_args(
+            + run.testcase.test_case_yaml_args(
                 output_validator,
                 bar or PrintBar("Run interactive test case"),
             )

diff --git a/bin/problem.py b/bin/problem.py
@@ -283,7 +283,9 @@ def __init__(
         self.limits = ProblemLimits(parse_setting(yaml_data, "limits", {}), problem, self)
 
         parse_deprecated_setting(
-            yaml_data, "validator_flags", f"{validate.OutputValidator.args_key}' in 'testdata.yaml"
+            yaml_data,
+            "validator_flags",
+            f"{validate.OutputValidator.args_key}' in 'test_group.yaml",
         )
 
         self.keywords: list[str] = parse_optional_list_setting(yaml_data, "keywords", str)
@@ -362,9 +364,9 @@ def __init__(self, path: Path, tmpdir: Path, label: Optional[str] = None):
         self._programs = dict[Path, "Program"]()
         self._program_callbacks = dict[Path, list[Callable[["Program"], None]]]()
         # Dictionary from path to parsed file contents.
-        # TODO #102: Add type for testdata.yaml (typed Namespace?)
-        self._testdata_yamls = dict[Path, dict[str, Any]]()
-        self._testdata_lock = threading.Lock()
+        # TODO #102: Add type for test_group.yaml (typed Namespace?)
+        self._test_case_yamls = dict[Path, dict[str, Any]]()
+        self._test_group_lock = threading.Lock()
 
         # The label for the problem: A, B, A1, A2, X, ...
         self.label = label
@@ -457,105 +459,102 @@ def _read_settings(self):
         self.multi_pass: bool = self.settings.multi_pass
         self.custom_output: bool = self.settings.custom_output
 
-    # TODO #102 move to TestData class
-    def _parse_testdata_yaml(p, path, bar):
+    # TODO #102 move to a new TestGroup class
+    def _parse_test_case_and_groups_yaml(p, path: Path, bar: BAR_TYPE):
         assert path.is_relative_to(p.path / "data")
-        for dir in [path] + list(path.parents):
+        for f in [path] + list(path.parents):
             # Do not go above the data directory.
-            if dir == p.path:
+            if f == p.path:
                 return
 
-            f = dir / "testdata.yaml"
-            if not f.is_file() or f in p._testdata_yamls:
-                continue
-            with p._testdata_lock:
-                if f not in p._testdata_yamls:
-                    raw = substitute(
-                        f.read_text(),
-                        p.settings.constants,
-                        pattern=config.CONSTANT_SUBSTITUTE_REGEX,
-                    )
-                    p._testdata_yamls[f] = flags = parse_yaml(raw, path=f, plain=True)
+            if f.is_dir():
+                f = f / "test_group.yaml"
+            with p._test_group_lock:
+                if not f.is_file() or f in p._test_case_yamls:
+                    continue
+                raw = substitute(
+                    f.read_text(),
+                    p.settings.constants,
+                    pattern=config.CONSTANT_SUBSTITUTE_REGEX,
+                )
+                p._test_case_yamls[f] = flags = parse_yaml(raw, path=f, plain=True)
 
-                    parse_deprecated_setting(
-                        flags, "output_validator_flags", validate.OutputValidator.args_key
-                    )
-                    parse_deprecated_setting(
-                        flags, "input_validator_flags", validate.InputValidator.args_key
-                    )
+                parse_deprecated_setting(
+                    flags, "output_validator_flags", validate.OutputValidator.args_key
+                )
+                parse_deprecated_setting(
+                    flags, "input_validator_flags", validate.InputValidator.args_key
+                )
 
-                    # Verify testdata.yaml
-                    for k in flags:
-                        match k:
-                            case (
-                                validate.OutputValidator.args_key
-                                | validate.AnswerValidator.args_key
-                                | visualize.TestCaseVisualizer.args_key
-                                | visualize.OutputVisualizer.args_key
-                            ):
-                                if not isinstance(flags[k], list):
-                                    bar.error(
-                                        f"{k} must be a list of strings",
-                                        resume=True,
-                                        print_item=False,
-                                    )
-                            case validate.InputValidator.args_key:
-                                if not isinstance(flags[k], (list, dict)):
-                                    bar.error(
-                                        f"{k} must be list or map",
-                                        resume=True,
-                                        print_item=False,
-                                    )
-                                if isinstance(flags[k], dict):
-                                    input_validator_names = set(
-                                        val.name for val in p.validators(validate.InputValidator)
-                                    )
-                                    for name in set(flags[k]) - input_validator_names:
-                                        bar.warn(
-                                            f"Unknown input validator {name}; expected {input_validator_names}",
-                                            print_item=False,
-                                        )
-                            case (
-                                "args"
-                                | "description"
-                                | "full_feedback"
-                                | "hint"
-                                | "scoring"
-                                | "static_validation"
-                            ):
-                                bar.warn(
-                                    f"{k} in testdata.yaml not implemented in BAPCtools",
-                                    print_item=False,
+                # Use variable kwargs so the type checker does not complain when passing them to a PrintBar (nothing happens in that case anyway)
+                bar_kwargs = {"resume": True, "print_item": False}
+
+                # Verify test_group.yaml
+                for k in flags:
+                    match k:
+                        case (
+                            validate.OutputValidator.args_key
+                            | validate.AnswerValidator.args_key
+                            | visualize.TestCaseVisualizer.args_key
+                            | visualize.OutputVisualizer.args_key
+                        ):
+                            if not isinstance(flags[k], list):
+                                bar.error(
+                                    f"{k} must be a list of strings",
+                                    None,
+                                    **bar_kwargs,
                                 )
-                            case _:
-                                path = f.relative_to(p.path / "data")
-                                bar.warn(f'Unknown key "{k}" in {path}', print_item=False)
-            # Do not go above the data directory.
-            if dir == p.path / "data":
-                break
-
-    def get_testdata_yaml(
+                        case validate.InputValidator.args_key:
+                            if not isinstance(flags[k], (list, dict)):
+                                bar.error(
+                                    f"{k} must be list or map",
+                                    None,
+                                    **bar_kwargs,
+                                )
+                            if isinstance(flags[k], dict):
+                                input_validator_names = set(
+                                    val.name for val in p.validators(validate.InputValidator)
+                                )
+                                for name in set(flags[k]) - input_validator_names:
+                                    bar.warn(
+                                        f"Unknown input validator {name}; expected {input_validator_names}",
+                                        None,
+                                        **bar_kwargs,
+                                    )
+                        case "description" | "hint":
+                            pass  # We don't do anything with hint or description in BAPCtools, but no need to warn about this
+                        case "args" | "full_feedback" | "scoring" | "static_validation":
+                            bar.warn(
+                                f"{k} in test_group.yaml not implemented in BAPCtools",
+                                None,
+                                **bar_kwargs,
+                            )
+                        case _:
+                            path = f.relative_to(p.path / "data")
+                            bar.warn(f'Unknown key "{k}" in {path}', None, **bar_kwargs)
+
+    def get_test_case_yaml(
         p,
         path: Path,
         key: str,
         bar: BAR_TYPE,
         name: Optional[str] = None,
     ) -> list[str]:
         """
-        Find the testdata flags applying at the given path for the given key.
-        If necessary, walk up from `path` looking for the first testdata.yaml file that applies,
+        Find the value of the given test_group.yaml key applying at the given path.
+        If necessary, walk up from `path` looking for the first test_group.yaml file that applies.
 
         Side effects: parses and caches the file.
 
         Arguments
         ---------
         path: absolute path (a file or a directory)
-        key: The testdata.yaml key to look for (TODO: 'grading' is not yet implemented)
+        key: The test_group.yaml key to look for (TODO: 'grading' is not yet implemented)
         name: If key == 'input_validator_args', optionally the name of the input validator.
 
         Returns:
         --------
-        A list of string arguments, which is empty if no testdata.yaml is found.
+        A list of string arguments, which is empty if no test_group.yaml is found.
         TODO: when 'grading' is supported, it also can return dict
         """
         known_args_keys = [
@@ -572,19 +571,21 @@ def get_testdata_yaml(
                 f"Only input validators support flags by validator name, got {key} and {name}"
             )
 
-        # parse and cache testdata.yaml
-        p._parse_testdata_yaml(path, bar)
+        # parse and cache <test_case>.yaml and test_group.yaml
+        path = path.with_suffix(".yaml")
+        p._parse_test_case_and_groups_yaml(path, bar)
 
         # extract the flags
-        for dir in [path] + list(path.parents):
+        for f in [path] + list(path.parents):
             # Do not go above the data directory.
-            if dir == p.path:
+            if f == p.path:
                 return []
 
-            f = dir / "testdata.yaml"
-            if f not in p._testdata_yamls:
+            if f.suffix != ".yaml":
+                f = f / "test_group.yaml"
+            if f not in p._test_case_yamls:
                 continue
-            flags = p._testdata_yamls[f]
+            flags = p._test_case_yamls[f]
             if key in flags:
                 args = flags[key]
                 if key == validate.InputValidator.args_key:
@@ -611,6 +612,15 @@ def get_testdata_yaml(
 
         return []
 
+    # Because Problem.testcases() may be called multiple times (e.g. validating multiple modes, or with `bt all`),
+    # this cache makes sure that some warnings (like malformed test case names) only appear once.
+    _warned_for_test_case = set[str]()
+
+    def _warn_once(p, test_name, msg):
+        if test_name not in p._warned_for_test_case:
+            p._warned_for_test_case.add(test_name)
+            warn(msg)
+
     def testcases(
         p,
         *,
@@ -659,6 +669,15 @@ def testcases(
         testcases = []
         for f in in_paths:
             t = testcase.Testcase(p, f)
+            if not config.COMPILED_FILE_NAME_REGEX.fullmatch(f.name):
+                p._warn_once(t.name, f"Test case name {t.name} is not valid. Skipping.")
+                continue
+            if f.with_suffix("").name == "test_group":
+                p._warn_once(
+                    t.name,
+                    "Test case must not be named 'test_group', this clashes with the group-level 'test_group.yaml'. Skipping.",
+                )
+                continue
             if (
                 (p.interactive or p.multi_pass)
                 and mode in [validate.Mode.INVALID, validate.Mode.VALID_OUTPUT]
@@ -670,7 +689,7 @@ def testcases(
                 continue
             if needans and not t.ans_path.is_file():
                 if t.root != "invalid_input":
-                    warn(f"Found input file {f} without a .ans file. Skipping.")
+                    p._warn_once(t.name, f"Found input file {f} without a .ans file. Skipping.")
                     continue
             if mode == validate.Mode.VALID_OUTPUT:
                 if t.out_path is None:
@@ -1331,7 +1350,7 @@ def validate_valid_extra_data(p) -> bool:
         if not p.validators(validate.OutputValidator, strict=True, print_warn=False):
             return True
 
-        args = p.get_testdata_yaml(
+        args = p.get_test_case_yaml(
             p.path / "data" / "valid_output",
             "output_validator_args",
             PrintBar("Generic Output Validation"),
@@ -1492,7 +1511,7 @@ def run_all(select_verdict, select):
                 return None, None, None
 
             def get_slowest(result):
-                slowest_pair = result.slowest_testcase()
+                slowest_pair = result.slowest_test_case()
                 assert slowest_pair is not None
                 return slowest_pair
 

diff --git a/bin/run.py b/bin/run.py
@@ -228,7 +228,7 @@ def _validate_output(self, bar: BAR_TYPE) -> Optional[ExecResult]:
         return output_validator.run(
             self.testcase,
             self,
-            args=self.testcase.testdata_yaml_args(output_validator, bar),
+            args=self.testcase.test_case_yaml_args(output_validator, bar),
         )
 
     def _visualize_output(self, bar: BAR_TYPE) -> Optional[ExecResult]:
@@ -242,7 +242,7 @@ def _visualize_output(self, bar: BAR_TYPE) -> Optional[ExecResult]:
             self.testcase.ans_path.resolve(),
             self.out_path if not self.problem.interactive else None,
             self.feedbackdir,
-            args=self.testcase.testdata_yaml_args(output_visualizer, bar),
+            args=self.testcase.test_case_yaml_args(output_visualizer, bar),
         )
 
 
@@ -501,15 +501,15 @@ def process_run(run: Run):
         else:
             color = Fore.GREEN if self.verdict in self.expected_verdicts else Fore.RED
 
-        (salient_testcase, salient_duration) = verdicts.salient_testcase()
+        (salient_testcase, salient_duration) = verdicts.salient_test_case()
         salient_print_verdict = self.verdict
         salient_duration_style = Style.BRIGHT if salient_duration >= self.limits["timeout"] else ""
 
         # Summary line is the only thing shown.
         message = f"{color}{salient_print_verdict.short():>3}{salient_duration_style}{salient_duration:6.3f}s{Style.RESET_ALL} {Style.DIM}@ {salient_testcase:{max_testcase_len}}{Style.RESET_ALL}"
 
         if verdicts.run_until in [RunUntil.DURATION, RunUntil.ALL]:
-            slowest_pair = verdicts.slowest_testcase()
+            slowest_pair = verdicts.slowest_test_case()
             assert slowest_pair is not None
             (slowest_testcase, slowest_duration) = slowest_pair
             slowest_verdict = verdicts[slowest_testcase]