Skip to content

Commit 61de0c5

Browse files
Guard against duplicate builder_kwargs/config_kwargs in load_dataset_builder (#4910)
1 parent 3573d75 commit 61de0c5

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

src/datasets/load.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,6 +1053,14 @@ def load_dataset_builder(
10531053
storage_options: Optional[dict] = None,
10541054
**config_kwargs,
10551055
) -> DatasetBuilder:
1056+
# Error if builder_kwargs and config_kwargs share any keys
1057+
if "builder_kwargs" in config_kwargs and "config_kwargs" in config_kwargs:
1058+
bk = config_kwargs["builder_kwargs"]
1059+
ck = config_kwargs["config_kwargs"]
1060+
overlap = set(bk) & set(ck)
1061+
if overlap:
1062+
raise TypeError(f"Duplicate keys in builder_kwargs and config_kwargs: {overlap}")
1063+
10561064
"""Load a dataset builder which can be used to:
10571065
10581066
- Inspect general information that is required to build a dataset (cache directory, config, dataset info, features, data files, etc.)

tests/test_load_duplicate_keys.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import pytest
2+
from datasets.load import load_dataset_builder
3+
4+
def test_duplicate_builder_and_config_kwargs():
5+
builder_kwargs = {"foo-key": 1}
6+
config_kwargs = {"foo-key": 2}
7+
with pytest.raises(TypeError) as excinfo:
8+
load_dataset_builder(
9+
"csv",
10+
builder_kwargs=builder_kwargs,
11+
config_kwargs=config_kwargs,
12+
)
13+
msg = str(excinfo.value).lower()
14+
assert "duplicate keys" in msg
15+
assert "builder_kwargs" in msg
16+
assert "config_kwargs" in msg

0 commit comments

Comments
 (0)