Skip to content

Commit 37a6036

Browse files
Move info_utils errors to exceptions module (#6952)
* Move info_utils errors to exceptions module * Create new errors and deprecate old ones * Replace deprecated errors * Make deprecation backward compatible * Test deprecated and non-deprecated errors * Fix non-deprecated errors
1 parent 9510252 commit 37a6036

File tree

4 files changed

+188
-36
lines changed

4 files changed

+188
-36
lines changed

src/datasets/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@
5858
# Deprecated modules
5959
from . import arrow_dataset as _arrow_dataset
6060
from . import utils as _utils
61+
from .exceptions import ExpectedMoreDownloadedFiles, ExpectedMoreSplits, UnexpectedDownloadedFile, UnexpectedSplits
6162
from .utils import download_manager as _deprecated_download_manager
63+
from .utils import info_utils as _deprecated_info_utils
6264

6365

6466
_arrow_dataset.concatenate_datasets = concatenate_datasets
@@ -68,5 +70,10 @@
6870
_deprecated_download_manager.DownloadConfig = DownloadConfig
6971
_deprecated_download_manager.DownloadMode = DownloadMode
7072
_deprecated_download_manager.DownloadManager = DownloadManager
73+
_deprecated_info_utils.ExpectedMoreDownloadedFiles = ExpectedMoreDownloadedFiles
74+
_deprecated_info_utils.ExpectedMoreSplits = ExpectedMoreSplits
75+
_deprecated_info_utils.UnexpectedDownloadedFile = UnexpectedDownloadedFile
76+
_deprecated_info_utils.UnexpectedSplits = UnexpectedSplits
7177

7278
del _arrow_dataset, _utils, _deprecated_download_manager
79+
del _deprecated_info_utils, ExpectedMoreDownloadedFiles, ExpectedMoreSplits, UnexpectedDownloadedFile, UnexpectedSplits

src/datasets/exceptions.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from . import config
88
from .table import CastError
9+
from .utils.deprecation_utils import deprecated
910
from .utils.track import TrackedIterable, tracked_list, tracked_str
1011

1112

@@ -83,3 +84,113 @@ def from_cast_error(
8384
explanation_message += f"\n\nThis happened while the {builder_name} dataset builder was generating data using\n\n{', '.join(formatted_tracked_gen_kwargs)}"
8485
help_message = "\n\nPlease either edit the data files to have matching columns, or separate them into different configurations (see docs at https://hf.co/docs/hub/datasets-manual-configuration#multiple-configurations)"
8586
return cls("An error occurred while generating the dataset" + explanation_message + help_message)
87+
88+
89+
@deprecated("Use 'ChecksumVerificationError' instead.")
90+
class ChecksumVerificationException(Exception):
91+
"""Exceptions during checksums verifications of downloaded files.
92+
93+
<Deprecated version="2.20.0">
94+
95+
Use `ChecksumVerificationError` instead.
96+
97+
</Deprecated>
98+
"""
99+
100+
101+
class ChecksumVerificationError(DatasetsError, ChecksumVerificationException):
102+
"""Error raised during checksums verifications of downloaded files."""
103+
104+
def __init__(self, *args, **kwargs):
105+
DatasetsError.__init__(self, *args, **kwargs)
106+
107+
108+
@deprecated("Use 'UnexpectedDownloadedFileError' instead.")
109+
class UnexpectedDownloadedFile(ChecksumVerificationException):
110+
"""Some downloaded files were not expected.
111+
112+
<Deprecated version="2.20.0">
113+
114+
Use `UnexpectedDownloadedFileError` instead.
115+
116+
</Deprecated>
117+
"""
118+
119+
120+
class UnexpectedDownloadedFileError(ChecksumVerificationError, UnexpectedDownloadedFile):
121+
"""Some downloaded files were not expected."""
122+
123+
124+
@deprecated("Use 'ExpectedMoreDownloadedFilesError' instead.")
125+
class ExpectedMoreDownloadedFiles(ChecksumVerificationException):
126+
"""Some files were supposed to be downloaded but were not.
127+
128+
<Deprecated version="2.20.0">
129+
130+
Use `ExpectedMoreDownloadedFilesError` instead.
131+
132+
</Deprecated>
133+
"""
134+
135+
136+
class ExpectedMoreDownloadedFilesError(ChecksumVerificationError, ExpectedMoreDownloadedFiles):
137+
"""Some files were supposed to be downloaded but were not."""
138+
139+
140+
class NonMatchingChecksumError(ChecksumVerificationError):
141+
"""The downloaded file checksum don't match the expected checksum."""
142+
143+
144+
@deprecated("Use 'SplitsVerificationError' instead.")
145+
class SplitsVerificationException(Exception):
146+
"""Exceptions during splits verifications.
147+
148+
<Deprecated version="2.20.0">
149+
150+
Use `SplitsVerificationError` instead.
151+
152+
</Deprecated>
153+
"""
154+
155+
156+
class SplitsVerificationError(DatasetsError, SplitsVerificationException):
157+
"""Error raised during splits verifications."""
158+
159+
def __init__(self, *args, **kwargs):
160+
DatasetsError.__init__(self, *args, **kwargs)
161+
162+
163+
@deprecated("Use 'UnexpectedSplitsError' instead.")
164+
class UnexpectedSplits(SplitsVerificationException):
165+
"""The expected splits of the downloaded file is missing.
166+
167+
<Deprecated version="2.20.0">
168+
169+
Use `UnexpectedSplitsError` instead.
170+
171+
</Deprecated>
172+
"""
173+
174+
175+
class UnexpectedSplitsError(SplitsVerificationError, UnexpectedSplits):
176+
"""The expected splits of the downloaded file is missing."""
177+
178+
179+
@deprecated("Use 'ExpectedMoreSplitsError' instead.")
180+
class ExpectedMoreSplits(SplitsVerificationException):
181+
"""Some recorded splits are missing.
182+
183+
<Deprecated version="2.20.0">
184+
185+
Use `ExpectedMoreSplitsError` instead.
186+
187+
</Deprecated>
188+
"""
189+
190+
191+
class ExpectedMoreSplitsError(SplitsVerificationError, ExpectedMoreSplits):
192+
"""Some recorded splits are missing."""
193+
194+
195+
class NonMatchingSplitsSizesError(SplitsVerificationError):
196+
"""The splits sizes don't match the expected splits sizes."""

src/datasets/utils/info_utils.py

Lines changed: 12 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@
55
from huggingface_hub.utils import insecure_hashlib
66

77
from .. import config
8+
from ..exceptions import (
9+
ExpectedMoreDownloadedFilesError,
10+
ExpectedMoreSplitsError,
11+
NonMatchingChecksumError,
12+
NonMatchingSplitsSizesError,
13+
UnexpectedDownloadedFileError,
14+
UnexpectedSplitsError,
15+
)
816
from .logging import get_logger
917

1018

@@ -33,30 +41,14 @@ class VerificationMode(enum.Enum):
3341
NO_CHECKS = "no_checks"
3442

3543

36-
class ChecksumVerificationException(Exception):
37-
"""Exceptions during checksums verifications of downloaded files."""
38-
39-
40-
class UnexpectedDownloadedFile(ChecksumVerificationException):
41-
"""Some downloaded files were not expected."""
42-
43-
44-
class ExpectedMoreDownloadedFiles(ChecksumVerificationException):
45-
"""Some files were supposed to be downloaded but were not."""
46-
47-
48-
class NonMatchingChecksumError(ChecksumVerificationException):
49-
"""The downloaded file checksum don't match the expected checksum."""
50-
51-
5244
def verify_checksums(expected_checksums: Optional[dict], recorded_checksums: dict, verification_name=None):
5345
if expected_checksums is None:
5446
logger.info("Unable to verify checksums.")
5547
return
5648
if len(set(expected_checksums) - set(recorded_checksums)) > 0:
57-
raise ExpectedMoreDownloadedFiles(str(set(expected_checksums) - set(recorded_checksums)))
49+
raise ExpectedMoreDownloadedFilesError(str(set(expected_checksums) - set(recorded_checksums)))
5850
if len(set(recorded_checksums) - set(expected_checksums)) > 0:
59-
raise UnexpectedDownloadedFile(str(set(recorded_checksums) - set(expected_checksums)))
51+
raise UnexpectedDownloadedFileError(str(set(recorded_checksums) - set(expected_checksums)))
6052
bad_urls = [url for url in expected_checksums if expected_checksums[url] != recorded_checksums[url]]
6153
for_verification_name = " for " + verification_name if verification_name is not None else ""
6254
if len(bad_urls) > 0:
@@ -68,30 +60,14 @@ def verify_checksums(expected_checksums: Optional[dict], recorded_checksums: dic
6860
logger.info("All the checksums matched successfully" + for_verification_name)
6961

7062

71-
class SplitsVerificationException(Exception):
72-
"""Exceptions during splis verifications"""
73-
74-
75-
class UnexpectedSplits(SplitsVerificationException):
76-
"""The expected splits of the downloaded file is missing."""
77-
78-
79-
class ExpectedMoreSplits(SplitsVerificationException):
80-
"""Some recorded splits are missing."""
81-
82-
83-
class NonMatchingSplitsSizesError(SplitsVerificationException):
84-
"""The splits sizes don't match the expected splits sizes."""
85-
86-
8763
def verify_splits(expected_splits: Optional[dict], recorded_splits: dict):
8864
if expected_splits is None:
8965
logger.info("Unable to verify splits sizes.")
9066
return
9167
if len(set(expected_splits) - set(recorded_splits)) > 0:
92-
raise ExpectedMoreSplits(str(set(expected_splits) - set(recorded_splits)))
68+
raise ExpectedMoreSplitsError(str(set(expected_splits) - set(recorded_splits)))
9369
if len(set(recorded_splits) - set(expected_splits)) > 0:
94-
raise UnexpectedSplits(str(set(recorded_splits) - set(expected_splits)))
70+
raise UnexpectedSplitsError(str(set(recorded_splits) - set(expected_splits)))
9571
bad_splits = [
9672
{"expected": expected_splits[name], "recorded": recorded_splits[name]}
9773
for name in expected_splits

tests/test_exceptions.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import warnings
2+
3+
import pytest
4+
5+
import datasets.utils.deprecation_utils
6+
from datasets.exceptions import (
7+
ChecksumVerificationError,
8+
ChecksumVerificationException,
9+
ExpectedMoreDownloadedFiles,
10+
ExpectedMoreDownloadedFilesError,
11+
ExpectedMoreSplits,
12+
ExpectedMoreSplitsError,
13+
NonMatchingChecksumError,
14+
NonMatchingSplitsSizesError,
15+
SplitsVerificationError,
16+
SplitsVerificationException,
17+
UnexpectedDownloadedFile,
18+
UnexpectedDownloadedFileError,
19+
UnexpectedSplits,
20+
UnexpectedSplitsError,
21+
)
22+
23+
24+
@pytest.mark.parametrize(
25+
"error",
26+
[
27+
ChecksumVerificationException,
28+
UnexpectedDownloadedFile,
29+
ExpectedMoreDownloadedFiles,
30+
SplitsVerificationException,
31+
UnexpectedSplits,
32+
ExpectedMoreSplits,
33+
],
34+
)
35+
def test_error_deprecated(error, monkeypatch):
36+
monkeypatch.setattr(datasets.utils.deprecation_utils, "_emitted_deprecation_warnings", set())
37+
with pytest.deprecated_call():
38+
error()
39+
40+
41+
@pytest.mark.parametrize(
42+
"error",
43+
[
44+
ChecksumVerificationError,
45+
UnexpectedDownloadedFileError,
46+
ExpectedMoreDownloadedFilesError,
47+
NonMatchingChecksumError,
48+
SplitsVerificationError,
49+
UnexpectedSplitsError,
50+
ExpectedMoreSplitsError,
51+
NonMatchingSplitsSizesError,
52+
],
53+
)
54+
def test_error_not_deprecated(error, monkeypatch):
55+
monkeypatch.setattr(datasets.utils.deprecation_utils, "_emitted_deprecation_warnings", set())
56+
with warnings.catch_warnings():
57+
warnings.simplefilter("error")
58+
error()

0 commit comments

Comments
 (0)