Skip to content

Commit 6f2fa13

Browse files
author
atollk
committed
Changes from code review.
1 parent 3d127b5 commit 6f2fa13

File tree

5 files changed

+116
-58
lines changed

5 files changed

+116
-58
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1010

1111
### Added
1212

13-
- To `fs.walk.Walker`, added parameters `filter_glob` and `exclude_glob`.
13+
- Added `filter_glob` and `exclude_glob` parameters to `fs.walk.Walker`.
1414
Closes [#459](https://github.com/PyFilesystem/pyfilesystem2/issues/459).
1515
- Added `fs.copy.copy_file_if`, `fs.copy.copy_dir_if`, and `fs.copy.copy_fs_if`.
1616
Closes [#458](https://github.com/PyFilesystem/pyfilesystem2/issues/458).

fs/base.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,8 +1648,8 @@ def check(self):
16481648
if self.isclosed():
16491649
raise errors.FilesystemClosed()
16501650

1651-
def match(self, patterns, name, accept_prefix=False):
1652-
# type: (Optional[Iterable[Text]], Text, bool) -> bool
1651+
def match(self, patterns, name):
1652+
# type: (Optional[Iterable[Text]], Text) -> bool
16531653
"""Check if a name matches any of a list of wildcards.
16541654
16551655
If a filesystem is case *insensitive* (such as Windows) then
@@ -1706,7 +1706,7 @@ def match_glob(self, patterns, path, accept_prefix=False):
17061706
``['*.py']``, or `None` to match everything.
17071707
path (str): A resource path, starting with "/".
17081708
accept_prefix (bool): If ``True``, the path is
1709-
not required to match the wildcards themselves
1709+
not required to match the patterns themselves
17101710
but only need to be a prefix of a string that does.
17111711
17121712
Returns:
@@ -1724,6 +1724,8 @@ def match_glob(self, patterns, path, accept_prefix=False):
17241724
False
17251725
>>> my_fs.match_glob(['dir/file.txt'], '/dir/', accept_prefix=True)
17261726
True
1727+
>>> my_fs.match_glob(['dir/file.txt'], '/dir/', accept_prefix=False)
1728+
False
17271729
>>> my_fs.match_glob(['dir/file.txt'], '/dir/gile.txt', accept_prefix=True)
17281730
False
17291731

fs/glob.py

Lines changed: 51 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@
3333

3434
_PATTERN_CACHE = LRUCache(
3535
1000
36-
) # type: LRUCache[Tuple[Text, bool], Tuple[int, bool, Pattern]]
36+
) # type: LRUCache[Tuple[Text, bool], Tuple[Optional[int], Pattern]]
3737

3838

39-
def _split_pattern_by_rec(pattern):
39+
def _split_pattern_by_sep(pattern):
4040
# type: (Text) -> List[Text]
4141
"""Split a glob pattern at its directory seperators (/).
4242
@@ -56,28 +56,27 @@ def _split_pattern_by_rec(pattern):
5656
return [pattern[i + 1 : j] for i, j in zip(indices[:-1], indices[1:])]
5757

5858

59-
def _translate(pattern, case_sensitive=True):
60-
# type: (Text, bool) -> Text
61-
"""Translate a wildcard pattern to a regular expression.
59+
def _translate(pattern):
60+
# type: (Text) -> Text
61+
"""Translate a glob pattern without '**' to a regular expression.
6262
6363
There is no way to quote meta-characters.
64+
6465
Arguments:
65-
pattern (str): A wildcard pattern.
66-
case_sensitive (bool): Set to `False` to use a case
67-
insensitive regex (default `True`).
66+
pattern (str): A glob pattern.
6867
6968
Returns:
7069
str: A regex equivalent to the given pattern.
7170
7271
"""
73-
if not case_sensitive:
74-
pattern = pattern.lower()
7572
i, n = 0, len(pattern)
7673
res = []
7774
while i < n:
7875
c = pattern[i]
7976
i = i + 1
8077
if c == "*":
78+
if i < n and pattern[i] == "*":
79+
raise ValueError("glob._translate does not support '**' patterns.")
8180
res.append("[^/]*")
8281
elif c == "?":
8382
res.append("[^/]")
@@ -95,7 +94,7 @@ def _translate(pattern, case_sensitive=True):
9594
stuff = pattern[i:j].replace("\\", "\\\\")
9695
i = j + 1
9796
if stuff[0] == "!":
98-
stuff = "^" + stuff[1:]
97+
stuff = "^/" + stuff[1:]
9998
elif stuff[0] == "^":
10099
stuff = "\\" + stuff
101100
res.append("[%s]" % stuff)
@@ -104,27 +103,35 @@ def _translate(pattern, case_sensitive=True):
104103
return "".join(res)
105104

106105

107-
def _translate_glob(pattern, case_sensitive=True):
108-
levels = 0
106+
def _translate_glob(pattern):
107+
# type: (Text) -> Tuple[Optional[int], Text]
108+
"""Translate a glob pattern to a regular expression.
109+
110+
There is no way to quote meta-characters.
111+
112+
Arguments:
113+
pattern (str): A glob pattern.
114+
115+
Returns:
116+
Tuple[Optional[int], Text]: The first component describes the levels
117+
of depth this glob pattern goes to; basically the number of "/" in
118+
the pattern. If there is a "**" in the glob pattern, the depth is
119+
basically unbounded, and this component is `None` instead.
120+
The second component is the regular expression.
121+
122+
"""
109123
recursive = False
110124
re_patterns = [""]
111125
for component in iteratepath(pattern):
112126
if "**" in component:
113127
recursive = True
114128
split = component.split("**")
115-
split_re = [_translate(s, case_sensitive=case_sensitive) for s in split]
129+
split_re = [_translate(s) for s in split]
116130
re_patterns.append("/?" + ".*/?".join(split_re))
117131
else:
118-
re_patterns.append(
119-
"/" + _translate(component, case_sensitive=case_sensitive)
120-
)
121-
levels += 1
132+
re_patterns.append("/" + _translate(component))
122133
re_glob = "(?ms)^" + "".join(re_patterns) + ("/$" if pattern.endswith("/") else "$")
123-
return (
124-
levels,
125-
recursive,
126-
re.compile(re_glob, 0 if case_sensitive else re.IGNORECASE),
127-
)
134+
return pattern.count("/") + 1 if not recursive else None, re_glob
128135

129136

130137
def match(pattern, path):
@@ -146,10 +153,11 @@ def match(pattern, path):
146153
147154
"""
148155
try:
149-
levels, recursive, re_pattern = _PATTERN_CACHE[(pattern, True)]
156+
levels, re_pattern = _PATTERN_CACHE[(pattern, True)]
150157
except KeyError:
151-
levels, recursive, re_pattern = _translate_glob(pattern, case_sensitive=True)
152-
_PATTERN_CACHE[(pattern, True)] = (levels, recursive, re_pattern)
158+
levels, re_str = _translate_glob(pattern)
159+
re_pattern = re.compile(re_str)
160+
_PATTERN_CACHE[(pattern, True)] = (levels, re_pattern)
153161
if path and path[0] != "/":
154162
path = "/" + path
155163
return bool(re_pattern.match(path))
@@ -168,10 +176,11 @@ def imatch(pattern, path):
168176
169177
"""
170178
try:
171-
levels, recursive, re_pattern = _PATTERN_CACHE[(pattern, False)]
179+
levels, re_pattern = _PATTERN_CACHE[(pattern, False)]
172180
except KeyError:
173-
levels, recursive, re_pattern = _translate_glob(pattern, case_sensitive=True)
174-
_PATTERN_CACHE[(pattern, False)] = (levels, recursive, re_pattern)
181+
levels, re_str = _translate_glob(pattern)
182+
re_pattern = re.compile(re_str, re.IGNORECASE)
183+
_PATTERN_CACHE[(pattern, False)] = (levels, re_pattern)
175184
if path and path[0] != "/":
176185
path = "/" + path
177186
return bool(re_pattern.match(path))
@@ -186,7 +195,7 @@ def match_any(patterns, path):
186195
Arguments:
187196
patterns (list): A list of wildcard pattern, e.g ``["*.py",
188197
"*.pyc"]``
189-
name (str): A filename.
198+
path (str): A resource path.
190199
191200
Returns:
192201
bool: `True` if the path matches at least one of the patterns.
@@ -206,7 +215,7 @@ def imatch_any(patterns, path):
206215
Arguments:
207216
patterns (list): A list of wildcard pattern, e.g ``["*.py",
208217
"*.pyc"]``
209-
name (str): A filename.
218+
path (str): A resource path.
210219
211220
Returns:
212221
bool: `True` if the path matches at least one of the patterns.
@@ -227,29 +236,30 @@ def get_matcher(patterns, case_sensitive, accept_prefix=False):
227236
case_sensitive (bool): If ``True``, then the callable will be case
228237
sensitive, otherwise it will be case insensitive.
229238
accept_prefix (bool): If ``True``, the name is
230-
not required to match the wildcards themselves
239+
not required to match the patterns themselves
231240
but only need to be a prefix of a string that does.
232241
233242
Returns:
234243
callable: a matcher that will return `True` if the paths given as
235-
an argument matches any of the given patterns.
244+
an argument matches any of the given patterns, or if no patterns
245+
exist.
236246
237247
Example:
238-
>>> from fs import wildcard
239-
>>> is_python = wildcard.get_matcher(['*.py'], True)
248+
>>> from fs import glob
249+
>>> is_python = glob.get_matcher(['*.py'], True)
240250
>>> is_python('__init__.py')
241251
True
242252
>>> is_python('foo.txt')
243253
False
244254
245255
"""
246256
if not patterns:
247-
return lambda name: True
257+
return lambda path: True
248258

249259
if accept_prefix:
250260
new_patterns = []
251261
for pattern in patterns:
252-
split = _split_pattern_by_rec(pattern)
262+
split = _split_pattern_by_sep(pattern)
253263
for i in range(1, len(split)):
254264
new_pattern = "/".join(split[:i])
255265
new_patterns.append(new_pattern)
@@ -309,18 +319,15 @@ def __repr__(self):
309319
def _make_iter(self, search="breadth", namespaces=None):
310320
# type: (str, List[str]) -> Iterator[GlobMatch]
311321
try:
312-
levels, recursive, re_pattern = _PATTERN_CACHE[
313-
(self.pattern, self.case_sensitive)
314-
]
322+
levels, re_pattern = _PATTERN_CACHE[(self.pattern, self.case_sensitive)]
315323
except KeyError:
316-
levels, recursive, re_pattern = _translate_glob(
317-
self.pattern, case_sensitive=self.case_sensitive
318-
)
324+
levels, re_str = _translate_glob(self.pattern)
325+
re_pattern = re.compile(re_str, 0 if self.case_sensitive else re.IGNORECASE)
319326

320327
for path, info in self.fs.walk.info(
321328
path=self.path,
322329
namespaces=namespaces or self.namespaces,
323-
max_depth=None if recursive else levels,
330+
max_depth=levels,
324331
search=search,
325332
exclude_dirs=self.exclude_dirs,
326333
):

fs/walk.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,21 +86,21 @@ def __init__(
8686
a list of filename patterns, e.g. ``["~*"]``. Files matching
8787
any of these patterns will be removed from the walk.
8888
filter_dirs (list, optional): A list of patterns that will be used
89-
to match directories paths. The walk will only open directories
89+
to match directories names. The walk will only open directories
9090
that match at least one of these patterns. Directories will
9191
only be returned if the final component matches one of the
9292
patterns.
9393
exclude_dirs (list, optional): A list of patterns that will be
9494
used to filter out directories from the walk. e.g.
95-
``['*.svn', '*.git']``. Directories matching any of these
95+
``['*.svn', '*.git']``. Directory names matching any of these
9696
patterns will be removed from the walk.
9797
max_depth (int, optional): Maximum directory depth to walk.
9898
filter_glob (list, optional): If supplied, this parameter
9999
should be a list of path patterns e.g. ``["foo/**/*.py"]``.
100100
Resources will only be returned if their global path or
101101
an extension of it matches one of the patterns.
102102
exclude_glob (list, optional): If supplied, this parameter
103-
should be a list of path patterns e.g. ``["foo/**/*.py"]``.
103+
should be a list of path patterns e.g. ``["foo/**/*.pyc"]``.
104104
Resources will not be returned if their global path or
105105
an extension of it matches one of the patterns.
106106
@@ -215,7 +215,7 @@ def _iter_walk(
215215
def _check_open_dir(self, fs, path, info):
216216
# type: (FS, Text, Info) -> bool
217217
"""Check if a directory should be considered in the walk."""
218-
full_path = ("" if path == "/" else path) + "/" + info.name
218+
full_path = combine(path, info.name)
219219
if self.exclude_dirs is not None and fs.match(self.exclude_dirs, info.name):
220220
return False
221221
if self.exclude_glob is not None and fs.match_glob(

tests/test_glob.py

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
from __future__ import unicode_literals
22

3+
import re
34
import unittest
45

6+
from parameterized import parameterized
7+
58
from fs import glob
69
from fs import open_fs
710

@@ -18,8 +21,8 @@ def setUp(self):
1821
fs.makedirs("a/b/c/").writetext("foo.py", "import fs")
1922
repr(fs.glob)
2023

21-
def test_match(self):
22-
tests = [
24+
@parameterized.expand(
25+
[
2326
("*.?y", "/test.py", True),
2427
("*.py", "/test.py", True),
2528
("*.py", "__init__.py", True),
@@ -42,11 +45,11 @@ def test_match(self):
4245
("**/", "/test/", True),
4346
("**/", "/test.py", False),
4447
]
45-
for pattern, path, expected in tests:
46-
self.assertEqual(glob.match(pattern, path), expected, msg=(pattern, path))
48+
)
49+
def test_match(self, pattern, path, expected):
50+
self.assertEqual(glob.match(pattern, path), expected, msg=(pattern, path))
4751
# Run a second time to test cache
48-
for pattern, path, expected in tests:
49-
self.assertEqual(glob.match(pattern, path), expected, msg=(pattern, path))
52+
self.assertEqual(glob.match(pattern, path), expected, msg=(pattern, path))
5053

5154
def test_count_1dir(self):
5255
globber = glob.BoundGlobber(self.fs)
@@ -100,3 +103,49 @@ def test_remove_all(self):
100103
globber = glob.BoundGlobber(self.fs)
101104
globber("**").remove()
102105
self.assertEqual(sorted(self.fs.listdir("/")), [])
106+
107+
translate_test_cases = [
108+
("foo.py", ["foo.py"], ["Foo.py", "foo_py", "foo", ".py"]),
109+
("foo?py", ["foo.py", "fooapy"], ["foo/py", "foopy", "fopy"]),
110+
("bar/foo.py", ["bar/foo.py"], []),
111+
("bar?foo.py", ["barafoo.py"], ["bar/foo.py"]),
112+
("???.py", ["foo.py", "bar.py", "FOO.py"], [".py", "foo.PY"]),
113+
("bar/*.py", ["bar/.py", "bar/foo.py"], ["bar/foo"]),
114+
("bar/foo*.py", ["bar/foo.py", "bar/foobaz.py"], ["bar/foo", "bar/.py"]),
115+
("*/[bar]/foo.py", ["/b/foo.py", "x/a/foo.py", "/r/foo.py"], ["b/foo.py", "/bar/foo.py"]),
116+
("[!bar]/foo.py", ["x/foo.py"], ["//foo.py"]),
117+
("[.py", ["[.py"], [".py", "."]),
118+
]
119+
120+
@parameterized.expand(translate_test_cases)
121+
def test_translate(self, glob_pattern, expected_matches, expected_not_matches):
122+
translated = glob._translate(glob_pattern)
123+
for m in expected_matches:
124+
self.assertTrue(re.match(translated, m))
125+
for m in expected_not_matches:
126+
self.assertFalse(re.match(translated, m))
127+
128+
@parameterized.expand(translate_test_cases)
129+
def test_translate_glob_simple(self, glob_pattern, expected_matches, expected_not_matches):
130+
levels, translated = glob._translate_glob(glob_pattern)
131+
self.assertEqual(levels, glob_pattern.count("/") + 1)
132+
for m in expected_matches:
133+
self.assertTrue(re.match(translated, "/" + m))
134+
for m in expected_not_matches:
135+
self.assertFalse(re.match(translated, m))
136+
self.assertFalse(re.match(translated, "/" + m))
137+
138+
@parameterized.expand(
139+
[
140+
("foo/**/bar", ["/foo/bar", "/foo/baz/bar", "/foo/baz/qux/bar"], ["/foo"]),
141+
("**/*/bar", ["/foo/bar", "/foo/bar"], ["/bar", "/bar"]),
142+
("/**/foo/**/bar", ["/baz/foo/qux/bar", "/foo/bar"], ["/bar"]),
143+
]
144+
)
145+
def test_translate_glob(self, glob_pattern, expected_matches, expected_not_matches):
146+
levels, translated = glob._translate_glob(glob_pattern)
147+
self.assertIsNone(levels)
148+
for m in expected_matches:
149+
self.assertTrue(re.match(translated, m))
150+
for m in expected_not_matches:
151+
self.assertFalse(re.match(translated, m))

0 commit comments

Comments
 (0)