206 lines
7.3 KiB
Python
206 lines
7.3 KiB
Python
import fnmatch
|
|
import functools
|
|
import re
|
|
|
|
#
|
|
# Globbing helpers
|
|
#
|
|
|
|
|
|
@functools.cache
|
|
def _is_case_sensitive(flavour):
|
|
return flavour.normcase("Aa") == "Aa"
|
|
|
|
|
|
# fnmatch.translate() returns a regular expression that includes a prefix and
|
|
# a suffix, which enable matching newlines and ensure the end of the string is
|
|
# matched, respectively. These features are undesirable for our implementation
|
|
# of PurePatch.match(), which represents path separators as newlines and joins
|
|
# pattern segments together. As a workaround, we define a slice object that
|
|
# can remove the prefix and suffix from any translate() result. See the
|
|
# _compile_pattern_lines() function for more details.
|
|
_FNMATCH_PREFIX, _FNMATCH_SUFFIX = fnmatch.translate("_").split("_")
|
|
_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX))
|
|
_SWAP_SEP_AND_NEWLINE = {
|
|
"/": str.maketrans({"/": "\n", "\n": "/"}),
|
|
"\\": str.maketrans({"\\": "\n", "\n": "\\"}),
|
|
}
|
|
|
|
|
|
@functools.lru_cache()
|
|
def _make_selector(pattern_parts, flavour, case_sensitive):
|
|
pat = pattern_parts[0]
|
|
if not pat:
|
|
return _TerminatingSelector()
|
|
if pat == "**":
|
|
child_parts_idx = 1
|
|
while child_parts_idx < len(pattern_parts) and pattern_parts[child_parts_idx] == "**":
|
|
child_parts_idx += 1
|
|
child_parts = pattern_parts[child_parts_idx:]
|
|
if "**" in child_parts:
|
|
cls = _DoubleRecursiveWildcardSelector
|
|
else:
|
|
cls = _RecursiveWildcardSelector
|
|
else:
|
|
child_parts = pattern_parts[1:]
|
|
if pat == "..":
|
|
cls = _ParentSelector
|
|
elif "**" in pat:
|
|
raise ValueError("Invalid pattern: '**' can only be an entire path component")
|
|
else:
|
|
cls = _WildcardSelector
|
|
return cls(pat, child_parts, flavour, case_sensitive)
|
|
|
|
|
|
@functools.lru_cache(maxsize=256)
|
|
def _compile_pattern(pat, case_sensitive):
|
|
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
|
|
return re.compile(fnmatch.translate(pat), flags).match
|
|
|
|
|
|
@functools.lru_cache()
|
|
def _compile_pattern_lines(pattern_lines, case_sensitive):
|
|
"""Compile the given pattern lines to an `re.Pattern` object.
|
|
|
|
The *pattern_lines* argument is a glob-style pattern (e.g. '*/*.py') with
|
|
its path separators and newlines swapped (e.g. '*\n*.py`). By using
|
|
newlines to separate path components, and not setting `re.DOTALL`, we
|
|
ensure that the `*` wildcard cannot match path separators.
|
|
|
|
The returned `re.Pattern` object may have its `match()` method called to
|
|
match a complete pattern, or `search()` to match from the right. The
|
|
argument supplied to these methods must also have its path separators and
|
|
newlines swapped.
|
|
"""
|
|
|
|
# Match the start of the path, or just after a path separator
|
|
parts = ["^"]
|
|
for part in pattern_lines.splitlines(keepends=True):
|
|
if part == "*\n":
|
|
part = r".+\n"
|
|
elif part == "*":
|
|
part = r".+"
|
|
else:
|
|
# Any other component: pass to fnmatch.translate(). We slice off
|
|
# the common prefix and suffix added by translate() to ensure that
|
|
# re.DOTALL is not set, and the end of the string not matched,
|
|
# respectively. With DOTALL not set, '*' wildcards will not match
|
|
# path separators, because the '.' characters in the pattern will
|
|
# not match newlines.
|
|
part = fnmatch.translate(part)[_FNMATCH_SLICE]
|
|
parts.append(part)
|
|
# Match the end of the path, always.
|
|
parts.append(r"\Z")
|
|
flags = re.MULTILINE
|
|
if not case_sensitive:
|
|
flags |= re.IGNORECASE
|
|
return re.compile("".join(parts), flags=flags)
|
|
|
|
|
|
class _Selector:
|
|
"""A selector matches a specific glob pattern part against the children
|
|
of a given path."""
|
|
|
|
def __init__(self, child_parts, flavour, case_sensitive):
|
|
self.child_parts = child_parts
|
|
if child_parts:
|
|
self.successor = _make_selector(child_parts, flavour, case_sensitive)
|
|
self.dironly = True
|
|
else:
|
|
self.successor = _TerminatingSelector()
|
|
self.dironly = False
|
|
|
|
def select_from(self, parent_path):
|
|
"""Iterate over all child paths of `parent_path` matched by this
|
|
selector. This can contain parent_path itself."""
|
|
path_cls = type(parent_path)
|
|
scandir = path_cls._scandir
|
|
if not parent_path.is_dir():
|
|
return iter([])
|
|
return self._select_from(parent_path, scandir)
|
|
|
|
|
|
class _TerminatingSelector:
|
|
|
|
def _select_from(self, parent_path, scandir):
|
|
yield parent_path
|
|
|
|
|
|
class _ParentSelector(_Selector):
|
|
|
|
def __init__(self, name, child_parts, flavour, case_sensitive):
|
|
_Selector.__init__(self, child_parts, flavour, case_sensitive)
|
|
|
|
def _select_from(self, parent_path, scandir):
|
|
path = parent_path._make_child_relpath("..")
|
|
for p in self.successor._select_from(path, scandir):
|
|
yield p
|
|
|
|
|
|
class _WildcardSelector(_Selector):
|
|
|
|
def __init__(self, pat, child_parts, flavour, case_sensitive):
|
|
_Selector.__init__(self, child_parts, flavour, case_sensitive)
|
|
if case_sensitive is None:
|
|
# TODO: evaluate case-sensitivity of each directory in _select_from()
|
|
case_sensitive = _is_case_sensitive(flavour)
|
|
self.match = _compile_pattern(pat, case_sensitive)
|
|
|
|
def _select_from(self, parent_path, scandir):
|
|
try:
|
|
# We must close the scandir() object before proceeding to
|
|
# avoid exhausting file descriptors when globbing deep trees.
|
|
with scandir(parent_path) as scandir_it:
|
|
entries = list(scandir_it)
|
|
except OSError:
|
|
pass
|
|
else:
|
|
for entry in entries:
|
|
if self.dironly:
|
|
try:
|
|
if not entry.is_dir():
|
|
continue
|
|
except OSError:
|
|
continue
|
|
name = entry.name
|
|
if self.match(name):
|
|
path = parent_path._make_child_relpath(name)
|
|
for p in self.successor._select_from(path, scandir):
|
|
yield p
|
|
|
|
|
|
class _RecursiveWildcardSelector(_Selector):
|
|
|
|
def __init__(self, pat, child_parts, flavour, case_sensitive):
|
|
_Selector.__init__(self, child_parts, flavour, case_sensitive)
|
|
|
|
def _iterate_directories(self, parent_path):
|
|
yield parent_path
|
|
for dirpath, dirnames, _ in parent_path.walk():
|
|
for dirname in dirnames:
|
|
yield dirpath._make_child_relpath(dirname)
|
|
|
|
def _select_from(self, parent_path, scandir):
|
|
successor_select = self.successor._select_from
|
|
for starting_point in self._iterate_directories(parent_path):
|
|
for p in successor_select(starting_point, scandir):
|
|
yield p
|
|
|
|
|
|
class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector):
|
|
"""
|
|
Like _RecursiveWildcardSelector, but also de-duplicates results from
|
|
successive selectors. This is necessary if the pattern contains
|
|
multiple non-adjacent '**' segments.
|
|
"""
|
|
|
|
def _select_from(self, parent_path, scandir):
|
|
yielded = set()
|
|
try:
|
|
for p in super()._select_from(parent_path, scandir):
|
|
if p not in yielded:
|
|
yield p
|
|
yielded.add(p)
|
|
finally:
|
|
yielded.clear()
|