Merge pull request #77 from asottile/regex_flags

leverage new regex flags
This commit is contained in:
Anthony Sottile
2020-07-01 17:34:20 -07:00
committed by GitHub
3 changed files with 25 additions and 97 deletions

View File

@@ -6,80 +6,36 @@ from typing import Tuple
import onigurumacffi
from babi.cached_property import cached_property
_BACKREF_RE = re.compile(r'((?<!\\)(?:\\\\)*)\\([0-9]+)')
def _replace_esc(s: str, chars: str) -> str:
"""replace the given escape sequences of `chars` with \\uffff"""
for c in chars:
if f'\\{c}' in s:
break
else:
return s
b = []
i = 0
length = len(s)
while i < length:
try:
sbi = s.index('\\', i)
except ValueError:
b.append(s[i:])
break
if sbi > i:
b.append(s[i:sbi])
b.append('\\')
i = sbi + 1
if i < length:
if s[i] in chars:
b.append('\uffff')
else:
b.append(s[i])
i += 1
return ''.join(b)
_FLAGS = {
# (first_line, boundary)
(False, False): (
onigurumacffi.OnigSearchOption.NOT_END_STRING |
onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING |
onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION
),
(False, True): (
onigurumacffi.OnigSearchOption.NOT_END_STRING |
onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING
),
(True, False): (
onigurumacffi.OnigSearchOption.NOT_END_STRING |
onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION
),
(True, True): onigurumacffi.OnigSearchOption.NOT_END_STRING,
}
class _Reg:
def __init__(self, s: str) -> None:
self._pattern = s
self._reg = onigurumacffi.compile(self._pattern)
def __repr__(self) -> str:
return f'{type(self).__name__}({self._pattern!r})'
@cached_property
def _reg(self) -> onigurumacffi._Pattern:
return onigurumacffi.compile(_replace_esc(self._pattern, 'z'))
@cached_property
def _reg_no_A(self) -> onigurumacffi._Pattern:
return onigurumacffi.compile(_replace_esc(self._pattern, 'Az'))
@cached_property
def _reg_no_G(self) -> onigurumacffi._Pattern:
return onigurumacffi.compile(_replace_esc(self._pattern, 'Gz'))
@cached_property
def _reg_no_A_no_G(self) -> onigurumacffi._Pattern:
return onigurumacffi.compile(_replace_esc(self._pattern, 'AGz'))
def _get_reg(
self,
first_line: bool,
boundary: bool,
) -> onigurumacffi._Pattern:
if boundary:
if first_line:
return self._reg
else:
return self._reg_no_A
else:
if first_line:
return self._reg_no_G
else:
return self._reg_no_A_no_G
def search(
self,
line: str,
@@ -87,7 +43,7 @@ class _Reg:
first_line: bool,
boundary: bool,
) -> Optional[Match[str]]:
return self._get_reg(first_line, boundary).search(line, pos)
return self._reg.search(line, pos, flags=_FLAGS[first_line, boundary])
def match(
self,
@@ -96,36 +52,18 @@ class _Reg:
first_line: bool,
boundary: bool,
) -> Optional[Match[str]]:
return self._get_reg(first_line, boundary).match(line, pos)
return self._reg.match(line, pos, flags=_FLAGS[first_line, boundary])
class _RegSet:
def __init__(self, *s: str) -> None:
self._patterns = s
self._set = onigurumacffi.compile_regset(*self._patterns)
def __repr__(self) -> str:
args = ', '.join(repr(s) for s in self._patterns)
return f'{type(self).__name__}({args})'
@cached_property
def _set(self) -> onigurumacffi._RegSet:
return onigurumacffi.compile_regset(*self._patterns)
@cached_property
def _set_no_A(self) -> onigurumacffi._RegSet:
patterns = (_replace_esc(p, 'A') for p in self._patterns)
return onigurumacffi.compile_regset(*patterns)
@cached_property
def _set_no_G(self) -> onigurumacffi._RegSet:
patterns = (_replace_esc(p, 'G') for p in self._patterns)
return onigurumacffi.compile_regset(*patterns)
@cached_property
def _set_no_A_no_G(self) -> onigurumacffi._RegSet:
patterns = (_replace_esc(p, 'AG') for p in self._patterns)
return onigurumacffi.compile_regset(*patterns)
def search(
self,
line: str,
@@ -133,16 +71,7 @@ class _RegSet:
first_line: bool,
boundary: bool,
) -> Tuple[int, Optional[Match[str]]]:
if boundary:
if first_line:
return self._set.search(line, pos)
else:
return self._set_no_A.search(line, pos)
else:
if first_line:
return self._set_no_G.search(line, pos)
else:
return self._set_no_A_no_G.search(line, pos)
return self._set.search(line, pos, flags=_FLAGS[first_line, boundary])
def expand_escaped(match: Match[str], s: str) -> str:
@@ -151,4 +80,4 @@ def expand_escaped(match: Match[str], s: str) -> str:
make_reg = functools.lru_cache(maxsize=None)(_Reg)
make_regset = functools.lru_cache(maxsize=None)(_RegSet)
ERR_REG = make_reg(')this pattern always triggers an error when used(')
ERR_REG = make_reg('$ ^')

View File

@@ -24,7 +24,7 @@ packages = find:
install_requires =
babi-grammars
identify
onigurumacffi>=0.0.10
onigurumacffi>=0.0.18
importlib_metadata>=1;python_version<"3.8"
windows-curses;sys_platform=="win32"
python_requires = >=3.6.1

View File

@@ -35,9 +35,8 @@ def test_reg_other_escapes_left_untouched():
def test_reg_not_out_of_bounds_at_end():
# the only way this is triggerable is with an illegal regex, we'd rather
# produce an error about the regex being wrong than an IndexError
reg = _Reg('\\A\\')
with pytest.raises(onigurumacffi.OnigError) as excinfo:
reg.search('\\', 0, first_line=False, boundary=False)
_Reg('\\A\\')
msg, = excinfo.value.args
assert msg == 'end pattern at escape'