Merge pull request #77 from asottile/regex_flags
leverage new regex flags
This commit is contained in:
117
babi/reg.py
117
babi/reg.py
@@ -6,80 +6,36 @@ from typing import Tuple
|
||||
|
||||
import onigurumacffi
|
||||
|
||||
from babi.cached_property import cached_property
|
||||
|
||||
_BACKREF_RE = re.compile(r'((?<!\\)(?:\\\\)*)\\([0-9]+)')
|
||||
|
||||
|
||||
def _replace_esc(s: str, chars: str) -> str:
|
||||
"""replace the given escape sequences of `chars` with \\uffff"""
|
||||
for c in chars:
|
||||
if f'\\{c}' in s:
|
||||
break
|
||||
else:
|
||||
return s
|
||||
|
||||
b = []
|
||||
i = 0
|
||||
length = len(s)
|
||||
while i < length:
|
||||
try:
|
||||
sbi = s.index('\\', i)
|
||||
except ValueError:
|
||||
b.append(s[i:])
|
||||
break
|
||||
if sbi > i:
|
||||
b.append(s[i:sbi])
|
||||
b.append('\\')
|
||||
i = sbi + 1
|
||||
if i < length:
|
||||
if s[i] in chars:
|
||||
b.append('\uffff')
|
||||
else:
|
||||
b.append(s[i])
|
||||
i += 1
|
||||
return ''.join(b)
|
||||
_FLAGS = {
|
||||
# (first_line, boundary)
|
||||
(False, False): (
|
||||
onigurumacffi.OnigSearchOption.NOT_END_STRING |
|
||||
onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING |
|
||||
onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION
|
||||
),
|
||||
(False, True): (
|
||||
onigurumacffi.OnigSearchOption.NOT_END_STRING |
|
||||
onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING
|
||||
),
|
||||
(True, False): (
|
||||
onigurumacffi.OnigSearchOption.NOT_END_STRING |
|
||||
onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION
|
||||
),
|
||||
(True, True): onigurumacffi.OnigSearchOption.NOT_END_STRING,
|
||||
}
|
||||
|
||||
|
||||
class _Reg:
|
||||
def __init__(self, s: str) -> None:
|
||||
self._pattern = s
|
||||
self._reg = onigurumacffi.compile(self._pattern)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'{type(self).__name__}({self._pattern!r})'
|
||||
|
||||
@cached_property
|
||||
def _reg(self) -> onigurumacffi._Pattern:
|
||||
return onigurumacffi.compile(_replace_esc(self._pattern, 'z'))
|
||||
|
||||
@cached_property
|
||||
def _reg_no_A(self) -> onigurumacffi._Pattern:
|
||||
return onigurumacffi.compile(_replace_esc(self._pattern, 'Az'))
|
||||
|
||||
@cached_property
|
||||
def _reg_no_G(self) -> onigurumacffi._Pattern:
|
||||
return onigurumacffi.compile(_replace_esc(self._pattern, 'Gz'))
|
||||
|
||||
@cached_property
|
||||
def _reg_no_A_no_G(self) -> onigurumacffi._Pattern:
|
||||
return onigurumacffi.compile(_replace_esc(self._pattern, 'AGz'))
|
||||
|
||||
def _get_reg(
|
||||
self,
|
||||
first_line: bool,
|
||||
boundary: bool,
|
||||
) -> onigurumacffi._Pattern:
|
||||
if boundary:
|
||||
if first_line:
|
||||
return self._reg
|
||||
else:
|
||||
return self._reg_no_A
|
||||
else:
|
||||
if first_line:
|
||||
return self._reg_no_G
|
||||
else:
|
||||
return self._reg_no_A_no_G
|
||||
|
||||
def search(
|
||||
self,
|
||||
line: str,
|
||||
@@ -87,7 +43,7 @@ class _Reg:
|
||||
first_line: bool,
|
||||
boundary: bool,
|
||||
) -> Optional[Match[str]]:
|
||||
return self._get_reg(first_line, boundary).search(line, pos)
|
||||
return self._reg.search(line, pos, flags=_FLAGS[first_line, boundary])
|
||||
|
||||
def match(
|
||||
self,
|
||||
@@ -96,36 +52,18 @@ class _Reg:
|
||||
first_line: bool,
|
||||
boundary: bool,
|
||||
) -> Optional[Match[str]]:
|
||||
return self._get_reg(first_line, boundary).match(line, pos)
|
||||
return self._reg.match(line, pos, flags=_FLAGS[first_line, boundary])
|
||||
|
||||
|
||||
class _RegSet:
|
||||
def __init__(self, *s: str) -> None:
|
||||
self._patterns = s
|
||||
self._set = onigurumacffi.compile_regset(*self._patterns)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
args = ', '.join(repr(s) for s in self._patterns)
|
||||
return f'{type(self).__name__}({args})'
|
||||
|
||||
@cached_property
|
||||
def _set(self) -> onigurumacffi._RegSet:
|
||||
return onigurumacffi.compile_regset(*self._patterns)
|
||||
|
||||
@cached_property
|
||||
def _set_no_A(self) -> onigurumacffi._RegSet:
|
||||
patterns = (_replace_esc(p, 'A') for p in self._patterns)
|
||||
return onigurumacffi.compile_regset(*patterns)
|
||||
|
||||
@cached_property
|
||||
def _set_no_G(self) -> onigurumacffi._RegSet:
|
||||
patterns = (_replace_esc(p, 'G') for p in self._patterns)
|
||||
return onigurumacffi.compile_regset(*patterns)
|
||||
|
||||
@cached_property
|
||||
def _set_no_A_no_G(self) -> onigurumacffi._RegSet:
|
||||
patterns = (_replace_esc(p, 'AG') for p in self._patterns)
|
||||
return onigurumacffi.compile_regset(*patterns)
|
||||
|
||||
def search(
|
||||
self,
|
||||
line: str,
|
||||
@@ -133,16 +71,7 @@ class _RegSet:
|
||||
first_line: bool,
|
||||
boundary: bool,
|
||||
) -> Tuple[int, Optional[Match[str]]]:
|
||||
if boundary:
|
||||
if first_line:
|
||||
return self._set.search(line, pos)
|
||||
else:
|
||||
return self._set_no_A.search(line, pos)
|
||||
else:
|
||||
if first_line:
|
||||
return self._set_no_G.search(line, pos)
|
||||
else:
|
||||
return self._set_no_A_no_G.search(line, pos)
|
||||
return self._set.search(line, pos, flags=_FLAGS[first_line, boundary])
|
||||
|
||||
|
||||
def expand_escaped(match: Match[str], s: str) -> str:
|
||||
@@ -151,4 +80,4 @@ def expand_escaped(match: Match[str], s: str) -> str:
|
||||
|
||||
make_reg = functools.lru_cache(maxsize=None)(_Reg)
|
||||
make_regset = functools.lru_cache(maxsize=None)(_RegSet)
|
||||
ERR_REG = make_reg(')this pattern always triggers an error when used(')
|
||||
ERR_REG = make_reg('$ ^')
|
||||
|
||||
@@ -24,7 +24,7 @@ packages = find:
|
||||
install_requires =
|
||||
babi-grammars
|
||||
identify
|
||||
onigurumacffi>=0.0.10
|
||||
onigurumacffi>=0.0.18
|
||||
importlib_metadata>=1;python_version<"3.8"
|
||||
windows-curses;sys_platform=="win32"
|
||||
python_requires = >=3.6.1
|
||||
|
||||
@@ -35,9 +35,8 @@ def test_reg_other_escapes_left_untouched():
|
||||
def test_reg_not_out_of_bounds_at_end():
|
||||
# the only way this is triggerable is with an illegal regex, we'd rather
|
||||
# produce an error about the regex being wrong than an IndexError
|
||||
reg = _Reg('\\A\\')
|
||||
with pytest.raises(onigurumacffi.OnigError) as excinfo:
|
||||
reg.search('\\', 0, first_line=False, boundary=False)
|
||||
_Reg('\\A\\')
|
||||
msg, = excinfo.value.args
|
||||
assert msg == 'end pattern at escape'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user