Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
41880d5f8c | ||
|
|
effe988f60 | ||
|
|
84b20a4016 | ||
|
|
5d2c9532a3 | ||
|
|
33ff8d9726 | ||
|
|
f0b2af9a9f | ||
|
|
fc21a144aa | ||
|
|
973b4c3cf8 | ||
|
|
bd60977438 |
@@ -12,7 +12,7 @@ a text editor, eventually...
|
||||
|
||||
### why is it called babi?
|
||||
|
||||
I usually use the text editor `nano`, frequently I typo this. on a qwerty
|
||||
I used to use the text editor `nano`, frequently I typo this. on a qwerty
|
||||
keyboard, when the right hand is shifted left by one, `nano` becomes `babi`.
|
||||
|
||||
### quitting babi
|
||||
|
||||
@@ -273,6 +273,7 @@ class CompiledRegsetRule(CompiledRule, Protocol):
|
||||
class Entry(NamedTuple):
|
||||
scope: Tuple[str, ...]
|
||||
rule: CompiledRule
|
||||
start: Tuple[str, int]
|
||||
reg: _Reg = ERR_REG
|
||||
boundary: bool = False
|
||||
|
||||
@@ -284,7 +285,7 @@ def _inner_capture_parse(
|
||||
scope: Scope,
|
||||
rule: CompiledRule,
|
||||
) -> Regions:
|
||||
state = State.root(Entry(scope + rule.name, rule))
|
||||
state = State.root(Entry(scope + rule.name, rule, (s, 0)))
|
||||
_, regions = highlight_line(compiler, state, s, first_line=False)
|
||||
return tuple(
|
||||
r._replace(start=r.start + start, end=r.end + start) for r in regions
|
||||
@@ -440,7 +441,8 @@ class EndRule(NamedTuple):
|
||||
|
||||
boundary = match.end() == len(match.string)
|
||||
reg = make_reg(expand_escaped(match, self.end))
|
||||
state = state.push(Entry(next_scope, self, reg, boundary))
|
||||
start = (match.string, match.start())
|
||||
state = state.push(Entry(next_scope, self, start, reg, boundary))
|
||||
regions = _captures(compiler, scope, match, self.begin_captures)
|
||||
return state, True, regions
|
||||
|
||||
@@ -455,7 +457,16 @@ class EndRule(NamedTuple):
|
||||
if m.start() > pos:
|
||||
ret.append(Region(pos, m.start(), state.cur.scope))
|
||||
ret.extend(_captures(compiler, state.cur.scope, m, self.end_captures))
|
||||
return state.pop(), m.end(), False, tuple(ret)
|
||||
# this is probably a bug in the grammar, but it pushed and popped at
|
||||
# the same position.
|
||||
# we'll advance the highlighter by one position to get past the loop
|
||||
# this appears to be what vs code does as well
|
||||
if state.entries[-1].start == (m.string, m.end()):
|
||||
ret.append(Region(m.end(), m.end() + 1, state.cur.scope))
|
||||
end = m.end() + 1
|
||||
else:
|
||||
end = m.end()
|
||||
return state.pop(), end, False, tuple(ret)
|
||||
|
||||
def search(
|
||||
self,
|
||||
@@ -501,7 +512,9 @@ class WhileRule(NamedTuple):
|
||||
|
||||
boundary = match.end() == len(match.string)
|
||||
reg = make_reg(expand_escaped(match, self.while_))
|
||||
state = state.push_while(self, Entry(next_scope, self, reg, boundary))
|
||||
start = (match.string, match.start())
|
||||
entry = Entry(next_scope, self, start, reg, boundary)
|
||||
state = state.push_while(self, entry)
|
||||
regions = _captures(compiler, scope, match, self.begin_captures)
|
||||
return state, True, regions
|
||||
|
||||
@@ -541,7 +554,7 @@ class Compiler:
|
||||
self._rule_to_grammar: Dict[_Rule, Grammar] = {}
|
||||
self._c_rules: Dict[_Rule, CompiledRule] = {}
|
||||
root = self._compile_root(grammar)
|
||||
self.root_state = State.root(Entry(root.name, root))
|
||||
self.root_state = State.root(Entry(root.name, root, ('', 0)))
|
||||
|
||||
def _visit_rule(self, grammar: Grammar, rule: _Rule) -> _Rule:
|
||||
self._rule_to_grammar[rule] = grammar
|
||||
|
||||
@@ -2,6 +2,7 @@ import argparse
|
||||
import curses
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
@@ -138,6 +139,11 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
|
||||
else:
|
||||
stdin = ''
|
||||
|
||||
# ignore backgrounding signals, we'll handle those in curses
|
||||
# fixes a problem with ^Z on termination which would break the terminal
|
||||
if sys.platform != 'win32': # pragma: win32 no cover # pragma: no branch
|
||||
signal.signal(signal.SIGTSTP, signal.SIG_IGN)
|
||||
|
||||
with perf_log(args.perf_log) as perf, make_stdscr() as stdscr:
|
||||
if args.key_debug:
|
||||
return _key_debug(stdscr, perf)
|
||||
|
||||
117
babi/reg.py
117
babi/reg.py
@@ -6,80 +6,36 @@ from typing import Tuple
|
||||
|
||||
import onigurumacffi
|
||||
|
||||
from babi.cached_property import cached_property
|
||||
|
||||
_BACKREF_RE = re.compile(r'((?<!\\)(?:\\\\)*)\\([0-9]+)')
|
||||
|
||||
|
||||
def _replace_esc(s: str, chars: str) -> str:
|
||||
"""replace the given escape sequences of `chars` with \\uffff"""
|
||||
for c in chars:
|
||||
if f'\\{c}' in s:
|
||||
break
|
||||
else:
|
||||
return s
|
||||
|
||||
b = []
|
||||
i = 0
|
||||
length = len(s)
|
||||
while i < length:
|
||||
try:
|
||||
sbi = s.index('\\', i)
|
||||
except ValueError:
|
||||
b.append(s[i:])
|
||||
break
|
||||
if sbi > i:
|
||||
b.append(s[i:sbi])
|
||||
b.append('\\')
|
||||
i = sbi + 1
|
||||
if i < length:
|
||||
if s[i] in chars:
|
||||
b.append('\uffff')
|
||||
else:
|
||||
b.append(s[i])
|
||||
i += 1
|
||||
return ''.join(b)
|
||||
_FLAGS = {
|
||||
# (first_line, boundary)
|
||||
(False, False): (
|
||||
onigurumacffi.OnigSearchOption.NOT_END_STRING |
|
||||
onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING |
|
||||
onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION
|
||||
),
|
||||
(False, True): (
|
||||
onigurumacffi.OnigSearchOption.NOT_END_STRING |
|
||||
onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING
|
||||
),
|
||||
(True, False): (
|
||||
onigurumacffi.OnigSearchOption.NOT_END_STRING |
|
||||
onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION
|
||||
),
|
||||
(True, True): onigurumacffi.OnigSearchOption.NOT_END_STRING,
|
||||
}
|
||||
|
||||
|
||||
class _Reg:
|
||||
def __init__(self, s: str) -> None:
|
||||
self._pattern = s
|
||||
self._reg = onigurumacffi.compile(self._pattern)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'{type(self).__name__}({self._pattern!r})'
|
||||
|
||||
@cached_property
|
||||
def _reg(self) -> onigurumacffi._Pattern:
|
||||
return onigurumacffi.compile(_replace_esc(self._pattern, 'z'))
|
||||
|
||||
@cached_property
|
||||
def _reg_no_A(self) -> onigurumacffi._Pattern:
|
||||
return onigurumacffi.compile(_replace_esc(self._pattern, 'Az'))
|
||||
|
||||
@cached_property
|
||||
def _reg_no_G(self) -> onigurumacffi._Pattern:
|
||||
return onigurumacffi.compile(_replace_esc(self._pattern, 'Gz'))
|
||||
|
||||
@cached_property
|
||||
def _reg_no_A_no_G(self) -> onigurumacffi._Pattern:
|
||||
return onigurumacffi.compile(_replace_esc(self._pattern, 'AGz'))
|
||||
|
||||
def _get_reg(
|
||||
self,
|
||||
first_line: bool,
|
||||
boundary: bool,
|
||||
) -> onigurumacffi._Pattern:
|
||||
if boundary:
|
||||
if first_line:
|
||||
return self._reg
|
||||
else:
|
||||
return self._reg_no_A
|
||||
else:
|
||||
if first_line:
|
||||
return self._reg_no_G
|
||||
else:
|
||||
return self._reg_no_A_no_G
|
||||
|
||||
def search(
|
||||
self,
|
||||
line: str,
|
||||
@@ -87,7 +43,7 @@ class _Reg:
|
||||
first_line: bool,
|
||||
boundary: bool,
|
||||
) -> Optional[Match[str]]:
|
||||
return self._get_reg(first_line, boundary).search(line, pos)
|
||||
return self._reg.search(line, pos, flags=_FLAGS[first_line, boundary])
|
||||
|
||||
def match(
|
||||
self,
|
||||
@@ -96,36 +52,18 @@ class _Reg:
|
||||
first_line: bool,
|
||||
boundary: bool,
|
||||
) -> Optional[Match[str]]:
|
||||
return self._get_reg(first_line, boundary).match(line, pos)
|
||||
return self._reg.match(line, pos, flags=_FLAGS[first_line, boundary])
|
||||
|
||||
|
||||
class _RegSet:
|
||||
def __init__(self, *s: str) -> None:
|
||||
self._patterns = s
|
||||
self._set = onigurumacffi.compile_regset(*self._patterns)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
args = ', '.join(repr(s) for s in self._patterns)
|
||||
return f'{type(self).__name__}({args})'
|
||||
|
||||
@cached_property
|
||||
def _set(self) -> onigurumacffi._RegSet:
|
||||
return onigurumacffi.compile_regset(*self._patterns)
|
||||
|
||||
@cached_property
|
||||
def _set_no_A(self) -> onigurumacffi._RegSet:
|
||||
patterns = (_replace_esc(p, 'A') for p in self._patterns)
|
||||
return onigurumacffi.compile_regset(*patterns)
|
||||
|
||||
@cached_property
|
||||
def _set_no_G(self) -> onigurumacffi._RegSet:
|
||||
patterns = (_replace_esc(p, 'G') for p in self._patterns)
|
||||
return onigurumacffi.compile_regset(*patterns)
|
||||
|
||||
@cached_property
|
||||
def _set_no_A_no_G(self) -> onigurumacffi._RegSet:
|
||||
patterns = (_replace_esc(p, 'AG') for p in self._patterns)
|
||||
return onigurumacffi.compile_regset(*patterns)
|
||||
|
||||
def search(
|
||||
self,
|
||||
line: str,
|
||||
@@ -133,16 +71,7 @@ class _RegSet:
|
||||
first_line: bool,
|
||||
boundary: bool,
|
||||
) -> Tuple[int, Optional[Match[str]]]:
|
||||
if boundary:
|
||||
if first_line:
|
||||
return self._set.search(line, pos)
|
||||
else:
|
||||
return self._set_no_A.search(line, pos)
|
||||
else:
|
||||
if first_line:
|
||||
return self._set_no_G.search(line, pos)
|
||||
else:
|
||||
return self._set_no_A_no_G.search(line, pos)
|
||||
return self._set.search(line, pos, flags=_FLAGS[first_line, boundary])
|
||||
|
||||
|
||||
def expand_escaped(match: Match[str], s: str) -> str:
|
||||
@@ -151,4 +80,4 @@ def expand_escaped(match: Match[str], s: str) -> str:
|
||||
|
||||
make_reg = functools.lru_cache(maxsize=None)(_Reg)
|
||||
make_regset = functools.lru_cache(maxsize=None)(_RegSet)
|
||||
ERR_REG = make_reg(')this pattern always triggers an error when used(')
|
||||
ERR_REG = make_reg('$ ^')
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[metadata]
|
||||
name = babi
|
||||
version = 0.0.11
|
||||
version = 0.0.13
|
||||
description = a text editor
|
||||
long_description = file: README.md
|
||||
long_description_content_type = text/markdown
|
||||
@@ -24,7 +24,7 @@ packages = find:
|
||||
install_requires =
|
||||
babi-grammars
|
||||
identify
|
||||
onigurumacffi>=0.0.10
|
||||
onigurumacffi>=0.0.18
|
||||
importlib_metadata>=1;python_version<"3.8"
|
||||
windows-curses;sys_platform=="win32"
|
||||
python_requires = >=3.6.1
|
||||
|
||||
@@ -637,3 +637,25 @@ def test_backslash_z(compiler_state):
|
||||
assert regions2 == (
|
||||
Region(0, 6, ('test', 'comment')),
|
||||
)
|
||||
|
||||
|
||||
def test_buggy_begin_end_grammar(compiler_state):
|
||||
# before this would result in an infinite loop of start / end
|
||||
compiler, state = compiler_state({
|
||||
'scopeName': 'test',
|
||||
'patterns': [
|
||||
{
|
||||
'begin': '(?=</style)',
|
||||
'end': '(?=</style)',
|
||||
'name': 'css',
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
state, regions = highlight_line(compiler, state, 'test </style', True)
|
||||
|
||||
assert regions == (
|
||||
Region(0, 5, ('test',)),
|
||||
Region(5, 6, ('test', 'css')),
|
||||
Region(6, 12, ('test',)),
|
||||
)
|
||||
|
||||
@@ -35,9 +35,8 @@ def test_reg_other_escapes_left_untouched():
|
||||
def test_reg_not_out_of_bounds_at_end():
|
||||
# the only way this is triggerable is with an illegal regex, we'd rather
|
||||
# produce an error about the regex being wrong than an IndexError
|
||||
reg = _Reg('\\A\\')
|
||||
with pytest.raises(onigurumacffi.OnigError) as excinfo:
|
||||
reg.search('\\', 0, first_line=False, boundary=False)
|
||||
_Reg('\\A\\')
|
||||
msg, = excinfo.value.args
|
||||
assert msg == 'end pattern at escape'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user