v0.0.13

Merge pull request #81 from asottile/fix_begin_end_hang
fix highlighting hang with empty begin end rules
2020-07-24 15:28:01 -07:00 · 2020-07-24 15:26:38 -07:00 · 2020-07-24 15:13:35 -07:00 · 2020-07-20 20:06:24 -07:00 · 2020-07-13 13:33:59 -07:00 · 2020-07-01 17:34:20 -07:00
7 changed files with 73 additions and 104 deletions
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ a text editor, eventually...

 ### why is it called babi?

-I usually use the text editor `nano`, frequently I typo this.  on a qwerty
+I used to use the text editor `nano`, frequently I typo this.  on a qwerty
 keyboard, when the right hand is shifted left by one, `nano` becomes `babi`.

 ### quitting babi
--- a/babi/highlight.py
+++ b/babi/highlight.py
@@ -273,6 +273,7 @@ class CompiledRegsetRule(CompiledRule, Protocol):
 class Entry(NamedTuple):
    scope: Tuple[str, ...]
    rule: CompiledRule
+    start: Tuple[str, int]
    reg: _Reg = ERR_REG
    boundary: bool = False

@@ -284,7 +285,7 @@ def _inner_capture_parse(
        scope: Scope,
        rule: CompiledRule,
 ) -> Regions:
-    state = State.root(Entry(scope + rule.name, rule))
+    state = State.root(Entry(scope + rule.name, rule, (s, 0)))
    _, regions = highlight_line(compiler, state, s, first_line=False)
    return tuple(
        r._replace(start=r.start + start, end=r.end + start) for r in regions
@@ -440,7 +441,8 @@ class EndRule(NamedTuple):

        boundary = match.end() == len(match.string)
        reg = make_reg(expand_escaped(match, self.end))
-        state = state.push(Entry(next_scope, self, reg, boundary))
+        start = (match.string, match.start())
+        state = state.push(Entry(next_scope, self, start, reg, boundary))
        regions = _captures(compiler, scope, match, self.begin_captures)
        return state, True, regions

@@ -455,7 +457,16 @@ class EndRule(NamedTuple):
        if m.start() > pos:
            ret.append(Region(pos, m.start(), state.cur.scope))
        ret.extend(_captures(compiler, state.cur.scope, m, self.end_captures))
-        return state.pop(), m.end(), False, tuple(ret)
+        # this is probably a bug in the grammar, but it pushed and popped at
+        # the same position.
+        # we'll advance the highlighter by one position to get past the loop
+        # this appears to be what vs code does as well
+        if state.entries[-1].start == (m.string, m.end()):
+            ret.append(Region(m.end(), m.end() + 1, state.cur.scope))
+            end = m.end() + 1
+        else:
+            end = m.end()
+        return state.pop(), end, False, tuple(ret)

    def search(
            self,
@@ -501,7 +512,9 @@ class WhileRule(NamedTuple):

        boundary = match.end() == len(match.string)
        reg = make_reg(expand_escaped(match, self.while_))
-        state = state.push_while(self, Entry(next_scope, self, reg, boundary))
+        start = (match.string, match.start())
+        entry = Entry(next_scope, self, start, reg, boundary)
+        state = state.push_while(self, entry)
        regions = _captures(compiler, scope, match, self.begin_captures)
        return state, True, regions

@@ -541,7 +554,7 @@ class Compiler:
        self._rule_to_grammar: Dict[_Rule, Grammar] = {}
        self._c_rules: Dict[_Rule, CompiledRule] = {}
        root = self._compile_root(grammar)
-        self.root_state = State.root(Entry(root.name, root))
+        self.root_state = State.root(Entry(root.name, root, ('', 0)))

    def _visit_rule(self, grammar: Grammar, rule: _Rule) -> _Rule:
        self._rule_to_grammar[rule] = grammar
--- a/babi/main.py
+++ b/babi/main.py
@@ -2,6 +2,7 @@ import argparse
 import curses
 import os
 import re
+import signal
 import sys
 from typing import List
 from typing import Optional
@@ -138,6 +139,11 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
    else:
        stdin = ''

+    # ignore backgrounding signals, we'll handle those in curses
+    # fixes a problem with ^Z on termination which would break the terminal
+    if sys.platform != 'win32':  # pragma: win32 no cover  # pragma: no branch
+        signal.signal(signal.SIGTSTP, signal.SIG_IGN)
+
    with perf_log(args.perf_log) as perf, make_stdscr() as stdscr:
        if args.key_debug:
            return _key_debug(stdscr, perf)
--- a/babi/reg.py
+++ b/babi/reg.py
@@ -6,80 +6,36 @@ from typing import Tuple

 import onigurumacffi

-from babi.cached_property import cached_property
-
 _BACKREF_RE = re.compile(r'((?<!\\)(?:\\\\)*)\\([0-9]+)')


-def _replace_esc(s: str, chars: str) -> str:
-    """replace the given escape sequences of `chars` with \\uffff"""
-    for c in chars:
-        if f'\\{c}' in s:
-            break
-    else:
-        return s
-
-    b = []
-    i = 0
-    length = len(s)
-    while i < length:
-        try:
-            sbi = s.index('\\', i)
-        except ValueError:
-            b.append(s[i:])
-            break
-        if sbi > i:
-            b.append(s[i:sbi])
-        b.append('\\')
-        i = sbi + 1
-        if i < length:
-            if s[i] in chars:
-                b.append('\uffff')
-            else:
-                b.append(s[i])
-        i += 1
-    return ''.join(b)
+_FLAGS = {
+    # (first_line, boundary)
+    (False, False): (
+        onigurumacffi.OnigSearchOption.NOT_END_STRING |
+        onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING |
+        onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION
+    ),
+    (False, True): (
+        onigurumacffi.OnigSearchOption.NOT_END_STRING |
+        onigurumacffi.OnigSearchOption.NOT_BEGIN_STRING
+    ),
+    (True, False): (
+        onigurumacffi.OnigSearchOption.NOT_END_STRING |
+        onigurumacffi.OnigSearchOption.NOT_BEGIN_POSITION
+    ),
+    (True, True): onigurumacffi.OnigSearchOption.NOT_END_STRING,
+}


 class _Reg:
    def __init__(self, s: str) -> None:
        self._pattern = s
+        self._reg = onigurumacffi.compile(self._pattern)

    def __repr__(self) -> str:
        return f'{type(self).__name__}({self._pattern!r})'

-    @cached_property
-    def _reg(self) -> onigurumacffi._Pattern:
-        return onigurumacffi.compile(_replace_esc(self._pattern, 'z'))
-
-    @cached_property
-    def _reg_no_A(self) -> onigurumacffi._Pattern:
-        return onigurumacffi.compile(_replace_esc(self._pattern, 'Az'))
-
-    @cached_property
-    def _reg_no_G(self) -> onigurumacffi._Pattern:
-        return onigurumacffi.compile(_replace_esc(self._pattern, 'Gz'))
-
-    @cached_property
-    def _reg_no_A_no_G(self) -> onigurumacffi._Pattern:
-        return onigurumacffi.compile(_replace_esc(self._pattern, 'AGz'))
-
-    def _get_reg(
-            self,
-            first_line: bool,
-            boundary: bool,
-    ) -> onigurumacffi._Pattern:
-        if boundary:
-            if first_line:
-                return self._reg
-            else:
-                return self._reg_no_A
-        else:
-            if first_line:
-                return self._reg_no_G
-            else:
-                return self._reg_no_A_no_G
-
    def search(
            self,
            line: str,
@@ -87,7 +43,7 @@ class _Reg:
            first_line: bool,
            boundary: bool,
    ) -> Optional[Match[str]]:
-        return self._get_reg(first_line, boundary).search(line, pos)
+        return self._reg.search(line, pos, flags=_FLAGS[first_line, boundary])

    def match(
            self,
@@ -96,36 +52,18 @@ class _Reg:
            first_line: bool,
            boundary: bool,
    ) -> Optional[Match[str]]:
-        return self._get_reg(first_line, boundary).match(line, pos)
+        return self._reg.match(line, pos, flags=_FLAGS[first_line, boundary])


 class _RegSet:
    def __init__(self, *s: str) -> None:
        self._patterns = s
+        self._set = onigurumacffi.compile_regset(*self._patterns)

    def __repr__(self) -> str:
        args = ', '.join(repr(s) for s in self._patterns)
        return f'{type(self).__name__}({args})'

-    @cached_property
-    def _set(self) -> onigurumacffi._RegSet:
-        return onigurumacffi.compile_regset(*self._patterns)
-
-    @cached_property
-    def _set_no_A(self) -> onigurumacffi._RegSet:
-        patterns = (_replace_esc(p, 'A') for p in self._patterns)
-        return onigurumacffi.compile_regset(*patterns)
-
-    @cached_property
-    def _set_no_G(self) -> onigurumacffi._RegSet:
-        patterns = (_replace_esc(p, 'G') for p in self._patterns)
-        return onigurumacffi.compile_regset(*patterns)
-
-    @cached_property
-    def _set_no_A_no_G(self) -> onigurumacffi._RegSet:
-        patterns = (_replace_esc(p, 'AG') for p in self._patterns)
-        return onigurumacffi.compile_regset(*patterns)
-
    def search(
            self,
            line: str,
@@ -133,16 +71,7 @@ class _RegSet:
            first_line: bool,
            boundary: bool,
    ) -> Tuple[int, Optional[Match[str]]]:
-        if boundary:
-            if first_line:
-                return self._set.search(line, pos)
-            else:
-                return self._set_no_A.search(line, pos)
-        else:
-            if first_line:
-                return self._set_no_G.search(line, pos)
-            else:
-                return self._set_no_A_no_G.search(line, pos)
+        return self._set.search(line, pos, flags=_FLAGS[first_line, boundary])


 def expand_escaped(match: Match[str], s: str) -> str:
@@ -151,4 +80,4 @@ def expand_escaped(match: Match[str], s: str) -> str:

 make_reg = functools.lru_cache(maxsize=None)(_Reg)
 make_regset = functools.lru_cache(maxsize=None)(_RegSet)
-ERR_REG = make_reg(')this pattern always triggers an error when used(')
+ERR_REG = make_reg('$ ^')
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = babi
-version = 0.0.11
+version = 0.0.13
 description = a text editor
 long_description = file: README.md
 long_description_content_type = text/markdown
@@ -24,7 +24,7 @@ packages = find:
 install_requires =
    babi-grammars
    identify
-    onigurumacffi>=0.0.10
+    onigurumacffi>=0.0.18
    importlib_metadata>=1;python_version<"3.8"
    windows-curses;sys_platform=="win32"
 python_requires = >=3.6.1
--- a/tests/highlight_test.py
+++ b/tests/highlight_test.py
@@ -637,3 +637,25 @@ def test_backslash_z(compiler_state):
    assert regions2 == (
        Region(0, 6, ('test', 'comment')),
    )
+
+
+def test_buggy_begin_end_grammar(compiler_state):
+    # before this would result in an infinite loop of start / end
+    compiler, state = compiler_state({
+        'scopeName': 'test',
+        'patterns': [
+            {
+                'begin': '(?=</style)',
+                'end': '(?=</style)',
+                'name': 'css',
+            },
+        ],
+    })
+
+    state, regions = highlight_line(compiler, state, 'test </style', True)
+
+    assert regions == (
+        Region(0, 5, ('test',)),
+        Region(5, 6, ('test', 'css')),
+        Region(6, 12, ('test',)),
+    )
--- a/tests/reg_test.py
+++ b/tests/reg_test.py
@@ -35,9 +35,8 @@ def test_reg_other_escapes_left_untouched():
 def test_reg_not_out_of_bounds_at_end():
    # the only way this is triggerable is with an illegal regex, we'd rather
    # produce an error about the regex being wrong than an IndexError
-    reg = _Reg('\\A\\')
    with pytest.raises(onigurumacffi.OnigError) as excinfo:
-        reg.search('\\', 0, first_line=False, boundary=False)
+        _Reg('\\A\\')
    msg, = excinfo.value.args
    assert msg == 'end pattern at escape'
Author	SHA1	Message	Date
Anthony Sottile	41880d5f8c	v0.0.13	2020-07-24 15:28:01 -07:00
Anthony Sottile	effe988f60	Merge pull request #81 from asottile/fix_begin_end_hang fix highlighting hang with empty begin end rules	2020-07-24 15:26:38 -07:00
Anthony Sottile	84b20a4016	fix highlighting hang with empty begin end rules	2020-07-24 15:13:35 -07:00
Anthony Sottile	5d2c9532a3	s/usually use nano/used to use nano/	2020-07-20 20:06:24 -07:00
Anthony Sottile	33ff8d9726	v0.0.12	2020-07-13 13:33:59 -07:00
Anthony Sottile	f0b2af9a9f	Merge pull request #77 from asottile/regex_flags leverage new regex flags	2020-07-01 17:34:20 -07:00
Anthony Sottile	fc21a144aa	leverage new regex flags	2020-07-01 17:07:32 -07:00
Anthony Sottile	973b4c3cf8	Merge pull request #76 from asottile/fix_background_on_close fix race condition with ^Z on close	2020-06-29 13:37:14 -07:00
Anthony Sottile	bd60977438	fix race condition with ^Z on close	2020-06-29 13:13:14 -07:00