switch to babi-grammars for syntax

This commit is contained in:
Anthony Sottile
2020-03-26 19:43:01 -07:00
parent 2123e6ee84
commit 9f8e400d32
11 changed files with 145 additions and 183 deletions

View File

@@ -63,9 +63,7 @@ in prompts (search, search replace, command):
the syntax highlighting setup is a bit manual right now
1. from a clone of babi, run `./bin/download-syntax` -- you will likely need
to install some additional packages to download them (`pip install cson`)
2. find a visual studio code theme, convert it to json (if it is not already
1. find a visual studio code theme, convert it to json (if it is not already
json) and put it at `~/.config/babi/theme.json`. a helper script is
provided to make this easier: `./bin/download-theme NAME URL`

View File

@@ -3,11 +3,11 @@ import json
import os.path
from typing import Any
from typing import Dict
from typing import FrozenSet
from typing import List
from typing import Match
from typing import NamedTuple
from typing import Optional
from typing import Sequence
from typing import Tuple
from typing import TypeVar
@@ -628,27 +628,39 @@ class Compiler:
class Grammars:
def __init__(self, grammars: Sequence[Dict[str, Any]]) -> None:
self._raw = {grammar['scopeName']: grammar for grammar in grammars}
self._find_scope = [
(
frozenset(grammar.get('fileTypes', ())),
make_reg(grammar.get('firstLineMatch', '$impossible^')),
grammar['scopeName'],
)
for grammar in grammars
]
self._parsed: Dict[str, Grammar] = {}
self._compilers: Dict[str, Compiler] = {}
def __init__(self, *directories: str) -> None:
self._scope_to_files = {
os.path.splitext(filename)[0]: os.path.join(directory, filename)
for directory in directories
if os.path.exists(directory)
for filename in os.listdir(directory)
if filename.endswith('.json')
}
@classmethod
def from_syntax_dir(cls, syntax_dir: str) -> 'Grammars':
grammars = [{'scopeName': 'source.unknown', 'patterns': []}]
if os.path.exists(syntax_dir):
for filename in os.listdir(syntax_dir):
with open(os.path.join(syntax_dir, filename)) as f:
grammars.append(json.load(f))
return cls(grammars)
unknown_grammar = {'scopeName': 'source.unknown', 'patterns': []}
self._raw = {'source.unknown': unknown_grammar}
self._file_types: List[Tuple[FrozenSet[str], str]] = []
self._first_line: List[Tuple[_Reg, str]] = []
self._parsed: Dict[str, Grammar] = {}
self._compiled: Dict[str, Compiler] = {}
def _raw_for_scope(self, scope: str) -> Dict[str, Any]:
try:
return self._raw[scope]
except KeyError:
pass
grammar_path = self._scope_to_files.pop(scope)
with open(grammar_path) as f:
ret = self._raw[scope] = json.load(f)
file_types = frozenset(ret.get('fileTypes', ()))
first_line = make_reg(ret.get('firstLineMatch', '$impossible^'))
self._file_types.append((file_types, scope))
self._first_line.append((first_line, scope))
return ret
def grammar_for_scope(self, scope: str) -> Grammar:
try:
@@ -656,17 +668,18 @@ class Grammars:
except KeyError:
pass
ret = self._parsed[scope] = Grammar.from_data(self._raw[scope])
raw = self._raw_for_scope(scope)
ret = self._parsed[scope] = Grammar.from_data(raw)
return ret
def compiler_for_scope(self, scope: str) -> Compiler:
try:
return self._compilers[scope]
return self._compiled[scope]
except KeyError:
pass
grammar = self.grammar_for_scope(scope)
ret = self._compilers[scope] = Compiler(grammar, self)
ret = self._compiled[scope] = Compiler(grammar, self)
return ret
def blank_compiler(self) -> Compiler:
@@ -675,20 +688,24 @@ class Grammars:
def compiler_for_file(self, filename: str, first_line: str) -> Compiler:
for tag in tags_from_filename(filename) - {'text'}:
try:
# TODO: this doesn't always match even if we detect it
return self.compiler_for_scope(f'source.{tag}')
except KeyError:
pass
# didn't find it in the fast path, need to read all the json
for k in tuple(self._scope_to_files):
self._raw_for_scope(k)
_, _, ext = os.path.basename(filename).rpartition('.')
for extensions, first_line_match, scope_name in self._find_scope:
if (
ext in extensions or
first_line_match.match(
first_line, 0, first_line=True, boundary=True,
)
):
return self.compiler_for_scope(scope_name)
else:
for extensions, scope in self._file_types:
if ext in extensions:
return self.compiler_for_scope(scope)
for reg, scope in self._first_line:
if reg.match(first_line, 0, first_line=True, boundary=True):
return self.compiler_for_scope(scope)
return self.compiler_for_scope('source.unknown')

View File

@@ -17,6 +17,7 @@ from babi.hl.interface import HLs
from babi.list_spy import SequenceNoSlice
from babi.theme import Style
from babi.theme import Theme
from babi.user_data import prefix_data
from babi.user_data import xdg_config
from babi.user_data import xdg_data
@@ -144,7 +145,7 @@ class Syntax(NamedTuple):
stdscr: 'curses._CursesWindow',
color_manager: ColorManager,
) -> 'Syntax':
grammars = Grammars.from_syntax_dir(xdg_data('textmate_syntax'))
grammars = Grammars(prefix_data('grammar_v1'), xdg_data('grammar_v1'))
theme = Theme.from_filename(xdg_config('theme.json'))
ret = cls(grammars, theme, color_manager)
ret._init_screen(stdscr)

View File

@@ -1,4 +1,5 @@
import os.path
import sys
def _xdg(*path: str, env: str, default: str) -> str:
@@ -14,3 +15,7 @@ def xdg_data(*path: str) -> str:
def xdg_config(*path: str) -> str:
return _xdg(*path, env='XDG_CONFIG_HOME', default='~/.config')
def prefix_data(*path: str) -> str:
return os.path.join(sys.prefix, 'share/babi', *path)

View File

@@ -1,86 +0,0 @@
#!/usr/bin/env python3
import argparse
import enum
import json
import os.path
import plistlib
import urllib.request
from typing import NamedTuple
import cson # pip install cson
DEFAULT_DIR = os.path.join(
os.environ.get('XDG_DATA_HOME') or
os.path.expanduser('~/.local/share'),
'babi/textmate_syntax',
)
Ext = enum.Enum('Ext', 'CSON PLIST JSON')
def _convert_cson(src: bytes) -> str:
return json.dumps(cson.loads(src))
def _convert_json(src: bytes) -> str:
return json.dumps(json.loads(src))
def _convert_plist(src: bytes) -> str:
return json.dumps(plistlib.loads(src))
EXT_CONVERT = {
Ext.CSON: _convert_cson,
Ext.JSON: _convert_json,
Ext.PLIST: _convert_plist,
}
class Syntax(NamedTuple):
name: str
ext: Ext
url: str
SYNTAXES = (
Syntax('c', Ext.JSON, 'https://raw.githubusercontent.com/jeff-hykin/cpp-textmate-grammar/53e39b1c/syntaxes/c.tmLanguage.json'), # noqa: E501
Syntax('css', Ext.CSON, 'https://raw.githubusercontent.com/atom/language-css/9feb69c081308b63f78bb0d6a2af2ff5eb7d869b/grammars/css.cson'), # noqa: E501
Syntax('docker', Ext.PLIST, 'https://raw.githubusercontent.com/moby/moby/c7ad2b866/contrib/syntax/textmate/Docker.tmbundle/Syntaxes/Dockerfile.tmLanguage'), # noqa: E501
Syntax('diff', Ext.PLIST, 'https://raw.githubusercontent.com/textmate/diff.tmbundle/0593bb77/Syntaxes/Diff.plist'), # noqa: E501
Syntax('html', Ext.PLIST, 'https://raw.githubusercontent.com/textmate/html.tmbundle/0c3d5ee5/Syntaxes/HTML.plist'), # noqa: E501
Syntax('html-derivative', Ext.PLIST, 'https://raw.githubusercontent.com/textmate/html.tmbundle/0c3d5ee54de3a993f747f54186b73a4d2d3c44a2/Syntaxes/HTML%20(Derivative).tmLanguage'), # noqa: E501
Syntax('ini', Ext.PLIST, 'https://raw.githubusercontent.com/textmate/ini.tmbundle/7d8c7b55/Syntaxes/Ini.plist'), # noqa: E501
Syntax('json', Ext.PLIST, 'https://raw.githubusercontent.com/microsoft/vscode-JSON.tmLanguage/d113e90937ed3ecc31ac54750aac2e8efa08d784/JSON.tmLanguage'), # noqa: E501
Syntax('make', Ext.PLIST, 'https://raw.githubusercontent.com/fadeevab/make.tmbundle/fd57c0552/Syntaxes/Makefile.plist'), # noqa: E501
Syntax('markdown', Ext.PLIST, 'https://raw.githubusercontent.com/microsoft/vscode-markdown-tm-grammar/59a5962/syntaxes/markdown.tmLanguage'), # noqa: E501
Syntax('powershell', Ext.PLIST, 'https://raw.githubusercontent.com/PowerShell/EditorSyntax/4a0a0766/PowerShellSyntax.tmLanguage'), # noqa: E501
Syntax('puppet', Ext.PLIST, 'https://raw.githubusercontent.com/lingua-pupuli/puppet-editor-syntax/dc414b8a/syntaxes/puppet.tmLanguage'), # noqa: E501
Syntax('python', Ext.PLIST, 'https://raw.githubusercontent.com/MagicStack/MagicPython/c9b3409d/grammars/MagicPython.tmLanguage'), # noqa: E501
# TODO: https://github.com/zargony/atom-language-rust/pull/149
Syntax('rust', Ext.CSON, 'https://raw.githubusercontent.com/asottile/atom-language-rust/e113ca67/grammars/rust.cson'), # noqa: E501
Syntax('shell', Ext.CSON, 'https://raw.githubusercontent.com/atom/language-shellscript/7008ea926867d8a231003e78094091471c4fccf8/grammars/shell-unix-bash.cson'), # noqa: E501
# TODO: https://github.com/atom/language-xml/pull/99
Syntax('xml', Ext.CSON, 'https://raw.githubusercontent.com/asottile/language-xml/2d76bc1f/grammars/xml.cson'), # noqa: E501
Syntax('yaml', Ext.PLIST, 'https://raw.githubusercontent.com/textmate/yaml.tmbundle/e54ceae3/Syntaxes/YAML.tmLanguage'), # noqa: E501
)
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument('--dest', default=DEFAULT_DIR)
args = parser.parse_args()
os.makedirs(args.dest, exist_ok=True)
for syntax in SYNTAXES:
print(f'downloading {syntax.name}...')
resp = urllib.request.urlopen(syntax.url).read()
converted = EXT_CONVERT[syntax.ext](resp)
with open(os.path.join(args.dest, f'{syntax.name}.json'), 'w') as f:
f.write(converted)
return 0
if __name__ == '__main__':
exit(main())

View File

@@ -22,6 +22,7 @@ classifiers =
[options]
packages = find:
install_requires =
babi-grammars
identify
onigurumacffi>=0.0.10
importlib_metadata>=1;python_version<"3.8"

17
tests/conftest.py Normal file
View File

@@ -0,0 +1,17 @@
import json
import pytest
from babi.highlight import Grammars
@pytest.fixture
def make_grammars(tmpdir):
grammar_dir = tmpdir.join('grammars').ensure_dir()
def make_grammars(*grammar_dcts):
for grammar in grammar_dcts:
filename = f'{grammar["scopeName"]}.json'
grammar_dir.join(filename).write(json.dumps(grammar))
return Grammars(grammar_dir)
return make_grammars

View File

@@ -16,6 +16,13 @@ from babi.screen import VERSION_STR
from testing.runner import PrintsErrorRunner
@pytest.fixture(autouse=True)
def prefix_home(tmpdir):
prefix_home = tmpdir.join('prefix_home')
with mock.patch.object(sys, 'prefix', str(prefix_home)):
yield prefix_home
@pytest.fixture(autouse=True)
def xdg_data_home(tmpdir):
data_home = tmpdir.join('data_home')

View File

@@ -43,7 +43,7 @@ still more
@pytest.fixture(autouse=True)
def theme_and_grammar(xdg_data_home, xdg_config_home):
xdg_config_home.join('babi/theme.json').ensure().write(THEME)
xdg_data_home.join('babi/textmate_syntax/demo.json').ensure().write(SYNTAX)
xdg_data_home.join('babi/grammar_v1/demo.json').ensure().write(SYNTAX)
@pytest.fixture

View File

@@ -1,34 +1,37 @@
from babi.highlight import Grammars
import pytest
from babi.highlight import highlight_line
from babi.highlight import Region
def test_grammar_matches_extension_only_name():
def test_grammar_matches_extension_only_name(make_grammars):
data = {'scopeName': 'shell', 'patterns': [], 'fileTypes': ['bashrc']}
grammars = Grammars([data])
grammars = make_grammars(data)
compiler = grammars.compiler_for_file('.bashrc', 'alias nano=babi')
assert compiler.root_state.entries[0].scope[0] == 'shell'
def test_grammar_matches_via_identify_tag():
data = {'scopeName': 'source.ini', 'patterns': []}
grammars = Grammars([data])
def test_grammar_matches_via_identify_tag(make_grammars):
grammars = make_grammars({'scopeName': 'source.ini', 'patterns': []})
compiler = grammars.compiler_for_file('setup.cfg', '')
assert compiler.root_state.entries[0].scope[0] == 'source.ini'
@pytest.fixture
def compiler_state(make_grammars):
def _compiler_state(*grammar_dcts):
grammars = Grammars(grammar_dcts)
grammars = make_grammars(*grammar_dcts)
compiler = grammars.compiler_for_scope(grammar_dcts[0]['scopeName'])
return compiler, compiler.root_state
return _compiler_state
def test_backslash_a():
def test_backslash_a(compiler_state):
grammar = {
'scopeName': 'test',
'patterns': [{'name': 'aaa', 'match': r'\Aa+'}],
}
compiler, state = _compiler_state(grammar)
compiler, state = compiler_state(grammar)
state, (region_0,) = highlight_line(compiler, state, 'aaa', True)
state, (region_1,) = highlight_line(compiler, state, 'aaa', False)
@@ -51,8 +54,8 @@ BEGIN_END_NO_NL = {
}
def test_backslash_g_inline():
compiler, state = _compiler_state(BEGIN_END_NO_NL)
def test_backslash_g_inline(compiler_state):
compiler, state = compiler_state(BEGIN_END_NO_NL)
_, regions = highlight_line(compiler, state, 'xaax', True)
assert regions == (
@@ -63,8 +66,8 @@ def test_backslash_g_inline():
)
def test_backslash_g_next_line():
compiler, state = _compiler_state(BEGIN_END_NO_NL)
def test_backslash_g_next_line(compiler_state):
compiler, state = compiler_state(BEGIN_END_NO_NL)
state, regions1 = highlight_line(compiler, state, 'x\n', True)
state, regions2 = highlight_line(compiler, state, 'aax\n', False)
@@ -81,8 +84,8 @@ def test_backslash_g_next_line():
)
def test_end_before_other_match():
compiler, state = _compiler_state(BEGIN_END_NO_NL)
def test_end_before_other_match(compiler_state):
compiler, state = compiler_state(BEGIN_END_NO_NL)
state, regions = highlight_line(compiler, state, 'xazzx', True)
@@ -107,8 +110,8 @@ BEGIN_END_NL = {
}
def test_backslash_g_captures_nl():
compiler, state = _compiler_state(BEGIN_END_NL)
def test_backslash_g_captures_nl(compiler_state):
compiler, state = compiler_state(BEGIN_END_NL)
state, regions1 = highlight_line(compiler, state, 'x\n', True)
state, regions2 = highlight_line(compiler, state, 'aax\n', False)
@@ -124,8 +127,8 @@ def test_backslash_g_captures_nl():
)
def test_backslash_g_captures_nl_next_line():
compiler, state = _compiler_state(BEGIN_END_NL)
def test_backslash_g_captures_nl_next_line(compiler_state):
compiler, state = compiler_state(BEGIN_END_NL)
state, regions1 = highlight_line(compiler, state, 'x\n', True)
state, regions2 = highlight_line(compiler, state, 'aa\n', False)
@@ -147,8 +150,8 @@ def test_backslash_g_captures_nl_next_line():
)
def test_while_no_nl():
compiler, state = _compiler_state({
def test_while_no_nl(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [{
'begin': '> ',
@@ -182,8 +185,8 @@ def test_while_no_nl():
)
def test_complex_captures():
compiler, state = _compiler_state({
def test_complex_captures(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [
{
@@ -213,8 +216,8 @@ def test_complex_captures():
)
def test_captures_multiple_applied_to_same_capture():
compiler, state = _compiler_state({
def test_captures_multiple_applied_to_same_capture(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [
{
@@ -256,8 +259,8 @@ def test_captures_multiple_applied_to_same_capture():
)
def test_captures_ignores_empty():
compiler, state = _compiler_state({
def test_captures_ignores_empty(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [{
'match': '(.*) hi',
@@ -279,8 +282,8 @@ def test_captures_ignores_empty():
)
def test_captures_ignores_invalid_out_of_bounds():
compiler, state = _compiler_state({
def test_captures_ignores_invalid_out_of_bounds(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [{'match': '.', 'captures': {'1': {'name': 'oob'}}}],
})
@@ -292,8 +295,8 @@ def test_captures_ignores_invalid_out_of_bounds():
)
def test_captures_begin_end():
compiler, state = _compiler_state({
def test_captures_begin_end(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [
{
@@ -314,8 +317,8 @@ def test_captures_begin_end():
)
def test_captures_while_captures():
compiler, state = _compiler_state({
def test_captures_while_captures(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [
{
@@ -343,8 +346,8 @@ def test_captures_while_captures():
)
def test_captures_implies_begin_end_captures():
compiler, state = _compiler_state({
def test_captures_implies_begin_end_captures(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [
{
@@ -364,8 +367,8 @@ def test_captures_implies_begin_end_captures():
)
def test_captures_implies_begin_while_captures():
compiler, state = _compiler_state({
def test_captures_implies_begin_while_captures(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [
{
@@ -392,8 +395,8 @@ def test_captures_implies_begin_while_captures():
)
def test_include_self():
compiler, state = _compiler_state({
def test_include_self(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [
{
@@ -416,8 +419,8 @@ def test_include_self():
)
def test_include_repository_rule():
compiler, state = _compiler_state({
def test_include_repository_rule(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [{'include': '#impl'}],
'repository': {
@@ -438,8 +441,8 @@ def test_include_repository_rule():
)
def test_include_other_grammar():
compiler, state = _compiler_state(
def test_include_other_grammar(compiler_state):
compiler, state = compiler_state(
{
'scopeName': 'test',
'patterns': [
@@ -494,8 +497,8 @@ def test_include_other_grammar():
)
def test_include_base():
compiler, state = _compiler_state(
def test_include_base(compiler_state):
compiler, state = compiler_state(
{
'scopeName': 'test',
'patterns': [
@@ -542,8 +545,8 @@ def test_include_base():
)
def test_rule_with_begin_and_no_end():
compiler, state = _compiler_state({
def test_rule_with_begin_and_no_end(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [
{
@@ -566,8 +569,8 @@ def test_rule_with_begin_and_no_end():
)
def test_begin_end_substitute_special_chars():
compiler, state = _compiler_state({
def test_begin_end_substitute_special_chars(compiler_state):
compiler, state = compiler_state({
'scopeName': 'test',
'patterns': [{'begin': r'(\*)', 'end': r'\1', 'name': 'italic'}],
})

View File

@@ -5,7 +5,6 @@ from unittest import mock
import pytest
from babi.color_manager import ColorManager
from babi.highlight import Grammars
from babi.hl.interface import HL
from babi.hl.syntax import Syntax
from babi.theme import Color
@@ -72,8 +71,8 @@ THEME = Theme.from_dct({
@pytest.fixture
def syntax(tmpdir):
return Syntax(Grammars.from_syntax_dir(tmpdir), THEME, ColorManager.make())
def syntax(make_grammars):
return Syntax(make_grammars(), THEME, ColorManager.make())
def test_init_screen_low_color(stdscr, syntax):
@@ -152,13 +151,13 @@ def test_style_attributes_applied(stdscr, syntax):
assert attr == 2 << 8 | curses.A_BOLD
def test_syntax_highlight_cache_first_line(stdscr):
def test_syntax_highlight_cache_first_line(stdscr, make_grammars):
with FakeCurses.patch(n_colors=256, can_change_color=False):
grammars = Grammars([{
grammars = make_grammars({
'scopeName': 'source.demo',
'fileTypes': ['demo'],
'patterns': [{'match': r'\Aint', 'name': 'keyword'}],
}])
})
syntax = Syntax(grammars, THEME, ColorManager.make())
syntax._init_screen(stdscr)
file_hl = syntax.file_highlighter('foo.demo', '')