Initial commit of a pygments plugin for Fluent formatting.

projectfluent · Aug 14, 2019 · bdb3c4f · bdb3c4f
1 parent 267e2e0
commit bdb3c4f
Show file tree

Hide file tree

Showing 10 changed files with 204 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -36,7 +36,7 @@ you're a tool author you may be interested in the formal [EBNF grammar][].
 Installation
 ------------
 
-python-fluent consists of two packages:
+python-fluent consists of these packages:
 
 * `fluent.syntax` - includes AST classes and parser. Most end users will not
   need this directly. Documentation coming soon!
@@ -54,6 +54,11 @@ python-fluent consists of two packages:
 
   (The correct version of ``fluent.syntax`` will be installed automatically)
 
+* `fluent.pygments` - a plugin for pygments to add syntax highlighting to Sphinx.
+
+    To install:
+
+        pip install fluent.pygments
 
 PyPI also contains an old `fluent` package which is an older version of just
 `fluent.syntax`.

diff --git a/fluent.pygments/fluent/__init__.py b/fluent.pygments/fluent/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/fluent.pygments/fluent/pygments/__init__.py b/fluent.pygments/fluent/pygments/__init__.py
diff --git a/fluent.pygments/fluent/pygments/cli.py b/fluent.pygments/fluent/pygments/cli.py
@@ -0,0 +1,21 @@
+from __future__ import absolute_import, print_function, unicode_literals
+
+import argparse
+import sys
+
+from pygments import highlight
+from pygments.formatters import Terminal256Formatter
+from fluent.pygments.lexer import FluentLexer
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('path')
+    args = parser.parse_args()
+    with open(args.path) as fh:
+        code = fh.read()
+    highlight(code, FluentLexer(), Terminal256Formatter(), sys.stdout)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/fluent.pygments/fluent/pygments/lexer.py b/fluent.pygments/fluent/pygments/lexer.py
@@ -0,0 +1,91 @@
+from __future__ import absolute_import, print_function, unicode_literals
+
+from fluent.syntax import ast as FTL
+from fluent.syntax import parse
+
+from pygments.lexer import Lexer
+from pygments.token import Token
+
+
+class FluentLexer(Lexer):
+    name = 'Fluent Lexer'
+    aliases = ['fluent', 'ftl']
+    filenames = ['*.ftl']
+
+    def get_tokens_unprocessed(self, text):
+        last_end = 0
+        tokenizer = Tokenizer(text)
+        for token in tokenizer.tokenize():
+            node, start, token, span = token
+            if start > last_end:
+                yield last_end, Token.Punctuation, text[last_end:start]
+            last_end = node.span.end
+            yield start, token, span
+        if last_end < len(text):
+            yield last_end, Token.Punctuation, text[last_end:]
+
+
+ATOMIC = {
+    'Comment': Token.Comment.Multiline,
+    'GroupComment': Token.Comment.Multiline,
+    'ResourceComment': Token.Comment.Multiline,
+    'Identifier': Token.Name.Constant,
+    'TextElement': Token.Literal,
+    'NumberLiteral': Token.Literal.Number,
+    'StringLiteral': Token.Literal.String,
+    'VariableReference': Token.Name.Variable,
+    'Junk': Token.Generic.Error,
+}
+
+
+class Tokenizer(object):
+    def __init__(self, text):
+        self.text = text
+        self.ast = parse(text)
+
+    def tokenize(self, node=None):
+        if node is None:
+            node = self.ast
+        if isinstance(node, (FTL.Annotation, FTL.Span)):
+            return
+        if isinstance(node, FTL.SyntaxNode):
+            for token in self.tokenize_node(node):
+                yield token
+        elif isinstance(node, list):
+            for child in node:
+                for token in self.tokenize(child):
+                    yield token
+
+    def tokenize_node(self, node):
+        nodename = type(node).__name__
+        if nodename in ATOMIC:
+            yield self._token(node, ATOMIC[nodename])
+        else:
+            tokenize = getattr(self, 'tokenize_{}'.format(nodename), self.generic_tokenize)
+            for token in tokenize(node):
+                yield token
+
+    def generic_tokenize(self, node):
+        children = [
+            child for child in vars(node).values()
+            if isinstance(child, (FTL.SyntaxNode, list)) and child != []
+        ]
+        children.sort(
+            key=lambda child: child.span.start if isinstance(child, FTL.SyntaxNode) else child[0].span.start
+        )
+        for child in children:
+            for token in self.tokenize(child):
+                yield token
+
+    def tokenize_Variant(self, node):
+        yield self._token(node.key, Token.Name.Attribute)
+        for token in self.tokenize(node.value):
+            yield token
+
+    def _token(self, node, token):
+        return (
+            node,
+            node.span.start,
+            token,
+            self.text[node.span.start:node.span.end],
+        )
diff --git a/fluent.pygments/setup.cfg b/fluent.pygments/setup.cfg
@@ -0,0 +1,21 @@
+[bdist_wheel]
+universal=1
+
+[flake8]
+exclude=.tox
+max-line-length=120
+
+[isort]
+line_length=120
+skip_glob=.tox
+not_skip=__init__.py
+
+[options]
+install_requires =
+  pygments
+  fluent.syntax
+  six
+
+[options.entry_points]
+pygments.lexers =
+    fluent=fluent.pygments.lexer:FluentLexer
diff --git a/fluent.pygments/setup.py b/fluent.pygments/setup.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+from setuptools import setup
+
+setup(name='fluent.pygments',
+      version='0.1.0',
+      description='Pygments lexer for Fluent.',
+      long_description='See https://github.com/projectfluent/python-fluent/ for more info.',
+      author='Mozilla',
+      author_email='[email protected]',
+      license='APL 2',
+      url='https://github.com/projectfluent/python-fluent',
+      keywords=['fluent', 'pygments'],
+      classifiers=[
+          'Development Status :: 3 - Alpha',
+          'Intended Audience :: Developers',
+          'License :: OSI Approved :: Apache Software License',
+          'Programming Language :: Python :: 2.7',
+          'Programming Language :: Python :: 3.5',
+      ],
+      packages=['fluent', 'fluent.pygments'],
+      tests_require=['six'],
+      test_suite='tests.pygments'
+)
diff --git a/fluent.pygments/tests/__init__.py b/fluent.pygments/tests/__init__.py
diff --git a/fluent.pygments/tests/pygments/__init__.py b/fluent.pygments/tests/pygments/__init__.py
diff --git a/fluent.pygments/tests/pygments/test_lexer.py b/fluent.pygments/tests/pygments/test_lexer.py
@@ -0,0 +1,41 @@
+from __future__ import absolute_import, print_function, unicode_literals
+
+import unittest
+from pygments.token import Token
+
+from fluent.pygments.lexer import FluentLexer
+
+
+class LexerTest(unittest.TestCase):
+    def setUp(self):
+        self.lexer = FluentLexer()
+
+    def test_comment(self):
+        fragment = '# comment\n'
+        tokens = [
+            (Token.Comment.Multiline, '# comment'),
+            (Token.Punctuation, '\n'),
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+
+    def test_message(self):
+        fragment = 'msg = some value\n'
+        tokens = [
+            (Token.Name.Constant, 'msg'),
+            (Token.Punctuation, ' = '),
+            (Token.Literal, 'some value'),
+            (Token.Punctuation, '\n'),
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+
+    def test_message_with_comment(self):
+        fragment = '# good comment\nmsg = some value\n'
+        tokens = [
+            (Token.Comment.Multiline, '# good comment'),
+            (Token.Punctuation, '\n'),
+            (Token.Name.Constant, 'msg'),
+            (Token.Punctuation, ' = '),
+            (Token.Literal, 'some value'),
+            (Token.Punctuation, '\n'),
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		__path__ = __import__('pkgutil').extend_path(__path__, __name__)