diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b989be6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,105 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +.static_storage/ +.media/ +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/selections/__init__.py b/selections/__init__.py new file mode 100644 index 0000000..95d0076 --- /dev/null +++ b/selections/__init__.py @@ -0,0 +1,25 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from pyparsing import ParserElement + +from .backends import from_numexpr, to_numexpr +from .backends import from_root, to_root +from .expression import Expression +from .parser import ParsingException + + +__all__ = [ + 'Expression', + 'ParsingException', + # numexpr + 'from_numexpr', + 'to_numexpr', + # ROOT + 'from_root', + 'to_root', +] + + +ParserElement.enablePackrat() diff --git a/selections/backends/ROOT.py b/selections/backends/ROOT.py new file mode 100644 index 0000000..9a0a918 --- /dev/null +++ b/selections/backends/ROOT.py @@ -0,0 +1,53 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ..identifiers import IDs +from ..parser import Operator, Function, Parser + + +__all__ = [ + 'root_parser', +] + + +config = [ + Operator(IDs.MINUS, '-', rhs_only=True), + Operator(IDs.PLUS, '+', rhs_only=True), + Operator(IDs.ADD, '+'), + Operator(IDs.SUB, '-'), + Operator(IDs.MUL, '*'), + Operator(IDs.DIV, '/'), + + Function(IDs.SQRT, 'sqrt'), + Function(IDs.SQRT, 'TMath::Sqrt'), + Function(IDs.ABS, 'TMath::Abs'), + + Function(IDs.LOG, 'log'), + Function(IDs.LOG, 'TMath::Log'), + Function(IDs.LOG2, 'log2'), + Function(IDs.LOG2, 'TMath::Log2'), + Function(IDs.LOG10, 'log10'), + Function(IDs.LOG10, 'TMath::Log10'), + + Function(IDs.EXP, 'exp'), + Function(IDs.EXP, 'TMath::Exp'), + + Function(IDs.SIN, 'sin'), + Function(IDs.SIN, 'TMath::Sin'), + Function(IDs.ASIN, 'arcsin'), + Function(IDs.ASIN, 'TMath::ASin'), + Function(IDs.COS, 'cos'), + Function(IDs.COS, 'TMath::Cos'), + Function(IDs.ACOS, 'arccos'), + Function(IDs.ACOS, 'TMath::ACos'), + Function(IDs.TAN, 'tan'), + Function(IDs.TAN, 'TMath::Tan'), + Function(IDs.ATAN, 'arctan'), + Function(IDs.ATAN, 'TMath::ATan'), + Function(IDs.ATAN2, 'arctan2', 2), + Function(IDs.ATAN2, 'TMath::ATan2', 2), +] + + +root_parser = Parser('ROOT', config) diff --git a/selections/backends/__init__.py b/selections/backends/__init__.py new file mode 100644 index 0000000..520c984 --- /dev/null +++ b/selections/backends/__init__.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from .numexpr import numexpr_parser +from .ROOT import root_parser + + +__all__ = [ + 'from_numexpr', + 'to_numexpr', + 'from_root', + 'to_root', +] + +from_numexpr = numexpr_parser.to_expression +to_numexpr = numexpr_parser.to_string + +from_root = root_parser.to_expression +to_root = root_parser.to_string diff --git a/selections/backends/numexpr.py b/selections/backends/numexpr.py new file mode 100644 index 0000000..86f20de --- /dev/null +++ b/selections/backends/numexpr.py @@ -0,0 +1,50 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ..identifiers import IDs +from ..parser import Operator, Function, Parser + + +__all__ = [ + 'numexpr_parser', +] + + +config = [ + Operator(IDs.MINUS, '-', rhs_only=True), + Operator(IDs.PLUS, '+', rhs_only=True), + Operator(IDs.ADD, '+'), + Operator(IDs.SUB, '-'), + Operator(IDs.MUL, '*'), + Operator(IDs.DIV, '/'), + + Function(IDs.SQRT, 'sqrt'), + Function(IDs.ABS, 'abs'), + Function(IDs.WHERE, 'where', 3), + + Function(IDs.LOG, 'log'), + Function(IDs.LOG10, 'log10'), + Function(IDs.LOG1p, 'log1p'), + + Function(IDs.EXP, 'exp'), + Function(IDs.EXPM1, 'expm1'), + + Function(IDs.SIN, 'sin'), + Function(IDs.ASIN, 'arcsin'), + Function(IDs.COS, 'cos'), + Function(IDs.ACOS, 'arccos'), + Function(IDs.TAN, 'tan'), + Function(IDs.ATAN, 'arctan'), + Function(IDs.ATAN2, 'arctan2', 2), + + Function(IDs.SINH, 'sinh'), + Function(IDs.ASINH, 'arcsinh'), + Function(IDs.COSH, 'cosh'), + Function(IDs.ACOSH, 'arccosh'), + Function(IDs.TANH, 'tanh'), + Function(IDs.ATANH, 'arctanh'), +] + + +numexpr_parser = Parser('numexpr', config) diff --git a/selections/expression.py b/selections/expression.py new file mode 100644 index 0000000..6e0933f --- /dev/null +++ b/selections/expression.py @@ -0,0 +1,155 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from .identifiers import IDs + + +__all__ = [ + 'Expression' +] + + +class Expression(object): + def __init__(self, id, *args): + self._id = id + self._args = args + + def __repr__(self): + return f'{self.__class__.__name__}<{self._id.name}>({", ".join(map(repr, self._args))})' + + def __str__(self): + return repr(self) + + def equivilent(self, other): + """Check if two expression objects are the same""" + raise NotImplementedError() + if isinstance(other, self.__class__): + return self._id == other._id and self._args == other._args + return False + + # Binary arithmetic operators + def __add__(self, value): + return Expression(IDs.ADD, self, value) + + def __radd__(self, value): + return Expression(IDs.ADD, value, self) + + def __sub__(self, value): + return Expression(IDs.SUB, self, value) + + def __rsub__(self, value): + return Expression(IDs.SUB, value, self) + + def __mul__(self, value): + return Expression(IDs.MUL, self, value) + + def __rmul__(self, value): + return Expression(IDs.MUL, value, self) + + def __truediv__(self, value): + # TODO Is this correct for both Python 2 and 3? + raise NotImplemented + + def __rtruediv__(self, value): + # TODO Is this correct for both Python 2 and 3? + raise NotImplemented + + def __floordiv__(self, value): + # TODO Is this correct for both Python 2 and 3? + raise NotImplemented + + def __rfloordiv__(self, value): + # TODO Is this correct for both Python 2 and 3? + raise NotImplemented + + def __abs__(self): + raise NotImplemented + + def __pow__(self, other): + raise NotImplemented + + def __mod__(self, other): + raise NotImplemented + + def __lshift__(self, other): + raise NotImplemented + + def __rshift__(self, other): + raise NotImplemented + + # Functions + def where(self): + raise NotImplemented + + def sin(self): + raise NotImplemented + + def cos(self): + raise NotImplemented + + def tan(self): + raise NotImplemented + + def arcsin(self): + raise NotImplemented + + def arccos(self): + raise NotImplemented + + def arctan(self): + raise NotImplemented + + def arctan2(self, other): + raise NotImplemented + + def sinh(self): + raise NotImplemented + + def cosh(self): + raise NotImplemented + + def tanh(self): + raise NotImplemented + + def arcsinh(self): + raise NotImplemented + + def arccosh(self): + raise NotImplemented + + def arctanh(self): + raise NotImplemented + + def log(self): + raise NotImplemented + + def log10(self): + raise NotImplemented + + def log1p(self): + raise NotImplemented + + def exp(self): + raise NotImplemented + + def expm1(self): + raise NotImplemented + + def sqrt(self): + raise NotImplemented + + def abs(self): + raise NotImplemented + + def conj(self): + raise NotImplemented + + def real(self): + raise NotImplemented + + def imag(self): + raise NotImplemented + + def complex(self): + raise NotImplemented diff --git a/selections/identifiers.py b/selections/identifiers.py new file mode 100644 index 0000000..ae9a5a4 --- /dev/null +++ b/selections/identifiers.py @@ -0,0 +1,66 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from enum import Enum, auto + + +__all__ = [ + 'IDs', + 'order_of_operations', +] + + +class IDs(Enum): + MINUS = auto() + PLUS = auto() + ADD = auto() + SUB = auto() + MUL = auto() + DIV = auto() + + AND = auto() + OR = auto() + XOR = auto() + + SQRT = auto() + ABS = auto() + WHERE = auto() + + LOG = auto() + LOG2 = auto() + LOG10 = auto() + LOG1p = auto() + + EXP = auto() + EXPM1 = auto() + + SIN = auto() + ASIN = auto() + COS = auto() + ACOS = auto() + TAN = auto() + ATAN = auto() + ATAN2 = auto() + + SINH = auto() + ASINH = auto() + COSH = auto() + ACOSH = auto() + TANH = auto() + ATANH = auto() + + +order_of_operations = [ + IDs.MINUS, + IDs.PLUS, + + IDs.DIV, + IDs.MUL, + IDs.ADD, + IDs.SUB, + + IDs.AND, + IDs.OR, + IDs.XOR, +] diff --git a/selections/logger.py b/selections/logger.py new file mode 100644 index 0000000..4a53751 --- /dev/null +++ b/selections/logger.py @@ -0,0 +1,5 @@ +import logging + +logging.basicConfig() +logger = logging.getLogger('selections.backends.ROOT') +logger.setLevel(logging.DEBUG) diff --git a/selections/parser.py b/selections/parser.py new file mode 100644 index 0000000..65f4154 --- /dev/null +++ b/selections/parser.py @@ -0,0 +1,208 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import wraps +import logging + +import pyparsing +from pyparsing import Suppress, pyparsing_common, opAssoc + +from .expression import Expression +from .identifiers import order_of_operations + + +__all__ = [ + 'Constant', + 'Function', + 'Operator', + 'Parser', + 'ParsingException', +] + +import colorlog + +handler = colorlog.StreamHandler() +handler.setFormatter(colorlog.ColoredFormatter( + '%(log_color)s%(levelname)s:%(name)s:%(message)s')) + +logger = colorlog.getLogger('selections.parser') +logger.addHandler(handler) +logger.setLevel(logging.DEBUG) + + +def add_logging(func): + @wraps(func) + def new_func(*args, **kwargs): + logger.debug(f'Calling {func.__qualname__} with {args} and {kwargs}') + result = func(*args, **kwargs) + logger.debug(f' - Got result {result}') + return result + return new_func + + +EXPRESSION = pyparsing.Forward() + + +class Constant(object): + def __init__(self, value): + raise NotImplemented + + +class Function(object): + def __init__(self, id, name, n_args=1): + """Represents an function call with augments + + Parameters + ---------- + id : :IDs: + Element of the IDs enum representing this operation + name : str + String representing this function + n_args : int + Number of arguments required by this function + + Examples + -------- + >>> str(Function('sqrt', 1)(4)) + 'sqrt(4)' + >>> str(Function('arctan2', 2)(2, 4)) + 'arctan2(a, b)' + """ + assert n_args >= 1, n_args + self._id = id + self._name = name + self._n_args = n_args + + def __str__(self): + return f'{self._name}<{self._n_args}>' + + def __repr__(self): + return f'{self.__class__.__name__}<{self._id.name},{self._name},n_args={self._n_args}>' + + @add_logging + def __call__(self, *args): + if len(args) != self._n_args: + raise TypeError('Function({name}) requires {n} arguments, {x} given' + .format(name=self._name, n=self._n_args, x=len(args))) + # return f'F[{self._name}({", ".join(map(str, args))})]' + return Expression(self._id, *args) + + @property + def parser(self): + result = Suppress(self._name) + Suppress('(') + EXPRESSION + for i in range(1, self._n_args): + result += Suppress(',') + EXPRESSION + result += Suppress(')') + result.setName('Function({name})'.format(name=self._name)) + result.setParseAction(self._parse_action) + return result + + def _parse_action(self, string, location, result): + # TODO Replace with logging decorator + return self(*result) + + +class Operator(object): + def __init__(self, id, op, rhs_only=False): + """Represents an operator of the form "A x B" + + Parameters + ---------- + id : :IDs: + Element of the IDs enum representing this operation + op : str + String representing this operator + rhs_only : bool + Apply this operation to the right hand side only + + Examples + -------- + >>> str(Operator(IDs.MUL, '*')(4, 5)) + '4 * 5' + >>> str(Operator(IDs.SUB, '-', allow_lhs_zero=True)(2)) + '-2' + """ + self._id = id + self._op = op + self._rhs_only = rhs_only + + def __str__(self): + return f'{self._name}<{self._n_args}>' + + def __repr__(self): + return f'{self.__class__.__name__}<{self._id.name},{self._op},rhs_only={self._rhs_only}>' + + # Set order of operations + def __gt__(self, other): + return self.__class__.__lt__(other, self) + + def __lt__(self, other): + return order_of_operations.index(self._id) < order_of_operations.index(other._id) + + @add_logging + def __call__(self, a, b=None): + if self._rhs_only: + assert b is None + return Expression(self._id, a) + else: + assert b is not None + return Expression(self._id, a, b) + + @property + def parser_description(self): + if self._rhs_only: + return (Suppress(self._op), 1, opAssoc.RIGHT, self._parse_action) + else: + return (Suppress(self._op), 2, opAssoc.LEFT, self._parse_action) + + def _parse_action(self, string, location, result): + # TODO Replace with logging decorator + assert len(result) == 1, result + result = result[0] + assert len(result) in [1, 2], result + assert len(result) == 2 or self._rhs_only, result + return self(*result) + + +class Parser(object): + def __init__(self, name, config): + self._name = name + self._parser = create_parser(config) + + def to_expression(self, string): + try: + result = self._parser.parseString(string, parseAll=True) + assert len(result) == 1, result + result = result[0] + except pyparsing.ParseException as e: + logger.error('TODO TRACEBACK:', e.args) + logger.error('Error parsing:', e.line) + logger.error(' ', ' '*e.loc + '▲') + logger.error(' ', ' '*e.loc + '┃') + logger.error(' ', ' '*e.loc + '┗━━━━━━ Error here or shortly after') + raise ParsingException() + else: + return result + + def to_string(): + raise NotImplemented + + +class ParsingException(Exception): + pass + + +def create_parser(config): + COMPONENT = pyparsing.Or( + [f.parser for f in config if isinstance(f, Function)] + + [pyparsing_common.number, pyparsing_common.sci_real] + ) + + # Sort the order of operations as appropriate + # TODO should this be in the backend configuration? + operations = [o for o in config if isinstance(o, Operator)] + operations = [o.parser_description for o in sorted(operations)] + EXPRESSION << pyparsing.infixNotation(COMPONENT, operations) + + return EXPRESSION diff --git a/tests/backends/test_ROOT.py b/tests/backends/test_ROOT.py new file mode 100644 index 0000000..5802351 --- /dev/null +++ b/tests/backends/test_ROOT.py @@ -0,0 +1,58 @@ +from selections import Expression +from selections import from_numexpr +from selections.identifiers import IDs + + +def assert_equal_expressions(lhs, rhs): + assert isinstance(lhs, Expression) + assert isinstance(rhs, Expression) + assert lhs._id == rhs._id + assert len(lhs._args) == len(rhs._args) + for a, b in zip(lhs._args, rhs._args): + assert isinstance(b, a.__class__) + if isinstance(a, Expression): + assert_equal_expressions(a, b) + else: + assert a == b + # TODO Check the equivalent method always gets it right + # assert lhs.equivilent(rhs) + # assert rhs.equivilent(lhs) + + +# def test_empty(): +# from_numexpr('') + +# TODO What should this do? Just a number? +# assert_equal_expressions(from_numexpr('-1'), Expression(ID, 1, 1)) + +def test_basic_parsing(): + assert_equal_expressions(from_numexpr('1 + 1'), Expression(IDs.ADD, 1, 1)) + assert_equal_expressions(from_numexpr('sqrt(1)'), Expression(IDs.SQRT, 1)) + assert_equal_expressions(from_numexpr('arctan2(1, 1)'), Expression(IDs.ATAN2, 1, 1)) + + +def test_nested_parsing(): + assert_equal_expressions( + from_numexpr('-(1 + 1)'), + Expression(IDs.MINUS, Expression(IDs.ADD, 1, 1)) + ) + assert_equal_expressions( + from_numexpr('+sqrt(3)'), + Expression(IDs.PLUS, Expression(IDs.SQRT, 3)) + ) + assert_equal_expressions( + from_numexpr('-(4) * (3)'), + Expression(IDs.MUL, Expression(IDs.MINUS, 4), 3) + ) + assert_equal_expressions( + from_numexpr('-(1 + 1) * +(5 + 4)'), + Expression(IDs.MUL, + Expression(IDs.MINUS, Expression(IDs.ADD, 1, 1)), + Expression(IDs.PLUS, Expression(IDs.ADD, 5, 4))) + ) + assert_equal_expressions( + from_numexpr('-sqrt(3 + 4) / arctan2(5 + sqrt(4), 1)'), + Expression(IDs.DIV, + Expression(IDs.MINUS, Expression(IDs.SQRT, Expression(IDs.ADD, 3, 4))), + Expression(IDs.ATAN2, Expression(IDs.ADD, 5, Expression(IDs.SQRT, 4)), 1)) + ) diff --git a/tests/backends/test_numexpr.py b/tests/backends/test_numexpr.py new file mode 100644 index 0000000..45e66e6 --- /dev/null +++ b/tests/backends/test_numexpr.py @@ -0,0 +1,70 @@ +from selections import Expression +from selections import from_root +from selections.identifiers import IDs + + +def assert_equal_expressions(lhs, rhs): + assert isinstance(lhs, Expression) + assert isinstance(rhs, Expression) + assert lhs._id == rhs._id + assert len(lhs._args) == len(rhs._args) + for a, b in zip(lhs._args, rhs._args): + assert isinstance(b, a.__class__) + if isinstance(a, Expression): + assert_equal_expressions(a, b) + else: + assert a == b + # TODO Check the equivalent method always gets it right + # assert lhs.equivilent(rhs) + # assert rhs.equivilent(lhs) + + +# def test_empty(): +# from_root('') + +# TODO What should this do? Just a number? +# assert_equal_expressions(from_root('-1'), Expression(ID, 1, 1)) + +def test_basic_parsing(): + assert_equal_expressions(from_root('1 + 1'), Expression(IDs.ADD, 1, 1)) + assert_equal_expressions(from_root('sqrt(1)'), Expression(IDs.SQRT, 1)) + assert_equal_expressions(from_root('TMath::Sqrt(1)'), Expression(IDs.SQRT, 1)) + assert_equal_expressions(from_root('arctan2(1, 1)'), Expression(IDs.ATAN2, 1, 1)) + assert_equal_expressions(from_root('TMath::ATan2(1, 1)'), Expression(IDs.ATAN2, 1, 1)) + + +def test_nested_parsing(): + assert_equal_expressions( + from_root('-(1 + 1)'), + Expression(IDs.MINUS, Expression(IDs.ADD, 1, 1)) + ) + assert_equal_expressions( + from_root('+sqrt(3)'), + Expression(IDs.PLUS, Expression(IDs.SQRT, 3)) + ) + assert_equal_expressions( + from_root('+TMath::Sqrt(3)'), + Expression(IDs.PLUS, Expression(IDs.SQRT, 3)) + ) + assert_equal_expressions( + from_root('-(4) * (3)'), + Expression(IDs.MUL, Expression(IDs.MINUS, 4), 3) + ) + assert_equal_expressions( + from_root('-(1 + 1) * +(5 + 4)'), + Expression(IDs.MUL, + Expression(IDs.MINUS, Expression(IDs.ADD, 1, 1)), + Expression(IDs.PLUS, Expression(IDs.ADD, 5, 4))) + ) + assert_equal_expressions( + from_root('-sqrt(3 + 4) / arctan2(5 + sqrt(4), 1)'), + Expression(IDs.DIV, + Expression(IDs.MINUS, Expression(IDs.SQRT, Expression(IDs.ADD, 3, 4))), + Expression(IDs.ATAN2, Expression(IDs.ADD, 5, Expression(IDs.SQRT, 4)), 1)) + ) + assert_equal_expressions( + from_root('-TMath::Sqrt(3 + 4) / TMath::ATan2(5 + TMath::Sqrt(4), 1)'), + Expression(IDs.DIV, + Expression(IDs.MINUS, Expression(IDs.SQRT, Expression(IDs.ADD, 3, 4))), + Expression(IDs.ATAN2, Expression(IDs.ADD, 5, Expression(IDs.SQRT, 4)), 1)) + )