diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9a1d7f4..433c0e0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -25,6 +25,7 @@ jobs: strategy: matrix: python-version: ["3.8", "3.9", "3.10", "3.11"] + with-fastparser: [false, true] runs-on: ubuntu-20.04 @@ -34,6 +35,14 @@ jobs: with: python-version: ${{ matrix.python-version }} install-just: true + - name: Install dev requirements + run: just devenv + - name: Install fast YAML parser + if: ${{ matrix.with-fastparser }} + run: | + .venv/bin/python -m pip install \ + --no-deps --only-binary ':all:' \ + -r requirements.fastparser.txt - name: Run tests run: just test diff --git a/pipeline/loading.py b/pipeline/loading.py index df83d85..153dc09 100644 --- a/pipeline/loading.py +++ b/pipeline/loading.py @@ -14,6 +14,20 @@ YAMLWarning, ) + +# Import and construct a fast YAML parser, if it's available +try: + import ruamel.yaml.cyaml # type: ignore + + # Unlike `PARSER` below we don't use the round-trip (`rt`) option because we don't + # intend to use this for user-facing error reporting and so we're not interested in + # retaining line-numbers – we just want it to be as fast as possible. + FAST_PARSER: ruamel.yaml.YAML | None = ruamel.yaml.YAML( + typ=["safe"], pure=False + ) # pragma: no cover +except ImportError: # pragma: no cover + FAST_PARSER = None + from . import exceptions @@ -56,6 +70,19 @@ def make_yaml_error_more_helpful( def parse_yaml_file(data: str | Path, filename: str | None = None) -> dict[str, Any]: + # If a fast parser is availabe and can parse the input without error then we just + # return it. This results in a very significant speed-up for large files. If there + # are errors then we re-parse using the pure Python parser which gives much more + # helpful error messages. + # + # Note that this _is_ covered by tests in CI but, because we're not combining + # coverage across multiple runs, we have to mark it as uncovered. + if FAST_PARSER is not None: # pragma: no cover + try: + return FAST_PARSER.load(data) # type: ignore[no-any-return] + except Exception: + pass + try: return PARSER.load(data) # type: ignore[no-any-return] # ruyaml doesn't have a nice exception hierarchy so we have to catch these diff --git a/requirements.fastparser.in b/requirements.fastparser.in new file mode 100644 index 0000000..f9c5661 --- /dev/null +++ b/requirements.fastparser.in @@ -0,0 +1,4 @@ +# These are the optional packages which, if installed, we will use to parse +# YAML but are not hard dependencies +ruamel.yaml +ruamel.yaml.clib diff --git a/requirements.fastparser.txt b/requirements.fastparser.txt new file mode 100644 index 0000000..9fd20b8 --- /dev/null +++ b/requirements.fastparser.txt @@ -0,0 +1,12 @@ +# +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: +# +# pip-compile requirements.fastparser.in +# +ruamel-yaml==0.18.6 + # via -r requirements.fastparser.in +ruamel-yaml-clib==0.2.8 + # via + # -r requirements.fastparser.in + # ruamel-yaml