Skip to content

Commit

Permalink
Optionally use fast YAML parser if available
Browse files Browse the repository at this point in the history
Closes #340
  • Loading branch information
evansd committed Oct 9, 2024
1 parent e4fe728 commit 71a8709
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 0 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
with-fastparser: [false, true]

runs-on: ubuntu-20.04

Expand All @@ -34,6 +35,14 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
install-just: true
- name: Install dev requirements
run: just devenv
- name: Install fast YAML parser
if: ${{ matrix.with-fastparser }}
run: |
.venv/bin/python -m pip install \
--no-deps --only-binary ':all:' \
-r requirements.fastparser.txt
- name: Run tests
run: just test

Expand Down
27 changes: 27 additions & 0 deletions pipeline/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,20 @@
YAMLWarning,
)


# Import and construct a fast YAML parser, if it's available
try:
import ruamel.yaml.cyaml # type: ignore

# Unlike `PARSER` below we don't use the round-trip (`rt`) option because we don't
# intend to use this for user-facing error reporting and so we're not interested in
# retaining line-numbers – we just want it to be as fast as possible.
FAST_PARSER: ruamel.yaml.YAML | None = ruamel.yaml.YAML(
typ=["safe"], pure=False
) # pragma: no cover
except ImportError: # pragma: no cover
FAST_PARSER = None

from . import exceptions


Expand Down Expand Up @@ -56,6 +70,19 @@ def make_yaml_error_more_helpful(


def parse_yaml_file(data: str | Path, filename: str | None = None) -> dict[str, Any]:
# If a fast parser is availabe and can parse the input without error then we just
# return it. This results in a very significant speed-up for large files. If there
# are errors then we re-parse using the pure Python parser which gives much more
# helpful error messages.
#
# Note that this _is_ covered by tests in CI but, because we're not combining
# coverage across multiple runs, we have to mark it as uncovered.
if FAST_PARSER is not None: # pragma: no cover
try:
return FAST_PARSER.load(data) # type: ignore[no-any-return]
except Exception:
pass

try:
return PARSER.load(data) # type: ignore[no-any-return]
# ruyaml doesn't have a nice exception hierarchy so we have to catch these
Expand Down
4 changes: 4 additions & 0 deletions requirements.fastparser.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# These are the optional packages which, if installed, we will use to parse
# YAML but are not hard dependencies
ruamel.yaml
ruamel.yaml.clib
12 changes: 12 additions & 0 deletions requirements.fastparser.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
# This file is autogenerated by pip-compile with Python 3.8
# by the following command:
#
# pip-compile requirements.fastparser.in
#
ruamel-yaml==0.18.6
# via -r requirements.fastparser.in
ruamel-yaml-clib==0.2.8
# via
# -r requirements.fastparser.in
# ruamel-yaml

0 comments on commit 71a8709

Please sign in to comment.