Skip to content

Commit

Permalink
use CLoader if available to speed up yaml parsing;
Browse files Browse the repository at this point in the history
  • Loading branch information
scarlehoff committed Feb 15, 2024
1 parent 0523d3f commit 21bbc39
Showing 1 changed file with 17 additions and 4 deletions.
21 changes: 17 additions & 4 deletions validphys2/src/validphys/commondataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,28 @@

import numpy as np
import pandas as pd
from ruamel import yaml
from validobj import ValidationError, parse_input
from validobj.custom import Parser

from reportengine.compat import yaml
from validphys.coredata import KIN_NAMES, CommonData
from validphys.datafiles import new_to_legacy_map, path_commondata
from validphys.plotoptions.plottingoptions import PlottingOptions, labeler_functions
from validphys.utils import parse_yaml_inp

try:
# If libyaml is available, use the C loader to speed up some of the read
# https://pyyaml.org/wiki/LibYAML
# libyaml is avaialble for most linux distributionso
from ruamel.yaml import CLoader as Loader
except ImportError:
from ruamel.yaml import Loader


def _quick_yaml_load(filepath):
return yaml.load(filepath.read_text(encoding="utf-8"), Loader=Loader)


# JCM:
# Some notes for developers
# The usage of `frozen` in the definitions of the dataclass is not strictly necessary
Expand Down Expand Up @@ -471,7 +484,7 @@ def load_data_central(self):
if self.is_lagrange_multiplier:
data = np.zeros(self.ndata)
else:
datayaml = yaml.safe_load(self.path_data_central.read_text(encoding="utf-8"))
datayaml = _quick_yaml_load(self.path_data_central)
data = datayaml["data_central"]
data_df = pd.DataFrame(data, index=range(1, self.ndata + 1), columns=["data"])
data_df.index.name = _INDEX_NAME
Expand All @@ -494,7 +507,7 @@ def load_uncertainties(self):

all_df = []
for ufile in self.paths_uncertainties:
uncyaml = yaml.safe_load(ufile.read_text())
uncyaml = _quick_yaml_load(ufile)

mindex = pd.MultiIndex.from_tuples(
[(k, v["treatment"], v["type"]) for k, v in uncyaml["definitions"].items()],
Expand Down Expand Up @@ -531,7 +544,7 @@ def load_kinematics(self, fill_to_three=True, drop_minmax=True):
a dataframe containing the kinematics
"""
kinematics_file = self.path_kinematics
kinyaml = yaml.safe_load(kinematics_file.read_text())
kinyaml = _quick_yaml_load(kinematics_file)

kin_dict = {}
for i, dbin in enumerate(kinyaml["bins"]):
Expand Down

0 comments on commit 21bbc39

Please sign in to comment.