diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py new file mode 100644 index 0000000000..fea2fe5ad7 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py @@ -0,0 +1,76 @@ +from nnpdf_data.filter_utils.hera_utils import commondata #, covmat_is_close +from pathlib import Path +from dataclasses import dataclass +import typing +from typing import List +import numpy as np +import pandas as pd +from os import PathLike +import yaml + +def mergetables() -> pd.DataFrame: + + table_paths = [] + for i in range(1,8): + table_paths.append(Path(f"./rawdata/Table{i}.csv")) + + # List with the rapidity bins for tables 1 to 7. + yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4] + + col_names = ["M2","dsig","statp","statm","normp","normm","sysp","sysm"] + col_names_all = col_names + ["y", "sqrts"] + + combined_df = pd.DataFrame(columns=col_names_all) + for i, path in enumerate(table_paths): + df = pd.read_csv(path, header=11, names=col_names) + df["y"]=yrap[i] + df["sqrts"]=38.8 + df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()] + combined_df = pd.concat([combined_df,df],ignore_index=True) + + # In the table we have sqrt(tau) not M2; compute M2=tau*s + combined_df["M2"] = (combined_df["M2"]*38.8)**2 + + return combined_df + +@dataclass +class E605_commondata(commondata): + def __init__(self, data: pd.DataFrame, dataset_name: str, process: str): + + # Kinematic quantities. + self.central_values = data["dsig"].astype(float).to_numpy() + self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy() + self.kinematic_quantities = ["y", "M2", "sqrts"] + + # Statistical uncertainties. + self.statistical_uncertainties = data["statp"] + + # Systematic uncertainties. + norm = data["normp"].str.strip("%").astype(float).to_numpy()/100 + stat = norm/norm*0.1 # overall 10% uncertainty + + # the overall 10% statistical uncertainty is treated as + # additive, while normalisation uncertainty is always treated + # multiplicatively + stat = stat * self.central_values + + self.systematic_uncertainties = np.dstack((stat,norm))[0] + self.systypes = [("ADD", "UNCORR"),("MULT", "CORR")] + + self.process = process + self.dataset_name = dataset_name + +def main(): + data = mergetables() + # First create the commondata variant without the nuclear uncertainties. + DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0") + DYE605.write_new_commondata(Path("data_reimplemented_PXSEC.yaml"), + Path("kinematics_reimplemented_PXSEC.yaml"), + Path("uncertainties_reimplemented_PXSEC.yaml")) + +if __name__ == "__main__": + main() + + + + diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table1.csv b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table1.csv new file mode 100644 index 0000000000..095a5d5e25 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table1.csv @@ -0,0 +1,31 @@ +#: table_doi: 10.17182/hepdata.22831.v1/t1 +#: name: Table 1 +#: description: No description provided. +#: data_file: Table1.yaml +#: keyword reactions: P CU --> MU+ MU- X | P NUCLEUS --> MU+ MU- X +#: keyword observables: D2SIG/DSQRTTAU/DYRAP +#: keyword phrases: Inclusive | Double Differential Cross Section | Rapidity Dependence | Di-Muon Production | Muon production +#: keyword cmenergies: 38.8 +#: RE,P CU --> MU+ MU- X +#: SQRT(S) [GeV],38.8 +#: YRAP,-0.2 +SQRT(TAU),S*D2(SIG)/D(SQRT(TAU))/DYRAP [NB*GEV**2/NUCLEON],error +,error -,"sys,Normalisation error +","sys,Normalisation error -","sys,Point-to-point systematic error +","sys,Point-to-point systematic error -" +0.1831,364.0,106.0,-106.0,15.0%,-15.0%,5.0%,-5.0% +0.1883,209.0,45.9,-45.9,15.0%,-15.0%,5.0%,-5.0% +0.1935,220.0,40.3,-40.3,15.0%,-15.0%,5.0%,-5.0% +0.1986,243.0,45.5,-45.5,15.0%,-15.0%,5.0%,-5.0% +0.2038,119.0,21.5,-21.5,15.0%,-15.0%,5.0%,-5.0% +0.2089,176.0,31.2,-31.2,15.0%,-15.0%,5.0%,-5.0% +0.2141,174.0,28.5,-28.5,15.0%,-15.0%,5.0%,-5.0% +0.2192,140.0,19.8,-19.8,15.0%,-15.0%,5.0%,-5.0% +0.2244,105.0,13.4,-13.4,15.0%,-15.0%,5.0%,-5.0% +0.2296,123.0,13.9,-13.9,15.0%,-15.0%,5.0%,-5.0% +0.2708,34.9,4.32,-4.32,15.0%,-15.0%,5.0%,-5.0% +0.276,28.9,4.31,-4.31,15.0%,-15.0%,5.0%,-5.0% +0.2812,27.4,4.33,-4.33,15.0%,-15.0%,5.0%,-5.0% +0.2915,16.2,1.62,-1.62,15.0%,-15.0%,5.0%,-5.0% +0.3121,10.7,1.1,-1.1,15.0%,-15.0%,5.0%,-5.0% +0.3431,3.57,0.566,-0.566,15.0%,-15.0%,5.0%,-5.0% +0.3843,1.7,0.467,-0.467,15.0%,-15.0%,5.0%,-5.0% +0.4359,-,0,0,0,0,0,0 + diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table2.csv b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table2.csv new file mode 100644 index 0000000000..7e2075b044 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table2.csv @@ -0,0 +1,31 @@ +#: table_doi: 10.17182/hepdata.22831.v1/t2 +#: name: Table 2 +#: description: No description provided. +#: data_file: Table2.yaml +#: keyword reactions: P CU --> MU+ MU- X | P NUCLEUS --> MU+ MU- X +#: keyword observables: D2SIG/DSQRTTAU/DYRAP +#: keyword phrases: Inclusive | Double Differential Cross Section | Rapidity Dependence | Di-Muon Production | Muon production +#: keyword cmenergies: 38.8 +#: RE,P CU --> MU+ MU- X +#: SQRT(S) [GeV],38.8 +#: YRAP,-0.1 +SQRT(TAU),S*D2(SIG)/D(SQRT(TAU))/DYRAP [NB*GEV**2/NUCLEON],error +,error -,"sys,Normalisation error +","sys,Normalisation error -","sys,Point-to-point systematic error +","sys,Point-to-point systematic error -" +0.1831,399.0,85.1,-85.1,15.0%,-15.0%,5.0%,-5.0% +0.1883,315.0,55.5,-55.5,15.0%,-15.0%,5.0%,-5.0% +0.1935,277.0,45.4,-45.4,15.0%,-15.0%,5.0%,-5.0% +0.1986,244.0,35.0,-35.0,15.0%,-15.0%,5.0%,-5.0% +0.2038,237.0,35.4,-35.4,15.0%,-15.0%,5.0%,-5.0% +0.2089,192.0,24.9,-24.9,15.0%,-15.0%,5.0%,-5.0% +0.2141,166.0,19.2,-19.2,15.0%,-15.0%,5.0%,-5.0% +0.2192,161.0,17.9,-17.9,15.0%,-15.0%,5.0%,-5.0% +0.2244,145.0,12.9,-12.9,15.0%,-15.0%,5.0%,-5.0% +0.2296,127.0,10.4,-10.4,15.0%,-15.0%,5.0%,-5.0% +0.2708,45.6,3.95,-3.95,15.0%,-15.0%,5.0%,-5.0% +0.276,28.9,3.04,-3.04,15.0%,-15.0%,5.0%,-5.0% +0.2812,30.9,3.2,-3.2,15.0%,-15.0%,5.0%,-5.0% +0.2915,19.9,1.35,-1.35,15.0%,-15.0%,5.0%,-5.0% +0.3121,12.5,0.828,-0.828,15.0%,-15.0%,5.0%,-5.0% +0.3431,5.8,0.47,-0.47,15.0%,-15.0%,5.0%,-5.0% +0.3843,1.79,0.272,-0.272,15.0%,-15.0%,5.0%,-5.0% +0.4359,0.474,0.18,-0.18,15.0%,-15.0%,5.0%,-5.0% + diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table3.csv b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table3.csv new file mode 100644 index 0000000000..9dcf49710d --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table3.csv @@ -0,0 +1,31 @@ +#: table_doi: 10.17182/hepdata.22831.v1/t3 +#: name: Table 3 +#: description: No description provided. +#: data_file: Table3.yaml +#: keyword reactions: P CU --> MU+ MU- X | P NUCLEUS --> MU+ MU- X +#: keyword observables: D2SIG/DSQRTTAU/DYRAP +#: keyword phrases: Inclusive | Double Differential Cross Section | Rapidity Dependence | Di-Muon Production | Muon production +#: keyword cmenergies: 38.8 +#: RE,P CU --> MU+ MU- X +#: SQRT(S) [GeV],38.8 +#: YRAP,0.0 +SQRT(TAU),S*D2(SIG)/D(SQRT(TAU))/DYRAP [NB*GEV**2/NUCLEON],error +,error -,"sys,Normalisation error +","sys,Normalisation error -","sys,Point-to-point systematic error +","sys,Point-to-point systematic error -" +0.1831,424.0,77.1,-77.1,15.0%,-15.0%,5.0%,-5.0% +0.1883,350.0,57.7,-57.7,15.0%,-15.0%,5.0%,-5.0% +0.1935,363.0,55.7,-55.7,15.0%,-15.0%,5.0%,-5.0% +0.1986,248.0,34.0,-34.0,15.0%,-15.0%,5.0%,-5.0% +0.2038,208.0,26.9,-26.9,15.0%,-15.0%,5.0%,-5.0% +0.2089,212.0,23.4,-23.4,15.0%,-15.0%,5.0%,-5.0% +0.2141,148.0,13.3,-13.3,15.0%,-15.0%,5.0%,-5.0% +0.2192,144.0,11.9,-11.9,15.0%,-15.0%,5.0%,-5.0% +0.2244,143.0,11.1,-11.1,15.0%,-15.0%,5.0%,-5.0% +0.2296,114.0,7.69,-7.69,15.0%,-15.0%,5.0%,-5.0% +0.2708,39.7,3.05,-3.05,15.0%,-15.0%,5.0%,-5.0% +0.276,33.1,2.8,-2.8,15.0%,-15.0%,5.0%,-5.0% +0.2812,27.4,2.42,-2.42,15.0%,-15.0%,5.0%,-5.0% +0.2915,21.6,1.3,-1.3,15.0%,-15.0%,5.0%,-5.0% +0.3121,15.1,0.903,-0.903,15.0%,-15.0%,5.0%,-5.0% +0.3431,6.05,0.372,-0.372,15.0%,-15.0%,5.0%,-5.0% +0.3843,1.86,0.194,-0.194,15.0%,-15.0%,5.0%,-5.0% +0.4359,0.404,0.101,-0.101,15.0%,-15.0%,5.0%,-5.0% + diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table4.csv b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table4.csv new file mode 100644 index 0000000000..0128d856a1 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table4.csv @@ -0,0 +1,31 @@ +#: table_doi: 10.17182/hepdata.22831.v1/t4 +#: name: Table 4 +#: description: No description provided. +#: data_file: Table4.yaml +#: keyword reactions: P CU --> MU+ MU- X | P NUCLEUS --> MU+ MU- X +#: keyword observables: D2SIG/DSQRTTAU/DYRAP +#: keyword phrases: Inclusive | Double Differential Cross Section | Rapidity Dependence | Di-Muon Production | Muon production +#: keyword cmenergies: 38.8 +#: RE,P CU --> MU+ MU- X +#: SQRT(S) [GeV],38.8 +#: YRAP,0.1 +SQRT(TAU),S*D2(SIG)/D(SQRT(TAU))/DYRAP [NB*GEV**2/NUCLEON],error +,error -,"sys,Normalisation error +","sys,Normalisation error -","sys,Point-to-point systematic error +","sys,Point-to-point systematic error -" +0.1831,431.0,75.3,-75.3,15.0%,-15.0%,5.0%,-5.0% +0.1883,347.0,52.5,-52.5,15.0%,-15.0%,5.0%,-5.0% +0.1935,386.0,58.5,-58.5,15.0%,-15.0%,5.0%,-5.0% +0.1986,274.0,36.6,-36.6,15.0%,-15.0%,5.0%,-5.0% +0.2038,294.0,39.4,-39.4,15.0%,-15.0%,5.0%,-5.0% +0.2089,223.0,19.4,-19.4,15.0%,-15.0%,5.0%,-5.0% +0.2141,169.0,12.7,-12.7,15.0%,-15.0%,5.0%,-5.0% +0.2192,137.0,9.91,-9.91,15.0%,-15.0%,5.0%,-5.0% +0.2244,137.0,9.28,-9.28,15.0%,-15.0%,5.0%,-5.0% +0.2296,126.0,7.99,-7.99,15.0%,-15.0%,5.0%,-5.0% +0.2708,46.0,3.25,-3.25,15.0%,-15.0%,5.0%,-5.0% +0.276,37.0,2.93,-2.93,15.0%,-15.0%,5.0%,-5.0% +0.2812,30.2,2.58,-2.58,15.0%,-15.0%,5.0%,-5.0% +0.2915,23.1,1.39,-1.39,15.0%,-15.0%,5.0%,-5.0% +0.3121,14.7,0.884,-0.884,15.0%,-15.0%,5.0%,-5.0% +0.3431,7.0,0.42,-0.42,15.0%,-15.0%,5.0%,-5.0% +0.3843,2.24,0.176,-0.176,15.0%,-15.0%,5.0%,-5.0% +0.4359,0.495,0.0853,-0.0853,15.0%,-15.0%,5.0%,-5.0% + diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table5.csv b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table5.csv new file mode 100644 index 0000000000..8d5f0b84f6 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table5.csv @@ -0,0 +1,31 @@ +#: table_doi: 10.17182/hepdata.22831.v1/t5 +#: name: Table 5 +#: description: No description provided. +#: data_file: Table5.yaml +#: keyword reactions: P CU --> MU+ MU- X | P NUCLEUS --> MU+ MU- X +#: keyword observables: D2SIG/DSQRTTAU/DYRAP +#: keyword phrases: Inclusive | Double Differential Cross Section | Rapidity Dependence | Di-Muon Production | Muon production +#: keyword cmenergies: 38.8 +#: RE,P CU --> MU+ MU- X +#: SQRT(S) [GeV],38.8 +#: YRAP,0.2 +SQRT(TAU),S*D2(SIG)/D(SQRT(TAU))/DYRAP [NB*GEV**2/NUCLEON],error +,error -,"sys,Normalisation error +","sys,Normalisation error -","sys,Point-to-point systematic error +","sys,Point-to-point systematic error -" +0.1831,560.0,110.0,-110.0,15.0%,-15.0%,5.0%,-5.0% +0.1883,405.0,64.4,-64.4,15.0%,-15.0%,5.0%,-5.0% +0.1935,315.0,49.8,-49.8,15.0%,-15.0%,5.0%,-5.0% +0.1986,274.0,39.4,-39.4,15.0%,-15.0%,5.0%,-5.0% +0.2038,318.0,47.7,-47.7,15.0%,-15.0%,5.0%,-5.0% +0.2089,214.0,16.7,-16.7,15.0%,-15.0%,5.0%,-5.0% +0.2141,162.0,11.0,-11.0,15.0%,-15.0%,5.0%,-5.0% +0.2192,159.0,10.7,-10.7,15.0%,-15.0%,5.0%,-5.0% +0.2244,135.0,9.14,-9.14,15.0%,-15.0%,5.0%,-5.0% +0.2296,133.0,8.43,-8.43,15.0%,-15.0%,5.0%,-5.0% +0.2708,57.1,3.99,-3.99,15.0%,-15.0%,5.0%,-5.0% +0.276,41.1,3.42,-3.42,15.0%,-15.0%,5.0%,-5.0% +0.2812,34.5,2.96,-2.96,15.0%,-15.0%,5.0%,-5.0% +0.2915,25.1,1.51,-1.51,15.0%,-15.0%,5.0%,-5.0% +0.3121,16.4,0.981,-0.981,15.0%,-15.0%,5.0%,-5.0% +0.3431,6.43,0.386,-0.386,15.0%,-15.0%,5.0%,-5.0% +0.3843,2.2,0.178,-0.178,15.0%,-15.0%,5.0%,-5.0% +0.4359,0.56,0.0878,-0.0878,15.0%,-15.0%,5.0%,-5.0% + diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table6.csv b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table6.csv new file mode 100644 index 0000000000..f2dc466eb1 --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table6.csv @@ -0,0 +1,31 @@ +#: table_doi: 10.17182/hepdata.22831.v1/t6 +#: name: Table 6 +#: description: No description provided. +#: data_file: Table6.yaml +#: keyword reactions: P CU --> MU+ MU- X | P NUCLEUS --> MU+ MU- X +#: keyword observables: D2SIG/DSQRTTAU/DYRAP +#: keyword phrases: Inclusive | Double Differential Cross Section | Rapidity Dependence | Di-Muon Production | Muon production +#: keyword cmenergies: 38.8 +#: RE,P CU --> MU+ MU- X +#: SQRT(S) [GeV],38.8 +#: YRAP,0.3 +SQRT(TAU),S*D2(SIG)/D(SQRT(TAU))/DYRAP [NB*GEV**2/NUCLEON],error +,error -,"sys,Normalisation error +","sys,Normalisation error -","sys,Point-to-point systematic error +","sys,Point-to-point systematic error -" +0.1831,419.0,98.8,-98.8,15.0%,-15.0%,5.0%,-5.0% +0.1883,396.0,89.7,-89.7,15.0%,-15.0%,5.0%,-5.0% +0.1935,331.0,70.2,-70.2,15.0%,-15.0%,5.0%,-5.0% +0.1986,377.0,79.3,-79.3,15.0%,-15.0%,5.0%,-5.0% +0.2038,401.0,84.8,-84.8,15.0%,-15.0%,5.0%,-5.0% +0.2089,193.0,17.1,-17.1,15.0%,-15.0%,5.0%,-5.0% +0.2141,153.0,12.7,-12.7,15.0%,-15.0%,5.0%,-5.0% +0.2192,151.0,12.6,-12.6,15.0%,-15.0%,5.0%,-5.0% +0.2244,134.0,10.4,-10.4,15.0%,-15.0%,5.0%,-5.0% +0.2296,149.0,11.6,-11.6,15.0%,-15.0%,5.0%,-5.0% +0.2708,59.5,5.31,-5.31,15.0%,-15.0%,5.0%,-5.0% +0.276,40.3,4.35,-4.35,15.0%,-15.0%,5.0%,-5.0% +0.2812,31.3,3.44,-3.44,15.0%,-15.0%,5.0%,-5.0% +0.2915,23.2,1.6,-1.6,15.0%,-15.0%,5.0%,-5.0% +0.3121,15.9,0.957,-0.957,15.0%,-15.0%,5.0%,-5.0% +0.3431,6.3,0.449,-0.449,15.0%,-15.0%,5.0%,-5.0% +0.3843,2.38,0.223,-0.223,15.0%,-15.0%,5.0%,-5.0% +0.4359,0.607,0.11,-0.11,15.0%,-15.0%,5.0%,-5.0% + diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table7.csv b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table7.csv new file mode 100644 index 0000000000..730b39bdda --- /dev/null +++ b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/rawdata/Table7.csv @@ -0,0 +1,25 @@ +#: table_doi: 10.17182/hepdata.22831.v1/t7 +#: name: Table 7 +#: description: No description provided. +#: data_file: Table7.yaml +#: keyword reactions: P CU --> MU+ MU- X | P NUCLEUS --> MU+ MU- X +#: keyword observables: D2SIG/DSQRTTAU/DYRAP +#: keyword phrases: Inclusive | Double Differential Cross Section | Rapidity Dependence | Di-Muon Production | Muon production +#: keyword cmenergies: 38.8 +#: RE,P CU --> MU+ MU- X +#: SQRT(S) [GeV],38.8 +#: YRAP,0.4 +SQRT(TAU),S*D2(SIG)/D(SQRT(TAU))/DYRAP [NB*GEV**2/NUCLEON],error +,error -,"sys,Normalisation error +","sys,Normalisation error -","sys,Point-to-point systematic error +","sys,Point-to-point systematic error -" +0.2141,198.0,33.3,-33.3,15.0%,-15.0%,5.0%,-5.0% +0.2192,173.0,26.8,-26.8,15.0%,-15.0%,5.0%,-5.0% +0.2244,149.0,25.6,-25.6,15.0%,-15.0%,5.0%,-5.0% +0.2296,98.6,14.8,-14.8,15.0%,-15.0%,5.0%,-5.0% +0.2708,71.1,11.3,-11.3,15.0%,-15.0%,5.0%,-5.0% +0.276,28.7,6.61,-6.61,15.0%,-15.0%,5.0%,-5.0% +0.2812,36.8,8.08,-8.08,15.0%,-15.0%,5.0%,-5.0% +0.2915,24.7,3.03,-3.03,15.0%,-15.0%,5.0%,-5.0% +0.3121,14.6,1.55,-1.55,15.0%,-15.0%,5.0%,-5.0% +0.3431,6.14,0.709,-0.709,15.0%,-15.0%,5.0%,-5.0% +0.3843,1.95,0.342,-0.342,15.0%,-15.0%,5.0%,-5.0% +0.4359,0.423,0.142,-0.142,15.0%,-15.0%,5.0%,-5.0% + diff --git a/nnpdf_data/nnpdf_data/filter_utils/hera_utils.py b/nnpdf_data/nnpdf_data/filter_utils/hera_utils.py new file mode 100644 index 0000000000..e76eaef8f2 --- /dev/null +++ b/nnpdf_data/nnpdf_data/filter_utils/hera_utils.py @@ -0,0 +1,105 @@ +from pathlib import Path +from dataclasses import dataclass +import typing +from typing import List +import numpy as np +import pandas as pd +from os import PathLike +import yaml +#from validphys.api import API + +@dataclass +class commondata: + central_values: np.ndarray + kinematics: np.ndarray + statistical_uncertainties: np.ndarray + systematic_uncertainties: np.ndarray + systypes: List[tuple[str, str]] + process: str + dataset_name: str + kinematic_quantities: List[str] + + + # Procedure to create data_*.yaml, kinematics_*.yaml and uncertainties_*.yaml + def write_new_commondata(self, data_filename: str | PathLike, + kinematics_filename: str | PathLike, + uncertainties_filename: str | PathLike): + # central data values + data = {"data_central": self.central_values.tolist()} + with data_filename.open("w+") as f: + yaml.dump(data, f, default_flow_style=False, sort_keys=False) + + # kinematic quantieties + # TODO add arrays for min and max values to derived type? + bins = [] + for kin in self.kinematics.tolist(): + bins.append( + {self.kinematic_quantities[0]: + { + "min": None, + "mid": kin[0], + "max": None + }, + self.kinematic_quantities[1]: + { + "min": None, + "mid": kin[1], + "max": None + }, + self.kinematic_quantities[2]: + { + "min": None, + "mid": kin[2], + "max": None + } + }) + data = {"bins": bins} + with kinematics_filename.open("w+") as f: + yaml.dump(data, f, default_flow_style=False, sort_keys=False) + + # uncertainties + # There is only one statistical uncertainty per datapoint. + definitions = {"stat": + { + "description": "Statistical uncertainty.", + "treatment": "ADD", + "type": "UNCORR" + } + } + for isys, sys in enumerate(self.systypes): + definitions.update( + {f"sys_corr_{isys}": + { + "description": f"Systematic uncertainty {isys}", + "treatment": sys[0], + "type": sys[1] + } + }) + bins = {"bins": [] } + for i, _ in enumerate(self.central_values): + systematics = {"stat": self.statistical_uncertainties.tolist()[i]} + for isys, sys in enumerate(self.systematic_uncertainties[i].tolist()): + systematics.update({f"sys_corr_{isys}": sys}) + bins["bins"].append(systematics) + data = {"definitions": definitions } + # TODO Notation of reals is inconsistent from yaml.safe_dump + # sometimes it is in scientific notation sometimes not... + with uncertainties_filename.open("w+") as f: + yaml.safe_dump(data, f, default_flow_style=False, sort_keys=False) + yaml.safe_dump(bins, f, default_flow_style=False, sort_keys=False) + + +# Subroutines for testing the implementation of the commondata. + +## Obtain the covariance matrix for a given variant. +#def _covmat(name: str, var: str): +# inp = {"dataset_input": {"dataset": name, "variant": var}, "theoryid": 700, "use_cuts": "internal"} +# return API.covmat_from_systematics(**inp) +# +## Compare the covariance matrices of two different variants. True if close. +#def covmat_is_close(name: str, var1: str, var2: str) -> bool: +# return np.isclose(_covmat(name,var1),_covmat(name,var2)).all() + + + +