Skip to content

Commit

Permalink
Format code with black
Browse files Browse the repository at this point in the history
Format some modules, in special the new ones.
  • Loading branch information
ggonzr committed Jan 20, 2025
1 parent 3b292fb commit 80ef044
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 35 deletions.
1 change: 1 addition & 0 deletions core/controller/ticket_controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Module that contains TicketController class
"""

import json
from copy import deepcopy
from environment import (
Expand Down
61 changes: 36 additions & 25 deletions core/utils/das.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ def get_lumi_ranges(i):
result = []
for _, b in itertools.groupby(enumerate(i), lambda pair: pair[1] - pair[0]):
b = list(b)
result.append([b[0][1],b[-1][1]])
result.append([b[0][1], b[-1][1]])
return result


def das_do_command(query):
"""
A simple wrapper for dasgoclient.
Expand All @@ -34,10 +35,13 @@ def das_do_command(query):
list[str]: the dasgoclient command output split by newlines.
"""
cmd = 'dasgoclient --query="%s"'%(query)
out = subprocess.check_output(cmd, shell=True, executable="/bin/bash").decode('utf8')
cmd = 'dasgoclient --query="%s"' % (query)
out = subprocess.check_output(cmd, shell=True, executable="/bin/bash").decode(
"utf8"
)
return out.split("\n")


def das_file_data(dataset):
"""
Given a dataset create a pandas DataFrame with the
Expand All @@ -51,15 +55,16 @@ def das_file_data(dataset):
- the file name;
- the number of events in each file.
"""
query = 'file dataset=%s | grep file.name, file.nevents'%(dataset)
query = "file dataset=%s | grep file.name, file.nevents" % (dataset)
out = das_do_command(query)
out = [np.array(r.split(" "))[[0,3]] for r in out if len(r) > 0]
out = [np.array(r.split(" "))[[0, 3]] for r in out if len(r) > 0]

df = pd.DataFrame(out,columns=["file","events"])
df = pd.DataFrame(out, columns=["file", "events"])
df.events = df.events.values.astype(int)

return df


def das_lumi_data(dataset):
"""
Produces a file by file+lumi+run pandas DataFrame
Expand All @@ -73,17 +78,17 @@ def das_lumi_data(dataset):
- the lumisections.
"""
query = 'file,lumi,run dataset=%s '%(dataset)
query = "file,lumi,run dataset=%s " % (dataset)

out = das_do_command(query)
out = [r.split(" ") for r in out if len(r)>0]
out = [r.split(" ") for r in out if len(r) > 0]

df = pd.DataFrame(out,columns=["file","run","lumis"])
df = pd.DataFrame(out, columns=["file", "run", "lumis"])

return df

def get_events_df(golden,dataset,events):

def get_events_df(golden, dataset, events):
"""
Produces a file by file pandas DataFrame
Expand All @@ -104,9 +109,11 @@ def get_events_df(golden,dataset,events):
lumi_df = das_lumi_data(dataset)
file_df = das_file_data(dataset)

df = lumi_df.merge(file_df,on="file",how="inner") # merge file informations with run and lumis
df = lumi_df.merge(
file_df, on="file", how="inner"
) # merge file informations with run and lumis
df["lumis"] = [
[int(ff) for ff in f.replace("[","").replace("]","").split(",")]
[int(ff) for ff in f.replace("[", "").replace("]", "").split(",")]
for f in df.lumis.values
]

Expand All @@ -123,29 +130,32 @@ def get_events_df(golden,dataset,events):
if df_r["events"].sum() < 10000:
continue

good_lumis = np.array([len([ll for ll in l if ll in golden[r]]) for l in df_r.lumis])
good_lumis = np.array(
[len([ll for ll in l if ll in golden[r]]) for l in df_r.lumis]
)
n_lumis = np.array([len(l) for l in df_r.lumis])
df_rs.append(df_r[good_lumis==n_lumis])
df_rs.append(df_r[good_lumis == n_lumis])

if len(df_rs)==0:
if len(df_rs) == 0:
return pd.DataFrame([])
if len(df_rs)==1:
if len(df_rs) == 1:
df = df_rs
else:
df = pd.concat(df_rs)

## lumi sorting
df.loc[:,"min_lumi"] = [min(f) for f in df.lumis]
df.loc[:,"max_lumi"] = [max(f) for f in df.lumis]
df = df.sort_values(["run","min_lumi","max_lumi"])
df.loc[:, "min_lumi"] = [min(f) for f in df.lumis]
df.loc[:, "max_lumi"] = [max(f) for f in df.lumis]
df = df.sort_values(["run", "min_lumi", "max_lumi"])

## events skimming
df = df[df["events"] <= events] #jump too big files
df.loc[:,"sum_evs"] = df.loc[:,"events"].cumsum()
df = df[df["events"] <= events] # jump too big files
df.loc[:, "sum_evs"] = df.loc[:, "events"].cumsum()
df = df[df["sum_evs"] < events]

return df


def get_run_lumi(df):
"""
Produces the lumi mask dict starting from a pandas DataFrame
Expand All @@ -170,17 +180,18 @@ def get_run_lumi(df):
lumi_list = [
get_lumi_ranges(
np.sort(
np.concatenate(df.loc[df["run"]==r,"lumis"].values).ravel()
np.concatenate(df.loc[df["run"] == r, "lumis"].values).ravel()
).tolist()
)
for r in run_list
]

lumi_ranges = dict(zip(run_list,lumi_list))
lumi_ranges = dict(zip(run_list, lumi_list))

return lumi_ranges

def get_lumi_dict(golden,dataset,events):

def get_lumi_dict(golden, dataset, events):
"""
Produces a lumi mask for a given dataset, up to events, using a certification json
Expand All @@ -197,7 +208,7 @@ def get_lumi_dict(golden,dataset,events):
E.g. {run : [[lumi_1,lumi_2],[lumi_3,lumi_4]]}
"""

df = get_events_df(golden,dataset,events)
df = get_events_df(golden, dataset, events)
lumi = get_run_lumi(df)

return lumi
29 changes: 19 additions & 10 deletions core/utils/dqm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
import json
import os

#pylint: disable=import-error
# pylint: disable=import-error
import numpy as np
import requests
from bs4 import BeautifulSoup
from requests.exceptions import HTTPError

#pylint: enable=import-error
# pylint: enable=import-error

base_cert_path = "/eos/user/c/cmsdqm/www/CAF/certification/"


def list_certification_files(cert_type):
"""
List all the certification files related to a certification type
Expand Down Expand Up @@ -48,6 +49,7 @@ def list_certification_files(cert_type):

return file_names


def get_certification_file(path):
"""
Get a certification file from the CMS DQM certification
Expand All @@ -68,6 +70,7 @@ def get_certification_file(path):

return file.json()


def get_cert_type(dataset):
"""
List all the certification files related to a certification type
Expand All @@ -81,8 +84,8 @@ def get_cert_type(dataset):
or Commisioning).
"""
year = dataset.split("Run")[1][2:4] # from 20XX to XX
PD = dataset.split("/")[1] # pylint: disable=invalid-name
year = dataset.split("Run")[1][2:4] # from 20XX to XX
PD = dataset.split("/")[1] # pylint: disable=invalid-name
cert_type = "Collisions" + str(year)
if "Cosmics" in dataset:
cert_type = "Cosmics" + str(year)
Expand All @@ -93,7 +96,8 @@ def get_cert_type(dataset):

return cert_type

def get_json_list(cert_type,web_fallback):

def get_json_list(cert_type, web_fallback):
"""
List all the certification files related to a certification type
either stored on CMS DQM EOS either, as a fallback,
Expand All @@ -113,7 +117,9 @@ def get_json_list(cert_type,web_fallback):
cert_path = base_cert_path + cert_type + "/"
json_list = os.listdir(cert_path)
json_list = [c for c in json_list if "Golden" in c and "era" not in c]
json_list = [c for c in json_list if c.startswith("Cert_C") and c.endswith("json")]
json_list = [
c for c in json_list if c.startswith("Cert_C") and c.endswith("json")
]
## ... if not we go to the website
else:
json_list = list_certification_files(cert_type=cert_type)
Expand All @@ -127,6 +133,7 @@ def get_json_list(cert_type,web_fallback):

return json_list


def get_golden_json(dataset):
"""
Output a the golden certification dictionary (json) for a specific datasets.
Expand All @@ -148,12 +155,14 @@ def get_golden_json(dataset):
cert_path = base_cert_path + cert_type + "/"
web_fallback = not os.path.isdir(cert_path)

json_list = get_json_list(cert_type,web_fallback)
json_list = get_json_list(cert_type, web_fallback)

# the larger the better, assuming file naming schema
# Cert_X_RunStart_RunFinish_Type.json
run_ranges = [int(c.split("_")[3]) - int(c.split("_")[2]) for c in json_list]
latest_json = np.array(json_list[np.argmax(run_ranges)]).reshape(1,-1)[0].astype(str)
latest_json = (
np.array(json_list[np.argmax(run_ranges)]).reshape(1, -1)[0].astype(str)
)
best_json = str(latest_json[0])
if not web_fallback:
with codecs.open(cert_path + "/" + best_json, encoding="utf-8") as js:
Expand All @@ -164,9 +173,9 @@ def get_golden_json(dataset):

# golden json with all the lumisections one by one
for k in golden:
R = [] # pylint: disable=invalid-name
R = [] # pylint: disable=invalid-name
for r in golden[k]:
R = R + list(range(r[0], r[1] + 1)) # pylint: disable=invalid-name
R = R + list(range(r[0], r[1] + 1)) # pylint: disable=invalid-name
golden_flat[k] = R

return golden_flat

0 comments on commit 80ef044

Please sign in to comment.