From c08683fe1d551d6a18adea88d8e839f95c04b018 Mon Sep 17 00:00:00 2001 From: fleg Date: Wed, 6 Mar 2024 11:55:29 +0100 Subject: [PATCH 01/85] Script to transfer data from observatories to CCIN2P3 --- scripts/transfers/transfer_from_obs.bash | 141 +++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 scripts/transfers/transfer_from_obs.bash diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash new file mode 100644 index 00000000..e924c8af --- /dev/null +++ b/scripts/transfers/transfer_from_obs.bash @@ -0,0 +1,141 @@ +#!/bin/bash +# Script to transfert data from a GRAND observatory to CCIN2P3 (or to any site) +# Fleg: 03/2024 +# Copyright : Grand Observatory 2024 + +##### Configuration part ##### +# Please adjust the following variable to your site + +# Local database name (sqlite filename) +dbfile='grand_transfer.db' + +# Local directory where are stored the data to be transfered (will be explored recursively) +localdatadir='/sps/grand/data/gp13/raw/2024/' + +# Site name prefix in filenames +site='GP13' + +# Remote server to transfer +remote_server='lpnws5131.in2p3.fr' # 'cca.in2p3.fr' + +# Account on remote server +remote_account='fleg' # 'prod_grand' + +# Target directory on remote server +remotedatadir='/data/tmp/' #'/sps/grand/data/gp13/raw' + +# Start date for transfer (all files older than this date will be skipped +first_transfer='20240302' + +##### End of Configuration section (do not modify below) ##### + +# Create database if not exists +sqlite3 $dbfile "create table if not exists gfiles (id INTEGER PRIMARY KEY AUTOINCREMENT, directory TEXT, file TEXT, date INT, success BOOLEAN, UNIQUE (directory,file));" +sqlite3 $dbfile "create table if not exists transfer (id, date_transfer DATETIME, success BOOLEAN, comment TEXTE);" + + +# Define some useful stuff + +# Last date of files already registered +last_transfer=$(sqlite3 $dbfile "select max(date) from gfiles;") +last_transfer=$(( last_transfer > first_transfer ? last_transfer : first_transfer )) + +# Colors +Default='\033[0m' # Text Reset +Red='\033[0;31m' # Red +Green='\033[0;32m' # Green + +scriptname=$(basename "$0") + +#List of files to be inserted into the db by bunchs of 500 (larger should produce errors) +declare -A toins=([1]="") +i=1 +j=0 +#find all files in localdatadir corresponding to datas (i.e. name starting by site) and add them to the database if not here already +for file in $(find $localdatadir -type f -newermt $last_transfer| grep /${site}_ |sort) +do + filename=$(basename $file) + tmp=${filename#${site}_} + dateobs=${tmp:0:8} + #Add file to be registered to the list (and start new list if more than 500 to avoid request limit in insert below) + if [ $j -ge 500 ]; + then + i=$((i + 1)) + toins+=([$i]="") + j=0 + fi + toins[$i]+=",('$(dirname $file)', '$(basename $file)', ${dateobs}, false)" + j=$((j + 1)) +done + +#Add all files at a time (10x faster that adding them individually) +# but iterate over various lists in case of huge number of files +for key in "${!toins[@]}"; do + value=${toins[${key}]} + if [ -n "$value" ]; + then + res=$(sqlite3 $dbfile "INSERT OR IGNORE INTO gfiles (directory,file,date,success) values ${value:1}") + fi + +done + +# Open a ssh connection that will be used for all transfers (avoid to reopen rsync tunnel for each file) +mkdir ~/.ssh/ctl +ssh -nNf -o ControlMaster=yes -o ControlPath="$HOME/.ssh/ctl/%L-%r@%h:%p" ${remote_account}@${remote_server} + +declare -A translog=([1]="") +i=1 +j=0 +#select files not transfered successfully +for file in $(sqlite3 $dbfile "select directory, file, date, success, id from gfiles where success=false ORDER BY file;") +do + #transform result into array (more easy to manipulate) + fileinfo=(${file//|/ }) + #Transfer files (one by one to get info on each transfer) + printf "\nSending ${fileinfo[1]} " + trans=$(rsync -e "ssh -o 'ControlPath=$HOME/.ssh/ctl/%L-%r@%h:%p'" --out-format="%t %b %n" -au --rsync-path="mkdir -p $remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/ 2>&1) + + if [ "$?" -eq "0" ] + then + #Transfer successful : store info to update database at the end + translog[$i]+=";UPDATE gfiles SET success=true WHERE id=${fileinfo[4]};INSERT INTO transfer (id,success,date_transfer,comment) VALUES (${fileinfo[4]},true,datetime('now','utc'),'${trans}')" + printf "${Green}Ok${Default}" + + else + #Transfer failed : just log errors + translog[$i]+=";INSERT INTO transfer (id,success,date_transfer,comment) VALUES (${fileinfo[4]},false,datetime('now','utc'),'${trans}')" + printf "${Red}ERROR:${Default} \n ${trans} " + fi + + # split info to store into db in case of large number of files + if [ $j -ge 100 ]; + then + i=$((i + 1)) + translog+=([$i]="") + j=0 + fi + + j=$((j + 1)) +done + +printf "\n" + +#update DB with all results (iterate over logs) +for key in "${!translog[@]}"; do + value=${translog[${key}]} + if [ -n "$value" ]; + then + res=$(sqlite3 $dbfile "${value:1}") + fi + +done + +#finally also rsync the database +#rsync -au $dbfile ${remote_account}@${remote_server}:$remotedatadir/${dbfile}_$(date +'%Y%m%d-%H%M%S') +rsync -e "ssh -o 'ControlPath=$HOME/.ssh/ctl/%L-%r@%h:%p'" -au $dbfile ${remote_account}@${remote_server}:$remotedatadir/${dbfile}_$(date +'%Y%m%d-%H%M%S') + +#close ssh connection +ssh -O exit -o ControlPath="$HOME/.ssh/ctl/%L-%r@%h:%p" ${remote_account}@${remote_server} +rm -rf ~/.ssh/ctl + + From 8a58a4a50e686b48e95731324e2d7f51772155c9 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Thu, 7 Mar 2024 11:18:56 +0100 Subject: [PATCH 02/85] Added tag to identify transfer in a run --- scripts/transfers/transfer_from_obs.bash | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index e924c8af..5f7a36ec 100644 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -31,7 +31,7 @@ first_transfer='20240302' # Create database if not exists sqlite3 $dbfile "create table if not exists gfiles (id INTEGER PRIMARY KEY AUTOINCREMENT, directory TEXT, file TEXT, date INT, success BOOLEAN, UNIQUE (directory,file));" -sqlite3 $dbfile "create table if not exists transfer (id, date_transfer DATETIME, success BOOLEAN, comment TEXTE);" +sqlite3 $dbfile "create table if not exists transfer (id, tag INTEGER, date_transfer DATETIME, success BOOLEAN, comment TEXTE);" # Define some useful stuff @@ -40,13 +40,14 @@ sqlite3 $dbfile "create table if not exists transfer (id, date_transfer DATETIM last_transfer=$(sqlite3 $dbfile "select max(date) from gfiles;") last_transfer=$(( last_transfer > first_transfer ? last_transfer : first_transfer )) +#tag to identify files treated in the current run +tag=$(date +'%Y%m%d%H%M%S') + # Colors Default='\033[0m' # Text Reset Red='\033[0;31m' # Red Green='\033[0;32m' # Green -scriptname=$(basename "$0") - #List of files to be inserted into the db by bunchs of 500 (larger should produce errors) declare -A toins=([1]="") i=1 @@ -98,12 +99,12 @@ do if [ "$?" -eq "0" ] then #Transfer successful : store info to update database at the end - translog[$i]+=";UPDATE gfiles SET success=true WHERE id=${fileinfo[4]};INSERT INTO transfer (id,success,date_transfer,comment) VALUES (${fileinfo[4]},true,datetime('now','utc'),'${trans}')" + translog[$i]+=";UPDATE gfiles SET success=true WHERE id=${fileinfo[4]};INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]},${tag}, true,datetime('now','utc'),'${trans}')" printf "${Green}Ok${Default}" else #Transfer failed : just log errors - translog[$i]+=";INSERT INTO transfer (id,success,date_transfer,comment) VALUES (${fileinfo[4]},false,datetime('now','utc'),'${trans}')" + translog[$i]+=";INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]}, ${tag}, false,datetime('now','utc'),'${trans}')" printf "${Red}ERROR:${Default} \n ${trans} " fi @@ -132,7 +133,7 @@ done #finally also rsync the database #rsync -au $dbfile ${remote_account}@${remote_server}:$remotedatadir/${dbfile}_$(date +'%Y%m%d-%H%M%S') -rsync -e "ssh -o 'ControlPath=$HOME/.ssh/ctl/%L-%r@%h:%p'" -au $dbfile ${remote_account}@${remote_server}:$remotedatadir/${dbfile}_$(date +'%Y%m%d-%H%M%S') +rsync -e "ssh -o 'ControlPath=$HOME/.ssh/ctl/%L-%r@%h:%p'" -au $dbfile ${remote_account}@${remote_server}:$remotedatadir/${tag}_${dbfile} #close ssh connection ssh -O exit -o ControlPath="$HOME/.ssh/ctl/%L-%r@%h:%p" ${remote_account}@${remote_server} From af5d95f9c5c202bacd863656fea97767bf655b3d Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Mon, 11 Mar 2024 16:38:37 +0100 Subject: [PATCH 03/85] Added pre_run and post_run scripts (for grand@auger) --- scripts/transfers/setup_network_auger.bash | 91 ++++++++++++++++++++++ scripts/transfers/transfer_from_obs.bash | 24 +++++- 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100755 scripts/transfers/setup_network_auger.bash mode change 100644 => 100755 scripts/transfers/transfer_from_obs.bash diff --git a/scripts/transfers/setup_network_auger.bash b/scripts/transfers/setup_network_auger.bash new file mode 100755 index 00000000..90fcba02 --- /dev/null +++ b/scripts/transfers/setup_network_auger.bash @@ -0,0 +1,91 @@ +#!/bin/bash +# Script to activate/desactivate network in Grand@auger DAQ +# Need on parameter in -init and -close +# Fred & Fleg: 03/2024 +# Copyright : Grand Observatory 2024 + +claro_hop="claro.net.ar"; # 4G operator tracepath hop +auger_hop="auger.org.ar"; # auger network tracepath hop +wwan_con="netplan-cdc-wdm0"; # NetworkManager wwan connection name +max_hop=5; # max hop for tracepath test +sleep_delay=2; # sleep delay to wait after NetworkManager (de)activation calls +verbose=false; # true or false + +# usage: verbose_echo +verbose_echo () { + $verbose && echo "$*"; +} + +wwan_activated() { + local wwan_state=$(nmcli c show $wwan_con | awk '/^GENERAL.STATE:/{print $2}'); + verbose_echo "wwan_activated - wwan_state: $wwan_state" + test "$wwan_state" = "activated"; +} + +up_wwan() { + nmcli c up $wwan_con + sleep $sleep_delay +} + +down_wwan() { + nmcli c down $wwan_con + sleep $sleep_delay +} + +in2p3_route_claro() { + local tracepath_output; + tracepath_output=$(tracepath -m $max_hop $in2p3_machine) + verbose_echo "in2p3_route_claro - tracepath_output: $tracepath_output" + echo $tracepath_output | grep -q $claro_hop; +} + +in2p3_route_auger() { + local tracepath_output; + tracepath_output=$(tracepath -m $max_hop $in2p3_machine) + verbose_echo "in2p3_route_auger - tracepath_output: $tracepath_output" + echo $tracepath_output | grep -q $auger_hop; +} + +switch_on() { + # switch on interface if not activated + if ! wwan_activated ; then + up_wwan + elif in2p3_route_auger ; then # interface already activated but still auger route + # up/down cycle + down_wwan + up_wwan + fi + + # exit if wwan still not activated + if ! wwan_activated ; then + echo "cannot activate $wwan_con"; + down_wwan + exit 1 + fi + + # wwan supposed to be activated, test if route to in2p3 is still through auger network + if in2p3_route_auger ; then + echo "wrong route to $in2p3_machine, still going through auger network" + down_wwan + exit 1 + fi +} + +# Main +if [ "$#" -ne 1 ] +then + echo "Incorrect number of arguments : 1 needed, ${#} given" + exit 2 +else + if [ "$1" = "-init" ]; then + switch_on + elif [ "$1" = "-close" ]; then + down_wwan + else + echo "Bad option" + exit 2 + fi +fi + + + diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash old mode 100644 new mode 100755 index 5f7a36ec..2e0ebd8e --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -27,6 +27,13 @@ remotedatadir='/data/tmp/' #'/sps/grand/data/gp13/raw' # Start date for transfer (all files older than this date will be skipped first_transfer='20240302' +# Local script to be launched before run +#pre_run_script="setup_network_auger.bash -init" + +# Local script to be launched after run +#post_run_script='setup_network_auger.bash -close' + + ##### End of Configuration section (do not modify below) ##### # Create database if not exists @@ -48,6 +55,17 @@ Default='\033[0m' # Text Reset Red='\033[0;31m' # Red Green='\033[0;32m' # Green +# run pre script +if [ -n "$pre_run_script" ] +then + pre=$($pre_run_script) + ret=$? + if [ "$ret" -ne "0" ]; then + printf "Error ${ret} in pre run script : ${pre} \n" + exit ${ret} + fi +fi + #List of files to be inserted into the db by bunchs of 500 (larger should produce errors) declare -A toins=([1]="") i=1 @@ -139,4 +157,8 @@ rsync -e "ssh -o 'ControlPath=$HOME/.ssh/ctl/%L-%r@%h:%p'" -au $dbfile ${remote_ ssh -O exit -o ControlPath="$HOME/.ssh/ctl/%L-%r@%h:%p" ${remote_account}@${remote_server} rm -rf ~/.ssh/ctl - +# run post script +if [ -n "$post_run_script" ] +then + eval $post_run_script +fi From 66fcb1cf89abbd9aa6a10f9d0651bb11483f7521 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Mon, 11 Mar 2024 17:05:34 +0100 Subject: [PATCH 04/85] Added pre_run and post_run scripts (for grand@auger) --- scripts/transfers/setup_network_auger.bash | 5 ++++- scripts/transfers/transfer_from_obs.bash | 11 ++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/transfers/setup_network_auger.bash b/scripts/transfers/setup_network_auger.bash index 90fcba02..10565150 100755 --- a/scripts/transfers/setup_network_auger.bash +++ b/scripts/transfers/setup_network_auger.bash @@ -29,7 +29,9 @@ up_wwan() { down_wwan() { nmcli c down $wwan_con + status=$? sleep $sleep_delay + return $status } in2p3_route_claro() { @@ -81,9 +83,10 @@ else switch_on elif [ "$1" = "-close" ]; then down_wwan + exit $? else echo "Bad option" - exit 2 + exit 2 fi fi diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index 2e0ebd8e..ad40de9f 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -31,7 +31,7 @@ first_transfer='20240302' #pre_run_script="setup_network_auger.bash -init" # Local script to be launched after run -#post_run_script='setup_network_auger.bash -close' +post_run_script='setup_network_auger.bash -close' ##### End of Configuration section (do not modify below) ##### @@ -160,5 +160,10 @@ rm -rf ~/.ssh/ctl # run post script if [ -n "$post_run_script" ] then - eval $post_run_script -fi + post=$($post_run_script) + ret=$? + if [ "$ret" -ne "0" ]; then + printf "Error ${ret} in post run script : ${post} \n" + exit ${ret} + fi +fi \ No newline at end of file From 560ff3e9a70a4b2e81d412792752cc2e7d857ac2 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 12 Mar 2024 11:23:06 +0100 Subject: [PATCH 05/85] Added choice of ssh key --- scripts/transfers/transfer_from_obs.bash | 57 ++++++++++++++---------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index ad40de9f..a2f61e7f 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -21,18 +21,20 @@ remote_server='lpnws5131.in2p3.fr' # 'cca.in2p3.fr' # Account on remote server remote_account='fleg' # 'prod_grand' +#ssh key +ssh_key="/pbs/home/l/legrand/.ssh/id_rsa" # "/root/.ssh/id_ed25519-nopw" + # Target directory on remote server remotedatadir='/data/tmp/' #'/sps/grand/data/gp13/raw' # Start date for transfer (all files older than this date will be skipped -first_transfer='20240302' +first_transfer='20240301' # Local script to be launched before run -#pre_run_script="setup_network_auger.bash -init" +pre_run_script='' #'setup_network_auger.bash -init' # Local script to be launched after run -post_run_script='setup_network_auger.bash -close' - +post_run_script='' # 'setup_network_auger.bash -close' ##### End of Configuration section (do not modify below) ##### @@ -40,9 +42,14 @@ post_run_script='setup_network_auger.bash -close' sqlite3 $dbfile "create table if not exists gfiles (id INTEGER PRIMARY KEY AUTOINCREMENT, directory TEXT, file TEXT, date INT, success BOOLEAN, UNIQUE (directory,file));" sqlite3 $dbfile "create table if not exists transfer (id, tag INTEGER, date_transfer DATETIME, success BOOLEAN, comment TEXTE);" - # Define some useful stuff +#ssh options +ssh_options="-o ControlPath=\"$HOME/.ssh/ctl/%L-%r@%h:%p\"" +if [ -n "$ssh_key" ]; then + ssh_options+=" -i ${ssh_key}" +fi + # Last date of files already registered last_transfer=$(sqlite3 $dbfile "select max(date) from gfiles;") last_transfer=$(( last_transfer > first_transfer ? last_transfer : first_transfer )) @@ -71,20 +78,24 @@ declare -A toins=([1]="") i=1 j=0 #find all files in localdatadir corresponding to datas (i.e. name starting by site) and add them to the database if not here already + for file in $(find $localdatadir -type f -newermt $last_transfer| grep /${site}_ |sort) do - filename=$(basename $file) - tmp=${filename#${site}_} - dateobs=${tmp:0:8} - #Add file to be registered to the list (and start new list if more than 500 to avoid request limit in insert below) - if [ $j -ge 500 ]; - then - i=$((i + 1)) - toins+=([$i]="") - j=0 - fi - toins[$i]+=",('$(dirname $file)', '$(basename $file)', ${dateobs}, false)" - j=$((j + 1)) + # skip opened files + if [ !$(fuser "$file" &> /dev/null) ]; then + filename=$(basename $file) + tmp=${filename#${site}_} + dateobs=${tmp:0:8} + #Add file to be registered to the list (and start new list if more than 500 to avoid request limit in insert below) + if [ $j -ge 500 ]; + then + i=$((i + 1)) + toins+=([$i]="") + j=0 + fi + toins[$i]+=",('$(dirname $file)', '$(basename $file)', ${dateobs}, false)" + j=$((j + 1)) + fi done #Add all files at a time (10x faster that adding them individually) @@ -99,9 +110,8 @@ for key in "${!toins[@]}"; do done # Open a ssh connection that will be used for all transfers (avoid to reopen rsync tunnel for each file) -mkdir ~/.ssh/ctl -ssh -nNf -o ControlMaster=yes -o ControlPath="$HOME/.ssh/ctl/%L-%r@%h:%p" ${remote_account}@${remote_server} - +mkdir ~/.ssh/ctl >/dev/null 2>&1 +ssh -nNf -o ControlMaster=yes ${ssh_options} ${remote_account}@${remote_server} declare -A translog=([1]="") i=1 j=0 @@ -112,7 +122,7 @@ do fileinfo=(${file//|/ }) #Transfer files (one by one to get info on each transfer) printf "\nSending ${fileinfo[1]} " - trans=$(rsync -e "ssh -o 'ControlPath=$HOME/.ssh/ctl/%L-%r@%h:%p'" --out-format="%t %b %n" -au --rsync-path="mkdir -p $remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/ 2>&1) + trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b %n" -au --rsync-path="mkdir -p $remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/ 2>&1) if [ "$?" -eq "0" ] then @@ -150,11 +160,10 @@ for key in "${!translog[@]}"; do done #finally also rsync the database -#rsync -au $dbfile ${remote_account}@${remote_server}:$remotedatadir/${dbfile}_$(date +'%Y%m%d-%H%M%S') -rsync -e "ssh -o 'ControlPath=$HOME/.ssh/ctl/%L-%r@%h:%p'" -au $dbfile ${remote_account}@${remote_server}:$remotedatadir/${tag}_${dbfile} +rsync -e "ssh ${ssh_options}" -au $dbfile ${remote_account}@${remote_server}:$remotedatadir/${tag}_${dbfile} #close ssh connection -ssh -O exit -o ControlPath="$HOME/.ssh/ctl/%L-%r@%h:%p" ${remote_account}@${remote_server} +ssh -O exit $ssh_options ${remote_account}@${remote_server} rm -rf ~/.ssh/ctl # run post script From ade206ddc1338ffc9c5a1ec4648645eb1ec14971 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 12 Mar 2024 14:24:20 +0100 Subject: [PATCH 06/85] Added rsync options parameter --- scripts/transfers/transfer_from_obs.bash | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index a2f61e7f..88260ec3 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -28,7 +28,7 @@ ssh_key="/pbs/home/l/legrand/.ssh/id_rsa" # "/root/.ssh/id_ed25519-nopw" remotedatadir='/data/tmp/' #'/sps/grand/data/gp13/raw' # Start date for transfer (all files older than this date will be skipped -first_transfer='20240301' +first_transfer='20240311' # Local script to be launched before run pre_run_script='' #'setup_network_auger.bash -init' @@ -36,6 +36,9 @@ pre_run_script='' #'setup_network_auger.bash -init' # Local script to be launched after run post_run_script='' # 'setup_network_auger.bash -close' +# rsync_options : a to keep the creation time of files, z to compress if bandwidth is limited (but it's ~5 times slower). Please keep the "a" option ! +rsync_options="-az" + ##### End of Configuration section (do not modify below) ##### # Create database if not exists @@ -122,7 +125,7 @@ do fileinfo=(${file//|/ }) #Transfer files (one by one to get info on each transfer) printf "\nSending ${fileinfo[1]} " - trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b %n" -au --rsync-path="mkdir -p $remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/ 2>&1) + trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b %n" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/ 2>&1) if [ "$?" -eq "0" ] then @@ -160,7 +163,7 @@ for key in "${!translog[@]}"; do done #finally also rsync the database -rsync -e "ssh ${ssh_options}" -au $dbfile ${remote_account}@${remote_server}:$remotedatadir/${tag}_${dbfile} +rsync -e "ssh ${ssh_options}" ${rsync_options} $dbfile ${remote_account}@${remote_server}:$remotedatadir/${tag}_${dbfile} #close ssh connection ssh -O exit $ssh_options ${remote_account}@${remote_server} From f4fc70bd1f046dc5a51145dd59c83410c7118893 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 12 Mar 2024 17:44:40 +0100 Subject: [PATCH 07/85] Added md5sum --- scripts/transfers/transfer_from_obs.bash | 26 +++++++++++------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index 88260ec3..edc20591 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -16,16 +16,16 @@ localdatadir='/sps/grand/data/gp13/raw/2024/' site='GP13' # Remote server to transfer -remote_server='lpnws5131.in2p3.fr' # 'cca.in2p3.fr' +remote_server='cca.in2p3.fr' # Account on remote server -remote_account='fleg' # 'prod_grand' +remote_account='prod_grand' # 'prod_grand' #ssh key -ssh_key="/pbs/home/l/legrand/.ssh/id_rsa" # "/root/.ssh/id_ed25519-nopw" +ssh_key="/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" # Target directory on remote server -remotedatadir='/data/tmp/' #'/sps/grand/data/gp13/raw' +remotedatadir='/sps/grand/prod_grand/tests' #'/sps/grand/data/gp13/raw' # Start date for transfer (all files older than this date will be skipped first_transfer='20240311' @@ -37,12 +37,12 @@ pre_run_script='' #'setup_network_auger.bash -init' post_run_script='' # 'setup_network_auger.bash -close' # rsync_options : a to keep the creation time of files, z to compress if bandwidth is limited (but it's ~5 times slower). Please keep the "a" option ! -rsync_options="-az" +rsync_options="-a" ##### End of Configuration section (do not modify below) ##### # Create database if not exists -sqlite3 $dbfile "create table if not exists gfiles (id INTEGER PRIMARY KEY AUTOINCREMENT, directory TEXT, file TEXT, date INT, success BOOLEAN, UNIQUE (directory,file));" +sqlite3 $dbfile "create table if not exists gfiles (id INTEGER PRIMARY KEY AUTOINCREMENT, directory TEXT, file TEXT, date INT, success BOOLEAN, md5sum VARCHAR(35), UNIQUE (directory,file));" sqlite3 $dbfile "create table if not exists transfer (id, tag INTEGER, date_transfer DATETIME, success BOOLEAN, comment TEXTE);" # Define some useful stuff @@ -81,12 +81,12 @@ declare -A toins=([1]="") i=1 j=0 #find all files in localdatadir corresponding to datas (i.e. name starting by site) and add them to the database if not here already - for file in $(find $localdatadir -type f -newermt $last_transfer| grep /${site}_ |sort) do # skip opened files if [ !$(fuser "$file" &> /dev/null) ]; then filename=$(basename $file) + md5=$(md5sum ${file}| awk '{print $1}') tmp=${filename#${site}_} dateobs=${tmp:0:8} #Add file to be registered to the list (and start new list if more than 500 to avoid request limit in insert below) @@ -96,7 +96,7 @@ do toins+=([$i]="") j=0 fi - toins[$i]+=",('$(dirname $file)', '$(basename $file)', ${dateobs}, false)" + toins[$i]+=",('$(dirname $file)', '$(basename $file)', ${dateobs}, 0, '${md5}')" j=$((j + 1)) fi done @@ -107,9 +107,8 @@ for key in "${!toins[@]}"; do value=${toins[${key}]} if [ -n "$value" ]; then - res=$(sqlite3 $dbfile "INSERT OR IGNORE INTO gfiles (directory,file,date,success) values ${value:1}") + res=$(sqlite3 $dbfile "INSERT OR IGNORE INTO gfiles (directory,file,date,success,md5sum) values ${value:1}") fi - done # Open a ssh connection that will be used for all transfers (avoid to reopen rsync tunnel for each file) @@ -119,7 +118,7 @@ declare -A translog=([1]="") i=1 j=0 #select files not transfered successfully -for file in $(sqlite3 $dbfile "select directory, file, date, success, id from gfiles where success=false ORDER BY file;") +for file in $(sqlite3 $dbfile "select directory, file, date, success, id from gfiles where success=0 ORDER BY file;") do #transform result into array (more easy to manipulate) fileinfo=(${file//|/ }) @@ -130,12 +129,12 @@ do if [ "$?" -eq "0" ] then #Transfer successful : store info to update database at the end - translog[$i]+=";UPDATE gfiles SET success=true WHERE id=${fileinfo[4]};INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]},${tag}, true,datetime('now','utc'),'${trans}')" + translog[$i]+=";UPDATE gfiles SET success=1 WHERE id=${fileinfo[4]};INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]},${tag}, 1,datetime('now','utc'),'${trans}')" printf "${Green}Ok${Default}" else #Transfer failed : just log errors - translog[$i]+=";INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]}, ${tag}, false,datetime('now','utc'),'${trans}')" + translog[$i]+=";INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]}, ${tag}, 0,datetime('now','utc'),'${trans}')" printf "${Red}ERROR:${Default} \n ${trans} " fi @@ -159,7 +158,6 @@ for key in "${!translog[@]}"; do then res=$(sqlite3 $dbfile "${value:1}") fi - done #finally also rsync the database From 9e8b55cb9f404f0e0d0120c00990438851fd943c Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 12 Mar 2024 18:52:25 +0100 Subject: [PATCH 08/85] Added md5sum --- scripts/transfers/transfer_from_obs.bash | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index edc20591..2801a372 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -80,13 +80,13 @@ fi declare -A toins=([1]="") i=1 j=0 +md5="0" #find all files in localdatadir corresponding to datas (i.e. name starting by site) and add them to the database if not here already for file in $(find $localdatadir -type f -newermt $last_transfer| grep /${site}_ |sort) do # skip opened files if [ !$(fuser "$file" &> /dev/null) ]; then filename=$(basename $file) - md5=$(md5sum ${file}| awk '{print $1}') tmp=${filename#${site}_} dateobs=${tmp:0:8} #Add file to be registered to the list (and start new list if more than 500 to avoid request limit in insert below) @@ -124,15 +124,17 @@ do fileinfo=(${file//|/ }) #Transfer files (one by one to get info on each transfer) printf "\nSending ${fileinfo[1]} " - trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b %n" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/ 2>&1) - + trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b %n md5:%C" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/ 2>&1) if [ "$?" -eq "0" ] then + md5=${trans#*md5:} + #echo $md52 #Transfer successful : store info to update database at the end - translog[$i]+=";UPDATE gfiles SET success=1 WHERE id=${fileinfo[4]};INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]},${tag}, 1,datetime('now','utc'),'${trans}')" + translog[$i]+=";UPDATE gfiles SET success=1, md5sum='${md5}' WHERE id=${fileinfo[4]};INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]},${tag}, 1,datetime('now','utc'),'${trans}')" printf "${Green}Ok${Default}" else + md5=$(echo ${trans}|awk -F"md5:" '{print $2}') #Transfer failed : just log errors translog[$i]+=";INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]}, ${tag}, 0,datetime('now','utc'),'${trans}')" printf "${Red}ERROR:${Default} \n ${trans} " From c690f6d1f56f2db798d8acc7501b734fb83d4c5f Mon Sep 17 00:00:00 2001 From: fleg Date: Fri, 15 Mar 2024 18:22:15 +0100 Subject: [PATCH 09/85] Added scripts to convert file to Grandroot format --- scripts/transfers/bintoroot.bash | 49 +++++++++++++++ scripts/transfers/ccscript.bash | 80 ++++++++++++++++++++++++ scripts/transfers/transfer_from_obs.bash | 34 ++++++---- 3 files changed, 152 insertions(+), 11 deletions(-) create mode 100644 scripts/transfers/bintoroot.bash create mode 100644 scripts/transfers/ccscript.bash diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash new file mode 100644 index 00000000..6ad35228 --- /dev/null +++ b/scripts/transfers/bintoroot.bash @@ -0,0 +1,49 @@ +#!/bin/bash + +# path to gtot +gtot_path='/pbs/home/p/prod_grand/softs/gtot/cmake-build-release/gtot' + +# Get tag and database file to use +while getopts ":d:g:" option; do + case $option in + d) + root_dest=${OPTARG};; + g) + gtot_options=${OPTARG};; + :) + printf "option -${OPTARG} need an argument\n" + exit 1;; + ?) # Invalid option + printf "Error: Invalid option -${OPTARG}\n" + exit 1;; + esac +done + +shift $(($OPTIND - 1)) + +cd /pbs/home/p/prod_grand/softs/grand +source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh +conda activate /sps/grand/software/conda/grandlib_2304 +source env/setup.sh +cd /pbs/home/p/prod_grand/scripts/transfers + + + +for file in "$@" +do + echo "converting ${file} to GrandRoot" + filename=$(basename $file) + tmp=${filename#*_} + dateobs=${tmp:0:8} + dest="${root_dest}/${dateobs:0:4}/${dateobs:4:2}" + if [ ! -d $dest ];then + mkdir -p $dest >/dev/null 2>&1 + fi + dirlogs=${root_dest}/../logs + logfile=${dirlogs}/bin2root-${filename%.*} + if [ ! -d $dirlogs ];then + mkdir -p $dirlogs >/dev/null 2>&1 + fi + ${gtot_path} ${gtot_options} -i ${file} -o ${dest}/${filename%.*}.root >> ${logfile} +done + diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash new file mode 100644 index 00000000..c995f1fe --- /dev/null +++ b/scripts/transfers/ccscript.bash @@ -0,0 +1,80 @@ +#!/bin/bash +# path to bin2root file +bin2root='/pbs/home/p/prod_grand/scripts/transfers/bintoroot.bash' + +# gtot options for convertion +gtot_option="-g1" + +# number of files to group in same submission +nbfiles=3 + + +# Get tag and database file to use +while getopts ":t:d:" option; do + case $option in + t) + tag=${OPTARG};; + d) + db=${OPTARG};; + :) + printf "option -${OPTARG} need an argument\n" + exit 1;; + ?) # Invalid option + printf "Error: Invalid option -${OPTARG}\n" + exit 1;; + esac +done + + +#test dbfile exists and tag is set +if [ -z "$tag" ] || [ -z "$db" ];then + printf "Missing option -t or -d\n" + exit 1 +elif [ ! -f $db ];then + printf "Database file does not exists\n" + exit 1 +fi + +# Determine root_dri from database path +root_dest=${db%/database*}/GrandRoot/ +submit_dir=${db%/database*}/logs/ +submit_base_name=submit_${tag} +if [ ! -d $root_dest ];then + mkdir -p $root_dest >/dev/null 2>&1 +fi +if [ ! -d $submit_dir ];then + mkdir -p $submit_dir >/dev/null 2>&1 +fi + + +i=1 +j=1 +listoffiles="" + +echo "loop" +for file in $(sqlite3 $db "select target from transfer,gfiles where gfiles.id=transfer.id and tag='${tag}' and transfer.success=1;") +do + echo $file + #define the submission and log files + outfile="${submit_dir}/${submit_base_name}-${j}.bash" + logfile="${submit_dir}/${submit_base_name}-${j}.log" + + #add file to the list of files to be treated + listoffiles+=" ${file}" + + # When reach the number of files to treat in a run then write the submission file and submit it + if [ "$i" -eq "${nbfiles}" ];then + echo "#!/bin/bash" > $outfile + # add conversion from bin to Grandroot + echo "$bin2root -g '$gtot_option' -d $root_dest $listoffiles" >> $outfile + + #submit script + sbatch -t 0-08:30 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile + # reset iterators and list of files + i=0 + ((j++)) + listoffiles="" + fi + ((i++)) +done + diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index 2801a372..05d3f999 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -21,14 +21,17 @@ remote_server='cca.in2p3.fr' # Account on remote server remote_account='prod_grand' # 'prod_grand' -#ssh key -ssh_key="/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" +#ssh key for rsync +ssh_key_rsync="/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" + +#ssh key for exec remote scripts +ssh_key_exec="/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" # Target directory on remote server -remotedatadir='/sps/grand/prod_grand/tests' #'/sps/grand/data/gp13/raw' +remotedatadir='/sps/grand/prod_grand/tests' #'/sps/grand/data/gp13' # Start date for transfer (all files older than this date will be skipped -first_transfer='20240311' +first_transfer='20240312' # Local script to be launched before run pre_run_script='' #'setup_network_auger.bash -init' @@ -39,18 +42,21 @@ post_run_script='' # 'setup_network_auger.bash -close' # rsync_options : a to keep the creation time of files, z to compress if bandwidth is limited (but it's ~5 times slower). Please keep the "a" option ! rsync_options="-a" +# treatment scripts location @CCIN2P3 +ccscripts='/pbs/home/p/prod_grand/scripts/transfers/ccscript_GP13.bash' + ##### End of Configuration section (do not modify below) ##### # Create database if not exists sqlite3 $dbfile "create table if not exists gfiles (id INTEGER PRIMARY KEY AUTOINCREMENT, directory TEXT, file TEXT, date INT, success BOOLEAN, md5sum VARCHAR(35), UNIQUE (directory,file));" -sqlite3 $dbfile "create table if not exists transfer (id, tag INTEGER, date_transfer DATETIME, success BOOLEAN, comment TEXTE);" +sqlite3 $dbfile "create table if not exists transfer (id, tag INTEGER, date_transfer DATETIME, success BOOLEAN, target TEXT, comment TEXTE);" # Define some useful stuff #ssh options ssh_options="-o ControlPath=\"$HOME/.ssh/ctl/%L-%r@%h:%p\"" -if [ -n "$ssh_key" ]; then - ssh_options+=" -i ${ssh_key}" +if [ -n "$ssh_key_rsync" ]; then + ssh_options+=" -i ${ssh_key_rsync}" fi # Last date of files already registered @@ -124,19 +130,19 @@ do fileinfo=(${file//|/ }) #Transfer files (one by one to get info on each transfer) printf "\nSending ${fileinfo[1]} " - trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b %n md5:%C" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/ 2>&1) + trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b md5:%C" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/ 2>&1) if [ "$?" -eq "0" ] then md5=${trans#*md5:} #echo $md52 #Transfer successful : store info to update database at the end - translog[$i]+=";UPDATE gfiles SET success=1, md5sum='${md5}' WHERE id=${fileinfo[4]};INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]},${tag}, 1,datetime('now','utc'),'${trans}')" + translog[$i]+=";UPDATE gfiles SET success=1, md5sum='${md5}' WHERE id=${fileinfo[4]};INSERT INTO transfer (id, tag, success,date_transfer,target,comment) VALUES (${fileinfo[4]},${tag}, 1,datetime('now','utc'), \"${remotedatadir}/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/${fileinfo[1]}\", '${trans}')" printf "${Green}Ok${Default}" else md5=$(echo ${trans}|awk -F"md5:" '{print $2}') #Transfer failed : just log errors - translog[$i]+=";INSERT INTO transfer (id, tag, success,date_transfer,comment) VALUES (${fileinfo[4]}, ${tag}, 0,datetime('now','utc'),'${trans}')" + translog[$i]+=";INSERT INTO transfer (id, tag, success, date_transfer, target, comment) VALUES (${fileinfo[4]}, ${tag}, 0,datetime('now','utc'), '${remotedatadir}', '${trans}')" printf "${Red}ERROR:${Default} \n ${trans} " fi @@ -163,7 +169,7 @@ for key in "${!translog[@]}"; do done #finally also rsync the database -rsync -e "ssh ${ssh_options}" ${rsync_options} $dbfile ${remote_account}@${remote_server}:$remotedatadir/${tag}_${dbfile} +rsync -e "ssh ${ssh_options}" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/database && rsync" $dbfile ${remote_account}@${remote_server}:$remotedatadir/database/${tag}_${dbfile} #close ssh connection ssh -O exit $ssh_options ${remote_account}@${remote_server} @@ -178,4 +184,10 @@ then printf "Error ${ret} in post run script : ${post} \n" exit ${ret} fi +fi + +#Run conversion scripts @ccin2p3 +if [ -n "$ccscripts" ] +then + ssh -i ${ssh_key_exec} ${remote_account}@${remote_server} ${ccscripts} -d ${remotedatadir}/database/${tag}_${dbfile} -t ${tag} fi \ No newline at end of file From e5ed8007599f1cf154d63eb428599e67304b8b90 Mon Sep 17 00:00:00 2001 From: fleg Date: Fri, 15 Mar 2024 18:54:43 +0100 Subject: [PATCH 10/85] corrected bug --- scripts/transfers/ccscript.bash | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index c995f1fe..77ef3b32 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -78,3 +78,9 @@ do ((i++)) done +# finally submit last files if needed +if [ -n "$listoffiles" ];then + echo "#!/bin/bash" > $outfile + echo "$bin2root -g '$gtot_option' -d $root_dest $listoffiles" >> $outfile + sbatch -t 0-08:30 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile +fi \ No newline at end of file From 9231ce9876f93b9214806d73be5d8f1b19194091 Mon Sep 17 00:00:00 2001 From: fleg Date: Fri, 15 Mar 2024 19:01:09 +0100 Subject: [PATCH 11/85] added renaming of files to force .bin extension --- scripts/transfers/transfer_from_obs.bash | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index 05d3f999..b7eccca3 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -128,21 +128,23 @@ for file in $(sqlite3 $dbfile "select directory, file, date, success, id from gf do #transform result into array (more easy to manipulate) fileinfo=(${file//|/ }) + # Ensure extension is .bin + finalname="${fileinfo[1]%.*}.bin" #Transfer files (one by one to get info on each transfer) printf "\nSending ${fileinfo[1]} " - trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b md5:%C" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/ 2>&1) + trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b md5:%C" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/${finalname} 2>&1) if [ "$?" -eq "0" ] then md5=${trans#*md5:} #echo $md52 #Transfer successful : store info to update database at the end - translog[$i]+=";UPDATE gfiles SET success=1, md5sum='${md5}' WHERE id=${fileinfo[4]};INSERT INTO transfer (id, tag, success,date_transfer,target,comment) VALUES (${fileinfo[4]},${tag}, 1,datetime('now','utc'), \"${remotedatadir}/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/${fileinfo[1]}\", '${trans}')" + translog[$i]+=";UPDATE gfiles SET success=1, md5sum='${md5}' WHERE id=${fileinfo[4]};INSERT INTO transfer (id, tag, success,date_transfer,target,comment) VALUES (${fileinfo[4]},${tag}, 1,datetime('now','utc'), \"${remotedatadir}/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/${finalname}\", '${trans}')" printf "${Green}Ok${Default}" else md5=$(echo ${trans}|awk -F"md5:" '{print $2}') #Transfer failed : just log errors - translog[$i]+=";INSERT INTO transfer (id, tag, success, date_transfer, target, comment) VALUES (${fileinfo[4]}, ${tag}, 0,datetime('now','utc'), '${remotedatadir}', '${trans}')" + translog[$i]+=";INSERT INTO transfer (id, tag, success, date_transfer, target, comment) VALUES (${fileinfo[4]}, ${tag}, 0,datetime('now','utc'), '${remotedatadir}/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/${finalname}', '${trans}')" printf "${Red}ERROR:${Default} \n ${trans} " fi From 07a1e03fe48d5f2a6e91f0b3dbd7d85facf42a90 Mon Sep 17 00:00:00 2001 From: fleg Date: Sun, 17 Mar 2024 13:47:10 +0100 Subject: [PATCH 12/85] Better loop and associative array for submission --- scripts/transfers/ccscript.bash | 54 ++++++++++++++------------------- 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index 77ef3b32..cf2fd21f 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -47,40 +47,30 @@ if [ ! -d $submit_dir ];then fi -i=1 -j=1 -listoffiles="" - -echo "loop" +i=0 +j=0 +declare -A listoffiles for file in $(sqlite3 $db "select target from transfer,gfiles where gfiles.id=transfer.id and tag='${tag}' and transfer.success=1;") do - echo $file - #define the submission and log files - outfile="${submit_dir}/${submit_base_name}-${j}.bash" - logfile="${submit_dir}/${submit_base_name}-${j}.log" - + if [ "$((i % nbfiles))" -eq "0" ]; then + ((j++)) + fi + #add file to the list of files to be treated - listoffiles+=" ${file}" - - # When reach the number of files to treat in a run then write the submission file and submit it - if [ "$i" -eq "${nbfiles}" ];then - echo "#!/bin/bash" > $outfile - # add conversion from bin to Grandroot - echo "$bin2root -g '$gtot_option' -d $root_dest $listoffiles" >> $outfile - - #submit script - sbatch -t 0-08:30 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile - # reset iterators and list of files - i=0 - ((j++)) - listoffiles="" - fi - ((i++)) + listoffiles[$j]+=" ${file}" + + ((i++)) done -# finally submit last files if needed -if [ -n "$listoffiles" ];then - echo "#!/bin/bash" > $outfile - echo "$bin2root -g '$gtot_option' -d $root_dest $listoffiles" >> $outfile - sbatch -t 0-08:30 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile -fi \ No newline at end of file +for j in "${!listoffiles[@]}" +do + outfile="${submit_dir}/${submit_base_name}-${j}.bash" + logfile="${submit_dir}/${submit_base_name}-${j}.log" + echo "#!/bin/bash" > $outfile + echo "$bin2root -g '$gtot_option' -d $root_dest ${listoffiles[$j]}" >> $outfile + #submit script + echo "submit $outfile" + sbatch -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile +done + + From 908ef567aab4b6ed732f795e13fbfce9beab3744 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Mon, 18 Mar 2024 14:12:10 +0100 Subject: [PATCH 13/85] Improvements --- granddb/config.ini | 16 ++++++++++------ granddb/granddatalib.py | 34 ++++++++++++++++++++++++++-------- granddb/granddblib.py | 16 +++++++++++----- granddb/rootdblib.py | 10 +++++++--- 4 files changed, 54 insertions(+), 22 deletions(-) diff --git a/granddb/config.ini b/granddb/config.ini index 0c976242..d009efcf 100644 --- a/granddb/config.ini +++ b/granddb/config.ini @@ -10,7 +10,7 @@ socket_timeout = 5 ; At least one localdir (incoming) is needed. ; incoming directory must be an absolute path [directories] -localdir = ["/home/fleg/DEV/myincoming","/home/fleg/DEV/GRAND/incoming","/home/fleg/"] +localdir = ["/home/fleg/DEV/GRAND/incoming/"] ; remote repositories to search for data if not present in local directories ; repositories are given as list : @@ -22,8 +22,10 @@ localdir = ["/home/fleg/DEV/myincoming","/home/fleg/DEV/GRAND/incoming","/home/f ; repository CCIN2P3 is already defined in the database (so it's not necessary to define it here), but credentials for it have ; to be supplied in the [credentials] section below [repositories] -CC = ["ssh","cca.in2p3.fr",22,["/sps/grand/pengxiong/GP81_interpolation/GP81_1000m/SignalTimeSeries/","/sps/grand/pengxiong/Proton_ROOT/","/sps/trend/fleg/INCOMING"]] -WEB = [ "https", "github.com" , 443, ["/grand-mother/data_challenge1/raw/main/coarse_subei_traces_root/"]] +#CC = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/auger/GRANDfiles","/sps/grand/pengxiong/GP81_interpolation/GP81_1000m/SignalTimeSeries/","/sps/grand/pengxiong/Proton_ROOT/","/sps/trend/fleg/INCOMING"]] +CCIN2P3 = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/nancay/sep2023/GRANDfiles"]] + +#WEB = [ "https", "github.com" , 443, ["/grand-mother/data_challenge1/raw/main/coarse_subei_traces_root/"]] ; Credentials for repositories given as : ; Name = [user, keyfile] @@ -41,8 +43,8 @@ SSHTUNNEL = ["fleg",""] ; database to use (only one database can be defined) ; Name = [server, port, database, login, passwd, sshtunnel_server, sshtunnel_port, sshtunnel_credentials ] [database] -database = ["localhost", "" ,"granddb", "grandadmin", "popo","", "", ""] -#database = ["ccpgsqlexpe.in2p3.fr",6550,"granddb","grandadmin","rSM8X7vmB7Up2ngR","lpnclaude.in2p3.fr", 22, "SSHTUNNEL"] +#database = ["localhost", "" ,"granddb", "grandadmin", "password","", "", ""] + ; The following section is optional. ; it defines the repository where registered files need to go. @@ -50,4 +52,6 @@ database = ["localhost", "" ,"granddb", "grandadmin", "popo","", "", ""] ; if not provided, the files will go to the incoming section provided in section directories ; Useful only if you want to work on "localdir" but register files in a remote directory #[registerer] -#CC = "/sps/grand/fleg/INCOMING" +#CCIN2P3 = "/sps/grand/fleg/INCOMING" +#CCIN2P3 = "/sps/grand/data/auger/GRANDfiles" +#CCIN2P3 = "/sps/grand/data/nancay/sep2023/GRANDfiles" \ No newline at end of file diff --git a/granddb/granddatalib.py b/granddb/granddatalib.py index 2a7ac331..bbc3b736 100644 --- a/granddb/granddatalib.py +++ b/granddb/granddatalib.py @@ -55,7 +55,7 @@ class DataManager: _database = None _provider = None - def __init__(self, file="config.ini"): + def __init__(self, file=os.path.join(os.path.dirname(__file__), 'config.ini')): configur = ConfigParser() # by default configparser convert all keys to lowercase... but we don't want ! configur.optionxform = lambda option: option @@ -97,7 +97,9 @@ def __init__(self, file="config.ini"): self._directories.append(Datasource("localdir", "local", "localhost", "", dirlist, self.incoming())) # We also append localdirs to repositories... so search method will first look at local dirs before searching on remote locations # self._repositories.append(Datasource("localdir", "local", "localhost", "", dirlist, self.incoming())) - self._repositories["localdir"] = Datasource("localdir", "local", "localhost", "", dirlist, self.incoming()) + # But instead of localhost and localdir we use the name of the machine + hostname = socket.getfqdn(os.environ["HOSTNAME"]) + self._repositories["localdir"] = Datasource("localdir", "local", hostname, "", dirlist, self.incoming()) else: logger.error(f"Section directories is mandatory in config file {file}") exit(1) @@ -184,17 +186,23 @@ def SearchFileInDB(self, filename): # If not, search first in localdirs and then in remote repositories. First match is returned. def get(self, file, repository=None, path=None): res = None + # Check if file is a simple name or full path name + if (os.path.dirname(file) != ""): + if (not (path is None) and (path != os.path.dirname(file))): + logger.warning(f"path given in filename ({os.path.dirname(file)}) and in repository path ({path}) are different ! The path {os.path.dirname(file)} from file will be used !") + path = os.path.dirname(file) + file = os.path.basename(file) # if repository is given we get file directly from this repo if not (repository is None): rep = self.getrepo(repository) if not (rep is None): - logger.debug(f"search in repository {rep.name()}") + logger.debug(f"search in repository {rep.name()} {path}") res = rep.get(file, path) # if no repo specified, we search everywhere else: for name, rep in self.repositories().items(): - logger.debug(f"search in repository {rep.name()}") - res = rep.get(file) + logger.debug(f"search in repository {rep.name()} {path}") + res = rep.get(file, path) if not (res is None): break @@ -225,7 +233,17 @@ def register_file(self,filename): newfilename = None file = self.get(filename) if file is not None: - newfilename = self.referer().copy(file) + # If filename in referer repository then keep it + #print(os.path.basename(filename)+" "+self.referer().name()+" "+os.path.dirname(filename)) + newfilename = self.get(os.path.basename(filename),self.referer().name()) + + if newfilename is None: + newfilename = self.referer().copy(file) + else: + newfilename = str(newfilename) + + #print("newfilename = "+str(newfilename)) + self.database().register_file(file, newfilename, self.referer().id_repository, self.provider()) return newfilename @@ -384,7 +402,7 @@ def get(self, file, path=None): else: # No path given : we recursively search in all dirs and subdirs for path in self.paths(): - logger.debug(f"search in localdir {path}{file}") + logger.debug(f"search in localdir {path} for file {file}") #my_file = Path(path + file) my_file = None @@ -404,7 +422,7 @@ def get(self, file, path=None): if not found_file is None: logger.debug(f"file found in localdir {found_file}") - return found_file + return str(found_file) def copy(self, pathfile): newname = self.incoming() + uniquename(pathfile) diff --git a/granddb/granddblib.py b/granddb/granddblib.py index f0c601f3..b8b1675d 100644 --- a/granddb/granddblib.py +++ b/granddb/granddblib.py @@ -17,7 +17,7 @@ import grand.manage_log as mlg import ROOT logger = mlg.get_logger_for_script(__name__) -mlg.create_output_for_logger("debug", log_stdout=False) +mlg.create_output_for_logger("debug", log_stdout=True) @@ -79,7 +79,8 @@ def __init__(self, host, port, dbname, user, passwd, sshserv="", sshport=22, cre (self._sshserv, self.sshport()), ssh_username=self._cred.user(), ssh_pkey=self._cred.keyfile(), - remote_bind_address=(self._host, self._port) + remote_bind_address=(self._host, self._port), + allow_agent=True ) self.server.start() local_port = str(self.server.local_bind_port) @@ -94,7 +95,9 @@ def __init__(self, host, port, dbname, user, passwd, sshserv="", sshport=22, cre Base.prepare(engine, reflect=True) self.sqlalchemysession = Session(engine) - for table in engine.table_names(): + inspection = inspect(engine) + for table in inspection.get_table_names(): + #for table in engine.table_names(): #this is obsolete self._tables[table] = getattr(Base.classes, table) def __del__(self): @@ -342,7 +345,7 @@ def register_filecontent(self, file, idfile): rfile = rdb.RootFile(str(file)) # We iterate over all trees for treename in rfile.TreeList: - print(treename) + logger.debug(f" Debug reading tree {treename}") treetype = treename.split('_', 1)[0] #We register only known and identified trees defined in rootdblib if hasattr(rfile, treetype + "ToDB"): @@ -458,7 +461,8 @@ def register_filecontent(self, file, idfile): #idtree = "id_"+treename et = time.time() elapsed_time = et - st - print('Execution time:', elapsed_time, 'seconds') + #print('Execution time:', elapsed_time, 'seconds') + logger.debug(f"execution time {elapsed_time} seconds") def register_file(self, orgfilename, newfilename, id_repository, provider): @@ -467,4 +471,6 @@ def register_file(self, orgfilename, newfilename, id_repository, provider): #We read the localfile and not the remote one self.register_filecontent(orgfilename,idfile) #self.register_filecontent(newfilename,idfile) + else: + logger.info(f"file {orgfilename} already registered.") self.sqlalchemysession.commit() diff --git a/granddb/rootdblib.py b/granddb/rootdblib.py index 7341721c..fe2da570 100644 --- a/granddb/rootdblib.py +++ b/granddb/rootdblib.py @@ -1,6 +1,8 @@ import ROOT import grand.dataio.root_trees as groot - +import grand.manage_log as mlg +logger = mlg.get_logger_for_script(__name__) +#mlg.create_output_for_logger("debug", log_stdout=True) class RootFile: # Use dict to associate rootfile ttree class to root_tree classe @@ -319,7 +321,9 @@ def __init__(self, f_name): if ttype in self.TreeToClass: self.TreeList[tname] = self.TreeToClass[ttype](f_name) else: - print(ttype + " is unknown") + logger.warning(f"{ttype} is unknown") + + def copy_content_to(self, file): @@ -350,7 +354,7 @@ def dataset_name(self): extra = "" serial = "1" name = source+"_"+site+"_"+mydate+"_"+mytime+"_"+extra+"_"+serial - print(name) + #print(name) #We use only first run break return name From c179c11cd33f702ed8f84c931a815b5b75c88a5f Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 19 Mar 2024 10:26:16 +0100 Subject: [PATCH 14/85] Conf for GP13 --- scripts/transfers/transfer_from_obs.bash | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index b7eccca3..682b8ff2 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -10,7 +10,7 @@ dbfile='grand_transfer.db' # Local directory where are stored the data to be transfered (will be explored recursively) -localdatadir='/sps/grand/data/gp13/raw/2024/' +localdatadir='/home/mapx/mapx/DunhuangData/Rawdata/20dB/2024/03/14/' #/sps/grand/data/gp13/raw/2024/' # Site name prefix in filenames site='GP13' @@ -19,19 +19,19 @@ site='GP13' remote_server='cca.in2p3.fr' # Account on remote server -remote_account='prod_grand' # 'prod_grand' +remote_account='pma'#prod_grand' # 'prod_grand' #ssh key for rsync -ssh_key_rsync="/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" +ssh_key_rsync="/home/mapx/.ssh/id_ed25519"#/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" #ssh key for exec remote scripts -ssh_key_exec="/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" +ssh_key_exec="/home/mapx/.ssh/id_ed25519" #"/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" # Target directory on remote server -remotedatadir='/sps/grand/prod_grand/tests' #'/sps/grand/data/gp13' +remotedatadir='/sps/grand/data/gp13/test'#'/sps/grand/prod_grand/tests' #'/sps/grand/data/gp13' # Start date for transfer (all files older than this date will be skipped -first_transfer='20240312' +first_transfer='20240314' # Local script to be launched before run pre_run_script='' #'setup_network_auger.bash -init' @@ -43,7 +43,7 @@ post_run_script='' # 'setup_network_auger.bash -close' rsync_options="-a" # treatment scripts location @CCIN2P3 -ccscripts='/pbs/home/p/prod_grand/scripts/transfers/ccscript_GP13.bash' +ccscripts=''#/pbs/home/p/prod_grand/scripts/transfers/ccscript_GP13.bash' ##### End of Configuration section (do not modify below) ##### From b60cb48dbc64c67f3ff9e3edee0e48f598683b79 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 19 Mar 2024 11:25:53 +0100 Subject: [PATCH 15/85] Conf for GP13 --- scripts/transfers/transfer_from_obs.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index 682b8ff2..c80e5af2 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -10,7 +10,7 @@ dbfile='grand_transfer.db' # Local directory where are stored the data to be transfered (will be explored recursively) -localdatadir='/home/mapx/mapx/DunhuangData/Rawdata/20dB/2024/03/14/' #/sps/grand/data/gp13/raw/2024/' +localdatadir='/home/mapx/mapx/DunhuangData/Rawdata/20dB/2024/' #/sps/grand/data/gp13/raw/2024/' # Site name prefix in filenames site='GP13' From 7d9d38f3fad58dc2a5a1484f86fdc4421cf4a473 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 19 Mar 2024 14:43:59 +0100 Subject: [PATCH 16/85] Conf for GP13 (missing spaces corrected) --- scripts/transfers/transfer_from_obs.bash | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index c80e5af2..fb384f85 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -19,16 +19,16 @@ site='GP13' remote_server='cca.in2p3.fr' # Account on remote server -remote_account='pma'#prod_grand' # 'prod_grand' +remote_account='pma' #prod_grand' # 'prod_grand' #ssh key for rsync -ssh_key_rsync="/home/mapx/.ssh/id_ed25519"#/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" +ssh_key_rsync="/home/mapx/.ssh/id_ed25519" #/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" #ssh key for exec remote scripts ssh_key_exec="/home/mapx/.ssh/id_ed25519" #"/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" # Target directory on remote server -remotedatadir='/sps/grand/data/gp13/test'#'/sps/grand/prod_grand/tests' #'/sps/grand/data/gp13' +remotedatadir='/sps/grand/data/gp13/test' #'/sps/grand/prod_grand/tests' #'/sps/grand/data/gp13' # Start date for transfer (all files older than this date will be skipped first_transfer='20240314' @@ -43,7 +43,7 @@ post_run_script='' # 'setup_network_auger.bash -close' rsync_options="-a" # treatment scripts location @CCIN2P3 -ccscripts=''#/pbs/home/p/prod_grand/scripts/transfers/ccscript_GP13.bash' +ccscripts='' #/pbs/home/p/prod_grand/scripts/transfers/ccscript_GP13.bash' ##### End of Configuration section (do not modify below) ##### From ad2f04d21e096839d02c4d522c65e03c6fe51c16 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Thu, 21 Mar 2024 17:46:30 +0100 Subject: [PATCH 17/85] Version compatible with restrictions on ssh keys --- scripts/transfers/ccscript.bash | 37 +++++++++-- scripts/transfers/setup_network_auger.bash | 1 + scripts/transfers/transfer_from_obs.bash | 77 ++++++++++++++++------ 3 files changed, 88 insertions(+), 27 deletions(-) mode change 100644 => 100755 scripts/transfers/ccscript.bash diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash old mode 100644 new mode 100755 index cf2fd21f..ea2ef7ea --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -2,29 +2,54 @@ # path to bin2root file bin2root='/pbs/home/p/prod_grand/scripts/transfers/bintoroot.bash' -# gtot options for convertion + +# gtot options for convertion -g1 for gp13 -f2 for gaa gtot_option="-g1" # number of files to group in same submission nbfiles=3 +# manage call from remote restricted ssh command (extracr opt parameters) +# default args +fullscriptpath=${BASH_SOURCE[0]} +args="$*" +case $SSH_ORIGINAL_COMMAND in + "$fullscriptpath "*) + args=$(echo "${SSH_ORIGINAL_COMMAND}" | sed -e "s,^${fullscriptpath} ,,") + ;; + *) + echo "Permission denied." + exit 1 + ;; +esac # Get tag and database file to use -while getopts ":t:d:" option; do +while getopts ":t:d:s:" option ${args}; do case $option in t) tag=${OPTARG};; d) db=${OPTARG};; + s) + site=${OPTARG};; :) printf "option -${OPTARG} need an argument\n" - exit 1;; + exit 1;; ?) # Invalid option printf "Error: Invalid option -${OPTARG}\n" exit 1;; esac done +case $site in + gp13) + gtot_option="-g1";; + gaa) + gtot_option="-v2";; + ?) + gtot_option="-g1";; +esac + #test dbfile exists and tag is set if [ -z "$tag" ] || [ -z "$db" ];then @@ -35,9 +60,9 @@ elif [ ! -f $db ];then exit 1 fi -# Determine root_dri from database path -root_dest=${db%/database*}/GrandRoot/ -submit_dir=${db%/database*}/logs/ +# Determine root_dir from database path +root_dest=${db%/logs*}/GrandRoot/ +submit_dir=$(dirname db) submit_base_name=submit_${tag} if [ ! -d $root_dest ];then mkdir -p $root_dest >/dev/null 2>&1 diff --git a/scripts/transfers/setup_network_auger.bash b/scripts/transfers/setup_network_auger.bash index 10565150..ba560602 100755 --- a/scripts/transfers/setup_network_auger.bash +++ b/scripts/transfers/setup_network_auger.bash @@ -7,6 +7,7 @@ claro_hop="claro.net.ar"; # 4G operator tracepath hop auger_hop="auger.org.ar"; # auger network tracepath hop wwan_con="netplan-cdc-wdm0"; # NetworkManager wwan connection name +in2p3_machine="cca.in2p3.fr"; # CCIN2P3 ssh machine max_hop=5; # max hop for tracepath test sleep_delay=2; # sleep delay to wait after NetworkManager (de)activation calls verbose=false; # true or false diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index fb384f85..5d6a76a2 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -1,49 +1,71 @@ #!/bin/bash # Script to transfert data from a GRAND observatory to CCIN2P3 (or to any site) -# Fleg: 03/2024 +# Fleg & Fred: 03/2024 # Copyright : Grand Observatory 2024 ##### Configuration part ##### # Please adjust the following variable to your site # Local database name (sqlite filename) +# '/home/grand/data-transfer/grand_transfer.db' for gaa +# dbfile='grand_transfer.db' # Local directory where are stored the data to be transfered (will be explored recursively) -localdatadir='/home/mapx/mapx/DunhuangData/Rawdata/20dB/2024/' #/sps/grand/data/gp13/raw/2024/' +# '/home/mapx/mapx/DunhuangData/Rawdata/20dB/' for gp13 +# '/home/grand/Malargue/' for gaa +localdatadir='/home/mapx/mapx/DunhuangData/Rawdata/20dB/' + +#path to local rsync to use (leave blank if your default rsync --version >= 3.2.3 +# define the path to rsync version >= 3.2.3 otherwise +rsyncpath='' # Site name prefix in filenames +# 'GP13' for gp13 +# gaa for gaa site='GP13' # Remote server to transfer remote_server='cca.in2p3.fr' # Account on remote server -remote_account='pma' #prod_grand' # 'prod_grand' +remote_account='prod_grand' #ssh key for rsync -ssh_key_rsync="/home/mapx/.ssh/id_ed25519" #/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" +# "/home/mapx/.ssh/id_ed25519" for gp13 +# "/root/.ssh/id_ed25519-nopw" for gaa +ssh_key_rsync="/home/mapx/.ssh/id_ed25519" #ssh key for exec remote scripts -ssh_key_exec="/home/mapx/.ssh/id_ed25519" #"/pbs/home/p/prod_grand/.ssh/id_ed25519" # "/root/.ssh/id_ed25519-nopw" +# "/home/mapx/.ssh/id_ed25519-scrips" for gp13 +# "/root/.ssh/id_ed25519-nopw-scripts" for gaa +ssh_key_exec="/home/mapx/.ssh/id_ed25519-scrips" -# Target directory on remote server -remotedatadir='/sps/grand/data/gp13/test' #'/sps/grand/prod_grand/tests' #'/sps/grand/data/gp13' +# Target directory on remote server, must be the same directory as the one defined in remote_account@remote_server:~/.ssh/authorized_keys for ssh_key_rsync +# '/sps/grand/data/gp13' for gp13 +# '/sps/grand/data/gaa' for gaa +remotedatadir='/sps/grand/data/gp13' #'/sps/grand/prod_grand/tests' #'/sps/grand/data/gp13' # Start date for transfer (all files older than this date will be skipped first_transfer='20240314' # Local script to be launched before run -pre_run_script='' #'setup_network_auger.bash -init' +# '' for gp13 +#'setup_network_auger.bash -init' for gaa +pre_run_script='' # Local script to be launched after run -post_run_script='' # 'setup_network_auger.bash -close' +# '' for gp13 +# '/root/bin/setup_network_auger.bash -close' for gaa +post_run_script='' # rsync_options : a to keep the creation time of files, z to compress if bandwidth is limited (but it's ~5 times slower). Please keep the "a" option ! -rsync_options="-a" +rsync_options="-az --mkpath" # treatment scripts location @CCIN2P3 -ccscripts='' #/pbs/home/p/prod_grand/scripts/transfers/ccscript_GP13.bash' +# '/pbs/home/p/prod_grand/scripts/transfers/ccscript_GP13.bash' for gp13 +# '/pbs/home/p/prod_grand/scripts/transfers/ccscript_gaa.bash' for gaa +ccscripts='/pbs/home/p/prod_grand/scripts/transfers/ccscript.bash' ##### End of Configuration section (do not modify below) ##### @@ -52,7 +74,13 @@ sqlite3 $dbfile "create table if not exists gfiles (id INTEGER PRIMARY KEY AUTO sqlite3 $dbfile "create table if not exists transfer (id, tag INTEGER, date_transfer DATETIME, success BOOLEAN, target TEXT, comment TEXTE);" # Define some useful stuff - +# +if [ -n "$rsyncpath" ] && [ -f $rsyncpath ]; +then + rsync_command=$rsyncpath +else + rsync_command=$(which rsync) +fi #ssh options ssh_options="-o ControlPath=\"$HOME/.ssh/ctl/%L-%r@%h:%p\"" if [ -n "$ssh_key_rsync" ]; then @@ -65,6 +93,7 @@ last_transfer=$(( last_transfer > first_transfer ? last_transfer : first_transfe #tag to identify files treated in the current run tag=$(date +'%Y%m%d%H%M%S') +dbname=$(basename dbfile) # Colors Default='\033[0m' # Text Reset @@ -117,7 +146,7 @@ for key in "${!toins[@]}"; do fi done -# Open a ssh connection that will be used for all transfers (avoid to reopen rsync tunnel for each file) +# Open a ssh connection that will be used for all transfers (avoid to reopen rsync tunnel for each file) for rsync mkdir ~/.ssh/ctl >/dev/null 2>&1 ssh -nNf -o ControlMaster=yes ${ssh_options} ${remote_account}@${remote_server} declare -A translog=([1]="") @@ -132,7 +161,10 @@ do finalname="${fileinfo[1]%.*}.bin" #Transfer files (one by one to get info on each transfer) printf "\nSending ${fileinfo[1]} " - trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b md5:%C" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/${finalname} 2>&1) + # no remotedatadir specified on rsync command line, as it's already the defined restricted directory in ccin2p3:~/.ssh/authorized_keys + # /raw directory in the rsync thus has /sps/grand/data/gaa as root dir + trans=$(${rsync_command} -e "ssh ${ssh_options}" --out-format="%t %b md5:%C" ${rsync_options} ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/${finalname} 2>&1) + #trans=$(rsync -e "ssh ${ssh_options}" --out-format="%t %b md5:%C" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2} && rsync" ${fileinfo[0]}/${fileinfo[1]} ${remote_account}@${remote_server}:$remotedatadir/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/${finalname} 2>&1) if [ "$?" -eq "0" ] then md5=${trans#*md5:} @@ -140,7 +172,6 @@ do #Transfer successful : store info to update database at the end translog[$i]+=";UPDATE gfiles SET success=1, md5sum='${md5}' WHERE id=${fileinfo[4]};INSERT INTO transfer (id, tag, success,date_transfer,target,comment) VALUES (${fileinfo[4]},${tag}, 1,datetime('now','utc'), \"${remotedatadir}/raw/${fileinfo[2]:0:4}/${fileinfo[2]:4:2}/${finalname}\", '${trans}')" printf "${Green}Ok${Default}" - else md5=$(echo ${trans}|awk -F"md5:" '{print $2}') #Transfer failed : just log errors @@ -171,12 +202,21 @@ for key in "${!translog[@]}"; do done #finally also rsync the database -rsync -e "ssh ${ssh_options}" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/database && rsync" $dbfile ${remote_account}@${remote_server}:$remotedatadir/database/${tag}_${dbfile} +# no remotedatadir specified on rsync command line, as it's already the defined restricted directory in ccin2p3:~/.ssh/authorized_keys +# /raw directory in the rsync thus has /sps/grand/data/gaa as root dir +${rsync_command} -e "ssh ${ssh_options}" ${rsync_options} $dbfile ${remote_account}@${remote_server}:/logs/${tag}_${dbname} +#rsync -e "ssh ${ssh_options}" ${rsync_options} --rsync-path="mkdir -p $remotedatadir/logs && rsync" $dbfile ${remote_account}@${remote_server}:$remotedatadir/logs/${tag}_${dbname} #close ssh connection ssh -O exit $ssh_options ${remote_account}@${remote_server} rm -rf ~/.ssh/ctl +#Run conversion scripts @ccin2p3 +if [ -n "$ccscripts" ] +then + ssh -i ${ssh_key_exec} ${remote_account}@${remote_server} ${ccscripts} -s ${site} -d ${remotedatadir}/logs/${tag}_${dbname} -t ${tag} +fi + # run post script if [ -n "$post_run_script" ] then @@ -188,8 +228,3 @@ then fi fi -#Run conversion scripts @ccin2p3 -if [ -n "$ccscripts" ] -then - ssh -i ${ssh_key_exec} ${remote_account}@${remote_server} ${ccscripts} -d ${remotedatadir}/database/${tag}_${dbfile} -t ${tag} -fi \ No newline at end of file From 5615e90e2dee4563847bccc72ae1f52eea423f8e Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Thu, 21 Mar 2024 18:06:16 +0100 Subject: [PATCH 18/85] Removed ccscripts from configuration. --- scripts/transfers/transfer_from_obs.bash | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index 5d6a76a2..1ed0ffd5 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -62,13 +62,11 @@ post_run_script='' # rsync_options : a to keep the creation time of files, z to compress if bandwidth is limited (but it's ~5 times slower). Please keep the "a" option ! rsync_options="-az --mkpath" +##### End of Configuration section (do not modify below) ##### + # treatment scripts location @CCIN2P3 -# '/pbs/home/p/prod_grand/scripts/transfers/ccscript_GP13.bash' for gp13 -# '/pbs/home/p/prod_grand/scripts/transfers/ccscript_gaa.bash' for gaa ccscripts='/pbs/home/p/prod_grand/scripts/transfers/ccscript.bash' -##### End of Configuration section (do not modify below) ##### - # Create database if not exists sqlite3 $dbfile "create table if not exists gfiles (id INTEGER PRIMARY KEY AUTOINCREMENT, directory TEXT, file TEXT, date INT, success BOOLEAN, md5sum VARCHAR(35), UNIQUE (directory,file));" sqlite3 $dbfile "create table if not exists transfer (id, tag INTEGER, date_transfer DATETIME, success BOOLEAN, target TEXT, comment TEXTE);" From 83c6627d5c28ef0d3fef5651560cf134073feafe Mon Sep 17 00:00:00 2001 From: fleg Date: Sun, 24 Mar 2024 10:15:39 +0100 Subject: [PATCH 19/85] Corrected type in submit_dir --- scripts/transfers/ccscript.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100755 => 100644 scripts/transfers/ccscript.bash diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash old mode 100755 new mode 100644 index ea2ef7ea..bfd0cbc5 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -62,7 +62,7 @@ fi # Determine root_dir from database path root_dest=${db%/logs*}/GrandRoot/ -submit_dir=$(dirname db) +submit_dir=$(dirname "${db}") submit_base_name=submit_${tag} if [ ! -d $root_dest ];then mkdir -p $root_dest >/dev/null 2>&1 From 3a79fcd03e0e7f56d1df93e3133abcff9e3562b1 Mon Sep 17 00:00:00 2001 From: fleg Date: Sun, 24 Mar 2024 10:16:10 +0100 Subject: [PATCH 20/85] Corrected typo in submit_dir --- scripts/transfers/ccscript.bash | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index bfd0cbc5..c9ab9771 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -64,6 +64,7 @@ fi root_dest=${db%/logs*}/GrandRoot/ submit_dir=$(dirname "${db}") submit_base_name=submit_${tag} + if [ ! -d $root_dest ];then mkdir -p $root_dest >/dev/null 2>&1 fi From a63ded98562efeb7636a5cf7703ed1e49c0ba46b Mon Sep 17 00:00:00 2001 From: fleg Date: Wed, 27 Mar 2024 10:45:21 +0100 Subject: [PATCH 21/85] Added registration of transfer logs into the database --- scripts/transfers/bintoroot.bash | 35 +++++++++--------- scripts/transfers/ccscript.bash | 10 ++++- scripts/transfers/register_transfers.py | 49 +++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 18 deletions(-) create mode 100644 scripts/transfers/register_transfers.py diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash index 6ad35228..201b061d 100644 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -5,18 +5,18 @@ gtot_path='/pbs/home/p/prod_grand/softs/gtot/cmake-build-release/gtot' # Get tag and database file to use while getopts ":d:g:" option; do - case $option in - d) - root_dest=${OPTARG};; - g) - gtot_options=${OPTARG};; - :) - printf "option -${OPTARG} need an argument\n" - exit 1;; - ?) # Invalid option - printf "Error: Invalid option -${OPTARG}\n" - exit 1;; - esac + case $option in + d) + root_dest=${OPTARG};; + g) + gtot_options=${OPTARG};; + :) + printf "option -${OPTARG} need an argument\n" + exit 1;; + ?) # Invalid option + printf "Error: Invalid option -${OPTARG}\n" + exit 1;; + esac done shift $(($OPTIND - 1)) @@ -33,17 +33,18 @@ for file in "$@" do echo "converting ${file} to GrandRoot" filename=$(basename $file) - tmp=${filename#*_} - dateobs=${tmp:0:8} - dest="${root_dest}/${dateobs:0:4}/${dateobs:4:2}" + tmp=${filename#*_} + dateobs=${tmp:0:8} + dest="${root_dest}/${dateobs:0:4}/${dateobs:4:2}" if [ ! -d $dest ];then - mkdir -p $dest >/dev/null 2>&1 + mkdir -p $dest >/dev/null 2>&1 fi dirlogs=${root_dest}/../logs logfile=${dirlogs}/bin2root-${filename%.*} if [ ! -d $dirlogs ];then mkdir -p $dirlogs >/dev/null 2>&1 fi - ${gtot_path} ${gtot_options} -i ${file} -o ${dest}/${filename%.*}.root >> ${logfile} + ${gtot_path} ${gtot_options} -i ${file} -o ${dest}/${filename%.*}.root >> ${logfile} + echo $? >> ${logfile} done diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index c9ab9771..edf2a30c 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -1,7 +1,12 @@ #!/bin/bash +# Script triggered after transfering data from a GRAND observatory to CCIN2P3 (or to any site) +# It will launch the jobs to convert binary files into GrandRoot and register the results of the transfers and convertions into the database +# Fleg & Fred: 03/2024 +# Copyright : Grand Observatory 2024 + # path to bin2root file bin2root='/pbs/home/p/prod_grand/scripts/transfers/bintoroot.bash' - +register_transfers='/pbs/home/p/prod_grand/scripts/transfers/register_transfers.py' # gtot options for convertion -g1 for gp13 -f2 for gaa gtot_option="-g1" @@ -99,4 +104,7 @@ do sbatch -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile done +outfile="${submit_dir}/${submit_base_name}-register-transfer.bash" +echo "python3 $register_transfers -d $db -t $tag" >> $outfile +sbatch -t 0-01:00 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/slurm-${submit_base_name}-register-transfer --mem 8G $outfile diff --git a/scripts/transfers/register_transfers.py b/scripts/transfers/register_transfers.py new file mode 100644 index 00000000..910a9cf4 --- /dev/null +++ b/scripts/transfers/register_transfers.py @@ -0,0 +1,49 @@ +from granddb.granddatalib import DataManager +import os +import sqlite3 +import argparse + +argParser = argparse.ArgumentParser() +argParser.add_argument("-c", "--config",default="config.ini", help="Config file to use") +argParser.add_argument("-d", "--database", help="Database file to use", required=True) +argParser.add_argument("-t", "--tag", default="*", help="Tag for the files to register") +args = argParser.parse_args() + +dm = DataManager(os.path.dirname(__file__)+"/"+args.config) + +db = args.database +tag = args.tag + +#db = "/sps/grand/data/gp13/logs/20240325225834_dbfile" +#tag = "20240325225834" + +connection = sqlite3.connect(db) +connection.row_factory = sqlite3.Row +cursor = connection.cursor() +cursor.execute("SELECT target as file, md5sum, transfer.success, transfer.date_transfer, transfer.comment, transfer.tag FROM gfiles, transfer WHERE gfiles.id = transfer.id AND transfer.tag = "+tag+";") +rows = cursor.fetchall() +connection.close() + +for row in rows: + trans = dict(row) + rawfile = {'filename': os.path.basename(trans["file"]), 'md5': trans["md5sum"]} + fname = os.path.basename(trans["file"]) + + myobject = dm.database().sqlalchemysession.query(dm.database().tables()['rawfile']).filter_by(filename=fname).first() + if not myobject: + container = dm.database().tables()['rawfile'](**rawfile) + dm.database().sqlalchemysession.add(container) + dm.database().sqlalchemysession.flush() + id_raw_file = container.id_raw_file + else: + id_raw_file = myobject.id_raw_file + + transfer = {'id_raw_file': id_raw_file, 'tag': trans["tag"], 'date_transfer': trans["date_transfer"], 'success': trans["success"], 'target': trans["file"], 'comments': trans["comment"]} + myobject = dm.database().sqlalchemysession.query(dm.database().tables()['transfer']).filter_by(id_raw_file=id_raw_file, date_transfer=trans["date_transfer"],success=trans["success"]).first() + if not myobject: + container = dm.database().tables()['transfer'](**transfer) + dm.database().sqlalchemysession.add(container) + dm.database().sqlalchemysession.flush() + id_raw_file = container.id_raw_file + + dm.database().sqlalchemysession.commit() From 23f4d6f698098d0392e2b2e069403b03262dd2e8 Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 28 Mar 2024 17:52:32 +0100 Subject: [PATCH 22/85] Added registration of transfer and convertion logs into the database --- scripts/transfers/bintoroot.bash | 8 +++++-- scripts/transfers/ccscript.bash | 19 +++++++++------ scripts/transfers/register_convert.py | 28 ++++++++++++++++++++++ scripts/transfers/register_transfer.bash | 30 ++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 9 deletions(-) create mode 100644 scripts/transfers/register_convert.py create mode 100644 scripts/transfers/register_transfer.bash diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash index 201b061d..8f550066 100644 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -2,7 +2,7 @@ # path to gtot gtot_path='/pbs/home/p/prod_grand/softs/gtot/cmake-build-release/gtot' - +register_path='/pbs/home/p/prod_grand/scripts/transfers/register_convert.py' # Get tag and database file to use while getopts ":d:g:" option; do case $option in @@ -44,7 +44,11 @@ do if [ ! -d $dirlogs ];then mkdir -p $dirlogs >/dev/null 2>&1 fi + # Convert file ${gtot_path} ${gtot_options} -i ${file} -o ${dest}/${filename%.*}.root >> ${logfile} - echo $? >> ${logfile} + conv_status=$? + echo $conv_status >> ${logfile} + # Register conversion result into the database + python3 ${register_path} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} done diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index edf2a30c..ed3c9d5e 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -6,7 +6,7 @@ # path to bin2root file bin2root='/pbs/home/p/prod_grand/scripts/transfers/bintoroot.bash' -register_transfers='/pbs/home/p/prod_grand/scripts/transfers/register_transfers.py' +register_transfers='/pbs/home/p/prod_grand/scripts/transfers/register_transfer.bash' # gtot options for convertion -g1 for gp13 -f2 for gaa gtot_option="-g1" @@ -14,7 +14,7 @@ gtot_option="-g1" # number of files to group in same submission nbfiles=3 -# manage call from remote restricted ssh command (extracr opt parameters) +# manage call from remote restricted ssh command (extract opt parameters) # default args fullscriptpath=${BASH_SOURCE[0]} args="$*" @@ -23,7 +23,7 @@ case $SSH_ORIGINAL_COMMAND in args=$(echo "${SSH_ORIGINAL_COMMAND}" | sed -e "s,^${fullscriptpath} ,,") ;; *) - echo "Permission denied." + echo "Permission denied. You are not authorized to run ${fullscriptpath}. Check ssh key ?" exit 1 ;; esac @@ -77,7 +77,14 @@ if [ ! -d $submit_dir ];then mkdir -p $submit_dir >/dev/null 2>&1 fi +# First register raw files transfers into the DB and get the id of the registration job +outfile="${submit_dir}/${submit_base_name}-register-transfer.bash" +echo "#!/bin/bash" > $outfile +echo "$register_transfers -d $db -t $tag" >> $outfile +jregid=$(sbatch -t 0-01:00 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/slurm-${submit_base_name}-register-transfer --mem 8G ${outfile}) +jregid=$(echo $jregid |awk '{print $NF}') +# List files to be converted and group them by bunchs of nbfiles i=0 j=0 declare -A listoffiles @@ -93,6 +100,7 @@ do ((i++)) done +# Launch convertion of files (but after the registration has finished) for j in "${!listoffiles[@]}" do outfile="${submit_dir}/${submit_base_name}-${j}.bash" @@ -101,10 +109,7 @@ do echo "$bin2root -g '$gtot_option' -d $root_dest ${listoffiles[$j]}" >> $outfile #submit script echo "submit $outfile" - sbatch -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile + sbatch --dependency=afterok:${jregid} -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile done -outfile="${submit_dir}/${submit_base_name}-register-transfer.bash" -echo "python3 $register_transfers -d $db -t $tag" >> $outfile -sbatch -t 0-01:00 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/slurm-${submit_base_name}-register-transfer --mem 8G $outfile diff --git a/scripts/transfers/register_convert.py b/scripts/transfers/register_convert.py new file mode 100644 index 00000000..00db96d6 --- /dev/null +++ b/scripts/transfers/register_convert.py @@ -0,0 +1,28 @@ +from granddb.granddatalib import DataManager +import os +import argparse +from datetime import datetime + +argParser = argparse.ArgumentParser() +argParser.add_argument("-c", "--config",default="config.ini", help="Config file to use") +argParser.add_argument("-s", "--status", help="Status of convertion", required=True) +argParser.add_argument("-i","--file",help="Bin file converted", required=True) +argParser.add_argument("-o","--root",help="Root file created", required=True) +argParser.add_argument("-l","--logfile",help="Logfile of convertion", required=True) + +args = argParser.parse_args() + +dm = DataManager(os.path.dirname(__file__)+"/"+args.config) + +print(args.file) +print(args.status) +myfile = dm.database().sqlalchemysession.query(dm.database().tables()['rawfile']).filter_by(filename=args.file).first() +if not myfile: + print("Error file not registerd") + exit(0) +else: + id_raw_file = myfile.id_raw_file + converted = {'id_raw_file': id_raw_file, 'date_convertion': datetime.now(), 'logfile': args.logfile, 'root_filename': args.root, 'retcode': args.status} + container = dm.database().tables()['convertion'](**converted) + dm.database().sqlalchemysession.add(container) + dm.database().sqlalchemysession.commit() diff --git a/scripts/transfers/register_transfer.bash b/scripts/transfers/register_transfer.bash new file mode 100644 index 00000000..5e4f9d39 --- /dev/null +++ b/scripts/transfers/register_transfer.bash @@ -0,0 +1,30 @@ +#!/bin/bash + +register_transfers='python3 /pbs/home/p/prod_grand/scripts/transfers/register_transfers.py' + +while getopts ":d:t:" option; do + case $option in + d) + db=${OPTARG};; + t) + tag=${OPTARG};; + c) + config=${OPTARG};; + :) + printf "option -${OPTARG} need an argument\n" + exit 1;; + ?) # Invalid option + printf "Error: Invalid option -${OPTARG}\n" + exit 1;; + esac +done + + +cd /pbs/home/p/prod_grand/softs/grand +source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh +conda activate /sps/grand/software/conda/grandlib_2304 +source env/setup.sh +cd /pbs/home/p/prod_grand/scripts/transfers + +#${register_transfers} -d ${db} -t ${tag} -c ${config} +${register_transfers} -d ${db} -t ${tag} \ No newline at end of file From caaaff79c5c488344984f1505f907455a6836c5f Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 28 Mar 2024 19:01:03 +0100 Subject: [PATCH 23/85] user "after" instead of "afterok" in submission --- scripts/transfers/ccscript.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index ed3c9d5e..9d7a4196 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -109,7 +109,7 @@ do echo "$bin2root -g '$gtot_option' -d $root_dest ${listoffiles[$j]}" >> $outfile #submit script echo "submit $outfile" - sbatch --dependency=afterok:${jregid} -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile + sbatch --dependency=after:${jregid} -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile done From fba1b97b2f9b60c0a2b1d21b8bb823e648d9ddb1 Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 28 Mar 2024 19:47:02 +0100 Subject: [PATCH 24/85] user "afterany" instead of "afterok" in submission --- scripts/transfers/ccscript.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index 9d7a4196..5353189c 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -109,7 +109,7 @@ do echo "$bin2root -g '$gtot_option' -d $root_dest ${listoffiles[$j]}" >> $outfile #submit script echo "submit $outfile" - sbatch --dependency=after:${jregid} -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile + sbatch --dependency=afterany:${jregid} -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile done From f59b4a18cf75c3cc4d911c083cf9102ce09b2925 Mon Sep 17 00:00:00 2001 From: fleg Date: Fri, 29 Mar 2024 16:25:47 +0100 Subject: [PATCH 25/85] Added registration into the database of newly converted files to GrandRoot format --- granddb/granddatalib.py | 5 +++-- granddb/register_file_in_db.py | 31 ++++++++++++++++++++----------- scripts/transfers/bintoroot.bash | 10 ++++++++-- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/granddb/granddatalib.py b/granddb/granddatalib.py index bbc3b736..4cfde33e 100644 --- a/granddb/granddatalib.py +++ b/granddb/granddatalib.py @@ -192,6 +192,7 @@ def get(self, file, repository=None, path=None): logger.warning(f"path given in filename ({os.path.dirname(file)}) and in repository path ({path}) are different ! The path {os.path.dirname(file)} from file will be used !") path = os.path.dirname(file) file = os.path.basename(file) + # if repository is given we get file directly from this repo if not (repository is None): rep = self.getrepo(repository) @@ -229,9 +230,9 @@ def getrepo(self, repo): ##Function to register a file into the database. Returns the path to the file in the repository where the file was registered. - def register_file(self,filename): + def register_file(self,filename, repository=None, path=None): newfilename = None - file = self.get(filename) + file = self.get(filename,repository,path) if file is not None: # If filename in referer repository then keep it #print(os.path.basename(filename)+" "+self.referer().name()+" "+os.path.dirname(filename)) diff --git a/granddb/register_file_in_db.py b/granddb/register_file_in_db.py index 64872cda..63b1df87 100644 --- a/granddb/register_file_in_db.py +++ b/granddb/register_file_in_db.py @@ -1,20 +1,29 @@ import sys, os, getopt import grand.manage_log as mlg from granddatalib import DataManager - - import argparse +logger = mlg.get_logger_for_script(__name__) argParser = argparse.ArgumentParser() -argParser.add_argument("-c", "--config",default="config.ini", help="Config file to use") -argParser.add_argument('file', type=str, help='File to register') +argParser.add_argument("-c", "--config", default="config.ini", help="Config file to use") +argParser.add_argument("-r", "--repository", default="", help="Repository") +argParser.add_argument('files', nargs='+', default=[], help='Files to register') args = argParser.parse_args() +# if config is given as absolute path, use it. If not then use path relative to script +if args.config[0] == '/': + config_path = args.config +else: + config_path = os.path.dirname(__file__)+"/"+args.config -dm = DataManager(os.path.dirname(__file__)+"/"+args.config) - -try: - print(dm.register_file(args.file)) -except Exception as e: - logger.error(f'Error when importing {path}. Skipping.') - logger.error(f'Error was {e}.') \ No newline at end of file +dm = DataManager(config_path) +if args.repository == '': + repo_name = None +else: + repo_name = args.repository +for file in args.files: + try: + dm.register_file(file, repo_name) + except Exception as e: + logger.error(f'Error when importing {file}. Skipping.') + logger.error(f'Error was {e}.') \ No newline at end of file diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash index 8f550066..6f2dca45 100644 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -2,7 +2,11 @@ # path to gtot gtot_path='/pbs/home/p/prod_grand/softs/gtot/cmake-build-release/gtot' -register_path='/pbs/home/p/prod_grand/scripts/transfers/register_convert.py' +# path to script to register convertion results +register_convertion='/pbs/home/p/prod_grand/scripts/transfers/register_convert.py' +# path to script to register root file into the DB +register_root='/pbs/home/p/prod_grand/softs/grand/granddb/register_file_in_db.py' +config_file='/pbs/home/p/prod_grand/softs/grand/scripts/transfers/config-prod.ini' # Get tag and database file to use while getopts ":d:g:" option; do case $option in @@ -49,6 +53,8 @@ do conv_status=$? echo $conv_status >> ${logfile} # Register conversion result into the database - python3 ${register_path} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} + python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} + # Register root file into db + python3 ${register_root} -c ${config_file} -r "CCIN2P3" ${dest}/${filename%.*}.root done From be0e3c9c3cc60223dc2e56d4bc280e291aac3593 Mon Sep 17 00:00:00 2001 From: fleg Date: Sat, 30 Mar 2024 16:58:14 +0100 Subject: [PATCH 26/85] Added execute_sql --- granddb/granddblib.py | 195 +++++++++++++++++++++++------------------- 1 file changed, 107 insertions(+), 88 deletions(-) diff --git a/granddb/granddblib.py b/granddb/granddblib.py index b8b1675d..7c1cd3c1 100644 --- a/granddb/granddblib.py +++ b/granddb/granddblib.py @@ -16,12 +16,11 @@ from sqlalchemy.dialects import postgresql import grand.manage_log as mlg import ROOT + logger = mlg.get_logger_for_script(__name__) mlg.create_output_for_logger("debug", log_stdout=True) - - def casttodb(value): if isinstance(value, numpy.uint32): value = int(value) @@ -74,7 +73,7 @@ def __init__(self, host, port, dbname, user, passwd, sshserv="", sshport=22, cre self._cred = cred if self._sshserv != "" and self._cred is not None: - #TODO: Check credentials for ssh tunnel and ask for passwds + # TODO: Check credentials for ssh tunnel and ask for passwds self.server = SSHTunnelForwarder( (self._sshserv, self.sshport()), ssh_username=self._cred.user(), @@ -87,17 +86,18 @@ def __init__(self, host, port, dbname, user, passwd, sshserv="", sshport=22, cre self._host = "127.0.0.1" self._port = local_port - #self.connect() + # self.connect() engine = create_engine( - 'postgresql+psycopg2://' + self.user() + ':' + self.passwd() + '@' + self.host() + ':' + str(self.port()) + '/' + self._dbname) + 'postgresql+psycopg2://' + self.user() + ':' + self.passwd() + '@' + self.host() + ':' + str( + self.port()) + '/' + self._dbname) Base = automap_base() Base.prepare(engine, reflect=True) self.sqlalchemysession = Session(engine) inspection = inspect(engine) for table in inspection.get_table_names(): - #for table in engine.table_names(): #this is obsolete + # for table in engine.table_names(): #this is obsolete self._tables[table] = getattr(Base.classes, table) def __del__(self): @@ -155,31 +155,43 @@ def select(self, query): logger.error(f"Error {e}") return record -# def insert(self, query): -# record = [] -# try: -# cursor = self.dbconnection.cursor(cursor_factory=psycopg2.extras.DictCursor) -# cursor.execute(query) -# print(cursor.statusmessage) -# self.dbconnection.commit() -# record.append(cursor.fetchone()[0]) -# cursor.close() -# except psycopg2.DatabaseError as e: -# print(f'Error {e}') -# return record -# -# def insert2(self, query, values): -# record = [] -# try: -# cursor = self.dbconnection.cursor(cursor_factory=psycopg2.extras.DictCursor) -# cursor.execute(query, values) -# print(cursor.statusmessage) -# self.dbconnection.commit() -# record.append(cursor.fetchone()[0]) -# cursor.close() -# except psycopg2.DatabaseError as e: -# print(f'Error {e}') -# return record + def execute_sql(self, query): + try: + res = True + self.connect() + cursor = self.dbconnection.cursor(cursor_factory=psycopg2.extras.DictCursor) + cursor.execute(query) + cursor.close() + except psycopg2.DatabaseError as e: + logger.error(f"Error {e}") + res = False + return res + + # def insert(self, query): + # record = [] + # try: + # cursor = self.dbconnection.cursor(cursor_factory=psycopg2.extras.DictCursor) + # cursor.execute(query) + # print(cursor.statusmessage) + # self.dbconnection.commit() + # record.append(cursor.fetchone()[0]) + # cursor.close() + # except psycopg2.DatabaseError as e: + # print(f'Error {e}') + # return record + # + # def insert2(self, query, values): + # record = [] + # try: + # cursor = self.dbconnection.cursor(cursor_factory=psycopg2.extras.DictCursor) + # cursor.execute(query, values) + # print(cursor.statusmessage) + # self.dbconnection.commit() + # record.append(cursor.fetchone()[0]) + # cursor.close() + # except psycopg2.DatabaseError as e: + # print(f'Error {e}') + # return record ## @brief Method to get the list of the repositories defined in the database. # Returns a dictionary with @@ -201,19 +213,25 @@ def get_repos(self): # returns the filename and the different locations for it def SearchFile(self, filename): result = [] - file = self.sqlalchemysession.query(self.tables()['file'], self.tables()['file_location'], self.tables()['repository'])\ - .join(self.tables()['file_location'], self.tables()['file_location'].id_file==self.tables()['file'].id_file) \ - .join(self.tables()['repository'],self.tables()['repository'].id_repository==self.tables()['file_location'].id_repository) \ - .filter(self.tables()['file'].filename == filename)\ - .order_by(self.tables()['repository'].id_repository)\ + file = self.sqlalchemysession.query(self.tables()['file'], self.tables()['file_location'], + self.tables()['repository']) \ + .join(self.tables()['file_location'], + self.tables()['file_location'].id_file == self.tables()['file'].id_file) \ + .join(self.tables()['repository'], + self.tables()['repository'].id_repository == self.tables()['file_location'].id_repository) \ + .filter(self.tables()['file'].filename == filename) \ + .order_by(self.tables()['repository'].id_repository) \ .all() if len(file) == 0: - file = self.sqlalchemysession.query(self.tables()['file'], self.tables()['file_location'], self.tables()['repository'])\ - .join(self.tables()['file_location'], self.tables()['file_location'].id_file==self.tables()['file'].id_file) \ - .join(self.tables()['repository'],self.tables()['repository'].id_repository==self.tables()['file_location'].id_repository) \ - .filter(self.tables()['file'].original_name == filename)\ - .order_by(self.tables()['repository'].id_repository)\ + file = self.sqlalchemysession.query(self.tables()['file'], self.tables()['file_location'], + self.tables()['repository']) \ + .join(self.tables()['file_location'], + self.tables()['file_location'].id_file == self.tables()['file'].id_file) \ + .join(self.tables()['repository'], + self.tables()['repository'].id_repository == self.tables()['file_location'].id_repository) \ + .filter(self.tables()['file'].original_name == filename) \ + .order_by(self.tables()['repository'].id_repository) \ .all() for record in file: @@ -296,7 +314,7 @@ def register_filename(self, filename, newfilename, id_repository, provider): file_exist = self.sqlalchemysession.query(self.tables()['file']).filter_by( filename=os.path.basename(newfilename)).first() if file_exist is not None: - #file_exist_here = self.sqlalchemysession.query(self.tables()['file_location']).filter_by( + # file_exist_here = self.sqlalchemysession.query(self.tables()['file_location']).filter_by( # id_repository=id_repository).first() file_exist_here = self.sqlalchemysession.query(self.tables()['file_location']).filter_by( id_repository=id_repository).first() @@ -313,44 +331,45 @@ def register_filename(self, filename, newfilename, id_repository, provider): if register_file: id_provider = self.get_or_create_key('provider', 'provider', provider) if isnewfile: - #rfile = ROOT.TFile(str(filename)) + # rfile = ROOT.TFile(str(filename)) rfile = rdb.RootFile(str(filename)) rfile.dataset_name() - #rfile.file().GetSize() + # rfile.file().GetSize() container = self.tables()['file'](filename=os.path.basename(newfilename), - description='autodesc', - original_name=os.path.basename(filename), - id_provider=id_provider, - file_size=rfile.file.GetSize() + description='autodesc', + original_name=os.path.basename(filename), + id_provider=id_provider, + file_size=rfile.file.GetSize() ) self.sqlalchemysession.add(container) self.sqlalchemysession.flush() idfile = container.id_file - #container = self.tables()['file_location'](id_file=idfile, id_repository=id_repository, path=os.path.dirname(newfilename)) - container = self.tables()['file_location'](id_file=idfile, id_repository=id_repository, path=newfilename, description="") + # container = self.tables()['file_location'](id_file=idfile, id_repository=id_repository, path=os.path.dirname(newfilename)) + container = self.tables()['file_location'](id_file=idfile, id_repository=id_repository, path=newfilename, + description="") self.sqlalchemysession.add(container) - #self.sqlalchemysession.flush() + # self.sqlalchemysession.flush() return idfile, isnewfile ## @brief Function to register (if necessary) the content of a file into the database. # It will first read the file and walk along datas to determine what has to be registered def register_filecontent(self, file, idfile): - #We store run_number-event_number list to avoid to record them twice in event table (and produce an error due to unicity). + # We store run_number-event_number list to avoid to record them twice in event table (and produce an error due to unicity). # Ugly but no other efficient way to do (checking in the DB before insertion is too time consuming). eventlist = [] # ttrees will be a dict of trees to add. key is the tree name and value is a dict with all values for the tree. ttrees = {} - #tables = {} + # tables = {} rfile = rdb.RootFile(str(file)) # We iterate over all trees for treename in rfile.TreeList: logger.debug(f" Debug reading tree {treename}") treetype = treename.split('_', 1)[0] - #We register only known and identified trees defined in rootdblib + # We register only known and identified trees defined in rootdblib if hasattr(rfile, treetype + "ToDB"): table = getattr(rfile, treetype + "ToDB").get('table') - #table = getattr(rfile, treetype + "ToDB")['table'] + # table = getattr(rfile, treetype + "ToDB")['table'] ttrees[treename] = {} # Get metadata and add file_content record @@ -358,7 +377,7 @@ def register_filecontent(self, file, idfile): tablemeta = "file_content" metatree['id_file'] = idfile for meta, field in rfile.metaToDB.items(): - #try/except to avoid stopping when metadata is not present in root file + # try/except to avoid stopping when metadata is not present in root file try: value = casttodb(getattr(rfile.TreeList[treename], meta)) if field.find('id_') >= 0: @@ -366,25 +385,25 @@ def register_filecontent(self, file, idfile): if field == "comment": field = "comments" metatree[field] = value - #print(meta + "/" + field + " = " + str(getattr(rfile.TreeList[treename], meta)) + "/" + str(value)) + # print(meta + "/" + field + " = " + str(getattr(rfile.TreeList[treename], meta)) + "/" + str(value)) except: pass - #Trick to use "real" tree name (instead of meta _tree_name which is not always correct) + # Trick to use "real" tree name (instead of meta _tree_name which is not always correct) metatree['tree_name'] = treename container = self.tables()[tablemeta](**metatree) self.sqlalchemysession.add(container) - #self.sqlalchemysession.flush() + # self.sqlalchemysession.flush() # If table not defined in rootdblib for this tree then no content to record. st = time.time() if table is not None: if treetype in rfile.EventTrees: # For events we iterates over event_number and run_number for event, run in rfile.TreeList[treename].get_list_of_events(): - #MOVE TEST eventlist her to avoid reading for nothing + # MOVE TEST eventlist her to avoid reading for nothing - if ((table != "events") or ([run,event] not in eventlist)): + if ((table != "events") or ([run, event] not in eventlist)): if table == "events": - eventlist.append([run,event]) + eventlist.append([run, event]) if not (run, event) in ttrees[treename]: ttrees[treename][(run, event)] = {} @@ -393,7 +412,7 @@ def register_filecontent(self, file, idfile): if param != "table": value = casttodb(getattr(rfile.TreeList[treename], param)) # Il foreign key (i.e. starts with id_) then register value in foreign table and return the key instead of value - if field.startswith('id_') : + if field.startswith('id_'): value = self.get_or_create_fk(table, field, value) ttrees[treename][(run, event)][field] = value else: @@ -403,34 +422,34 @@ def register_filecontent(self, file, idfile): container = self.tables()[table](**ttrees[treename][(run, event)]) self.sqlalchemysession.add(container) - #if table =="events": + # if table =="events": # if [run,event] not in eventlist: # eventlist.append([run,event]) # self.sqlalchemysession.add(container) - #else: + # else: # self.sqlalchemysession.add(container) - #try: + # try: # self.sqlalchemysession.add(container) # self.sqlalchemysession.flush() - #except : + # except : # print("error 1") - #self.sqlalchemysession.add(container) - #self.sqlalchemysession.flush() - #filt = {} - #filt["run_number"] = str(casttodb(run)) - #filt["event_number"] = str(casttodb(event)) - #filt["id_file"] = str(casttodb(idfile)) - #ret = self.sqlalchemysession.query(self._tables[table]).filter_by(**filt).exists() - #if ret == 0 : + # self.sqlalchemysession.add(container) + # self.sqlalchemysession.flush() + # filt = {} + # filt["run_number"] = str(casttodb(run)) + # filt["event_number"] = str(casttodb(event)) + # filt["id_file"] = str(casttodb(idfile)) + # ret = self.sqlalchemysession.query(self._tables[table]).filter_by(**filt).exists() + # if ret == 0 : # self.sqlalchemysession.add(container) - #else: + # else: # print("UPDATE ?") - #self.sqlalchemysession.flush() - #print(container.id_treename) - #idtree = "id_"+treename + # self.sqlalchemysession.flush() + # print(container.id_treename) + # idtree = "id_"+treename # For runs we iterates over run_number elif treename in rfile.RunTrees: @@ -448,29 +467,29 @@ def register_filecontent(self, file, idfile): value = self.get_or_create_fk(table, field, value) ttrees[treename][run][field] = value except: - logger.warning(f"Error in getting {param} for {rfile.TreeList[treename].__class__.__name__}") + logger.warning( + f"Error in getting {param} for {rfile.TreeList[treename].__class__.__name__}") else: ttrees[treename][run]['id_file'] = idfile ttrees[treename][run]['tree_name'] = treename container = self.tables()[table](**ttrees[treename][run]) self.sqlalchemysession.add(container) - #self.sqlalchemysession.flush() + # self.sqlalchemysession.flush() - #print(container.id_treename) - #idtree = "id_"+treename + # print(container.id_treename) + # idtree = "id_"+treename et = time.time() elapsed_time = et - st - #print('Execution time:', elapsed_time, 'seconds') + # print('Execution time:', elapsed_time, 'seconds') logger.debug(f"execution time {elapsed_time} seconds") - def register_file(self, orgfilename, newfilename, id_repository, provider): idfile, read_file = self.register_filename(orgfilename, newfilename, id_repository, provider) if read_file: - #We read the localfile and not the remote one - self.register_filecontent(orgfilename,idfile) - #self.register_filecontent(newfilename,idfile) + # We read the localfile and not the remote one + self.register_filecontent(orgfilename, idfile) + # self.register_filecontent(newfilename,idfile) else: logger.info(f"file {orgfilename} already registered.") self.sqlalchemysession.commit() From 2e431902ff0a144caaa371e91ce4b7c966478351 Mon Sep 17 00:00:00 2001 From: fleg Date: Mon, 1 Apr 2024 12:05:11 +0200 Subject: [PATCH 27/85] Added refreshing for materialized views --- granddb/granddblib.py | 1 + granddb/refresh_mat_views.py | 15 +++++++++++++++ scripts/transfers/ccscript.bash | 15 ++++++++++++--- scripts/transfers/refresh_mat_views.bash | 7 +++++++ scripts/transfers/register_convert.py | 2 +- 5 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 granddb/refresh_mat_views.py create mode 100755 scripts/transfers/refresh_mat_views.bash diff --git a/granddb/granddblib.py b/granddb/granddblib.py index 7c1cd3c1..9f0911fa 100644 --- a/granddb/granddblib.py +++ b/granddb/granddblib.py @@ -161,6 +161,7 @@ def execute_sql(self, query): self.connect() cursor = self.dbconnection.cursor(cursor_factory=psycopg2.extras.DictCursor) cursor.execute(query) + self.dbconnection.commit() cursor.close() except psycopg2.DatabaseError as e: logger.error(f"Error {e}") diff --git a/granddb/refresh_mat_views.py b/granddb/refresh_mat_views.py new file mode 100644 index 00000000..f71de3c2 --- /dev/null +++ b/granddb/refresh_mat_views.py @@ -0,0 +1,15 @@ +from granddb.granddatalib import DataManager +import os +import argparse +import grand.manage_log as mlg +logger = mlg.get_logger_for_script(__name__) + +argParser = argparse.ArgumentParser() +argParser.add_argument("-c", "--config",default="config.ini", help="Config file to use") +args = argParser.parse_args() +dm = DataManager(os.path.dirname(__file__)+"/"+args.config) + +materialized_views = ['datamat'] +for view in materialized_views: + logger.info(f'refreshing {view}.') + dm.database().execute_sql(str('refresh materialized view '+view)) diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index 5353189c..372d72a3 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -7,7 +7,7 @@ # path to bin2root file bin2root='/pbs/home/p/prod_grand/scripts/transfers/bintoroot.bash' register_transfers='/pbs/home/p/prod_grand/scripts/transfers/register_transfer.bash' - +refresh_mat_script='/pbs/home/p/prod_grand/scripts/transfers/refresh_mat_views.bash' # gtot options for convertion -g1 for gp13 -f2 for gaa gtot_option="-g1" @@ -100,6 +100,7 @@ do ((i++)) done +convjobs="" # Launch convertion of files (but after the registration has finished) for j in "${!listoffiles[@]}" do @@ -109,7 +110,15 @@ do echo "$bin2root -g '$gtot_option' -d $root_dest ${listoffiles[$j]}" >> $outfile #submit script echo "submit $outfile" - sbatch --dependency=afterany:${jregid} -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G $outfile + jid=$(sbatch --dependency=afterany:${jregid} -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G ${outfile}) + jid=$(echo $jid |awk '{print $NF}') + convjobs=$convjobs":"$jid done - +if [ "$convjobs" -eq "" ]; then + dep="" +else + dep="--dependency=afterany${convjobs}" +fi +#finally refresh the materialized views in the database +sbatch ${dep} -t 0-01:00 -n 1 -J refresh_mat -o ${submit_dir}/slurm-refresh_mat --mem 1G ${refresh_mat_script} diff --git a/scripts/transfers/refresh_mat_views.bash b/scripts/transfers/refresh_mat_views.bash new file mode 100755 index 00000000..d0044f3c --- /dev/null +++ b/scripts/transfers/refresh_mat_views.bash @@ -0,0 +1,7 @@ +#!/bin/bash +cd /pbs/home/p/prod_grand/softs/grand +source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh +conda activate /sps/grand/software/conda/grandlib_2304 +source env/setup.sh +cd /pbs/home/p/prod_grand/scripts/transfers +python3 /pbs/home/p/prod_grand/softs/grand/granddb/refresh_mat_views.py \ No newline at end of file diff --git a/scripts/transfers/register_convert.py b/scripts/transfers/register_convert.py index 00db96d6..4d569e40 100644 --- a/scripts/transfers/register_convert.py +++ b/scripts/transfers/register_convert.py @@ -18,7 +18,7 @@ print(args.status) myfile = dm.database().sqlalchemysession.query(dm.database().tables()['rawfile']).filter_by(filename=args.file).first() if not myfile: - print("Error file not registerd") + print("Error file not registered") exit(0) else: id_raw_file = myfile.id_raw_file From c3c12e55aaf80860325e6418b17444aec4854c1d Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 4 Apr 2024 00:31:54 +0200 Subject: [PATCH 28/85] Corrected bug in fuser test (l121) --- scripts/transfers/transfer_from_obs.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index 1ed0ffd5..5eb6b8c0 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -118,7 +118,7 @@ md5="0" for file in $(find $localdatadir -type f -newermt $last_transfer| grep /${site}_ |sort) do # skip opened files - if [ !$(fuser "$file" &> /dev/null) ]; then + if ! $(fuser "$file" &> /dev/null) ; then filename=$(basename $file) tmp=${filename#${site}_} dateobs=${tmp:0:8} From d9ff08dc46bbee9fb704fa5357ad945f6ebbb5d2 Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 4 Apr 2024 15:45:07 +0200 Subject: [PATCH 29/85] Corrected typos --- scripts/transfers/ccscript.bash | 4 ++-- scripts/transfers/transfer_from_obs.bash | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index 372d72a3..4353f4ff 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -50,7 +50,7 @@ case $site in gp13) gtot_option="-g1";; gaa) - gtot_option="-v2";; + gtot_option="-f2";; ?) gtot_option="-g1";; esac @@ -115,7 +115,7 @@ do convjobs=$convjobs":"$jid done -if [ "$convjobs" -eq "" ]; then +if [ "$convjobs" = "" ]; then dep="" else dep="--dependency=afterany${convjobs}" diff --git a/scripts/transfers/transfer_from_obs.bash b/scripts/transfers/transfer_from_obs.bash index 5eb6b8c0..21201116 100755 --- a/scripts/transfers/transfer_from_obs.bash +++ b/scripts/transfers/transfer_from_obs.bash @@ -60,7 +60,7 @@ pre_run_script='' post_run_script='' # rsync_options : a to keep the creation time of files, z to compress if bandwidth is limited (but it's ~5 times slower). Please keep the "a" option ! -rsync_options="-az --mkpath" +rsync_options="-az --mkpath --chmod=go-w" ##### End of Configuration section (do not modify below) ##### From c78bc1c141d03767f857bfd2a09ab1dd61980a16 Mon Sep 17 00:00:00 2001 From: fleg Date: Sat, 6 Apr 2024 17:27:27 +0200 Subject: [PATCH 30/85] Added normalization of logfile path --- scripts/transfers/register_convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/transfers/register_convert.py b/scripts/transfers/register_convert.py index 4d569e40..004b92f5 100644 --- a/scripts/transfers/register_convert.py +++ b/scripts/transfers/register_convert.py @@ -13,7 +13,7 @@ args = argParser.parse_args() dm = DataManager(os.path.dirname(__file__)+"/"+args.config) - +logfile = os.path.normpath(args.logfile) print(args.file) print(args.status) myfile = dm.database().sqlalchemysession.query(dm.database().tables()['rawfile']).filter_by(filename=args.file).first() @@ -22,7 +22,7 @@ exit(0) else: id_raw_file = myfile.id_raw_file - converted = {'id_raw_file': id_raw_file, 'date_convertion': datetime.now(), 'logfile': args.logfile, 'root_filename': args.root, 'retcode': args.status} + converted = {'id_raw_file': id_raw_file, 'date_convertion': datetime.now(), 'logfile': logfile, 'root_filename': args.root, 'retcode': args.status} container = dm.database().tables()['convertion'](**converted) dm.database().sqlalchemysession.add(container) dm.database().sqlalchemysession.commit() From 9b915bbb207e00364cae88fe4c2800aa9381c117 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Tue, 23 Apr 2024 13:53:27 +0200 Subject: [PATCH 31/85] Added type selection to several arrays --- sim2root/Common/sim2root.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sim2root/Common/sim2root.py b/sim2root/Common/sim2root.py index 168dbce6..75220e55 100755 --- a/sim2root/Common/sim2root.py +++ b/sim2root/Common/sim2root.py @@ -292,7 +292,7 @@ def main(): rawmeta2grandroot(trawmeta, gt) # Change the trace lenght as specified in the comand line - trace = np.moveaxis(np.array([trawefield.trace_x, trawefield.trace_y, trawefield.trace_z]), 0,1) + trace = np.moveaxis(np.array([trawefield.trace_x, trawefield.trace_y, trawefield.trace_z]), 0,1).astype(np.float32) ext_t_0, trace=adjust_trace(trace, trawefield.t_0, OriginalTpre, OriginalTpost, DesiredTpre, DesiredTpost,trawefield.t_bin_size) # trawefield.trace_x=trace[:,0,:] @@ -355,12 +355,12 @@ def main(): # For star shapes, set the trun's du_id/xyz now and fill/write the tree if clargs.star_shape: gt.trun.du_id = tdu_ids - gt.trun.du_xyz = tdu_xyzs + gt.trun.du_xyz = np.array(tdu_xyzs) - gt.trun.du_tilt = np.zeros(shape=(len(du_ids), 2)) + gt.trun.du_tilt = np.zeros(shape=(len(du_ids), 2), dtype=np.float32) # For now (and for the forseable future) all DU will have the same bin size at the level of the efield simulator. - gt.trun.t_bin_size = [trawefield.t_bin_size] * len(du_ids) + gt.trun.t_bin_size = np.array([trawefield.t_bin_size] * len(du_ids)) gt.trun.site_layout = "star_shape" @@ -518,7 +518,7 @@ def get_tree_du_id_and_xyz(trawefield,shower_core): #trawefield has the antenna positions in array coordinates, cartesian. Origin is at the delcared latitude, longitude and altitude of the site. print("Warning: using flat earth approximation for coordinates!.Event:",trawefield.event_number," Core:",shower_core) count = trawefield.draw("du_id:du_x:du_y:du_z", "", "goff") - du_ids = np.array(np.frombuffer(trawefield.get_v1(), dtype=np.float64, count=count)).astype(int) + du_ids = np.array(np.frombuffer(trawefield.get_v1(), dtype=np.float64, count=count)).astype(np.int32) du_xs = np.array(np.frombuffer(trawefield.get_v2(), dtype=np.float64, count=count)).astype(np.float32) du_ys = np.array(np.frombuffer(trawefield.get_v3(), dtype=np.float64, count=count)).astype(np.float32) du_zs = np.array(np.frombuffer(trawefield.get_v4(), dtype=np.float64, count=count)).astype(np.float32) @@ -706,7 +706,7 @@ def rawefield2grandroot(trawefield, gt, ext_trace = None, ext_t_0 = None): if ext_trace is None: gt.tefield.trace = np.moveaxis(np.array([trawefield.trace_x, trawefield.trace_y, trawefield.trace_z]), 0,1) else: - gt.tefield.trace=ext_trace + gt.tefield.trace = ext_trace # gt.tefield.trace_x=ext_trace[:,0,:] # gt.tefield.trace_y=ext_trace[:,1,:] # gt.tefield.trace_z=ext_trace[:,2,:] @@ -727,8 +727,8 @@ def rawefield2grandroot(trawefield, gt, ext_trace = None, ext_t_0 = None): tempseconds[maskplus]+=np.int64(1) tempnanoseconds[maskminus]+=np.int64(1e9) tempseconds[maskminus]-=np.int64(1) - gt.tefield.du_nanoseconds=tempnanoseconds - gt.tefield.du_seconds=tempseconds + gt.tefield.du_nanoseconds=tempnanoseconds.astype(np.uint32) + gt.tefield.du_seconds=tempseconds.astype(np.uint32) #store tpre in samples is the expected trigger position in sims. #This can be furter enforced with the --trigger_time_ns switch. All dus have the same value at the efield generator level From 34318aa2ac16124d6dae0c7c4b517272637f6e7f Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Tue, 23 Apr 2024 13:54:49 +0200 Subject: [PATCH 32/85] vector filling with += for pure np arrays now done with my C++ funciton to avoid memory leak --- grand/dataio/root_trees.py | 40 +++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index fc8387a8..19d8e559 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -17,6 +17,9 @@ from collections import defaultdict +# Load the C++ macros for vector filling from numpy arrays +ROOT.gROOT.LoadMacro(os.path.dirname(os.path.realpath(__file__))+"/vector_filling.C") + # Conversion between numpy dtype and array.array typecodes numpy_to_array_typecodes = {np.dtype('int8'): 'b', np.dtype('int16'): 'h', np.dtype('int32'): 'i', np.dtype('int64'): 'q', np.dtype('uint8'): 'B', np.dtype('uint16'): 'H', np.dtype('uint32'): 'I', np.dtype('uint64'): 'Q', np.dtype('float32'): 'f', np.dtype('float64'): 'd', np.dtype('complex64'): 'F', np.dtype('complex128'): 'D', np.dtype('int16'): 'h'} # numpy_to_array_typecodes = {np.int8: 'b', np.int16: 'h', np.int32: 'i', np.int64: 'q', np.uint8: 'B', np.uint16: 'H', np.uint32: 'I', np.uint64: 'Q', np.float32: 'f', np.float64: 'd', np.complex64: 'F', np.complex128: 'D', "int8": 'b', "int16": 'h', "int32": 'i', "int64": 'q', "uint8": 'B', "uint16": 'H', "uint32": 'I', "uint64": 'Q', "float32": 'f', "float64": 'd', "complex64": 'F', "complex128": 'D'} @@ -24,6 +27,8 @@ # Conversion between C++ type and array.array typecodes cpp_to_array_typecodes = {'char': 'b', 'short': 'h', 'int': 'i', 'long long': 'q', 'unsigned char': 'B', 'unsigned short': 'H', 'unsigned int': 'I', 'unsigned long long': 'Q', 'float': 'f', 'double': 'd', 'string': 'u'} +cpp_to_numpy_typecodes = {'char': np.dtype('int8'), 'short': np.dtype('int16'), 'int': np.dtype('int32'), 'long long': np.dtype('int64'), 'unsigned char': np.dtype('uint8'), 'unsigned short': np.dtype('uint16'), 'unsigned int': np.dtype('uint32'), 'unsigned long long': np.dtype('uint64'), 'float': np.dtype('float32'), 'double': np.dtype('float64'), 'string': np.dtype('U')} + # This import changes in Python 3.10 if sys.version_info.major >= 3 and sys.version_info.minor < 10: from collections import MutableSequence @@ -135,24 +140,33 @@ def __repr__(self): def __iadd__(self, value): # function modified by Jelena to fix the negative issue, use at own risk try: - if (isinstance(value, list) and self.basic_vec_type.split()[-1] == "float") or isinstance(value, np.ndarray): - if self.ndim == 1: value = array.array(cpp_to_array_typecodes[self.basic_vec_type], value) - if self.ndim == 2: value = [array.array(cpp_to_array_typecodes[self.basic_vec_type], el) for el in value] - if self.ndim == 3: value = [[array.array(cpp_to_array_typecodes[self.basic_vec_type], el1) for el1 in el] for el in value] + if isinstance(value, np.ndarray): + if self.ndim == 1: ROOT.fill_vec_1D[self.basic_vec_type](value, np.array(value.shape).astype(np.int32), self._vector) + if self.ndim == 2: ROOT.fill_vec_2D[self.basic_vec_type](value, np.array(value.shape).astype(np.int32), self._vector) + if self.ndim == 3: ROOT.fill_vec_3D[self.basic_vec_type](value, np.array(value.shape).astype(np.int32), self._vector) else: - value = list(value) - - # The list needs to have simple Python types - ROOT.vector does not accept numpy types - try: - self._vector += value - except TypeError: - # Slow conversion to simple types. No better idea for now - if self.basic_vec_type.split()[-1] in ["int", "long", "short", "char", "float"]: + if (isinstance(value, list) and self.basic_vec_type.split()[-1] == "float"): if self.ndim == 1: value = array.array(cpp_to_array_typecodes[self.basic_vec_type], value) if self.ndim == 2: value = [array.array(cpp_to_array_typecodes[self.basic_vec_type], el) for el in value] if self.ndim == 3: value = [[array.array(cpp_to_array_typecodes[self.basic_vec_type], el1) for el1 in el] for el in value] + elif not isinstance(value, StdVectorList): + value = list(value) - self._vector += value + # The list needs to have simple Python types - ROOT.vector does not accept numpy types + try: + if isinstance(value, StdVectorList): + # ToDo: Maybe faster than +=, but... to be checked + self._vector.assign(value._vector) + else: + self._vector += value + except TypeError: + # Slow conversion to simple types. No better idea for now + if self.basic_vec_type.split()[-1] in ["int", "long", "short", "char", "float"]: + if self.ndim == 1: value = array.array(cpp_to_array_typecodes[self.basic_vec_type], value) + if self.ndim == 2: value = [array.array(cpp_to_array_typecodes[self.basic_vec_type], el) for el in value] + if self.ndim == 3: value = [[array.array(cpp_to_array_typecodes[self.basic_vec_type], el1) for el1 in el] for el in value] + + self._vector += value except OverflowError: # Handle the OverflowError here, e.g., by logging a message or taking an appropriate action. From d2ca70ea42c9e18f0d9f28f057b9b52901ef82de Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Tue, 23 Apr 2024 13:55:23 +0200 Subject: [PATCH 33/85] vector filling procedures in C++ --- grand/dataio/vector_filling.C | 43 +++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 grand/dataio/vector_filling.C diff --git a/grand/dataio/vector_filling.C b/grand/dataio/vector_filling.C new file mode 100644 index 00000000..50d55859 --- /dev/null +++ b/grand/dataio/vector_filling.C @@ -0,0 +1,43 @@ +// Methods for filling the vectors from NumPy arrays + +template +int fill_vec_1D(Type *arr, int *shape, vector *v) +{ + v->resize(shape[0]); + for(int i=0; i +int fill_vec_2D(Type *arr, int *shape, vector> *v) +{ + v->resize(shape[0], vector(shape[1])); + for(int i=0; i +int fill_vec_3D(Type *arr, int *shape, vector>> *v) +{ + v->resize(shape[0], vector>(shape[1], vector(shape[2]))); + for(int i=0; i Date: Wed, 24 Apr 2024 14:34:23 +0200 Subject: [PATCH 34/85] Ensuring that numpy arrays += to a vector are contiguous --- grand/dataio/root_trees.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index 19d8e559..bb5c16c6 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -141,9 +141,9 @@ def __iadd__(self, value): # function modified by Jelena to fix the negative issue, use at own risk try: if isinstance(value, np.ndarray): - if self.ndim == 1: ROOT.fill_vec_1D[self.basic_vec_type](value, np.array(value.shape).astype(np.int32), self._vector) - if self.ndim == 2: ROOT.fill_vec_2D[self.basic_vec_type](value, np.array(value.shape).astype(np.int32), self._vector) - if self.ndim == 3: ROOT.fill_vec_3D[self.basic_vec_type](value, np.array(value.shape).astype(np.int32), self._vector) + if self.ndim == 1: ROOT.fill_vec_1D[self.basic_vec_type](np.ascontiguousarray(value), np.array(value.shape).astype(np.int32), self._vector) + if self.ndim == 2: ROOT.fill_vec_2D[self.basic_vec_type](np.ascontiguousarray(value), np.array(value.shape).astype(np.int32), self._vector) + if self.ndim == 3: ROOT.fill_vec_3D[self.basic_vec_type](np.ascontiguousarray(value), np.array(value.shape).astype(np.int32), self._vector) else: if (isinstance(value, list) and self.basic_vec_type.split()[-1] == "float"): if self.ndim == 1: value = array.array(cpp_to_array_typecodes[self.basic_vec_type], value) From 2a2ee0c641cb0d703f6d59eced88954a989a2b75 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Wed, 24 Apr 2024 17:16:26 +0200 Subject: [PATCH 35/85] Much faster trace shifting, writing of trees after the loop for speed, other smal speedups and fixes --- sim2root/Common/sim2root.py | 132 +++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 55 deletions(-) diff --git a/sim2root/Common/sim2root.py b/sim2root/Common/sim2root.py index 75220e55..51fe6362 100755 --- a/sim2root/Common/sim2root.py +++ b/sim2root/Common/sim2root.py @@ -7,11 +7,15 @@ from types import SimpleNamespace import time from pathlib import Path + +import numpy as np + from grand.dataio.root_trees import * # this is home/grand/grand (at least in docker) or ../../grand import raw_root_trees as RawTrees # this is here in Common import grand.manage_log as mlg import matplotlib.pyplot as plt -from scipy.ndimage.interpolation import shift #to shift the time trance for the trigger simulation +# from scipy.ndimage.interpolation import shift #to shift the time trance for the trigger simulation +# from scipy.ndimage import shift #to shift the time trance for the trigger simulation # specific logger definition for script because __mane__ is "__main__" ! logger = mlg.get_logger_for_script(__file__) @@ -127,38 +131,40 @@ def adjust_trace_lenght(trace, DesiredTpre, DesiredTpost, CurrentTpre, CurrentTp def adjust_trigger(trace, CurrentT0s, TPre, TimeBinSize): - #now lets process a "trigger" algorithm that will modify where the trace is located. - #we asume trace is windowed between CurrentT0-Tpre and CurrentT0+tpost - #trace will have dim (du,3 or 4,tbins) - - #totl will have dim du,tbins - ttotal = np.linalg.norm(trace, axis=1) #make the modulus (the 1 is to remove the time) - #trigger_index will have dim du - trigger_index = np.argmax(ttotal,axis=1) #look where the maximum happens - - #this definition of the trigger times makes the trigger be at the begining of the bin where the maximum is, becouse the index starts at 0. This is compatible with the definition of the window that we give. - trigger_time=trigger_index*TimeBinSize - #If we need to shift the trigger time (the trigger time needs to be equal to tpre - DeltaT=TPre - trigger_time - ShiftBins=(DeltaT/TimeBinSize).astype(int,copy=False) - - #this is to assure that, if the maximum is found too late in the trace, we dont move outside of the original time window (normally, peaks are late in the time window, if you set the time window correctly). - mask=ShiftBins < -TPre/TimeBinSize - if mask.any(): - logger.error("some elements needed to be shifted only up to the limt, tpre was too small") - ShiftBins[mask]= int(-TPre/TimeBinSize) - - #we cannot use use np.roll, but roll makes re-appear the end of the trace at the begining if we roll to much - #we cannot use scipy shift, that lets you state what value to put for the places you roll, on a 3D array - - #TODO: There must be a better way to do this without the for loop, but i lost a morning to it and i dont have the time to develop it now. Search for strided_indexing_roll on the web for inspiration. - for du_idx in range(trace.shape[0]): - trace[du_idx]=shift(trace[du_idx],(0,ShiftBins[du_idx]),cval=0) - - #we get the correct t0 - T0s=CurrentT0s-ShiftBins*TimeBinSize + # now lets process a "trigger" algorithm that will modify where the trace is located. + # we asume trace is windowed between CurrentT0-Tpre and CurrentT0+tpost + # trace will have dim (du,3 or 4,tbins) + + # totl will have dim du,tbins + ttotal = np.linalg.norm(trace, axis=1) # make the modulus (the 1 is to remove the time) + # trigger_index will have dim du + trigger_index = np.argmax(ttotal, axis=1) # look where the maximum happens + + # this definition of the trigger times makes the trigger be at the begining of the bin where the maximum is, becouse the index starts at 0. This is compatible with the definition of the window that we give. + trigger_time = trigger_index * TimeBinSize + # If we need to shift the trigger time (the trigger time needs to be equal to tpre + DeltaT = TPre - trigger_time + ShiftBins = (DeltaT / TimeBinSize).astype(int, copy=False) + + # this is to assure that, if the maximum is found too late in the trace, we dont move outside of the original time window (normally, peaks are late in the time window, if you set the time window correctly). + mask = ShiftBins < -TPre / TimeBinSize + if mask.any(): + logger.error("some elements needed to be shifted only up to the limt, tpre was too small") + ShiftBins[mask] = int(-TPre / TimeBinSize) + + # we cannot use use np.roll, but roll makes re-appear the end of the trace at the begining if we roll to much + # we cannot use scipy shift, that lets you state what value to put for the places you roll, on a 3D array + + # TODO: There must be a better way to do this without the for loop, but i lost a morning to it and i dont have the time to develop it now. Search for strided_indexing_roll on the web for inspiration. + # for du_idx in range(trace.shape[0]): + # trace[du_idx] = shift(trace[du_idx], (0, ShiftBins[du_idx]), cval=0) + for du_idx in range(trace.shape[0]): + trace_shift(trace[du_idx], ShiftBins[du_idx]) + + # we get the correct t0 + T0s = CurrentT0s - ShiftBins * TimeBinSize - return T0s,trace + return T0s, trace def convert_date(date_str): # Convert input string to a struct_time object @@ -203,7 +209,7 @@ def main(): # for file_num, filename in enumerate(clargs.filename): for file_num, filename in enumerate(file_list): - logger.info(f"Working on input file {filename}, {file_num}/{len(file_list)}") + logger.info(f"Working on input file {filename}, {file_num+1}/{len(file_list)}") # Output filename for GRAND Trees # if clargs.output_filename is None: @@ -223,7 +229,7 @@ def main(): trawshower.get_entry(i) trawefield.get_entry(i) trawmeta.get_entry(i) - + OriginalTpre=trawefield.t_pre OriginalTpost=trawefield.t_post DesiredTpre=trawefield.t_pre @@ -240,7 +246,6 @@ def main(): #we modify this becouse it needs to be stored in the run file on the first event. trawefield.t_pre=DesiredTpre trawefield.t_post=DesiredTpost - # If the first entry on the first file or dealing with star shape sim if (file_num==0 and i==0) or clargs.star_shape: @@ -283,16 +288,16 @@ def main(): gt.trunshowersim.fill() gt.trunefieldsim.fill() # gt.trun.write() - gt.trunshowersim.write() - gt.trunefieldsim.write() # Convert the RawShowerTree entries rawshower2grandroot(trawshower, gt) # Convert the RawMetaTree entries - (this goes before the efield becouse the efield needs the info on the second and nanosecond) rawmeta2grandroot(trawmeta, gt) - - # Change the trace lenght as specified in the comand line - trace = np.moveaxis(np.array([trawefield.trace_x, trawefield.trace_y, trawefield.trace_z]), 0,1).astype(np.float32) + + # Change the trace lenght as specified in the comand line + # trace = np.moveaxis(np.array([trawefield.trace_x, trawefield.trace_y, trawefield.trace_z]), 0,1).astype(np.float32) + # Slightly faster than the above + trace = np.stack([trawefield.trace_x, trawefield.trace_y, trawefield.trace_z], 1, dtype=np.float32) ext_t_0, trace=adjust_trace(trace, trawefield.t_0, OriginalTpre, OriginalTpost, DesiredTpre, DesiredTpost,trawefield.t_bin_size) # trawefield.trace_x=trace[:,0,:] @@ -301,7 +306,7 @@ def main(): # Convert the RawEfieldTree entries rawefield2grandroot(trawefield, gt, ext_trace=trace, ext_t_0=ext_t_0) - + # Overwrite the run number if specified on command line if ext_run_number is not None: gt.trun.run_number = ext_run_number @@ -340,8 +345,7 @@ def main(): gt.tshower.fill() gt.tshowersim.fill() gt.tefield.fill() - - + # For the first file, get all the file's events du ids and pos if file_num==0: du_ids, du_xyzs = get_tree_du_id_and_xyz(trawefield,trawshower.shower_core_pos) @@ -366,12 +370,7 @@ def main(): # Fill and write the TRun gt.trun.fill() - gt.trun.write() - # Write the event trees - gt.tshower.write() - gt.tshowersim.write() - gt.tefield.write() # gt.tshower.first_interaction = trawshower.first_interaction trawmeta.close_file() @@ -401,17 +400,26 @@ def main(): # Assign the du ids and positions to the trun tree gt.trun.du_id = du_ids gt.trun.du_xyz = du_xyzs - gt.trun.du_tilt = np.zeros(shape=(len(du_ids), 2)) + gt.trun.du_tilt = np.zeros(shape=(len(du_ids), 2), dtype=np.float32) #For now (and for the forseable future) all DU will have the same bin size at the level of the efield simulator. gt.trun.t_bin_size = [trawefield.t_bin_size]*len(du_ids) # Fill and write the TRun gt.trun.fill() - gt.trun.write() - - - # Rename the created files to appropriate names + # gt.trun.write() + # gt.trunshowersim.write() + # gt.trunefieldsim.write() + + # Write the event trees + gt.tshower.write() + gt.tshowersim.write() + gt.tefield.write() + gt.trun.write() + gt.trunshowersim.write() + gt.trunefieldsim.write() + + # Rename the created files to appropriate names print("Renaming files to proper file names") rename_files(clargs, out_dir_name, start_event_number, end_event_number, start_run_number) @@ -531,7 +539,7 @@ def get_tree_du_id_and_xyz(trawefield,shower_core): # Stack x/y/z together and leave only the ones for unique du_ids du_xyzs = np.column_stack([du_xs, du_ys, du_zs])[unique_dus_idx] - return np.asarray(du_ids), np.asarray(du_xyzs) + return np.asarray(du_ids, dtype=np.int32), np.asarray(du_xyzs, dtype=np.float32) # Convert the RawShowerTree entries @@ -704,7 +712,7 @@ def rawefield2grandroot(trawefield, gt, ext_trace = None, ext_t_0 = None): ## Efield trace in X,Y,Z direction if ext_trace is None: - gt.tefield.trace = np.moveaxis(np.array([trawefield.trace_x, trawefield.trace_y, trawefield.trace_z]), 0,1) + gt.tefield.trace = np.moveaxis(np.array([trawefield.trace_x, trawefield.trace_y, trawefield.trace_z]), 0,1).astype(np.float32) else: gt.tefield.trace = ext_trace # gt.tefield.trace_x=ext_trace[:,0,:] @@ -809,6 +817,20 @@ def rename_files(clargs, path, start_event_number, end_event_number, run_number) print(f"Could not find a free filename for {fn_in} until serial number 1000. Please clean up some files!") exit(0) +# Simple shifting of a single x,y,z trace +def trace_shift(arr, shift): + # Shift the array right + if shift>0: + arr[:,shift:]=arr[:,:-shift] + arr[:,:shift]=0 + # Shift the array left + elif shift<0: + arr[:,:shift]=arr[:,-shift:] + arr[:,shift:]=0 + # No shift + else: + return arr + if __name__ == '__main__': main() From cab48e06909e96ca15e5925a60f74f2b3ae4229b Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Wed, 24 Apr 2024 17:21:59 +0200 Subject: [PATCH 36/85] Skipping files with no or empty trees --- sim2root/Common/sim2root.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sim2root/Common/sim2root.py b/sim2root/Common/sim2root.py index 51fe6362..bf123782 100755 --- a/sim2root/Common/sim2root.py +++ b/sim2root/Common/sim2root.py @@ -222,9 +222,15 @@ def main(): trawefield = RawTrees.RawEfieldTree(filename) trawmeta = RawTrees.RawMetaTree(filename) + nentries = trawshower.get_entries() + + # Skip files with no or empty trees + if nentries==0 or trawefield.get_entries()==0 or trawmeta.get_entries()==0: + logger.warning("No entries or one of the rawroot trees does not exist. Skipping this file.") + continue + # Loop through entries - assuming same number of entries in each tree # ToDo: this should be a tree iterator through one tree and getting the other through friends. Need to make friends working... - nentries = trawshower.get_entries() for i in range(nentries): trawshower.get_entry(i) trawefield.get_entry(i) From 4dbb76724138700f7d55f44fcffadc8b7e02475a Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Thu, 2 May 2024 23:31:18 +0200 Subject: [PATCH 37/85] Skipping reinitialising trees for non-first iterations in EventList --- grand/grandlib_classes/grandlib_classes.py | 157 +++++++++++++-------- 1 file changed, 99 insertions(+), 58 deletions(-) diff --git a/grand/grandlib_classes/grandlib_classes.py b/grand/grandlib_classes/grandlib_classes.py index 3d766adf..70cb8b54 100644 --- a/grand/grandlib_classes/grandlib_classes.py +++ b/grand/grandlib_classes/grandlib_classes.py @@ -419,7 +419,7 @@ def origin_geoid(self, v): self._origin_geoid = CartesianRepresentation(x=v[0], y=v[1], z=v[2]) ## Fill this event from trees - def fill_event_from_trees(self, event_number=None, run_number=None, entry_number=None, simshower=False, use_trawvoltage=False, trawvoltage_channels=[0,1,2]): + def fill_event_from_trees(self, event_number=None, run_number=None, entry_number=None, simshower=False, use_trawvoltage=False, trawvoltage_channels=[0,1,2], init_trees=True): """Fill this event from trees :param simshower: how to treat the TShower existing in the file, as sim values or reconstructed values :type simshower: bool @@ -453,81 +453,119 @@ def fill_event_from_trees(self, event_number=None, run_number=None, entry_number # *** Check what TTrees are available and fill according to their availability - # Check the Run tree existence - if trun := self.file_trun.Get("trun"): - self.trun = TRun(_tree=trun) + # If initialising trees requested + if init_trees: + # Check the Run tree existence + if trun := self.file_trun.Get("trun"): + self.trun = TRun(_tree=trun) + else: + print("No Run tree. Run information will not be available.") + # Make trun really None + self.trun = None + + # If self.trun was successfully initialised + if self.trun is not None: # Fill part of the event from trun ret = self.fill_event_from_runtree(run_entry_number=run_entry_number) - if ret: print("Run information loaded.") - else: print("No Run tree. Run information will not be available.") - else: - print("No Run tree. Run information will not be available.") - # Make trun really None - self.trun = None + if ret: + print("Run information loaded.") + else: + print("No Run tree. Run information will not be available.") if self.file_tvoltage: # Use standard voltage tree if not use_trawvoltage: - # Check the Voltage tree existence - if tvoltage := self.file_tvoltage.Get("tvoltage"): - self.tvoltage = TVoltage(_tree=tvoltage) + # If initialising trees requested + if init_trees: + # Check the Voltage tree existence + if tvoltage := self.file_tvoltage.Get("tvoltage"): + self.tvoltage = TVoltage(_tree=tvoltage) + else: + print("No Voltage tree. Voltage information will not be available.") + # Make tvoltage really None + self.tvoltage = None + + # If self.tvoltage was successfully initialised + if self.tvoltage is not None: # Fill part of the event from tvoltage ret = self.fill_event_from_voltage_tree() - if ret: print("Voltage information loaded.") + if ret: + print("Voltage information loaded.") else: print("No Voltage tree. Voltage information will not be available.") # Make tvoltage really None self.tvoltage = None - else: - print("No Voltage tree. Voltage information will not be available.") - # Make tvoltage really None - self.tvoltage = None + # Use trawvoltage tree else: - # Check the Voltage tree existence - if tvoltage := self.file_tvoltage.Get("trawvoltage"): - self.tvoltage = TRawVoltage(_tree=tvoltage) + # If initialising trees requested + if init_trees: + # Check the Voltage tree existence + if tvoltage := self.file_tvoltage.Get("trawvoltage"): + self.tvoltage = TRawVoltage(_tree=tvoltage) + else: + print("No TRawVoltage tree. Voltage information will not be available.") + # Make tvoltage really None + self.tvoltage = None + + # If self.tvoltage was successfully initialised + if self.tvoltage is not None: # Fill part of the event from tvoltage ret = self.fill_event_from_voltage_tree(use_trawvoltage=use_trawvoltage, trawvoltage_channels=trawvoltage_channels) - if ret: print("Voltage information (from TRawVoltage) loaded.") + if ret: + print("Voltage information (from TRawVoltage) loaded.") else: print("No TRawVoltage tree. Voltage information will not be available.") # Make tvoltage really None self.tvoltage = None - else: - print("No TRawVoltage tree. Voltage information will not be available.") - # Make tvoltage really None - self.tvoltage = None - # Check the Efield file existence if self.file_tefield: - # Check the Efield tree existence - if tefield := self.file_tefield.Get("tefield"): - self.tefield = TEfield(_tree=tefield) + # If initialising trees requested + if init_trees: + # Check the Efield tree existence + if tefield := self.file_tefield.Get("tefield"): + self.tefield = TEfield(_tree=tefield) + else: + print("No Efield tree. Efield information will not be available.") + # Make tefield really None + self.tefield = None + + # If self.tefield was successfully initialised + if self.tefield is not None: # Fill part of the event from tefield ret = self.fill_event_from_efield_tree() - if ret: print("Efield information loaded.") + if ret: + print("Efield information loaded.") else: print("No Efield tree. Efield information will not be available.") # Make tefield really None self.tefield = None - else: - print("No Efield tree. Efield information will not be available.") - # Make tefield really None - self.tefield = None # Check the Shower file existence if self.file_tshower: - # Check the Shower tree existence - if tshower := self.file_tshower.Get("tshower"): - if simshower: - self.tsimshower = TShower(_tree=tshower) + # If initialising trees requested + if init_trees: + # Check the Shower tree existence + if tshower := self.file_tshower.Get("tshower"): + if simshower: + self.tsimshower = TShower(_tree=tshower) + else: + self.tshower = TShower(_tree=tshower) else: - self.tshower = TShower(_tree=tshower) + print("No Shower tree. Shower information will not be available.") + # Make tshower really None + if simshower: + self.tsimshower = None + else: + self.tshower = None + + # If self.t(sim)shower was successfully initialised + if (simshower and self.tsimshower is not None) or (not simshower and self.thower is not None): # Fill part of the event from tshower ret = self.fill_event_from_shower_tree(simshower) - if ret: print("Shower information loaded.") + if ret: + print("Shower information loaded.") else: print("No Shower tree. Shower information will not be available.") # Make tshower really None @@ -535,30 +573,29 @@ def fill_event_from_trees(self, event_number=None, run_number=None, entry_number self.tsimshower = None else: self.tshower = None - else: - print("No Shower tree. Shower information will not be available.") - # Make tshower really None - if simshower: - self.tsimshower = None - else: - self.tshower = None # Check the sim Shower file existence if self.file_tsimshower: - # Check the SimShower tree existence - if tsimshower := self.file_tsimshower.Get("tshower"): - self.tsimshower = TShower(_tree=tsimshower) + # If initialising trees requested + if init_trees: + # Check the SimShower tree existence + if tsimshower := self.file_tsimshower.Get("tshower"): + self.tsimshower = TShower(_tree=tsimshower) + else: + print("No Simulated Shower tree. Simulated Shower information will not be available.") + # Make tsimshower really None + self.tsimshower = None + + # If self.tsimshower was successfully initialised + if self.tsimshower is not None: # Fill part of the event from tshower ret = self.fill_event_from_shower_tree(True) - if ret: print("Simulated shower information loaded.") + if ret: + print("Simulated shower information loaded.") else: print("No Simulated shower tree. Simulated shower information will not be available.") # Make tsimshower really None self.tsimshower = None - else: - print("No Simulated Shower tree. Simulated Shower information will not be available.") - # Make tsimshower really None - self.tsimshower = None self.fill_antennas() @@ -1038,6 +1075,7 @@ def __init__(self, inp_name, **kwargs): self.init_kwargs = kwargs self.event = Event() + self.init_trees = True def get_event(self, event_number=None, run_number=None, entry_number=None, fill_event=True, **kwargs): """Get specified event from the event list""" @@ -1078,9 +1116,12 @@ def get_event(self, event_number=None, run_number=None, entry_number=None, fill_ if fill_event: # Overwrite the init kwargs with kwargs given here if len(kwargs)>0: - e.fill_event_from_trees(**kwargs) + e.fill_event_from_trees(init_trees=self.init_trees, **kwargs) else: - e.fill_event_from_trees(**self.init_kwargs) + e.fill_event_from_trees(init_trees=self.init_trees, **self.init_kwargs) + + # Don't init trees anymore + self.init_trees = False return e From bd91bb8030c943eee05ba176613ad1fe88b30fd9 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Tue, 14 May 2024 10:39:49 +0200 Subject: [PATCH 38/85] TEMPORARY: StdVectorList return np array of arrays if possible --- grand/dataio/root_trees.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index bb5c16c6..3af7628b 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -91,14 +91,17 @@ def __getitem__(self, index): # If this is a vector of vectors, convert a subvector to list for the return if len(self._vector) > 0: if "std.vector" in str(type(self._vector[index])): - if self.ndim == 2: - return list(self._vector[index]) - elif self.ndim == 3: - return [list(el) for el in self._vector[index]] - elif self.ndim == 4: - return [list(el) for el1 in self._vector[index] for el in el1] - else: - return self._vector[index] + try: + return np.array(self._vector[index]) + except: + if self.ndim == 2: + return list(self._vector[index]) + elif self.ndim == 3: + return [list(el) for el in self._vector[index]] + elif self.ndim == 4: + return [list(el) for el1 in self._vector[index] for el in el1] + else: + return self._vector[index] else: return self._vector[index] else: From 73e3fcf6fa579f81b8a23b4779145fff0b9617dc Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Fri, 31 May 2024 12:24:06 +0200 Subject: [PATCH 39/85] Fixed a typo "thower" --- grand/grandlib_classes/grandlib_classes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grand/grandlib_classes/grandlib_classes.py b/grand/grandlib_classes/grandlib_classes.py index 70cb8b54..0250195b 100644 --- a/grand/grandlib_classes/grandlib_classes.py +++ b/grand/grandlib_classes/grandlib_classes.py @@ -561,7 +561,7 @@ def fill_event_from_trees(self, event_number=None, run_number=None, entry_number self.tshower = None # If self.t(sim)shower was successfully initialised - if (simshower and self.tsimshower is not None) or (not simshower and self.thower is not None): + if (simshower and self.tsimshower is not None) or (not simshower and self.tshower is not None): # Fill part of the event from tshower ret = self.fill_event_from_shower_tree(simshower) if ret: From 7c77b694b7d71aa644d36d17c126b9d36b5b7f8d Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Fri, 31 May 2024 14:28:16 +0200 Subject: [PATCH 40/85] Now automaticaly loading trawvoltage if tvoltage is not available --- grand/grandlib_classes/grandlib_classes.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/grand/grandlib_classes/grandlib_classes.py b/grand/grandlib_classes/grandlib_classes.py index 0250195b..c00fc88b 100644 --- a/grand/grandlib_classes/grandlib_classes.py +++ b/grand/grandlib_classes/grandlib_classes.py @@ -481,7 +481,7 @@ def fill_event_from_trees(self, event_number=None, run_number=None, entry_number if tvoltage := self.file_tvoltage.Get("tvoltage"): self.tvoltage = TVoltage(_tree=tvoltage) else: - print("No Voltage tree. Voltage information will not be available.") + # print("No Voltage tree. Voltage information will not be available.") # Make tvoltage really None self.tvoltage = None @@ -492,19 +492,20 @@ def fill_event_from_trees(self, event_number=None, run_number=None, entry_number if ret: print("Voltage information loaded.") else: - print("No Voltage tree. Voltage information will not be available.") + # print("No Voltage tree. Voltage information will not be available.") # Make tvoltage really None self.tvoltage = None - # Use trawvoltage tree - else: + # Use trawvoltage tree if requested or tvoltage tree not found + if use_trawvoltage or self.tvoltage==None: # If initialising trees requested if init_trees: # Check the Voltage tree existence if tvoltage := self.file_tvoltage.Get("trawvoltage"): self.tvoltage = TRawVoltage(_tree=tvoltage) + use_trawvoltage = True else: - print("No TRawVoltage tree. Voltage information will not be available.") + print("No Voltage or TRawVoltage tree. Voltage information will not be available.") # Make tvoltage really None self.tvoltage = None @@ -515,7 +516,7 @@ def fill_event_from_trees(self, event_number=None, run_number=None, entry_number if ret: print("Voltage information (from TRawVoltage) loaded.") else: - print("No TRawVoltage tree. Voltage information will not be available.") + print("No Voltage or TRawVoltage tree. Voltage information will not be available.") # Make tvoltage really None self.tvoltage = None From babd3e6ca94296b48758cfaf343892f13a1a9266 Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 6 Jun 2024 09:41:50 +0200 Subject: [PATCH 41/85] Conversion error notification --- scripts/transfers/bintoroot.bash | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash index 6f2dca45..73c04f4b 100644 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -32,7 +32,7 @@ source env/setup.sh cd /pbs/home/p/prod_grand/scripts/transfers - +notify=0 for file in "$@" do echo "converting ${file} to GrandRoot" @@ -51,6 +51,9 @@ do # Convert file ${gtot_path} ${gtot_options} -i ${file} -o ${dest}/${filename%.*}.root >> ${logfile} conv_status=$? + if [ "$conv_status" -ne 0 ]; then + notify=1 + fi echo $conv_status >> ${logfile} # Register conversion result into the database python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} @@ -58,3 +61,6 @@ do python3 ${register_root} -c ${config_file} -r "CCIN2P3" ${dest}/${filename%.*}.root done +if [ "$notify" -ne "0" ]; then + echo "Error in files conversion : " $@ | mail -s "Grand conversion error" fleg@lpnhe.in2p3.fr +fi From e3cde0c9629c4b56d68c50018d534a180e55ffbb Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 6 Jun 2024 09:42:20 +0200 Subject: [PATCH 42/85] Updated with new tables --- granddb/docker/pgsync.yml | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/granddb/docker/pgsync.yml b/granddb/docker/pgsync.yml index 65241d25..4e28026e 100644 --- a/granddb/docker/pgsync.yml +++ b/granddb/docker/pgsync.yml @@ -12,9 +12,11 @@ groups: repository_access: "where id_repository < (select max_value::BIGINT / 2 from pg_sequences where sequencename='repository_id_repository_seq') and id_protocol < (select max_value::BIGINT / 2 from pg_sequences where sequencename='protocol_id_protocol_seq')" provider: "where id_provider < (select max_value::BIGINT / 2 from pg_sequences where sequencename='provider_id_provider_seq')" file: "where id_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='file_id_file_seq')" + file_content: "where id_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='file_id_file_seq')" + dataset: "where id_dataset < (select max_value::BIGINT / 2 from pg_sequences where sequencename='dataset_id_dataset_seq')" + dataset_location: "where id_dataset < (select max_value::BIGINT / 2 from pg_sequences where sequencename='dataset_id_dataset_seq')" file_location: "where id_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='file_id_file_seq') and id_repository < (select max_value::BIGINT / 2 from pg_sequences where sequencename='repository_id_repository_seq')" tree_type: "where id_tree_type < (select max_value::BIGINT / 2 from pg_sequences where sequencename='tree_type_id_tree_type_seq')" - file_trees: "where id_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='file_id_file_seq') and id_tree_type < (select max_value::BIGINT / 2 from pg_sequences where sequencename='tree_type_id_tree_type_seq')" modification_software: "where id_modification_software < (select max_value::BIGINT / 2 from pg_sequences where sequencename='modification_software_id_modification_software_seq')" data_source: "where id_data_source < (select max_value::BIGINT / 2 from pg_sequences where sequencename='data_source_id_data_source_seq')" data_generator: "where id_data_generator < (select max_value::BIGINT / 2 from pg_sequences where sequencename='data_generator_id_data_generator_seq')" @@ -29,17 +31,14 @@ groups: particule_type: "where id_particule_type < (select max_value::BIGINT / 2 from pg_sequences where sequencename='particule_type_id_particule_type_seq')" event_type: "where id_event_type < (select max_value::BIGINT / 2 from pg_sequences where sequencename='event_type_id_event_type_seq')" atmos_model: "where id_atmos_model < (select max_value::BIGINT / 2 from pg_sequences where sequencename='atmos_model_id_atmos_model_seq')" - trun: "where id_trun < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_trun_seq')" - trunnoise: "where id_trunnoise < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_trunnoise_seq')" - trunshowersim: "where id_trunshowersim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_trunshowersim_seq')" - trunefieldsim: "where id_trunefieldsim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_trunefieldsim_seq')" - tshower: "where id_tshower < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_tshower_seq')" - tshowersim: "where id_tshowersim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_tshowersim_seq')" - tvoltage: "where id_tvoltage < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_tvoltage_seq')" - trawvoltage: "where id_trawvoltage < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_trawvoltage_seq')" - tadc: "where id_tadc < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_tadc_seq')" - tefield: "where id_tefield < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_tefield_seq')" - #run: "where id_run < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_run_seq')" - #event: "where id_event < (select max_value::BIGINT / 2 from pg_sequences where sequencename='event_id_event_seq')" - #file_contains: "where id_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='file_id_file_seq') and id_run < (select max_value::BIGINT / 2 from pg_sequences where sequencename='run_id_run_seq') and id_event < (select max_value::BIGINT / 2 from pg_sequences where sequencename='event_id_event_seq')" - + trun: "where id_trun < (select max_value::BIGINT / 2 from pg_sequences where sequencename='trun_id_trun_seq')" + trunnoise: "where id_trunnoise < (select max_value::BIGINT / 2 from pg_sequences where sequencename='trunnoise_id_trunnoise_seq')" + trunshowersim: "where id_trunshowersim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='trunshowersim_id_trunshowersim_seq')" + trunefieldsim: "where id_trunefieldsim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='trunfieldsim_id_trunefieldsim_seq')" + trunvoltage: "where id_trunvoltage < (select max_value::BIGINT / 2 from pg_sequences where sequencename='trunvoltage_id_trunvoltage_seq')" + tshower: "where id_tshower < (select max_value::BIGINT / 2 from pg_sequences where sequencename='tshower_id_tshower_seq')" + tshowersim: "where id_tshowersim < (select max_value::BIGINT / 2 from pg_sequences where sequencename='tshowersim_id_tshowersim_seq')" + events: "where id_event < (select max_value::BIGINT / 2 from pg_sequences where sequencename='events_id_event_seq')" + transfer: "where id_raw_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='rawfile_id_raw_file_seq')" + rawfile: "where id_raw_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='rawfile_id_raw_file_seq')" + convertion: "where id_raw_file < (select max_value::BIGINT / 2 from pg_sequences where sequencename='rawfile_id_raw_file_seq')" From 86df381a5bc7d8fdb5ae7877eb3a2821adbdaacf Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 6 Jun 2024 09:43:16 +0200 Subject: [PATCH 43/85] No change --- data/download_data_grand.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 data/download_data_grand.py diff --git a/data/download_data_grand.py b/data/download_data_grand.py old mode 100755 new mode 100644 From e87c5092dc3b112ada84ffe57011a79cac90e67b Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 6 Jun 2024 09:46:05 +0200 Subject: [PATCH 44/85] More infos in returned by SearchFile --- granddb/granddblib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/granddb/granddblib.py b/granddb/granddblib.py index 9f0911fa..45fb1050 100644 --- a/granddb/granddblib.py +++ b/granddb/granddblib.py @@ -236,8 +236,8 @@ def SearchFile(self, filename): .all() for record in file: - logger.debug(f"file {record.file.filename} found in repository {record.repository.repository}") - result.append([record.file.filename, record.repository.repository]) + logger.debug(f"file {record.file.filename} found in repository {record.repository.repository} at path {record.file_location.path}") + result.append([record.file.filename, record.repository.repository, record.file_location.path, record.file.id_file]) return result ## @brief For parameter of value in table this function will check if the param is a foreign key and if yes it will From 9302e47a53597cee9a4b248230ae3ba55a0860e6 Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 6 Jun 2024 09:51:45 +0200 Subject: [PATCH 45/85] Removed from git --- granddb/testssh.py | 70 ---------------------------------------------- 1 file changed, 70 deletions(-) delete mode 100644 granddb/testssh.py diff --git a/granddb/testssh.py b/granddb/testssh.py deleted file mode 100644 index 32ff2ebc..00000000 --- a/granddb/testssh.py +++ /dev/null @@ -1,70 +0,0 @@ -print("toto") -import granddatalib -import psycopg2 -import psycopg2.extras - -import time -# file = 'memo_Runner_step_radio.txt' -#file = 'LyonAiresLibraryRun.ini' -#file = 'granddatalib.py' -#file = 'readme.md' -file = 'prolongation_login.pdf' -#file = 'xrx7830.ppd' - -print("\nStart") - -dm = granddatalib.DataManager('config.ini') - - -file = 'Coarse3.root' -file = 'GRAND.TEST-RAW.20230309203415.001.root' -print("\nGet " + file) -st = time.time() -print("RESULTAT : " + str(dm.get(file))) -et = time.time() -print((et-st)*1000) - - - -file = 'main.py' -print("\nGet in localdir incoming " + file) -st = time.time() -print("RESULTAT : " + str(dm.get(file, "localdir","./incoming",))) -et = time.time() -print((et-st)*1000) - - - - -file = 'td002015_f0003.root' -print("\nGet in localdir " + file) -st = time.time() -print("RESULTAT : " + str(dm.get(file, "localdir"))) -et = time.time() -print((et-st)*1000) - - -file = 'LyonAiresLibraryRun.ini' -print("\nGet in CC " + file) -st = time.time() -print("RESULTAT : " + str(dm.get(file, "CC"))) -et = time.time() -print((et-st)*1000) - -file = 'LyonAiresLibraryRun.ini' -print("\nGet in CC /sps/trend/fleg/" + file) -st = time.time() -print("RESULTAT : " + str(dm.get(file, "CC", "/sps/trend/fleg/"))) -et = time.time() -print((et-st)*1000) - - -file = 'Coarse3.root' -print("\nGet in WEB " + file) -st = time.time() -print("RESULTAT : " + str(dm.get(file, "WEB"))) -et = time.time() -print((et-st)*1000) - - - From 98f1255fbb6109e8127a128df01f8759a844e143 Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 6 Jun 2024 09:59:01 +0200 Subject: [PATCH 46/85] Template file for config.ini --- granddb/config.ini.example | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 granddb/config.ini.example diff --git a/granddb/config.ini.example b/granddb/config.ini.example new file mode 100644 index 00000000..580e5b9d --- /dev/null +++ b/granddb/config.ini.example @@ -0,0 +1,52 @@ +; Sample configuration file +; general section must contain a provider value. Files you will register in the database will be identified as provided by user "provider" (should be your name). +[general] +provider = "Fleg" +socket_timeout = 5 + +;Local directories where data should be found. +;If data is not found in local directories but found in a repository, +; it will be copied in the first localdir of the list (e.g. .incoming). +; At least one localdir (incoming) is needed. +; incoming directory must be an absolute path +[directories] +localdir = ["/home/fleg/incoming/"] + +; remote repositories to search for data if not present in local directories +; repositories are given as list : +; Name = [protocol, server, port, [paths]] +; where protocol is one of the supported protocols : local, ssh, http, https +; server and port are the server name and port to access the datas using the defined protocol +; and [paths] is a list of paths to search in the repository +; If credentials are required to access the repository, they should be given in the [credential] section using the same name +; repository CCIN2P3 is already defined in the database (so it's not necessary to define it here), but credentials for it have +; to be supplied in the [credentials] section below +[repositories] +CCIN2P3 = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/nancay/GRANDfiles"]] + + +; Credentials for repositories given as : +; Name = [user, keyfile] +; where name is the name of the repository +; This section allows you to specify your login and optionally a key file to access repositories or connect database though an ssh tunnel etc... +; For security reasons you will not be allowed to provide sensitive information as password in this file. +; If password is required (e.g. to decrypt the key file) it will be asked interactively. +; For ssh protocol, it's highly encouraged to use an ssh-agent (to avoid to have to provide passwd interactively at each run) +; To run an ssh-agent just do : `eval $(ssh-agent)` and `ssh-add .ssh/id_rsa` +[credentials] +CCIN2P3 = ["john",""] +SSHTUNNEL = ["joe",""] + +; database to use (only one database can be defined) +; Name = [server, port, database, login, passwd, sshtunnel_server, sshtunnel_port, sshtunnel_credentials ] +[database] +database = ["localhost", "" ,"granddb", "grandadmin", "password","", "", ""] + + +; The following section is optional. +; it defines the repository where registered files need to go. +; repository_name = "path" +; if not provided, the files will go to the incoming section provided in section directories +; Useful only if you want to work on "localdir" but register files in a remote directory +#[registerer] +#CCIN2P3 = "/sps/grand/data/auger/GRANDfiles" From 53d509e0a55dc40c527ef8ef6ce9cfcd896bfb35 Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 6 Jun 2024 09:59:39 +0200 Subject: [PATCH 47/85] Removed config.ini --- granddb/config.ini | 57 ---------------------------------------------- 1 file changed, 57 deletions(-) delete mode 100644 granddb/config.ini diff --git a/granddb/config.ini b/granddb/config.ini deleted file mode 100644 index d009efcf..00000000 --- a/granddb/config.ini +++ /dev/null @@ -1,57 +0,0 @@ -; Sample configuration file -; general section must contain a provider value. Files you will register in the database will be identified as provided by user "provider" (should be your name). -[general] -provider = "Fleg" -socket_timeout = 5 - -;Local directories where data should be found. -;If data is not found in local directories but found in a repository, -; it will be copied in the first localdir of the list (e.g. .incoming). -; At least one localdir (incoming) is needed. -; incoming directory must be an absolute path -[directories] -localdir = ["/home/fleg/DEV/GRAND/incoming/"] - -; remote repositories to search for data if not present in local directories -; repositories are given as list : -; Name = [protocol, server, port, [paths]] -; where protocol is one of the supported protocols : local, ssh, http, https -; server and port are the server name and port to access the datas using the defined protocol -; and [paths] is a list of paths to search in the repository -; If credentials are required to access the repository, they should be given in the [credential] section using the same name -; repository CCIN2P3 is already defined in the database (so it's not necessary to define it here), but credentials for it have -; to be supplied in the [credentials] section below -[repositories] -#CC = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/auger/GRANDfiles","/sps/grand/pengxiong/GP81_interpolation/GP81_1000m/SignalTimeSeries/","/sps/grand/pengxiong/Proton_ROOT/","/sps/trend/fleg/INCOMING"]] -CCIN2P3 = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/nancay/sep2023/GRANDfiles"]] - -#WEB = [ "https", "github.com" , 443, ["/grand-mother/data_challenge1/raw/main/coarse_subei_traces_root/"]] - -; Credentials for repositories given as : -; Name = [user, keyfile] -; where name is the name of the repository -; This section allows you to specify your login and optionally a key file to access repositories or connect database though an ssh tunnel etc... -; For security reasons you will not be allowed to provide sensitive information as password in this file. -; If password is required (e.g. to decrypt the key file) it will be asked interactively. -; For ssh protocol, it's highly encouraged to use an ssh-agent (to avoid to have to provide passwd interactively at each run) -; To run an ssh-agent just do : `eval $(ssh-agent)` and `ssh-add .ssh/id_rsa` -[credentials] -CC = ["legrand",""] -CCIN2P3 = ["legrand",""] -SSHTUNNEL = ["fleg",""] - -; database to use (only one database can be defined) -; Name = [server, port, database, login, passwd, sshtunnel_server, sshtunnel_port, sshtunnel_credentials ] -[database] -#database = ["localhost", "" ,"granddb", "grandadmin", "password","", "", ""] - - -; The following section is optional. -; it defines the repository where registered files need to go. -; repository_name = "path" -; if not provided, the files will go to the incoming section provided in section directories -; Useful only if you want to work on "localdir" but register files in a remote directory -#[registerer] -#CCIN2P3 = "/sps/grand/fleg/INCOMING" -#CCIN2P3 = "/sps/grand/data/auger/GRANDfiles" -#CCIN2P3 = "/sps/grand/data/nancay/sep2023/GRANDfiles" \ No newline at end of file From 7c0bc509ea22b335dba1cca50569e647855ba9a1 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Thu, 13 Jun 2024 13:59:43 +0200 Subject: [PATCH 48/85] Added email notification on failed jobs --- scripts/transfers/bintoroot.bash | 10 ++++++++++ scripts/transfers/ccscript.bash | 15 ++++++++++----- scripts/transfers/refresh_mat_views.bash | 0 3 files changed, 20 insertions(+), 5 deletions(-) mode change 100644 => 100755 scripts/transfers/bintoroot.bash mode change 100644 => 100755 scripts/transfers/ccscript.bash mode change 100755 => 100644 scripts/transfers/refresh_mat_views.bash diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash old mode 100644 new mode 100755 index 73c04f4b..ef6f7bc6 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -7,6 +7,9 @@ register_convertion='/pbs/home/p/prod_grand/scripts/transfers/register_convert.p # path to script to register root file into the DB register_root='/pbs/home/p/prod_grand/softs/grand/granddb/register_file_in_db.py' config_file='/pbs/home/p/prod_grand/softs/grand/scripts/transfers/config-prod.ini' +sps_path='/sps/grand/' +irods_path='/grand/home/trirods/' + # Get tag and database file to use while getopts ":d:g:" option; do case $option in @@ -55,6 +58,13 @@ do notify=1 fi echo $conv_status >> ${logfile} + # Put GrandRoot file into irods + sfile=${dest}/${filename%.*}.root + ifile=${sfile/$sps_path/$irods_path} + ipath=${ifile%/*} + imkdir -p "$ipath" + iput "$sfile" "$ifile" + # Register conversion result into the database python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} # Register root file into db diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash old mode 100644 new mode 100755 index 4353f4ff..93793c8c --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -14,6 +14,10 @@ gtot_option="-g1" # number of files to group in same submission nbfiles=3 +# Notification options +mail_user='fleg@lpnhe.in2p3.fr' +mail_type='FAIL,TIME_LIMIT,INVALID_DEPEND' + # manage call from remote restricted ssh command (extract opt parameters) # default args fullscriptpath=${BASH_SOURCE[0]} @@ -68,7 +72,7 @@ fi # Determine root_dir from database path root_dest=${db%/logs*}/GrandRoot/ submit_dir=$(dirname "${db}") -submit_base_name=submit_${tag} +submit_base_name=s${tag} if [ ! -d $root_dest ];then mkdir -p $root_dest >/dev/null 2>&1 @@ -81,7 +85,7 @@ fi outfile="${submit_dir}/${submit_base_name}-register-transfer.bash" echo "#!/bin/bash" > $outfile echo "$register_transfers -d $db -t $tag" >> $outfile -jregid=$(sbatch -t 0-01:00 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/slurm-${submit_base_name}-register-transfer --mem 8G ${outfile}) +jregid=$(sbatch -t 0-01:00 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/slurm-${submit_base_name}-register-transfer --mem 8G ${outfile} --mail-user=${mail_user} --mail-type=${mail_type}) jregid=$(echo $jregid |awk '{print $NF}') # List files to be converted and group them by bunchs of nbfiles @@ -110,7 +114,7 @@ do echo "$bin2root -g '$gtot_option' -d $root_dest ${listoffiles[$j]}" >> $outfile #submit script echo "submit $outfile" - jid=$(sbatch --dependency=afterany:${jregid} -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G ${outfile}) + jid=$(sbatch --dependency=afterany:${jregid} -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G ${outfile} --mail-user=${mail_user} --mail-type=${mail_type}) jid=$(echo $jid |awk '{print $NF}') convjobs=$convjobs":"$jid done @@ -119,6 +123,7 @@ if [ "$convjobs" = "" ]; then dep="" else dep="--dependency=afterany${convjobs}" + #finally refresh the materialized views in the database + sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat -o ${submit_dir}/slurm-refresh_mat --mem 1G ${refresh_mat_script} --mail-user=${mail_user} --mail-type=${mail_type} fi -#finally refresh the materialized views in the database -sbatch ${dep} -t 0-01:00 -n 1 -J refresh_mat -o ${submit_dir}/slurm-refresh_mat --mem 1G ${refresh_mat_script} + diff --git a/scripts/transfers/refresh_mat_views.bash b/scripts/transfers/refresh_mat_views.bash old mode 100755 new mode 100644 From 6f71e72a5570dbd0ace162d9162b0a62ec2a86dc Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 18 Jun 2024 10:33:03 +0200 Subject: [PATCH 49/85] Added update of webmonitoring site and push to irods --- scripts/transfers/bintoroot.bash | 2 +- scripts/transfers/ccscript.bash | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash index ef6f7bc6..43731bcb 100755 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -63,7 +63,7 @@ do ifile=${sfile/$sps_path/$irods_path} ipath=${ifile%/*} imkdir -p "$ipath" - iput "$sfile" "$ifile" + iput -f "$sfile" "$ifile" # Register conversion result into the database python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index 93793c8c..de999a76 100755 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -8,6 +8,7 @@ bin2root='/pbs/home/p/prod_grand/scripts/transfers/bintoroot.bash' register_transfers='/pbs/home/p/prod_grand/scripts/transfers/register_transfer.bash' refresh_mat_script='/pbs/home/p/prod_grand/scripts/transfers/refresh_mat_views.bash' +update_web_script='/sps/grand/prod_grand/monitoring_page/launch_webmonitoring_update.bash' # gtot options for convertion -g1 for gp13 -f2 for gaa gtot_option="-g1" @@ -123,7 +124,8 @@ if [ "$convjobs" = "" ]; then dep="" else dep="--dependency=afterany${convjobs}" - #finally refresh the materialized views in the database + #finally refresh the materialized views in the database and the update of monitoring sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat -o ${submit_dir}/slurm-refresh_mat --mem 1G ${refresh_mat_script} --mail-user=${mail_user} --mail-type=${mail_type} + sbatch ${dep} -t 0-00:30 -n 1 -J update_webmonitoring -o ${submit_dir}/slurm-update_webmonitoring --mem 12G ${update_web_script} -mail-user=${mail_user} --mail-type=${mail_type} fi From 789eb2d1ccf1f022c43e73b2bc35e6197ce37ab8 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Thu, 20 Jun 2024 18:29:52 +0200 Subject: [PATCH 50/85] Added env variable to make irods working --- scripts/transfers/bintoroot.bash | 10 ++++++++-- scripts/transfers/ccscript.bash | 14 +++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash index 43731bcb..7dbf282f 100755 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -62,8 +62,14 @@ do sfile=${dest}/${filename%.*}.root ifile=${sfile/$sps_path/$irods_path} ipath=${ifile%/*} - imkdir -p "$ipath" - iput -f "$sfile" "$ifile" + echo "imkdir -p $ipath" >> ${logfile} + imkdir -p $ipath >> ${logfile} + echo "iput -f $sfile $ifile" >> ${logfile} + iput -f $sfile $ifile >> ${logfile} + iput_status=$? + if [ "iput_status" -ne 0 ]; then + notify=1 + fi # Register conversion result into the database python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index de999a76..1f9ee976 100755 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -19,6 +19,18 @@ nbfiles=3 mail_user='fleg@lpnhe.in2p3.fr' mail_type='FAIL,TIME_LIMIT,INVALID_DEPEND' +#Export some env to make irods works +export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/lib::/pbs/software/centos-7-x86_64/irods/4.3.1/lib:/pbs/software/centos-7-x86_64/irods/irods-externals/4.3.1/lib +export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/bin:/opt/software/rfio-hpss/prod/bin:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/bin:/pbs/software/centos-7-x86_64/fs4/prod/bin:/usr/lib64/qt-3.3/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/pbs/software/centos-7-x86_64/suptools/prod/bin:/opt/ccin2p3/bin:/pbs/software/centos-7-x86_64/irods/utils:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:. +export _LMFILES_=/pbs/software/centos-7-x86_64/modules/modulefiles/DataManagement/irods/4.3.1 +export IRODS_PLUGINS_HOME=/pbs/software/centos-7-x86_64/irods/4.3.1/lib/plugins +export MODULEPATH=/pbs/software/centos-7-x86_64/modules/modulefiles:/etc/modulefiles +export LOADEDMODULES=DataManagement/irods/4.3.1 +export __MODULES_SHARE_PATH=/pbs/software/centos-7-x86_64/irods/utils:2:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:2 +export TRIRODS_DATA_DIR=/grand/home/trirods/data +export BASH_ENV=/usr/share/Modules/init/bash + + # manage call from remote restricted ssh command (extract opt parameters) # default args fullscriptpath=${BASH_SOURCE[0]} @@ -86,7 +98,7 @@ fi outfile="${submit_dir}/${submit_base_name}-register-transfer.bash" echo "#!/bin/bash" > $outfile echo "$register_transfers -d $db -t $tag" >> $outfile -jregid=$(sbatch -t 0-01:00 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/slurm-${submit_base_name}-register-transfer --mem 8G ${outfile} --mail-user=${mail_user} --mail-type=${mail_type}) +jregid=$(sbatch -t 0-00:10 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/slurm-${submit_base_name}-register-transfer --mem 1G ${outfile} --mail-user=${mail_user} --mail-type=${mail_type}) jregid=$(echo $jregid |awk '{print $NF}') # List files to be converted and group them by bunchs of nbfiles From 516fc3f3f081abc2039d8fecafc7f1d324f0bcd2 Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 20 Jun 2024 23:27:35 +0200 Subject: [PATCH 51/85] typo correction --- scripts/transfers/bintoroot.bash | 69 ++++++++++++++++---------------- 1 file changed, 34 insertions(+), 35 deletions(-) mode change 100755 => 100644 scripts/transfers/bintoroot.bash diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash old mode 100755 new mode 100644 index 7dbf282f..6f7cae26 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -38,43 +38,42 @@ cd /pbs/home/p/prod_grand/scripts/transfers notify=0 for file in "$@" do - echo "converting ${file} to GrandRoot" - filename=$(basename $file) - tmp=${filename#*_} - dateobs=${tmp:0:8} - dest="${root_dest}/${dateobs:0:4}/${dateobs:4:2}" - if [ ! -d $dest ];then - mkdir -p $dest >/dev/null 2>&1 - fi - dirlogs=${root_dest}/../logs - logfile=${dirlogs}/bin2root-${filename%.*} - if [ ! -d $dirlogs ];then - mkdir -p $dirlogs >/dev/null 2>&1 - fi - # Convert file - ${gtot_path} ${gtot_options} -i ${file} -o ${dest}/${filename%.*}.root >> ${logfile} - conv_status=$? - if [ "$conv_status" -ne 0 ]; then - notify=1 - fi - echo $conv_status >> ${logfile} - # Put GrandRoot file into irods - sfile=${dest}/${filename%.*}.root - ifile=${sfile/$sps_path/$irods_path} - ipath=${ifile%/*} - echo "imkdir -p $ipath" >> ${logfile} - imkdir -p $ipath >> ${logfile} + echo "converting ${file} to GrandRoot" + filename=$(basename $file) + tmp=${filename#*_} + dateobs=${tmp:0:8} + dest="${root_dest}/${dateobs:0:4}/${dateobs:4:2}" + if [ ! -d $dest ];then + mkdir -p $dest >/dev/null 2>&1 + fi + dirlogs=${root_dest}/../logs + logfile=${dirlogs}/bin2root-${filename%.*} + if [ ! -d $dirlogs ];then + mkdir -p $dirlogs >/dev/null 2>&1 + fi + # Convert file + ${gtot_path} ${gtot_options} -i ${file} -o ${dest}/${filename%.*}.root >> ${logfile} + conv_status=$? + if [ "$conv_status" -ne 0 ]; then + notify=1 + fi + echo $conv_status >> ${logfile} + # Put GrandRoot file into irods + sfile=${dest}/${filename%.*}.root + ifile=${sfile/$sps_path/$irods_path} + ipath=${ifile%/*} + echo "imkdir -p $ipath" >> ${logfile} + imkdir -p $ipath >> ${logfile} 2>&1 echo "iput -f $sfile $ifile" >> ${logfile} - iput -f $sfile $ifile >> ${logfile} + iput -f $sfile $ifile >> ${logfile} 2>&1 iput_status=$? - if [ "iput_status" -ne 0 ]; then - notify=1 - fi - - # Register conversion result into the database - python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} - # Register root file into db - python3 ${register_root} -c ${config_file} -r "CCIN2P3" ${dest}/${filename%.*}.root + if [ "$iput_status" -ne 0 ]; then + notify=1 + fi + # Register conversion result into the database + python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} + # Register root file into db + python3 ${register_root} -c ${config_file} -r "CCIN2P3" ${dest}/${filename%.*}.root done if [ "$notify" -ne "0" ]; then From 673d5bea79c66f921b8d48984ae357139b9531b1 Mon Sep 17 00:00:00 2001 From: fleg Date: Sat, 22 Jun 2024 00:35:58 +0200 Subject: [PATCH 52/85] More accurate submission time and duration for bin2root (after profiling jobs) --- scripts/transfers/ccscript.bash | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) mode change 100755 => 100644 scripts/transfers/ccscript.bash diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash old mode 100755 new mode 100644 index 1f9ee976..027eb714 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -15,6 +15,9 @@ gtot_option="-g1" # number of files to group in same submission nbfiles=3 +#time required to run bin2root on one file +bin2rootduration=15 + # Notification options mail_user='fleg@lpnhe.in2p3.fr' mail_type='FAIL,TIME_LIMIT,INVALID_DEPEND' @@ -117,6 +120,7 @@ do ((i++)) done +jobtime=`date -d@$(($bin2rootduration*60*$nbfiles)) -u +%H:%M` convjobs="" # Launch convertion of files (but after the registration has finished) for j in "${!listoffiles[@]}" @@ -127,7 +131,7 @@ do echo "$bin2root -g '$gtot_option' -d $root_dest ${listoffiles[$j]}" >> $outfile #submit script echo "submit $outfile" - jid=$(sbatch --dependency=afterany:${jregid} -t 0-01:00 -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 8G ${outfile} --mail-user=${mail_user} --mail-type=${mail_type}) + jid=$(sbatch --dependency=afterany:${jregid} -t $jobtime -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 2G ${outfile} --mail-user=${mail_user} --mail-type=${mail_type}) jid=$(echo $jid |awk '{print $NF}') convjobs=$convjobs":"$jid done From 179207f63f1f7dbcbd219e3f5e3a05a9b7ad7864 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Wed, 26 Jun 2024 17:08:46 +0200 Subject: [PATCH 53/85] Skip small files (<256 bytes) and move them to a crap directory --- scripts/transfers/ccscript.bash | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) mode change 100644 => 100755 scripts/transfers/ccscript.bash diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash old mode 100644 new mode 100755 index 027eb714..a5698029 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -89,6 +89,7 @@ fi root_dest=${db%/logs*}/GrandRoot/ submit_dir=$(dirname "${db}") submit_base_name=s${tag} +crap_dir=${db%/logs*}/raw/crap if [ ! -d $root_dest ];then mkdir -p $root_dest >/dev/null 2>&1 @@ -96,7 +97,9 @@ fi if [ ! -d $submit_dir ];then mkdir -p $submit_dir >/dev/null 2>&1 fi - +if [ ! -d $crap_dir ];then + mkdir -p $crap_dir >/dev/null 2>&1 +fi # First register raw files transfers into the DB and get the id of the registration job outfile="${submit_dir}/${submit_base_name}-register-transfer.bash" echo "#!/bin/bash" > $outfile @@ -110,14 +113,19 @@ j=0 declare -A listoffiles for file in $(sqlite3 $db "select target from transfer,gfiles where gfiles.id=transfer.id and tag='${tag}' and transfer.success=1;") do - if [ "$((i % nbfiles))" -eq "0" ]; then - ((j++)) + # We exclude small files (which are suposed to be crap) + fsize=$(stat -c%s "$file") + if [ "$fsize" -le "256" ];then + echo "$file too small ($fsize). Moved to $crap_dir/ and skipped." + mv "$file" "$crap_dir/" + else + if [ "$((i % nbfiles))" -eq "0" ]; then + ((j++)) + fi + #add file to the list of files to be treated + listoffiles[$j]+=" ${file}" + ((i++)) fi - - #add file to the list of files to be treated - listoffiles[$j]+=" ${file}" - - ((i++)) done jobtime=`date -d@$(($bin2rootduration*60*$nbfiles)) -u +%H:%M` @@ -131,7 +139,7 @@ do echo "$bin2root -g '$gtot_option' -d $root_dest ${listoffiles[$j]}" >> $outfile #submit script echo "submit $outfile" - jid=$(sbatch --dependency=afterany:${jregid} -t $jobtime -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 2G ${outfile} --mail-user=${mail_user} --mail-type=${mail_type}) + jid=$(sbatch --dependency=afterany:${jregid} -t 0-${jobtime} -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 2G ${outfile} --mail-user=${mail_user} --mail-type=${mail_type}) jid=$(echo $jid |awk '{print $NF}') convjobs=$convjobs":"$jid done @@ -142,6 +150,6 @@ else dep="--dependency=afterany${convjobs}" #finally refresh the materialized views in the database and the update of monitoring sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat -o ${submit_dir}/slurm-refresh_mat --mem 1G ${refresh_mat_script} --mail-user=${mail_user} --mail-type=${mail_type} - sbatch ${dep} -t 0-00:30 -n 1 -J update_webmonitoring -o ${submit_dir}/slurm-update_webmonitoring --mem 12G ${update_web_script} -mail-user=${mail_user} --mail-type=${mail_type} + sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring -o ${submit_dir}/slurm-update_webmonitoring --mem 12G ${update_web_script} -mail-user=${mail_user} --mail-type=${mail_type} fi From fefb9461c993292e46b4da6d8cd8ec93c0aacb1f Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 2 Jul 2024 17:42:42 +0200 Subject: [PATCH 54/85] Error in slurm submission (options must be before script). --- scripts/transfers/bintoroot.bash | 6 ++++-- scripts/transfers/ccscript.bash | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) mode change 100644 => 100755 scripts/transfers/bintoroot.bash diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash old mode 100644 new mode 100755 index 6f7cae26..77f37f27 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -71,9 +71,11 @@ do notify=1 fi # Register conversion result into the database - python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} + echo "Register convertion" >> ${logfile} + python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} >> ${logfile} 2>&1 # Register root file into db - python3 ${register_root} -c ${config_file} -r "CCIN2P3" ${dest}/${filename%.*}.root + echo "register file in database" >> ${logfile} + python3 ${register_root} -c ${config_file} -r "CCIN2P3" ${dest}/${filename%.*}.root >> ${logfile} 2>&1 done if [ "$notify" -ne "0" ]; then diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index a5698029..676b1c40 100755 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -104,7 +104,7 @@ fi outfile="${submit_dir}/${submit_base_name}-register-transfer.bash" echo "#!/bin/bash" > $outfile echo "$register_transfers -d $db -t $tag" >> $outfile -jregid=$(sbatch -t 0-00:10 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/slurm-${submit_base_name}-register-transfer --mem 1G ${outfile} --mail-user=${mail_user} --mail-type=${mail_type}) +jregid=$(sbatch -t 0-00:10 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/slurm-${submit_base_name}-register-transfer --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} ${outfile} ) jregid=$(echo $jregid |awk '{print $NF}') # List files to be converted and group them by bunchs of nbfiles @@ -139,7 +139,7 @@ do echo "$bin2root -g '$gtot_option' -d $root_dest ${listoffiles[$j]}" >> $outfile #submit script echo "submit $outfile" - jid=$(sbatch --dependency=afterany:${jregid} -t 0-${jobtime} -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 2G ${outfile} --mail-user=${mail_user} --mail-type=${mail_type}) + jid=$(sbatch --dependency=afterany:${jregid} -t 0-${jobtime} -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 2G --mail-user=${mail_user} --mail-type=${mail_type} ${outfile} ) jid=$(echo $jid |awk '{print $NF}') convjobs=$convjobs":"$jid done @@ -149,7 +149,7 @@ if [ "$convjobs" = "" ]; then else dep="--dependency=afterany${convjobs}" #finally refresh the materialized views in the database and the update of monitoring - sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat -o ${submit_dir}/slurm-refresh_mat --mem 1G ${refresh_mat_script} --mail-user=${mail_user} --mail-type=${mail_type} - sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring -o ${submit_dir}/slurm-update_webmonitoring --mem 12G ${update_web_script} -mail-user=${mail_user} --mail-type=${mail_type} + sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat -o ${submit_dir}/slurm-refresh_mat --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} ${refresh_mat_script} + sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring -o ${submit_dir}/slurm-update_webmonitoring --mem 12G -mail-user=${mail_user} --mail-type=${mail_type} ${update_web_script} fi From 43ebe61d20485d7fbd8f6ca7088b97af96b34b23 Mon Sep 17 00:00:00 2001 From: fleg Date: Wed, 3 Jul 2024 11:08:10 +0200 Subject: [PATCH 55/85] Corrected typo in launching update_webmonitoring (missing -) --- scripts/transfers/ccscript.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100755 => 100644 scripts/transfers/ccscript.bash diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash old mode 100755 new mode 100644 index 676b1c40..523daa80 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -150,6 +150,6 @@ else dep="--dependency=afterany${convjobs}" #finally refresh the materialized views in the database and the update of monitoring sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat -o ${submit_dir}/slurm-refresh_mat --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} ${refresh_mat_script} - sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring -o ${submit_dir}/slurm-update_webmonitoring --mem 12G -mail-user=${mail_user} --mail-type=${mail_type} ${update_web_script} + sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring -o ${submit_dir}/slurm-update_webmonitoring --mem 12G --mail-user=${mail_user} --mail-type=${mail_type} ${update_web_script} fi From 50cb99d96e25c405b8a3a0c8ca3eda5672315b33 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Thu, 4 Jul 2024 21:23:01 +0200 Subject: [PATCH 56/85] - Enabled file splitting - Not fixing event ranges in file names anymore --- sim2root/Common/sim2root.py | 104 +++++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 26 deletions(-) diff --git a/sim2root/Common/sim2root.py b/sim2root/Common/sim2root.py index bf123782..4022597d 100755 --- a/sim2root/Common/sim2root.py +++ b/sim2root/Common/sim2root.py @@ -45,6 +45,7 @@ clparser.add_argument("--trigger_time_ns",type=float,default=None,help="Adujust the trace so that the maximum is at given ns from the begining of the trace") clparser.add_argument("--verbose", choices=["debug", "info", "warning", "error", "critical"],default="info", help="logger verbosity.") clparser.add_argument("-ss", "--star_shape", help="For star-shapes: create a separate run for every event", action='store_true') +clparser.add_argument("-ef", "--events_per_file", help="How many events to hold in a single file", type=int, default=None) clargs = clparser.parse_args() mlg.create_output_for_logger(clargs.verbose, log_stdout=True) @@ -152,7 +153,7 @@ def adjust_trigger(trace, CurrentT0s, TPre, TimeBinSize): logger.error("some elements needed to be shifted only up to the limt, tpre was too small") ShiftBins[mask] = int(-TPre / TimeBinSize) - # we cannot use use np.roll, but roll makes re-appear the end of the trace at the begining if we roll to much + # we cannot use np.roll, but roll makes re-appear the end of the trace at the begining if we roll to much # we cannot use scipy shift, that lets you state what value to put for the places you roll, on a 3D array # TODO: There must be a better way to do this without the for loop, but i lost a morning to it and i dont have the time to develop it now. Search for strided_indexing_roll on the web for inspiration. @@ -193,6 +194,7 @@ def main(): # Check if a directory was given as input if Path(clargs.file_dir_name[0]).is_dir(): + # ToDo: add console flag for file order randomisation file_list = sorted(glob.glob(clargs.file_dir_name[0]+"/*.rawroot")) # Check if the first file is a list (of files, hopefully) elif Path(clargs.file_dir_name[0]).is_file() and Path(clargs.file_dir_name[0]).suffix==".txt": @@ -205,6 +207,12 @@ def main(): print("No RawRoot files found in the input directory. Exiting.") exit(0) + # How many events were stored in current files + events_in_file = 1 + + # The name of the output directory + out_dir_name = "" + # Loop through the files specified on command line # for file_num, filename in enumerate(clargs.filename): for file_num, filename in enumerate(file_list): @@ -253,11 +261,14 @@ def main(): trawefield.t_pre=DesiredTpre trawefield.t_post=DesiredTpost - # If the first entry on the first file or dealing with star shape sim + if events_in_file==1: + start_event_number = trawshower.event_number + + # If the first entry on the first file, or dealing with star shape sim if (file_num==0 and i==0) or clargs.star_shape: # Overwrite the run number if specified on command line (only for the first event) - if (file_num==0 and i==0): + if file_num==0 and i==0: run_number = ext_run_number if ext_run_number is not None else trawshower.run_number start_run_number = run_number # or increase it by one for star shapes @@ -269,8 +280,8 @@ def main(): else: site = clargs.site_name - # Init output trees in the proper directory (only for the first event) - if file_num==0 and i==0: out_dir_name = init_trees(clargs, trawshower.unix_date, run_number, site, gt) + # Init output trees in the proper directory + if file_num==0 and i==0: out_dir_name = init_all_trees(clargs, trawshower.unix_date, run_number, site, gt) # Convert the RawShower entries rawshower2grandrootrun(trawshower, gt) @@ -327,7 +338,6 @@ def main(): gt.tshowersim.run_number = run_number gt.tefield.run_number = run_number - # Overwrite the event number if specified on command line if ext_event_number is not None: gt.tshower.event_number = ext_event_number @@ -338,12 +348,19 @@ def main(): if file_num==0 and i==0: start_event_number = gt.tshower.event_number - # Correct the first/last event number for file naming - if(gt.tshower.event_numberend_event_number): + # end_event_number = gt.tshower.event_number + # # ToDo: this should be the standard case + # else: - if(gt.tshower.event_number>end_event_number): - end_event_number = gt.tshower.event_number + end_event_number = gt.tshower.event_number gt.tshowersim.input_name = Path(filename).stem @@ -352,6 +369,33 @@ def main(): gt.tshowersim.fill() gt.tefield.fill() + # If filled max number of events in file + if events_in_file == clargs.events_per_file and not (file_num==0 and i==0): + + # tmp_start_event_number = gt.tshower.event_number + + # Save and close the current event files + logger.info("Writing event trees") + gt.tshower.write(force_close_file=True) + gt.tshowersim.write(force_close_file=True) + gt.tefield.write(force_close_file=True) + gt.tshower.stop_using() + gt.tshowersim.stop_using() + gt.tefield.stop_using() + + # Move the saved event files to proper filenames + logger.info("Renaming event files") + rename_event_files(clargs, out_dir_name, start_event_number, end_event_number) + + # Create the new event files + logger.info("Creating new event files") + init_event_trees(out_dir_name, gt) + + events_in_file=0 + # start_event_number = tmp_start_event_number + + events_in_file += 1 + # For the first file, get all the file's events du ids and pos if file_num==0: du_ids, du_xyzs = get_tree_du_id_and_xyz(trawefield,trawshower.shower_core_pos) @@ -418,19 +462,19 @@ def main(): # gt.trunefieldsim.write() # Write the event trees - gt.tshower.write() - gt.tshowersim.write() - gt.tefield.write() - gt.trun.write() - gt.trunshowersim.write() - gt.trunefieldsim.write() + gt.tshower.write(force_close_file=True) + gt.tshowersim.write(force_close_file=True) + gt.tefield.write(force_close_file=True) + gt.trun.write(force_close_file=True) + gt.trunshowersim.write(force_close_file=True) + gt.trunefieldsim.write(force_close_file=True) # Rename the created files to appropriate names - print("Renaming files to proper file names") - rename_files(clargs, out_dir_name, start_event_number, end_event_number, start_run_number) + logger.info("Renaming files to proper file names") + rename_all_files(clargs, out_dir_name, start_event_number, end_event_number, start_run_number) -# Initialise output trees and their directory -def init_trees(clargs, unix_date, run_number, site, gt): +# Initialise all output trees and their directory +def init_all_trees(clargs, unix_date, run_number, site, gt): # Use date/time from command line argument if specified, otherwise the unix time date, time = datetime.datetime.utcfromtimestamp(unix_date).strftime('%Y%m%d_%H%M%S').split("_") @@ -442,7 +486,7 @@ def init_trees(clargs, unix_date, run_number, site, gt): # Create the appropriate output directory if clargs.forced_output_directory is None: out_dir_name = form_directory_name(clargs, date, time, run_number, site) - print("Storing files in directory ", out_dir_name) + logger.info(f"Storing files in directory {out_dir_name}") out_dir_name.mkdir() # If another directory was forced as the output directory, create it else: @@ -450,15 +494,19 @@ def init_trees(clargs, unix_date, run_number, site, gt): out_dir_name.mkdir(exist_ok=True) # Create appropriate GRANDROOT trees in temporary file names (event range not known until the end of the loop) + # Init run trees only if requested gt.trun = TRun((out_dir_name / "run.root").as_posix()) gt.trunshowersim = TRunShowerSim((out_dir_name / "runshowersim.root").as_posix()) gt.trunefieldsim = TRunEfieldSim((out_dir_name / "runefieldsim.root").as_posix()) - gt.tshower = TShower((out_dir_name / "shower.root").as_posix()) - gt.tshowersim = TShowerSim((out_dir_name / "showersim.root").as_posix()) - gt.tefield = TEfield((out_dir_name / "efield.root").as_posix()) + init_event_trees(out_dir_name, gt) return out_dir_name +# Initialise event output trees +def init_event_trees(out_dir_name, gt): + gt.tshower = TShower((out_dir_name / "shower.root").as_posix()) + gt.tshowersim = TShowerSim((out_dir_name / "showersim.root").as_posix()) + gt.tefield = TEfield((out_dir_name / "efield.root").as_posix()) # Convert the RawShowerTree first entry to run values def rawshower2grandrootrun(trawshower, gt): @@ -790,7 +838,7 @@ def form_directory_name(clargs, date, time, run_number, site): return dir_name # Rename the created files to appropriate names -def rename_files(clargs, path, start_event_number, end_event_number, run_number): +def rename_all_files(clargs, path, start_event_number, end_event_number, run_number): # Go through run output files for fn_start in ["run", "runshowersim", "runefieldsim"]: @@ -807,6 +855,10 @@ def rename_files(clargs, path, start_event_number, end_event_number, run_number) print(f"Could not find a free filename for {fn_in} until serial number 1000. Please clean up some files!") exit(0) + # Rename the event files + rename_event_files(clargs, path, start_event_number, end_event_number) + +def rename_event_files(clargs, path, start_event_number, end_event_number): # Go through event output files for fn_start in ["shower", "showersim", "efield"]: From 0ab1b08fc247b02b0165e8f0ee4c655a4c146bde Mon Sep 17 00:00:00 2001 From: fleg Date: Thu, 11 Jul 2024 19:11:52 +0200 Subject: [PATCH 57/85] New functions to register directory (aka dataset). Modification of the files registration process. --- granddb/config.ini.example | 4 +- granddb/granddatalib.py | 299 ++++++++++++++++++++++++++++++++----- granddb/granddblib.py | 81 ++++++---- granddb/rootdblib.py | 35 +++-- 4 files changed, 337 insertions(+), 82 deletions(-) diff --git a/granddb/config.ini.example b/granddb/config.ini.example index 580e5b9d..c1a3fedb 100644 --- a/granddb/config.ini.example +++ b/granddb/config.ini.example @@ -21,8 +21,9 @@ localdir = ["/home/fleg/incoming/"] ; If credentials are required to access the repository, they should be given in the [credential] section using the same name ; repository CCIN2P3 is already defined in the database (so it's not necessary to define it here), but credentials for it have ; to be supplied in the [credentials] section below +; THIS DEFINITIONS OVERRIDE THE ONES FROM THE DATABASE [repositories] -CCIN2P3 = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/nancay/GRANDfiles"]] +CC = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/nancay/GRANDfiles"]] ; Credentials for repositories given as : @@ -35,6 +36,7 @@ CCIN2P3 = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/nancay/GRANDfiles"]] ; To run an ssh-agent just do : `eval $(ssh-agent)` and `ssh-add .ssh/id_rsa` [credentials] CCIN2P3 = ["john",""] +CC = ["jim",""] SSHTUNNEL = ["joe",""] ; database to use (only one database can be defined) diff --git a/granddb/granddatalib.py b/granddb/granddatalib.py index 4cfde33e..83cd2c7f 100644 --- a/granddb/granddatalib.py +++ b/granddb/granddatalib.py @@ -128,6 +128,7 @@ def __init__(self, file=os.path.join(os.path.dirname(__file__), 'config.ini')): self._repositories[repo["repository"]] = ds # Add remote repositories + # TODO: If repo exists from database just add path ? if configur.has_section('repositories'): for name in configur['repositories']: repo = json.loads(configur.get('repositories', name)) @@ -181,10 +182,16 @@ def referer(self): def SearchFileInDB(self, filename): return self.database().SearchFile(filename) + def exists(self, file, repository=None, path=None): + if self.get(file, repository, path, grab=False) is None: + return False + else: + return True + ## Get a file from the repositories. # If repo or path given, then directly search there. # If not, search first in localdirs and then in remote repositories. First match is returned. - def get(self, file, repository=None, path=None): + def get(self, file, repository=None, path=None, grab=True): res = None # Check if file is a simple name or full path name if (os.path.dirname(file) != ""): @@ -198,17 +205,41 @@ def get(self, file, repository=None, path=None): rep = self.getrepo(repository) if not (rep is None): logger.debug(f"search in repository {rep.name()} {path}") - res = rep.get(file, path) + res = rep.get(file, path, grab=grab) # if no repo specified, we search everywhere else: for name, rep in self.repositories().items(): logger.debug(f"search in repository {rep.name()} {path}") - res = rep.get(file, path) + res = rep.get(file, path, grab=grab) + logger.debug(f"res is {res}") if not (res is None): break - return res + def get_dataset(self, directory, repository=None, path=None): + res = None + # Check if directory is a simple name or full path name + if (os.path.dirname(directory) != ""): + if (not (path is None) and (path != os.path.dirname(directory))): + logger.warning(f"path given in dataset ({os.path.dirname(directory)}) and in repository path ({path}) are different ! The path {os.path.dirname(directory)} from dataset will be used !") + path = os.path.dirname(os.path.normpath(directory)) + directory = os.path.basename(os.path.normpath(directory)) + + # if repository is given we get directory directly from this repo + if not (repository is None): + rep = self.getrepo(repository) + if not (rep is None): + logger.debug(f"Search in repository {rep.name()} {path}") + res = rep.get_dataset(directory, path) + # if no repo specified, we search everywhere + else: + for name, rep in self.repositories().items(): + logger.debug(f"Search in repository {rep.name()} {path}") + res = rep.get_dataset(directory, path) + logger.debug(f"res is {res}") + if not (res is None): + break + return res def copy_to_incoming(self, pathfile): newname = self.incoming() + uniquename(pathfile) @@ -228,24 +259,98 @@ def getrepo(self, repo): break return res + ##Function to register a dataset (i.e directory) into the database. + def register_dataset(self, directory, repository=None, targetdir = None): + import grand.dataio.root_trees + if repository is None: + repository = self.referer() + else: + repository = self.getrepo(repository) - ##Function to register a file into the database. Returns the path to the file in the repository where the file was registered. - def register_file(self,filename, repository=None, path=None): + if targetdir is None: + targetdir = directory + + if repository is not None: + # For registering the full path of the dataset must be provided + path = os.path.dirname(directory) + if len(path) == 0: + logger.error(f"For registering, dataset ({directory}) must be a full path ") + else: + # And the dir must be already present in the target repository + # If so, we need to get it locally and use this local copy (to be able to read the files) + #localdir = self.get_dataset(directory, repository.name()) + localdir = self.get_dataset(directory) + #TODO: Check that target dir exists + if localdir is not None: + Tdir = grand.dataio.root_trees.DataDirectory(localdir) + for f in Tdir.get_list_of_files(): + self.register_file(localfile=f, dataset=Tdir.dir_name, repository=repository.name(), targetdir=targetdir) + else: + logger.error(f"Dataset {directory} was not found in repository {repository.name()} thus cannot be registered") + else: + logger.error(f"No repository found to register file {file}") + return directory + + ##Function to register a file into the database. + # The file MUST be present in the target repository and the full path must be given. + # Returns the path to the file in the repository where the file was registered. + def register_file(self, localfile, dataset=None, repository=None, targetdir=None): newfilename = None - file = self.get(filename,repository,path) - if file is not None: - # If filename in referer repository then keep it - #print(os.path.basename(filename)+" "+self.referer().name()+" "+os.path.dirname(filename)) - newfilename = self.get(os.path.basename(filename),self.referer().name()) - - if newfilename is None: - newfilename = self.referer().copy(file) + if targetdir is None or os.path.dirname(targetdir) == os.path.dirname(localfile): + targetdir = localfile + else: + # Target file is made of target dir + dataset name + filename + targetdir = targetdir + "/" + os.path.basename(dataset) + "/" + os.path.basename(localfile) + targetdir=os.path.normpath(targetdir) + # If repository not given then use the referer + if repository is None: + repository = self.referer() + else: + repository = self.getrepo(repository) + + if repository is not None: + # For registering the full path of the file must be provided + localpath = os.path.dirname(localfile) + + if len(localpath) == 0: + logger.error(f"For registering, local filename ({localfile}) must be a full path ") else: - newfilename = str(newfilename) + # And the file must be already present in the target repository and in the local directory + fileexists = self.get(targetdir, repository.name(), grab=False) + #TODO: Check file exists in + if fileexists : + #if fileexists is not None: + newfilename = localfile + self.database().register_file(localfile, newfilename, dataset, repository.id_repository, self.provider(), targetdir=targetdir) + else: + logger.error(f"File {targetdir} was not found in repository {repository.name()} thus cannot be registered") + newfilename = None + else: + logger.error(f"No repository found to register file {localfile}") + return newfilename - #print("newfilename = "+str(newfilename)) + def old_register_file(self,filename, dataset=None, repository=None, path=None): + newfilename = None - self.database().register_file(file, newfilename, self.referer().id_repository, self.provider()) + # For registering the full path of the file must be provided + path = os.path.dirname(filename) + if len(path) == 0: + logger.error(f"For registering, filename ({filename}) must be a full path ") + else: + file = self.get(filename,repository,path) + #file = filename + if file is not None: + # If filename in referer repository then keep it + newfilename = self.get(filename,self.referer().name(),path) + + if newfilename is None: + newfilename = self.referer().copy(file) + else: + newfilename = str(newfilename) + + #print("newfilename = "+str(newfilename)) + + self.database().register_file(file, newfilename, dataset, self.referer().id_repository, self.provider()) return newfilename @@ -365,6 +470,10 @@ def get(self, file, path=None): logger.warning(f"get method for protocol {self.protocol()} not implemented for repository {self.name()}") return None + def get_dataset(self, file, path=None): + logger.warning(f"get_dataset method for protocol {self.protocol()} not implemented for repository {self.name()}") + return None + def copy(self, pathfile): logger.warning(f"copy method for protocol {self.protocol()} not implemented for repository {self.name()}") return None @@ -376,7 +485,7 @@ def copy(self, pathfile): # @date Sept 2022 class DatasourceLocal(Datasource): ## Search for file in local directories and return the path to the first corresponding file found. - def get(self, file, path=None): + def get(self, file, path=None, grab=True): # TODO : Check that path is in self.paths(), if not then copy in incoming ? found_file = None # Path is given : we only search in that path @@ -393,7 +502,7 @@ def get(self, file, path=None): break if my_file is None: - logger.debug(f"file {file} not found in localdir {path}") + logger.debug(f"File {file} not found in localdir {path}") #my_file = Path(path + file) #if my_file.is_file(): @@ -403,7 +512,7 @@ def get(self, file, path=None): else: # No path given : we recursively search in all dirs and subdirs for path in self.paths(): - logger.debug(f"search in localdir {path} for file {file}") + logger.debug(f"Search in localdir {path} for file {file}") #my_file = Path(path + file) my_file = None @@ -418,19 +527,68 @@ def get(self, file, path=None): # found_file = path + file # break else: - logger.debug(f"file {file} not found in localdir {path}") + logger.debug(f"File {file} not found in localdir {path}") + + if not found_file is None: + logger.debug(f"File found in localdir {found_file}") + + return found_file + #return str(found_file) + + def get_dataset(self, file, path=None): + # TODO : Check that path is in self.paths(), if not then copy in incoming ? + found_file = None + # Path is given : we only search in that path + if not (path is None): + my_path = Path(path) + if not my_path.exists(): + logger.warning(f"path {path} not found (seems not exists) ! Check that it is mounted if you run in docker !") + + my_file = None + print(f'path {path} file {file} - {(Path(path))}') + liste = list(Path(path).rglob(file)) + print(f'list {liste}') + for my_file in liste: + if my_file.is_dir(): + found_file = my_file + break + + if my_file is None: + logger.debug(f"Dataset {file} not found in localdir {path}") + else: + # No path given : we recursively search in all dirs and subdirs + for path in self.paths(): + logger.debug(f"search in localdir {path} for dataset {file}") + + #my_file = Path(path + file) + my_file = None + liste = list(Path(path).rglob(file)) + for my_file in liste: + if my_file.is_dir(): + found_file = my_file + break + if not my_file is None and my_file.is_dir(): + break + else: + logger.debug(f"dataset {file} not found in localdir {path}") if not found_file is None: - logger.debug(f"file found in localdir {found_file}") + logger.debug(f"Dataset found in localdir {found_file}") return str(found_file) - def copy(self, pathfile): - newname = self.incoming() + uniquename(pathfile) - if os.path.join(os.path.dirname(pathfile), "") == self.incoming(): - os.rename(pathfile, newname) + def copy(self, pathfile, destfile = None): + if destfile is None: + newname = self.incoming() + uniquename(pathfile) + if os.path.join(os.path.dirname(pathfile), "") == self.incoming(): + os.rename(pathfile, newname) + else: + shutil.copy2(pathfile, newname) else: - shutil.copy2(pathfile, newname) + newname = destfile + if pathfile != newname: + shutil.copy2(pathfile, newname) + return newname @@ -469,20 +627,20 @@ def set_client(self, recurse=True): client = None return client - def get(self, file, path=None): + def get(self, file, path=None, grab=True): import getpass localfile = None client = self.set_client() if not(client is None): if not (path is None): logger.debug(f"search {file} in {path} @ {self.name()}") - localfile = self.get_file(client, path, file) + localfile = self.get_file(client, path, file, grab=grab) if (localfile is None): logger.debug(f"file {file} not found in {path} @ {self.name()}") else: for path in self.paths(): logger.debug(f"search {file} in {path}@ {self.name()}") - localfile = self.get_file(client, path, file) + localfile = self.get_file(client, path, file,grab=grab) if not (localfile is None): break else: @@ -493,11 +651,35 @@ def get(self, file, path=None): return localfile + + def get_dataset(self, file, path=None): + import getpass + localfile = None + client = self.set_client() + if not(client is None): + if not (path is None): + logger.debug(f"search {file} in {path} @ {self.name()}") + localfile = self.get_dir(client, path, file) + if (localfile is None): + logger.debug(f"Dataset {file} not found in {path} @ {self.name()}") + else: + for path in self.paths(): + logger.debug(f"search {file} in {path}@ {self.name()}") + localfile = self.get_dir(client, path, file) + if not (localfile is None): + break + else: + logger.debug(f"Dataset {file} not found in {path} @ {self.name()}") + else: + logger.debug(f"Search in repository {self.name()} is skipped") + + return localfile + ## Search for files in remote location accessed through ssh. # If file is found, it will be copied in the incoming local directory and the path to the local file is returned. # If file is not found, then None is returned. - def get_file(self, client, path, file): + def get_file(self, client, path, file, grab=True): localfile = None #stdin, stdout, stderr = client.exec_command('ls ' + path + file) #lines = list(map(lambda s: s.strip(), stdout.readlines())) @@ -508,20 +690,54 @@ def get_file(self, client, path, file): #if len(lines) == 1: logger.debug(f"file found in repository {self.name()} @ " + lines[0].strip('\n')) logger.debug(f"copy to {self.incoming()}{file}") - scpp = scp.SCPClient(client.get_transport()) - scpp.get(lines[0].strip('\n'), self.incoming() + file) - localfile = self.incoming() + file + if grab: + scpp = scp.SCPClient(client.get_transport()) + scpp.get(lines[0].strip('\n'), self.incoming() + file) + localfile = self.incoming() + file + else: + localfile = file return localfile - def copy(self, pathfile): - newname = self.incoming() + uniquename(pathfile) + def get_dir(self, client, path, dataset): + localfile = None + # Search directory on remote server + stdin, stdout, stderr = client.exec_command('find ' + path + " -type d -name " + dataset) + lines = sorted(list(map(lambda s: s.strip(), stdout.readlines())), key=len) + if len(lines) >= 1: + logger.debug(f"directory found in repository {self.name()} @ " + lines[0].strip('\n')) + # Create local directory if needed + if not os.path.exists(self.incoming() + dataset): + logger.debug(f"create local dir {self.incoming()}/{dataset}") + os.mkdir(self.incoming() + dataset) + # Search all files in dataset on remote server + stdin, stdout, stderr = client.exec_command('find ' + path + "/" + dataset + " -type f") + files = sorted(list(map(lambda s: s.strip(), stdout.readlines())), key=len) + scpp = scp.SCPClient(client.get_transport(), sanitize=lambda x: x) + # Get all files if not already present + for file in files: + filename = os.path.basename(file) + if not os.path.exists((self.incoming() + dataset + "/" + filename)): + logger.debug(f"copy {filename} to {self.incoming()}{dataset}") + scpp.get(file, self.incoming() + dataset ) + else: + logger.debug(f"File {filename} already exists in {self.incoming()}{dataset}") + #scpp.get(lines[0].strip('\n') + '/*', self.incoming() + file ) + localfile = self.incoming() + dataset + return localfile + + def copy(self, pathfile, destfile = None): + if destfile is None: + newname = self.incoming() + uniquename(pathfile) + else: + newname = destfile client = self.set_client() # search if original file exists remotely stdin, stdout, stderr = client.exec_command('ls ' + self.incoming() + os.path.basename(pathfile)) lines = list(map(lambda s: s.strip(), stdout.readlines())) if len(lines) == 1: - # original file exists... we rename it. + #original file exists... we rename it. client.exec_command('mv ' + self.incoming() + os.path.basename(pathfile) + ' ' + newname) + else: # search if dest files already there stdin, stdout, stderr = client.exec_command('ls ' + newname) @@ -542,28 +758,29 @@ class DatasourceHttp(Datasource): # If file is found, it will be copied in the incoming local directory and the path to the local file is returned. # If file is not found, then None is returned. # TODO: implement authentification - def get(self, file, path=None): + def get(self, file, path=None, grab=True): localfile = None if not (path is None): url = self._protocol + '://' + self.server() + '/' + path + '/' + file - localfile = self.get_file(url, file) + localfile = self.get_file(url, file,grab) else: for path in self.paths(): url = self._protocol + '://' + self.server() + '/' + path + '/' + file - localfile = self.get_file(url, file) + localfile = self.get_file(url, file, grab) if not (localfile is None): break return localfile - def get_file(self, url, file): + def get_file(self, url, file, grab=True): #import socket localfile = None try: #socket.setdefaulttimeout(10) + #TODO check grab and test url urllib.request.urlretrieve(url, self.incoming() + file) logger.debug(f"file found in repository {url}") localfile = self.incoming() + file diff --git a/granddb/granddblib.py b/granddb/granddblib.py index 45fb1050..26666d34 100644 --- a/granddb/granddblib.py +++ b/granddb/granddblib.py @@ -22,22 +22,33 @@ def casttodb(value): + #print(f'{type(value)} - {value}') if isinstance(value, numpy.uint32): - value = int(value) - if isinstance(value, numpy.float32): - value = float(value) - if isinstance(value, numpy.ndarray): + val = int(value) + elif isinstance(value, numpy.float32): + val = float(value) + elif isinstance(value, numpy.ndarray): if value.size == 0: - value = None + val = None elif value.size == 1: - value = value.item() + val = value.item() else: - value = value.tolist() - if isinstance(value, grand.dataio.root_trees.StdVectorList): - value = [i for i in value] - if isinstance(value, str): - value = value.strip().strip('\t').strip('\n') - return value + val = value.tolist() + elif isinstance(value, grand.dataio.root_trees.StdVectorList): + val =[] + #postgres cannot store arrays of arrays... so we split (not sure if really correct)! + for i in value: + if isinstance(i,numpy.ndarray) or isinstance(i, grand.dataio.root_trees.StdVectorList): + val.append(casttodb(i)) + else: + val.append(i) + + #value = [i for i in value] + elif isinstance(value, str): + val = value.strip().strip('\t').strip('\n') + else: + val = value + return val ## @brief Class to handle the Grand database. @@ -94,7 +105,8 @@ def __init__(self, host, port, dbname, user, passwd, sshserv="", sshport=22, cre Base = automap_base() Base.prepare(engine, reflect=True) - self.sqlalchemysession = Session(engine) + self.sqlalchemysession = Session(engine,autoflush=False) + #self.sqlalchemysession.no_autoflush = True inspection = inspect(engine) for table in inspection.get_table_names(): # for table in engine.table_names(): #this is obsolete @@ -306,21 +318,37 @@ def register_repository(self, name, protocol, port, server, path, description="" # Returns the id_file for the file and a boolean True if the file was not previously in the DB (i.e it's a new file) # and false if the file was already registered. This is usefull to know if the metadata of the file needs to be read # or not - def register_filename(self, filename, newfilename, id_repository, provider): + def register_filename(self, filename, newfilename, dataset, id_repository, provider, targetfile=None): import os register_file = False isnewfile = False idfile = None + id_dataset = None + if targetfile is None: + targetfile = newfilename + if dataset is not None: + id_dataset = self.get_or_create_key('dataset', 'name', os.path.basename(dataset)) + filt = {} + filt['id_dataset'] = str(casttodb(id_dataset)) + filt['id_repository'] = str(casttodb(id_repository)) + ret = self.sqlalchemysession.query(getattr(self._tables['dataset_location'], 'id_dataset')).filter_by( + **filt).all() + if len(ret) == 0: + container = self.tables()['dataset_location'](id_dataset=id_dataset, id_repository=id_repository, + description="") + self.sqlalchemysession.add(container) + self.sqlalchemysession.flush() + ## Check if file not already registered IN THIS REPO : IF YES, ABORT, IF NO REGISTER + #First see if file is registered elsewhere file_exist = self.sqlalchemysession.query(self.tables()['file']).filter_by( - filename=os.path.basename(newfilename)).first() + filename=os.path.basename(targetfile),id_dataset=id_dataset).first() if file_exist is not None: - # file_exist_here = self.sqlalchemysession.query(self.tables()['file_location']).filter_by( - # id_repository=id_repository).first() + #File exists somewhere... see if in the repository we want file_exist_here = self.sqlalchemysession.query(self.tables()['file_location']).filter_by( - id_repository=id_repository).first() + id_repository=id_repository, id_file=file_exist.id_file).first() if file_exist_here is None: - # file exists in different repo. We only need to register it in the current repo + # file exists but in a different repo. We only need to register it in the current repo register_file = True idfile = file_exist.id_file else: @@ -332,11 +360,11 @@ def register_filename(self, filename, newfilename, id_repository, provider): if register_file: id_provider = self.get_or_create_key('provider', 'provider', provider) if isnewfile: - # rfile = ROOT.TFile(str(filename)) rfile = rdb.RootFile(str(filename)) - rfile.dataset_name() + #rfile.dataset_name() # rfile.file().GetSize() - container = self.tables()['file'](filename=os.path.basename(newfilename), + container = self.tables()['file'](id_dataset=id_dataset, + filename=os.path.basename(targetfile), description='autodesc', original_name=os.path.basename(filename), id_provider=id_provider, @@ -346,9 +374,10 @@ def register_filename(self, filename, newfilename, id_repository, provider): self.sqlalchemysession.flush() idfile = container.id_file # container = self.tables()['file_location'](id_file=idfile, id_repository=id_repository, path=os.path.dirname(newfilename)) - container = self.tables()['file_location'](id_file=idfile, id_repository=id_repository, path=newfilename, + container = self.tables()['file_location'](id_file=idfile, id_repository=id_repository, path=targetfile, description="") self.sqlalchemysession.add(container) + logger.debug(f"File name {filename} registered") # self.sqlalchemysession.flush() return idfile, isnewfile @@ -485,8 +514,8 @@ def register_filecontent(self, file, idfile): # print('Execution time:', elapsed_time, 'seconds') logger.debug(f"execution time {elapsed_time} seconds") - def register_file(self, orgfilename, newfilename, id_repository, provider): - idfile, read_file = self.register_filename(orgfilename, newfilename, id_repository, provider) + def register_file(self, orgfilename, newfilename, dataset, id_repository, provider, targetdir=None): + idfile, read_file = self.register_filename(orgfilename, newfilename, dataset, id_repository, provider, targetdir) if read_file: # We read the localfile and not the remote one self.register_filecontent(orgfilename, idfile) @@ -494,3 +523,5 @@ def register_file(self, orgfilename, newfilename, id_repository, provider): else: logger.info(f"file {orgfilename} already registered.") self.sqlalchemysession.commit() + + diff --git a/granddb/rootdblib.py b/granddb/rootdblib.py index fe2da570..65946ded 100644 --- a/granddb/rootdblib.py +++ b/granddb/rootdblib.py @@ -1,6 +1,7 @@ import ROOT import grand.dataio.root_trees as groot import grand.manage_log as mlg +import os logger = mlg.get_logger_for_script(__name__) #mlg.create_output_for_logger("debug", log_stdout=True) @@ -136,21 +137,21 @@ class RootFile: 'hadronic_model': 'id_hadronic_model', 'low_energy_model': 'id_low_energy_model', 'cpu_time': 'cpu_time', - # 'long_pd_depth': 'long_pd_depth', - # 'long_pd_eminus': 'long_pd_eminus', - # 'long_pd_eplus': 'long_pd_eplus', - # 'long_pd_muminus': 'long_pd_muminus', - # 'long_pd_muplus': 'long_pd_muplus', - # 'long_pd_gamma': 'long_pd_gamma', - # 'long_pd_hadron': 'long_pd_hadron', - # 'long_gamma_elow': 'long_gamma_elow', - # 'long_e_elow': 'long_e_elow', - # 'long_e_edep': 'long_e_edep', - # 'long_mu_edep': 'long_mu_edep', - # 'long_mu_elow': 'long_mu_elow', - # 'long_hadron_edep': 'long_hadron_edep', - # 'long_hadron_elow': 'long_hadron_elow', - # 'long_neutrino': 'long_neutrino', + #'long_pd_depth': 'long_pd_depth', + #'long_pd_eminus': 'long_pd_eminus', + #'long_pd_eplus': 'long_pd_eplus', + #'long_pd_muminus': 'long_pd_muminus', + #'long_pd_muplus': 'long_pd_muplus', + #'long_pd_gamma': 'long_pd_gamma', + #'long_pd_hadron': 'long_pd_hadron', + #'long_gamma_elow': 'long_gamma_elow', + #'long_e_elow': 'long_e_elow', + #'long_e_edep': 'long_e_edep', + #'long_mu_edep': 'long_mu_edep', + #'long_mu_elow': 'long_mu_elow', + #'long_hadron_edep': 'long_hadron_edep', + #'long_hadron_elow': 'long_hadron_elow', + #'long_neutrino': 'long_neutrino', 'event_weight': 'event_weight' } tadcToDB = { @@ -307,9 +308,11 @@ class RootFile: #TreeList is a dict with name of the trees as key and the class corresponding to it's type as value TreeList = {} file = None + filename = None ## We retreive the list of Ttrees in the file and store them as the corresponding class from root_files.py in the dict TreeList def __init__(self, f_name): + self.filename = f_name self.TreeList.clear() self.file = ROOT.TFile(f_name) for key in self.file.GetListOfKeys(): @@ -341,6 +344,8 @@ def copy_content_to(self, file): # [extra]-> given by user (metadata ?) # serial -> automatically incremented in case of new version (how to do that ?) def dataset_name(self): + name = os.path.basename(os.path.dirname(self.filename)) + return name treename = 'trun' name = "noname" for run in self.TreeList[treename].get_list_of_runs(): From 09d655d6d890b2d4d7269d0b3372e9d04c7eba37 Mon Sep 17 00:00:00 2001 From: fleg Date: Fri, 12 Jul 2024 17:36:06 +0200 Subject: [PATCH 58/85] Added path in dataset_location and changed name by dataset_name --- granddb/granddblib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/granddb/granddblib.py b/granddb/granddblib.py index 26666d34..3f0bfd29 100644 --- a/granddb/granddblib.py +++ b/granddb/granddblib.py @@ -327,14 +327,14 @@ def register_filename(self, filename, newfilename, dataset, id_repository, provi if targetfile is None: targetfile = newfilename if dataset is not None: - id_dataset = self.get_or_create_key('dataset', 'name', os.path.basename(dataset)) + id_dataset = self.get_or_create_key('dataset', 'dataset_name', os.path.basename(dataset)) filt = {} filt['id_dataset'] = str(casttodb(id_dataset)) filt['id_repository'] = str(casttodb(id_repository)) ret = self.sqlalchemysession.query(getattr(self._tables['dataset_location'], 'id_dataset')).filter_by( **filt).all() if len(ret) == 0: - container = self.tables()['dataset_location'](id_dataset=id_dataset, id_repository=id_repository, + container = self.tables()['dataset_location'](id_dataset=id_dataset, id_repository=id_repository, path=dataset, description="") self.sqlalchemysession.add(container) self.sqlalchemysession.flush() From 66f3b8eb13429fc7a6e60600f2f50ab878493876 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Fri, 2 Aug 2024 15:53:50 +0200 Subject: [PATCH 59/85] Added initial version of DataFileChain --- grand/dataio/root_trees.py | 60 ++++++++++++-------------------------- 1 file changed, 19 insertions(+), 41 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index 3af7628b..c6a73a0f 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -2523,47 +2523,6 @@ def init_sim2root_structure(self): # Assign the tree with the highest or requested analysis level as default to the class instance setattr(self, f"{flistname[1:-1]}", getattr(f, f"{flistname[1:-1]}")) - # tree_types = set() - # # Loop through the list of file handles - # for i, f in enumerate(self.file_handle_list): - # - # - # # Collect the tree types - # tree_types.update(*f.tree_types.keys()) - # - # # Select the highest analysis level trees for each class and store these trees as main attributes - # for key in f.tree_types: - # if key == "run": - # setattr(self, "trun", f.dict_of_trees["trun"]) - # else: - # setattr(self, key + "_" + str(el["analysis_level"]), f.dict_of_trees[el["name"]]) - # if self.analysis_level>-1: - # - # - # - # max_analysis_level = -1 - # for key1 in f.tree_types[key].keys(): - # el = f.tree_types[key][key1] - # chain_name = el["name"] - # if "analysis_level" in el: - # if el["analysis_level"] > max_analysis_level or el["analysis_level"] == 0: - # max_analysis_level = el["analysis_level"] - # max_anal_chain_name = chain_name - # - # setattr(self, key + "_" + str(el["analysis_level"]), f.dict_of_trees[el["name"]]) - # - # if chain_name not in chains_dict: - # chains_dict[chain_name] = ROOT.TChain(chain_name) - # chains_dict[chain_name].Add(self.file_list[i]) - # - # # In case there is no analysis level info in the tree (old trees), just take the last one - # if max_analysis_level == -1: - # max_anal_chain_name = el["name"] - # - # tree_class = getattr(thismodule, el["type"]) - # setattr(self, tree_class.get_default_tree_name(), chains_dict[max_anal_chain_name]) - - def create_chains(self): chains_dict = {} tree_types = set() @@ -2825,3 +2784,22 @@ def close(self): for t in self.tree_instances: t.stop_using() self.f.Close() + +class DataFileChain: + """Class holding a number of DataFiles with the same TTree type, TChaining the trees together""" + + self.list_of_files = [] + """The list of DataFiles in the chain""" + + self.chain = None + """The main TChain""" + + def __init__(self, files, tree_type): + + # Create the ROOT TChain and the list of files + self.chain = ROOT.TChain(tree_type) + for f in files: + self.list_of_files.append(f) + self.chain.Add(f.filename) + + From ee699e8d176c55ea518a00b82b06b8a128fd41a7 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Sat, 3 Aug 2024 13:40:57 +0200 Subject: [PATCH 60/85] Minor fixes to DataFileChain --- grand/dataio/root_trees.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index c6a73a0f..a592679e 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -2788,10 +2788,10 @@ def close(self): class DataFileChain: """Class holding a number of DataFiles with the same TTree type, TChaining the trees together""" - self.list_of_files = [] + list_of_files = [] """The list of DataFiles in the chain""" - self.chain = None + chain = None """The main TChain""" def __init__(self, files, tree_type): From 2d685d643e14ee76570e3659585e41dd1856a8d7 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Tue, 13 Aug 2024 11:52:55 +0200 Subject: [PATCH 61/85] DataTree can now have underlying TChain --- grand/dataio/root_trees.py | 61 ++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index a592679e..040b7933 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -14,9 +14,13 @@ import numpy as np import glob import array +import warnings from collections import defaultdict +# ToDo: Ignore the warning about branches (and all the other ROOT errors :( ) for TChain until an answer in the ROOT forum +ROOT.gErrorIgnoreLevel = ROOT.kFatal + # Load the C++ macros for vector filling from numpy arrays ROOT.gROOT.LoadMacro(os.path.dirname(os.path.realpath(__file__))+"/vector_filling.C") @@ -394,6 +398,10 @@ class DataTree: _analysis_level: int = 0 """The analysis level of this tree""" + ## Is the tree read from TChain + is_tchain: bool = False + """Is the tree read from TChain""" + ## Fields that are not branches _nonbranch_fields = [ @@ -413,7 +421,8 @@ class DataTree: "_source_datetime", "_analysis_level", "_modification_history", - "__setattr__" + "__setattr__", + "is_tchain" ] """Fields that are not branches""" @@ -662,27 +671,45 @@ def _set_file(self, f): if isinstance(f, ROOT.TFile): self._file = f self._file_name = self._file.GetName() - # If the filename string is given, open/create the ROOT file with this name - else: - self._file_name = f - # print(self._file_name) - # If the file with that filename is already opened, use it (do not reopen) - if f := ROOT.gROOT.GetListOfFiles().FindObject(self._file_name): - self._file = f - # If not opened, open + # If the filename string is given, check if chain, if not open/create the ROOT file with this name + elif isinstance(f, str): + # Check if a chain - filename string resolves to a list longer than 1 (due to wildcards) + flist = glob.glob(f) + if len(flist) > 1: + f = flist + # Otherwise, it was a single file else: - # If file exists, initially open in the read-only mode (changed during write()) - if os.path.isfile(self._file_name): - self._file = ROOT.TFile(self._file_name, "read") - # If the file does not exist, create it + self._file_name = f + # print(self._file_name) + # If the file with that filename is already opened, use it (do not reopen) + if f := ROOT.gROOT.GetListOfFiles().FindObject(self._file_name): + self._file = f + # If not opened, open else: - self._file = ROOT.TFile(self._file_name, "create") + # If file exists, initially open in the read-only mode (changed during write()) + if os.path.isfile(self._file_name): + self._file = ROOT.TFile(self._file_name, "read") + # If the file does not exist, create it + else: + self._file = ROOT.TFile(self._file_name, "create") + else: + raise ValueError(f"Unsupported filename {f}. Can't open/create a file with a tree.") + + # If a list is given, it's a Chain + if isinstance(f, list): + self.is_tchain = True + # Create the TChain + self._tree = ROOT.TChain(self._tree_name, self._tree_name) + # Assign files to the chain + for el in f: + self._tree.Add(el) + ## Init/readout the tree from a file def _set_tree(self, t): """Init/readout the tree from a file""" # If the ROOT TTree is given, just use it - if isinstance(t, ROOT.TTree): + if isinstance(t, ROOT.TTree) or isinstance(t, ROOT.TChain): self._tree = t self._tree_name = t.GetName() # If the tree name string is given, open/create the ROOT TTree with this name @@ -958,7 +985,7 @@ def create_branch_from_field(self, value, set_branches=False, value_name=""): # self._tree.SetBranchAddress(value.name[1:], getattr(self, value.name).string) self._tree.SetBranchAddress(branch_name, getattr(self, value_name)) else: - raise ValueError(f"Unsupported type {type(value)}. Can't create a branch.") + raise ValueError(f"Unsupported type {type(value)}. Can't create a branch {branch_name}.") ## Assign branches to the instance - without calling it, the instance does not show the values read to the TTree def assign_branches(self): @@ -1196,6 +1223,8 @@ def __post_init__(self): if self._tree.GetTitle() == "": self._tree.SetTitle(self._tree_name) + print(self._tree, type(self._tree)) + self.create_branches() # ## Create metadata for the tree From f2cf9434b9c5a1d21f9a55378c19cd4a714fe165 Mon Sep 17 00:00:00 2001 From: fleg Date: Fri, 16 Aug 2024 16:12:39 +0200 Subject: [PATCH 62/85] New scripts to archive raw data into irods --- scripts/archiving/config.properties.gaa | 4 ++ scripts/archiving/config.properties.gp13 | 4 ++ scripts/archiving/create_archive.bash | 76 ++++++++++++++++++++++++ scripts/archiving/dc_gaa.xml | 32 ++++++++++ scripts/archiving/dc_gp13.xml | 32 ++++++++++ 5 files changed, 148 insertions(+) create mode 100644 scripts/archiving/config.properties.gaa create mode 100644 scripts/archiving/config.properties.gp13 create mode 100644 scripts/archiving/create_archive.bash create mode 100644 scripts/archiving/dc_gaa.xml create mode 100644 scripts/archiving/dc_gp13.xml diff --git a/scripts/archiving/config.properties.gaa b/scripts/archiving/config.properties.gaa new file mode 100644 index 00000000..8e58b386 --- /dev/null +++ b/scripts/archiving/config.properties.gaa @@ -0,0 +1,4 @@ +aipTempDirectory=/sps/grand/prod_grand/tests/archivage/archs/gaa/ +configMetadataDescriptiveDC=dc_gaa.xml +configDocumentation=GRAND_DMP_2024.pdf +representationID_1=representation1 \ No newline at end of file diff --git a/scripts/archiving/config.properties.gp13 b/scripts/archiving/config.properties.gp13 new file mode 100644 index 00000000..fa8ce9b0 --- /dev/null +++ b/scripts/archiving/config.properties.gp13 @@ -0,0 +1,4 @@ +aipTempDirectory=/sps/grand/prod_grand/tests/archivage/archs/gp13/ +configMetadataDescriptiveDC=dc_gp13.xml +configDocumentation=GRAND_DMP_2024.pdf +representationID_1=representation1 diff --git a/scripts/archiving/create_archive.bash b/scripts/archiving/create_archive.bash new file mode 100644 index 00000000..c93526ef --- /dev/null +++ b/scripts/archiving/create_archive.bash @@ -0,0 +1,76 @@ +#!/bin/bash +datadir="/sps/grand/data" +archive_root_name="doi+10.25520+in2p3.archive.grand" +irods_path='/grand/home/trirods/data/archives/' + +usage="$(basename "$0") [-d DATE] [-s SITE] [ +Archive some Grand raw files into irods : + -s site (gaa, gp13) + -d YYYY-MM to be archived + " + +while getopts "d:s:" option ${args}; do + case $option in + d) + if [[ ${OPTARG} =~ ^([0-9]{4})-([0][1-9]|[1][0-2]|[1-9])$ ]]; then + date=$(date --date="${BASH_REMATCH[1]}-${BASH_REMATCH[2]}-01" "+%Y_%m") + dir=$(date --date="${BASH_REMATCH[1]}-${BASH_REMATCH[2]}-01" "+%Y/%m") + else + echo "Date ${OPTARG} should be in format YYYY-MM" + exit 1 + fi + ;; + s) + if [[ ${OPTARG} =~ gp13|gaa ]] ; then + site=${OPTARG} + else + echo "Site should be gp13 or gaa" + exit 1 + fi + ;; + :) + printf "option -${OPTARG} need an argument\n" + exit 1;; + ?) # Invalid option + printf "Error: Invalid option -${OPTARG}\n" + exit 1;; + esac +done + +if [ ! "$date" ] || [ ! "$site" ]; then + echo "arguments -d and -s must be provided" + echo "$usage" >&2; exit 1 +fi + +outfile="${archive_root_name}.${site}.${date}" +logfile=archs/${site}/${outfile}--$(date "+%Y_%m_%d_%H%M%S").log + +find $datadir/$site/raw/$dir/ -name "*.bin" >list_files_${site} +echo "List of files to archive :" >> ${logfile} +cat list_files_${site} >> ${logfile} + +java -jar createAIP.jar --configfile=config.properties.${site} --listobjects=list_files_${site} -i ${outfile} + +echo "Archive ready to tar" >> ${logfile} + +tar -cvf archs/${site}/${outfile}.tar archs/${site}/${outfile} + +echo "Archive tared" >> ${logfile} + +echo "Push archs/${site}/${outfile}.tar to irods" >> ${logfile} +# Put file into irods + sfile=archs/${site}/${outfile}.tar + ipath="${irods_path}${site}/raw" + ifile="${ipath}/${outfile}.tar" + echo "imkdir -p $ipath" >> ${logfile} + imkdir -p $ipath >> ${logfile} 2>&1 + echo "iput -f $sfile $ifile" >> ${logfile} + #iput -f $sfile $ifile >> ${logfile} 2>&1 + #iput_status=$? + #if [ "$iput_status" -ne 0 ]; then + # notify=1 + #fi + +rm -rf archs/${site}/${outfile} +rm $sfile +echo "Month archived.">> ${logfile} diff --git a/scripts/archiving/dc_gaa.xml b/scripts/archiving/dc_gaa.xml new file mode 100644 index 00000000..102adbea --- /dev/null +++ b/scripts/archiving/dc_gaa.xml @@ -0,0 +1,32 @@ + + + + doi+10.25520+in2p3.archive+grand+gaa + + + Grand Raw Files from GAA + + + + Grand Observatory + Grand Observatory + + +Grand Observatory +2024 +Grand raw files + + Grand Raw Data from GAA Observatory + + + radio astronomy cosmics rays neutrinos + + + 2024 + + + + Argentina + + + diff --git a/scripts/archiving/dc_gp13.xml b/scripts/archiving/dc_gp13.xml new file mode 100644 index 00000000..b4470580 --- /dev/null +++ b/scripts/archiving/dc_gp13.xml @@ -0,0 +1,32 @@ + + + + doi+10.25520+in2p3.archive+grand+gp13 + + + Grand Raw Files from GP13 + + + + Grand Observatory + Grand Observatory + + +Grand Observatory +2024 +Grand raw files + + Grand Raw Data from GP13 Observatory + + + radio astronomy cosmics rays neutrinos + + + 2024 + + + + China + + + From fb2d230e077b7cb384ddf5d83246bb685d995eb3 Mon Sep 17 00:00:00 2001 From: fleg Date: Mon, 26 Aug 2024 17:20:27 +0200 Subject: [PATCH 63/85] Improved log names. --- scripts/transfers/bintoroot.bash | 8 +++++--- scripts/transfers/ccscript.bash | 14 +++++++------- scripts/transfers/refresh_mat_views.bash | 0 3 files changed, 12 insertions(+), 10 deletions(-) mode change 100755 => 100644 scripts/transfers/bintoroot.bash mode change 100644 => 100755 scripts/transfers/refresh_mat_views.bash diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash old mode 100755 new mode 100644 index 77f37f27..64cdfafa --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -9,14 +9,16 @@ register_root='/pbs/home/p/prod_grand/softs/grand/granddb/register_file_in_db.py config_file='/pbs/home/p/prod_grand/softs/grand/scripts/transfers/config-prod.ini' sps_path='/sps/grand/' irods_path='/grand/home/trirods/' - +submit_base_name='' # Get tag and database file to use -while getopts ":d:g:" option; do +while getopts ":d:g:n:" option; do case $option in d) root_dest=${OPTARG};; g) gtot_options=${OPTARG};; + n) + submit_base_name=${OPTARG};; :) printf "option -${OPTARG} need an argument\n" exit 1;; @@ -47,7 +49,7 @@ do mkdir -p $dest >/dev/null 2>&1 fi dirlogs=${root_dest}/../logs - logfile=${dirlogs}/bin2root-${filename%.*} + logfile=${dirlogs}/${submit_base_name}-bin2root-${filename%.*} if [ ! -d $dirlogs ];then mkdir -p $dirlogs >/dev/null 2>&1 fi diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index 523daa80..3a38944b 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -18,7 +18,7 @@ nbfiles=3 #time required to run bin2root on one file bin2rootduration=15 -# Notification options +# Notification options q mail_user='fleg@lpnhe.in2p3.fr' mail_type='FAIL,TIME_LIMIT,INVALID_DEPEND' @@ -88,7 +88,7 @@ fi # Determine root_dir from database path root_dest=${db%/logs*}/GrandRoot/ submit_dir=$(dirname "${db}") -submit_base_name=s${tag} +submit_base_name=${site}_${tag} crap_dir=${db%/logs*}/raw/crap if [ ! -d $root_dest ];then @@ -104,7 +104,7 @@ fi outfile="${submit_dir}/${submit_base_name}-register-transfer.bash" echo "#!/bin/bash" > $outfile echo "$register_transfers -d $db -t $tag" >> $outfile -jregid=$(sbatch -t 0-00:10 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/slurm-${submit_base_name}-register-transfer --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} ${outfile} ) +jregid=$(sbatch -t 0-00:10 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/${submit_base_name}-register-transfer.log --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} ${outfile} ) jregid=$(echo $jregid |awk '{print $NF}') # List files to be converted and group them by bunchs of nbfiles @@ -136,10 +136,10 @@ do outfile="${submit_dir}/${submit_base_name}-${j}.bash" logfile="${submit_dir}/${submit_base_name}-${j}.log" echo "#!/bin/bash" > $outfile - echo "$bin2root -g '$gtot_option' -d $root_dest ${listoffiles[$j]}" >> $outfile + echo "$bin2root -g '$gtot_option' -n $submit_base_name -d $root_dest ${listoffiles[$j]}" >> $outfile #submit script echo "submit $outfile" - jid=$(sbatch --dependency=afterany:${jregid} -t 0-${jobtime} -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/slurm-${submit_base_name}-${j} --mem 2G --mail-user=${mail_user} --mail-type=${mail_type} ${outfile} ) + jid=$(sbatch --dependency=afterany:${jregid} -t 0-${jobtime} -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/${submit_base_name}-${j}.log --mem 2G --mail-user=${mail_user} --mail-type=${mail_type} ${outfile} ) jid=$(echo $jid |awk '{print $NF}') convjobs=$convjobs":"$jid done @@ -149,7 +149,7 @@ if [ "$convjobs" = "" ]; then else dep="--dependency=afterany${convjobs}" #finally refresh the materialized views in the database and the update of monitoring - sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat -o ${submit_dir}/slurm-refresh_mat --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} ${refresh_mat_script} - sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring -o ${submit_dir}/slurm-update_webmonitoring --mem 12G --mail-user=${mail_user} --mail-type=${mail_type} ${update_web_script} + sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat_${tag} -o ${submit_dir}/refresh_mat_${tag}.log --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} ${refresh_mat_script} + sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring_${tag} -o ${submit_dir}/update_webmonitoring_${tag}.log --mem 12G --mail-user=${mail_user} --mail-type=${mail_type} ${update_web_script} fi diff --git a/scripts/transfers/refresh_mat_views.bash b/scripts/transfers/refresh_mat_views.bash old mode 100644 new mode 100755 From d508583dd8de9be100be313f42f08b3f4ba5d9d5 Mon Sep 17 00:00:00 2001 From: fleg Date: Fri, 30 Aug 2024 12:38:14 +0200 Subject: [PATCH 64/85] Added tar of logs older than 2 months. --- scripts/transfers/ccscript.bash | 2 ++ scripts/transfers/tar_logs.bash | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100755 scripts/transfers/tar_logs.bash diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index 3a38944b..fa25d8d9 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -9,6 +9,7 @@ bin2root='/pbs/home/p/prod_grand/scripts/transfers/bintoroot.bash' register_transfers='/pbs/home/p/prod_grand/scripts/transfers/register_transfer.bash' refresh_mat_script='/pbs/home/p/prod_grand/scripts/transfers/refresh_mat_views.bash' update_web_script='/sps/grand/prod_grand/monitoring_page/launch_webmonitoring_update.bash' +tar_logs_script='/pbs/home/p/prod_grand/scripts/transfers/tar_logs.bash' # gtot options for convertion -g1 for gp13 -f2 for gaa gtot_option="-g1" @@ -151,5 +152,6 @@ else #finally refresh the materialized views in the database and the update of monitoring sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat_${tag} -o ${submit_dir}/refresh_mat_${tag}.log --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} ${refresh_mat_script} sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring_${tag} -o ${submit_dir}/update_webmonitoring_${tag}.log --mem 12G --mail-user=${mail_user} --mail-type=${mail_type} ${update_web_script} + sbatch -t 0-00:05 -n 1 -J tar_logs_${tag} -o ${submit_dir}/tar_logs_${tag}.log --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} --wrap="${tar_logs_script} -s ${site} -d 2" fi diff --git a/scripts/transfers/tar_logs.bash b/scripts/transfers/tar_logs.bash new file mode 100755 index 00000000..7e1112d0 --- /dev/null +++ b/scripts/transfers/tar_logs.bash @@ -0,0 +1,28 @@ +#!/bin/bash +# Script to tar all logs olders than -d month for site -s site. +data_path='/sps/grand/data' + +while getopts ":d:s:" option ${args}; do + case $option in + d) + monthbefore=${OPTARG};; + s) + site=${OPTARG};; + :) + printf "option -${OPTARG} need an argument\n" + exit 1;; + ?) # Invalid option + printf "Error: Invalid option -${OPTARG}\n" + exit 1;; + esac +done + +if [ -z "$site" ] || [ -z "$monthbefore" ];then + printf "Missing option -s or -d\n" + exit 1 +fi + +monthstart="$(date -d "$(date +%y-%m-1) - ${monthbefore} month")" +monthend=$(date -d "$(date +%y-%m-1) - $((${monthbefore}-1)) month") +datetag="$(date -d "$(date +%y-%m-1) - ${monthbefore} month" +%Y-%m)" +find /sps/grand/data/${site}/logs/ -type f -newermt "${monthstart}" -and -not -newermt "${monthend}" -and -not -name '*.tgz' -and -not -name '*.tar' -and -not -name '*.gz' |xargs tar --remove-files -uvf /sps/grand/data/${site}/logs/logs_${datetag}.tar From 41b41b094d84818d11122be5205681beab00f4d5 Mon Sep 17 00:00:00 2001 From: fleg Date: Mon, 9 Sep 2024 14:58:55 +0200 Subject: [PATCH 65/85] Modification to use new env for rhel9 plateform --- scripts/transfers/bintoroot.bash | 11 +++++++-- scripts/transfers/ccscript.bash | 31 ++++++++++++++++-------- scripts/transfers/refresh_mat_views.bash | 9 ++++++- scripts/transfers/register_transfer.bash | 8 +++++- 4 files changed, 45 insertions(+), 14 deletions(-) diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash index 64cdfafa..bede4322 100644 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -30,10 +30,17 @@ done shift $(($OPTIND - 1)) +uname -r |grep el9 >/dev/null +el9=$? + cd /pbs/home/p/prod_grand/softs/grand source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh -conda activate /sps/grand/software/conda/grandlib_2304 -source env/setup.sh +if [ "$el9" -ne 0 ]; then + conda activate /sps/grand/software/conda/grandlib_2304 +else + conda activate /sps/grand/software/conda/grandlib_2409 +fi +source env/setup.sh cd /pbs/home/p/prod_grand/scripts/transfers diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index fa25d8d9..cf5910c4 100644 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -23,17 +23,28 @@ bin2rootduration=15 mail_user='fleg@lpnhe.in2p3.fr' mail_type='FAIL,TIME_LIMIT,INVALID_DEPEND' +uname -r |grep el9 >/dev/null +el9=$? + #Export some env to make irods works -export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/lib::/pbs/software/centos-7-x86_64/irods/4.3.1/lib:/pbs/software/centos-7-x86_64/irods/irods-externals/4.3.1/lib -export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/bin:/opt/software/rfio-hpss/prod/bin:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/bin:/pbs/software/centos-7-x86_64/fs4/prod/bin:/usr/lib64/qt-3.3/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/pbs/software/centos-7-x86_64/suptools/prod/bin:/opt/ccin2p3/bin:/pbs/software/centos-7-x86_64/irods/utils:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:. -export _LMFILES_=/pbs/software/centos-7-x86_64/modules/modulefiles/DataManagement/irods/4.3.1 -export IRODS_PLUGINS_HOME=/pbs/software/centos-7-x86_64/irods/4.3.1/lib/plugins -export MODULEPATH=/pbs/software/centos-7-x86_64/modules/modulefiles:/etc/modulefiles export LOADEDMODULES=DataManagement/irods/4.3.1 -export __MODULES_SHARE_PATH=/pbs/software/centos-7-x86_64/irods/utils:2:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:2 export TRIRODS_DATA_DIR=/grand/home/trirods/data export BASH_ENV=/usr/share/Modules/init/bash +if [ "$el9" -ne 0 ]; then + export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/lib::/pbs/software/centos-7-x86_64/irods/4.3.1/lib:/pbs/software/centos-7-x86_64/irods/irods-externals/4.3.1/lib + export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/bin:/opt/software/rfio-hpss/prod/bin:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/bin:/pbs/software/centos-7-x86_64/fs4/prod/bin:/usr/lib64/qt-3.3/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/pbs/software/centos-7-x86_64/suptools/prod/bin:/opt/ccin2p3/bin:/pbs/software/centos-7-x86_64/irods/utils:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:. + export _LMFILES_=/pbs/software/centos-7-x86_64/modules/modulefiles/DataManagement/irods/4.3.1 + export IRODS_PLUGINS_HOME=/pbs/software/centos-7-x86_64/irods/4.3.1/lib/plugins + export MODULEPATH=/pbs/software/centos-7-x86_64/modules/modulefiles:/etc/modulefiles + export __MODULES_SHARE_PATH=/pbs/software/centos-7-x86_64/irods/utils:2:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:2 +else + export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/redhat-9-x86_64/irods/4.3.1/lib:/pbs/software/redhat-9-x86_64/irods/irods-externals/4.3.1/lib + export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/software/rfio-hpss/prod/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/opt/ccin2p3/bin:/pbs/software/redhat-9-x86_64/irods/utils:/pbs/software/redhat-9-x86_64/irods/4.3.1/bin:. + export _LMFILES_=/pbs/software/modulefiles/redhat-9-x86_64/DataManagement/irods/4.3.1 + export IRODS_PLUGINS_HOME=/pbs/software/redhat-9-x86_64/irods/4.3.1/lib/plugins + export MODULEPATH=/etc/scl/modulefiles:/pbs/software/modulefiles/redhat-9-x86_64:/etc/modulefiles +fi # manage call from remote restricted ssh command (extract opt parameters) # default args @@ -105,7 +116,7 @@ fi outfile="${submit_dir}/${submit_base_name}-register-transfer.bash" echo "#!/bin/bash" > $outfile echo "$register_transfers -d $db -t $tag" >> $outfile -jregid=$(sbatch -t 0-00:10 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/${submit_base_name}-register-transfer.log --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} ${outfile} ) +jregid=$(sbatch -t 0-00:10 -n 1 -J ${submit_base_name}-register-transfer -o ${submit_dir}/${submit_base_name}-register-transfer.log --mem 1G --constraint el9 --mail-user=${mail_user} --mail-type=${mail_type} ${outfile} ) jregid=$(echo $jregid |awk '{print $NF}') # List files to be converted and group them by bunchs of nbfiles @@ -140,7 +151,7 @@ do echo "$bin2root -g '$gtot_option' -n $submit_base_name -d $root_dest ${listoffiles[$j]}" >> $outfile #submit script echo "submit $outfile" - jid=$(sbatch --dependency=afterany:${jregid} -t 0-${jobtime} -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/${submit_base_name}-${j}.log --mem 2G --mail-user=${mail_user} --mail-type=${mail_type} ${outfile} ) + jid=$(sbatch --dependency=afterany:${jregid} -t 0-${jobtime} -n 1 -J ${submit_base_name}-${j} -o ${submit_dir}/${submit_base_name}-${j}.log --mem 2G --constraint el9 --mail-user=${mail_user} --mail-type=${mail_type} ${outfile} ) jid=$(echo $jid |awk '{print $NF}') convjobs=$convjobs":"$jid done @@ -150,8 +161,8 @@ if [ "$convjobs" = "" ]; then else dep="--dependency=afterany${convjobs}" #finally refresh the materialized views in the database and the update of monitoring - sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat_${tag} -o ${submit_dir}/refresh_mat_${tag}.log --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} ${refresh_mat_script} + sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat_${tag} -o ${submit_dir}/refresh_mat_${tag}.log --mem 1G --constraint el9 --mail-user=${mail_user} --mail-type=${mail_type} ${refresh_mat_script} sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring_${tag} -o ${submit_dir}/update_webmonitoring_${tag}.log --mem 12G --mail-user=${mail_user} --mail-type=${mail_type} ${update_web_script} - sbatch -t 0-00:05 -n 1 -J tar_logs_${tag} -o ${submit_dir}/tar_logs_${tag}.log --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} --wrap="${tar_logs_script} -s ${site} -d 2" + sbatch -t 0-00:15 -n 1 -J tar_logs_${tag} -o ${submit_dir}/tar_logs_${tag}.log --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} --wrap="${tar_logs_script} -s ${site,,} -d 2" fi diff --git a/scripts/transfers/refresh_mat_views.bash b/scripts/transfers/refresh_mat_views.bash index d0044f3c..f2e716a6 100755 --- a/scripts/transfers/refresh_mat_views.bash +++ b/scripts/transfers/refresh_mat_views.bash @@ -1,7 +1,14 @@ #!/bin/bash +uname -r |grep el9 >/dev/null +el9=$? + cd /pbs/home/p/prod_grand/softs/grand source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh -conda activate /sps/grand/software/conda/grandlib_2304 +if [ "$el9" -ne 0 ]; then + conda activate /sps/grand/software/conda/grandlib_2304 +else + conda activate /sps/grand/software/conda/grandlib_2409 +fi source env/setup.sh cd /pbs/home/p/prod_grand/scripts/transfers python3 /pbs/home/p/prod_grand/softs/grand/granddb/refresh_mat_views.py \ No newline at end of file diff --git a/scripts/transfers/register_transfer.bash b/scripts/transfers/register_transfer.bash index 5e4f9d39..39987a45 100644 --- a/scripts/transfers/register_transfer.bash +++ b/scripts/transfers/register_transfer.bash @@ -19,10 +19,16 @@ while getopts ":d:t:" option; do esac done +uname -r |grep el9 >/dev/null +el9=$? cd /pbs/home/p/prod_grand/softs/grand source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh -conda activate /sps/grand/software/conda/grandlib_2304 +if [ "$el9" -ne 0 ]; then + conda activate /sps/grand/software/conda/grandlib_2304 +else + conda activate /sps/grand/software/conda/grandlib_2409 +fi source env/setup.sh cd /pbs/home/p/prod_grand/scripts/transfers From c00f9454abd0f919dd86095a50081f8902aa26f8 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 10 Sep 2024 11:12:01 +0200 Subject: [PATCH 66/85] Modification to use new env for rhel9 plateform --- scripts/transfers/bintoroot.bash | 23 ++++++++++++++++++++--- scripts/transfers/ccscript.bash | 23 ----------------------- 2 files changed, 20 insertions(+), 26 deletions(-) mode change 100644 => 100755 scripts/transfers/bintoroot.bash mode change 100644 => 100755 scripts/transfers/ccscript.bash diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash old mode 100644 new mode 100755 index bede4322..2b6a5e30 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -30,16 +30,33 @@ done shift $(($OPTIND - 1)) +cd /pbs/home/p/prod_grand/softs/grand +source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh uname -r |grep el9 >/dev/null el9=$? -cd /pbs/home/p/prod_grand/softs/grand -source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh +#Export some env to make irods works +export LOADEDMODULES=DataManagement/irods/4.3.1 +export TRIRODS_DATA_DIR=/grand/home/trirods/data +export BASH_ENV=/usr/share/Modules/init/bash + if [ "$el9" -ne 0 ]; then conda activate /sps/grand/software/conda/grandlib_2304 + export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/lib::/pbs/software/centos-7-x86_64/irods/4.3.1/lib:/pbs/software/centos-7-x86_64/irods/irods-externals/4.3.1/lib + export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/bin:/opt/software/rfio-hpss/prod/bin:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/bin:/pbs/software/centos-7-x86_64/fs4/prod/bin:/usr/lib64/qt-3.3/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/pbs/software/centos-7-x86_64/suptools/prod/bin:/opt/ccin2p3/bin:/pbs/software/centos-7-x86_64/irods/utils:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:. + export _LMFILES_=/pbs/software/centos-7-x86_64/modules/modulefiles/DataManagement/irods/4.3.1 + export IRODS_PLUGINS_HOME=/pbs/software/centos-7-x86_64/irods/4.3.1/lib/plugins + export MODULEPATH=/pbs/software/centos-7-x86_64/modules/modulefiles:/etc/modulefiles + export __MODULES_SHARE_PATH=/pbs/software/centos-7-x86_64/irods/utils:2:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:2 else - conda activate /sps/grand/software/conda/grandlib_2409 + conda activate /sps/grand/software/conda/grandlib_2409 + export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/redhat-9-x86_64/irods/4.3.1/lib:/pbs/software/redhat-9-x86_64/irods/irods-externals/4.3.1/lib + export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/software/rfio-hpss/prod/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/opt/ccin2p3/bin:/pbs/software/redhat-9-x86_64/irods/utils:/pbs/software/redhat-9-x86_64/irods/4.3.1/bin:. + export _LMFILES_=/pbs/software/modulefiles/redhat-9-x86_64/DataManagement/irods/4.3.1 + export IRODS_PLUGINS_HOME=/pbs/software/redhat-9-x86_64/irods/4.3.1/lib/plugins + export MODULEPATH=/etc/scl/modulefiles:/pbs/software/modulefiles/redhat-9-x86_64:/etc/modulefiles fi + source env/setup.sh cd /pbs/home/p/prod_grand/scripts/transfers diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash old mode 100644 new mode 100755 index cf5910c4..4ce79805 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -23,29 +23,6 @@ bin2rootduration=15 mail_user='fleg@lpnhe.in2p3.fr' mail_type='FAIL,TIME_LIMIT,INVALID_DEPEND' -uname -r |grep el9 >/dev/null -el9=$? - -#Export some env to make irods works -export LOADEDMODULES=DataManagement/irods/4.3.1 -export TRIRODS_DATA_DIR=/grand/home/trirods/data -export BASH_ENV=/usr/share/Modules/init/bash - -if [ "$el9" -ne 0 ]; then - export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/lib::/pbs/software/centos-7-x86_64/irods/4.3.1/lib:/pbs/software/centos-7-x86_64/irods/irods-externals/4.3.1/lib - export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/bin:/opt/software/rfio-hpss/prod/bin:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/bin:/pbs/software/centos-7-x86_64/fs4/prod/bin:/usr/lib64/qt-3.3/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/pbs/software/centos-7-x86_64/suptools/prod/bin:/opt/ccin2p3/bin:/pbs/software/centos-7-x86_64/irods/utils:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:. - export _LMFILES_=/pbs/software/centos-7-x86_64/modules/modulefiles/DataManagement/irods/4.3.1 - export IRODS_PLUGINS_HOME=/pbs/software/centos-7-x86_64/irods/4.3.1/lib/plugins - export MODULEPATH=/pbs/software/centos-7-x86_64/modules/modulefiles:/etc/modulefiles - export __MODULES_SHARE_PATH=/pbs/software/centos-7-x86_64/irods/utils:2:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:2 -else - export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/redhat-9-x86_64/irods/4.3.1/lib:/pbs/software/redhat-9-x86_64/irods/irods-externals/4.3.1/lib - export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/software/rfio-hpss/prod/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/opt/ccin2p3/bin:/pbs/software/redhat-9-x86_64/irods/utils:/pbs/software/redhat-9-x86_64/irods/4.3.1/bin:. - export _LMFILES_=/pbs/software/modulefiles/redhat-9-x86_64/DataManagement/irods/4.3.1 - export IRODS_PLUGINS_HOME=/pbs/software/redhat-9-x86_64/irods/4.3.1/lib/plugins - export MODULEPATH=/etc/scl/modulefiles:/pbs/software/modulefiles/redhat-9-x86_64:/etc/modulefiles -fi - # manage call from remote restricted ssh command (extract opt parameters) # default args fullscriptpath=${BASH_SOURCE[0]} From 7d41ab8c37390706af20cc33ce0e21fe8689ffe6 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 10 Sep 2024 11:13:16 +0200 Subject: [PATCH 67/85] Modification to use new env for rhel9 plateform --- scripts/transfers/ccscript.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index 4ce79805..91976b4a 100755 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -139,7 +139,7 @@ else dep="--dependency=afterany${convjobs}" #finally refresh the materialized views in the database and the update of monitoring sbatch ${dep} -t 0-00:10 -n 1 -J refresh_mat_${tag} -o ${submit_dir}/refresh_mat_${tag}.log --mem 1G --constraint el9 --mail-user=${mail_user} --mail-type=${mail_type} ${refresh_mat_script} - sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring_${tag} -o ${submit_dir}/update_webmonitoring_${tag}.log --mem 12G --mail-user=${mail_user} --mail-type=${mail_type} ${update_web_script} + sbatch ${dep} -t 0-01:00 -n 1 -J update_webmonitoring_${tag} -o ${submit_dir}/update_webmonitoring_${tag}.log --mem 12G --constraint el9 --mail-user=${mail_user} --mail-type=${mail_type} ${update_web_script} sbatch -t 0-00:15 -n 1 -J tar_logs_${tag} -o ${submit_dir}/tar_logs_${tag}.log --mem 1G --mail-user=${mail_user} --mail-type=${mail_type} --wrap="${tar_logs_script} -s ${site,,} -d 2" fi From 89263f94654ead5c31b4a11516e70e6950c3a298 Mon Sep 17 00:00:00 2001 From: LEGRAND Francois Date: Tue, 17 Sep 2024 18:44:53 +0200 Subject: [PATCH 68/85] Modification to use new env for rhel9 plateform @ccin2p3. Exclusion of TR files from GrandRoot conversion. --- scripts/transfers/bintoroot.bash | 111 +++++++++++------------ scripts/transfers/ccscript.bash | 1 + scripts/transfers/refresh_mat_views.bash | 12 +-- 3 files changed, 58 insertions(+), 66 deletions(-) mode change 100755 => 100644 scripts/transfers/refresh_mat_views.bash diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash index 2b6a5e30..bb5a184c 100755 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -30,33 +30,21 @@ done shift $(($OPTIND - 1)) +export PLATFORM=redhat-9-x86_64 cd /pbs/home/p/prod_grand/softs/grand source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh -uname -r |grep el9 >/dev/null -el9=$? + #Export some env to make irods works export LOADEDMODULES=DataManagement/irods/4.3.1 export TRIRODS_DATA_DIR=/grand/home/trirods/data export BASH_ENV=/usr/share/Modules/init/bash - -if [ "$el9" -ne 0 ]; then - conda activate /sps/grand/software/conda/grandlib_2304 - export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/lib::/pbs/software/centos-7-x86_64/irods/4.3.1/lib:/pbs/software/centos-7-x86_64/irods/irods-externals/4.3.1/lib - export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/bin:/opt/software/rfio-hpss/prod/bin:/pbs/software/centos-7-x86_64/oracle/12.2.0/instantclient/bin:/pbs/software/centos-7-x86_64/fs4/prod/bin:/usr/lib64/qt-3.3/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/pbs/software/centos-7-x86_64/suptools/prod/bin:/opt/ccin2p3/bin:/pbs/software/centos-7-x86_64/irods/utils:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:. - export _LMFILES_=/pbs/software/centos-7-x86_64/modules/modulefiles/DataManagement/irods/4.3.1 - export IRODS_PLUGINS_HOME=/pbs/software/centos-7-x86_64/irods/4.3.1/lib/plugins - export MODULEPATH=/pbs/software/centos-7-x86_64/modules/modulefiles:/etc/modulefiles - export __MODULES_SHARE_PATH=/pbs/software/centos-7-x86_64/irods/utils:2:/pbs/software/centos-7-x86_64/irods/4.3.1/bin:2 -else - conda activate /sps/grand/software/conda/grandlib_2409 - export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/redhat-9-x86_64/irods/4.3.1/lib:/pbs/software/redhat-9-x86_64/irods/irods-externals/4.3.1/lib - export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/software/rfio-hpss/prod/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/opt/ccin2p3/bin:/pbs/software/redhat-9-x86_64/irods/utils:/pbs/software/redhat-9-x86_64/irods/4.3.1/bin:. - export _LMFILES_=/pbs/software/modulefiles/redhat-9-x86_64/DataManagement/irods/4.3.1 - export IRODS_PLUGINS_HOME=/pbs/software/redhat-9-x86_64/irods/4.3.1/lib/plugins - export MODULEPATH=/etc/scl/modulefiles:/pbs/software/modulefiles/redhat-9-x86_64:/etc/modulefiles -fi - +export LD_LIBRARY_PATH=/pbs/throng/grand/soft/lib/:/pbs/software/redhat-9-x86_64/irods/4.3.1/lib:/pbs/software/redhat-9-x86_64/irods/irods-externals/4.3.1/lib +export PATH=/pbs/throng/grand/soft/miniconda3/condabin:/pbs/throng/grand/soft/bin/:/pbs/throng/grand/bin/:/opt/software/rfio-hpss/prod/bin:/usr/share/Modules/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/puppetlabs/bin:/opt/ccin2p3/bin:/pbs/software/redhat-9-x86_64/irods/utils:/pbs/software/redhat-9-x86_64/irods/4.3.1/bin:. +export _LMFILES_=/pbs/software/modulefiles/redhat-9-x86_64/DataManagement/irods/4.3.1 +export IRODS_PLUGINS_HOME=/pbs/software/redhat-9-x86_64/irods/4.3.1/lib/plugins +export MODULEPATH=/etc/scl/modulefiles:/pbs/software/modulefiles/redhat-9-x86_64:/etc/modulefiles +conda activate /sps/grand/software/conda/grandlib_2409 source env/setup.sh cd /pbs/home/p/prod_grand/scripts/transfers @@ -64,44 +52,53 @@ cd /pbs/home/p/prod_grand/scripts/transfers notify=0 for file in "$@" do - echo "converting ${file} to GrandRoot" - filename=$(basename $file) - tmp=${filename#*_} - dateobs=${tmp:0:8} - dest="${root_dest}/${dateobs:0:4}/${dateobs:4:2}" - if [ ! -d $dest ];then - mkdir -p $dest >/dev/null 2>&1 - fi - dirlogs=${root_dest}/../logs - logfile=${dirlogs}/${submit_base_name}-bin2root-${filename%.*} - if [ ! -d $dirlogs ];then - mkdir -p $dirlogs >/dev/null 2>&1 - fi - # Convert file - ${gtot_path} ${gtot_options} -i ${file} -o ${dest}/${filename%.*}.root >> ${logfile} - conv_status=$? - if [ "$conv_status" -ne 0 ]; then - notify=1 - fi - echo $conv_status >> ${logfile} - # Put GrandRoot file into irods - sfile=${dest}/${filename%.*}.root - ifile=${sfile/$sps_path/$irods_path} - ipath=${ifile%/*} - echo "imkdir -p $ipath" >> ${logfile} - imkdir -p $ipath >> ${logfile} 2>&1 - echo "iput -f $sfile $ifile" >> ${logfile} - iput -f $sfile $ifile >> ${logfile} 2>&1 - iput_status=$? - if [ "$iput_status" -ne 0 ]; then - notify=1 + if [ -f $file ]; then + echo "converting ${file} to GrandRoot" + filename=$(basename $file) + tmp=${filename#*_} + dateobs=${tmp:0:8} + dest="${root_dest}/${dateobs:0:4}/${dateobs:4:2}" + if [ ! -d $dest ];then + mkdir -p $dest >/dev/null 2>&1 + fi + dirlogs=${root_dest}/../logs + logfile=${dirlogs}/${submit_base_name}-bin2root-${filename%.*}.log + if [ ! -d $dirlogs ];then + mkdir -p $dirlogs >/dev/null 2>&1 + fi + #Determine if file is TR (so no conversion) + tr=$(echo basename ${file} |awk -F_ '{print $5} ') + if [ $tr == "TR" ]; then + cp ${file} ${dest}/${filename%.*}.root + conv_status=0 + else + # Convert file + ${gtot_path} ${gtot_options} -i ${file} -o ${dest}/${filename%.*}.root >> ${logfile} + conv_status=$? + fi + if [ "$conv_status" -ne 0 ]; then + notify=1 + fi + echo $conv_status >> ${logfile} + # Put GrandRoot file into irods + sfile=${dest}/${filename%.*}.root + ifile=${sfile/$sps_path/$irods_path} + ipath=${ifile%/*} + echo "imkdir -p $ipath" >> ${logfile} + imkdir -p $ipath >> ${logfile} 2>&1 + echo "iput -f $sfile $ifile" >> ${logfile} + iput -f $sfile $ifile >> ${logfile} 2>&1 + iput_status=$? + if [ "$iput_status" -ne 0 ]; then + notify=1 + fi + # Register conversion result into the database + echo "Register convertion" >> ${logfile} + python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} >> ${logfile} 2>&1 + # Register root file into db + echo "register file in database" >> ${logfile} + python3 ${register_root} -c ${config_file} -r "CCIN2P3" ${dest}/${filename%.*}.root >> ${logfile} 2>&1 fi - # Register conversion result into the database - echo "Register convertion" >> ${logfile} - python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} >> ${logfile} 2>&1 - # Register root file into db - echo "register file in database" >> ${logfile} - python3 ${register_root} -c ${config_file} -r "CCIN2P3" ${dest}/${filename%.*}.root >> ${logfile} 2>&1 done if [ "$notify" -ne "0" ]; then diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash index 91976b4a..f46222ae 100755 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -64,6 +64,7 @@ case $site in gtot_option="-g1";; esac +export PLATFORM=redhat-9-x86_64 #test dbfile exists and tag is set if [ -z "$tag" ] || [ -z "$db" ];then diff --git a/scripts/transfers/refresh_mat_views.bash b/scripts/transfers/refresh_mat_views.bash old mode 100755 new mode 100644 index f2e716a6..d2b2b390 --- a/scripts/transfers/refresh_mat_views.bash +++ b/scripts/transfers/refresh_mat_views.bash @@ -1,14 +1,8 @@ #!/bin/bash -uname -r |grep el9 >/dev/null -el9=$? - cd /pbs/home/p/prod_grand/softs/grand +export PLATFORM=redhat-9-x86_64 source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh -if [ "$el9" -ne 0 ]; then - conda activate /sps/grand/software/conda/grandlib_2304 -else - conda activate /sps/grand/software/conda/grandlib_2409 -fi +conda activate /sps/grand/software/conda/grandlib_2409 source env/setup.sh -cd /pbs/home/p/prod_grand/scripts/transfers +#cd /pbs/home/p/prod_grand/scripts/transfers python3 /pbs/home/p/prod_grand/softs/grand/granddb/refresh_mat_views.py \ No newline at end of file From 455f69e674b73b4c5aa73bd95d6bcaab7b243032 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Wed, 18 Sep 2024 15:53:59 +0200 Subject: [PATCH 69/85] Added TChain handling to DataFile Moved getting branch in get_list_of_dus() after the draw() as a bug workaround (branch dereferencing after draw) --- grand/dataio/root_trees.py | 43 +++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index 040b7933..7e54b316 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -1223,8 +1223,6 @@ def __post_init__(self): if self._tree.GetTitle() == "": self._tree.SetTitle(self._tree_name) - print(self._tree, type(self._tree)) - self.create_branches() # ## Create metadata for the tree @@ -1459,12 +1457,13 @@ def get_list_of_dus(self): except: current_entry = None - # Get the detector units branch - du_br = self._tree.GetBranch("du_id") - count = self.draw("du_id", "", "goff") detector_units = np.unique(np.array(np.frombuffer(self.get_v1(), dtype=np.float64, count=count)).astype(int)) + # Get the detector units branch + # It has to be here, not before the draw(), due to a bug in PyROOT + du_br = self._tree.GetBranch("du_id") + # If there was an entry read before this action, come back to this entry if current_entry is not None: du_br.GetEntry(current_entry) @@ -2613,6 +2612,15 @@ class DataFile: tree_types = defaultdict(dict) """Holds dict of tree types, each containing a dict of tree names with tree meta-data as values""" + ## Does this instace hold a chain of files + is_tchain = False + """Does this instace hold a chain of files""" + + ## File list in case this is a chain + flist = [] + """File list in case this is a chain""" + + def __init__(self, filename): """filename can be either a string or a ROOT.TFile""" @@ -2620,12 +2628,24 @@ def __init__(self, filename): self.dict_of_trees = {} self.list_of_trees = [] self.tree_types = defaultdict(dict) + self.is_tchain = False + self.flist = [] # If a string given, open the file if type(filename) is str: - f = ROOT.TFile(filename) - self.f = f - self.filename = filename + # Check if a chain - filename string resolves to a list longer than 1 (due to wildcards) + flist = glob.glob(filename) + if len(flist) > 1: + f = ROOT.TFile(flist[0]) + self.f = f + self.filename = flist[0] + self.is_tchain = True + self.flist = flist + # Single file + else: + f = ROOT.TFile(filename) + self.f = f + self.filename = filename elif type(filename) is ROOT.TFile: self.f = filename self.filename = self.f.GetName() @@ -2642,6 +2662,13 @@ def __init__(self, filename): # Get the basic information about the tree tree_info = self.get_tree_info(t) + # If we want a TChain + if self.is_tchain: + t = ROOT.TChain(t.GetName(), t.GetName()) + # Assign files to the chain + for el in self.flist: + t.Add(el) + # Add the tree to a dict for this tree class self.tree_types[tree_info["type"]][tree_info["name"]] = tree_info From 400d5cc43396ec29627c968e3b758acd47a7d834 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Wed, 18 Sep 2024 16:41:40 +0200 Subject: [PATCH 70/85] Improved print() of DataFile for TChains --- grand/dataio/root_trees.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index 7e54b316..9d41ebab 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -1071,6 +1071,8 @@ def get_metadata_as_dict(tree): metadata = {} + # ToDo: this should create some lists of values for TChains + for el in tree.GetUserInfo(): try: val = el.GetVal() @@ -2669,6 +2671,9 @@ def __init__(self, filename): for el in self.flist: t.Add(el) + # Modify the number of events for this TChain + tree_info["evt_cnt"] = t.GetEntries() + # Add the tree to a dict for this tree class self.tree_types[tree_info["type"]][tree_info["name"]] = tree_info @@ -2730,7 +2735,15 @@ def __exit__(self, exc_type, exc_val, exc_tb): def print(self): """Prints the information about the TTrees in the file""" - print(f"File size: {self.f.GetSize():40}") + # If this file is a chain + if self.is_tchain: + print("This DataFile is a chain of the following files:") + print(self.flist) + print(f"The first file size: {self.f.GetSize():40}") + print("Most of the information below are based on the tree in the first file") + else: + print(f"File size: {self.f.GetSize():40}") + print(f"Tree classes found in the file: {str([el for el in self.tree_types.keys()]):40}") for key in self.tree_types: From 2ade679e53d6afb39f9a93ee3f953dfe0845f3a6 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Wed, 18 Sep 2024 17:02:18 +0200 Subject: [PATCH 71/85] Added handling list of filenames to DataFile --- grand/dataio/root_trees.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index 9d41ebab..2fef9648 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -2648,6 +2648,19 @@ def __init__(self, filename): f = ROOT.TFile(filename) self.f = f self.filename = filename + # If list of files is given, make a TChain + elif type(filename) is list: + if len(filename) > 1: + f = ROOT.TFile(filename[0]) + self.f = f + self.filename = filename[0] + self.is_tchain = True + self.flist = filename + # Single file + else: + f = ROOT.TFile(filename[0]) + self.f = f + self.filename = filename[0] elif type(filename) is ROOT.TFile: self.f = filename self.filename = self.f.GetName() From d918ccb66a00a78bc9a61e581781510b539feec8 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Wed, 18 Sep 2024 20:38:38 +0200 Subject: [PATCH 72/85] Added handling of directories with split files to DataDirectory --- grand/dataio/root_trees.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index 2fef9648..38a92b9a 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -2527,8 +2527,16 @@ def get_list_of_files_handles(self): """Go through the list of files in the directory and open all of them""" file_handle_list = [] - for filename in self.file_list: - file_handle_list.append(DataFile(filename)) + # Function returning the tree type and analysis level from the filename. That's how we want to group files. + def split_filenames(x): + el = Path(x).name.split("_") + return el[0], el[2] + + # for filename in self.file_list: + from itertools import groupby + for key, filenames in groupby(sorted(self.file_list, key=split_filenames), split_filenames): + filenames = list(filenames) + file_handle_list.append(DataFile(filenames)) return file_handle_list From eb2125da7c78e9a1a8a59e173757c5396f1b42d5 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Wed, 18 Sep 2024 20:40:07 +0200 Subject: [PATCH 73/85] Removed unnecessary DataFileChain class and creat_chains() from DataDirectory --- grand/dataio/root_trees.py | 57 -------------------------------------- 1 file changed, 57 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index 38a92b9a..3affda05 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -2508,9 +2508,6 @@ def __init__(self, dir_name: str, recursive: bool = False, analysis_level: int = else: logger.warning("Sorry, non sim2root directories not supported yet") - # Create chains and set them as attributes - # self.create_chains() - def __getattr__(self, name): """For non-existing tree files or tree parameters, return None instead of rising an exception""" trees_to_check = ["trun", "trunvoltage", "trawvoltage", "tadc", "tvoltage", "tefield", "tshower", "trunefieldsim", "trunshowersim", "tshowersim", "trunnoise"] @@ -2561,41 +2558,6 @@ def init_sim2root_structure(self): # Assign the tree with the highest or requested analysis level as default to the class instance setattr(self, f"{flistname[1:-1]}", getattr(f, f"{flistname[1:-1]}")) - def create_chains(self): - chains_dict = {} - tree_types = set() - # Loop through the list of file handles - for i, f in enumerate(self.file_handle_list): - # Collect the tree types - tree_types.update(*f.tree_types.keys()) - - # Select the highest analysis level trees for each class and store these trees as main attributes - for key in f.tree_types: - if key == "run": - setattr(self, "trun", f.dict_of_trees["trun"]) - else: - max_analysis_level = -1 - for key1 in f.tree_types[key].keys(): - el = f.tree_types[key][key1] - chain_name = el["name"] - if "analysis_level" in el: - if el["analysis_level"] > max_analysis_level or el["analysis_level"] == 0: - max_analysis_level = el["analysis_level"] - max_anal_chain_name = chain_name - - setattr(self, key + "_" + str(el["analysis_level"]), f.dict_of_trees[el["name"]]) - - if chain_name not in chains_dict: - chains_dict[chain_name] = ROOT.TChain(chain_name) - chains_dict[chain_name].Add(self.file_list[i]) - - # In case there is no analysis level info in the tree (old trees), just take the last one - if max_analysis_level == -1: - max_anal_chain_name = el["name"] - - tree_class = getattr(thismodule, el["type"]) - setattr(self, tree_class.get_default_tree_name(), chains_dict[max_anal_chain_name]) - def print(self, recursive=True): """Prints all the information about all the data""" pass @@ -2874,22 +2836,3 @@ def close(self): for t in self.tree_instances: t.stop_using() self.f.Close() - -class DataFileChain: - """Class holding a number of DataFiles with the same TTree type, TChaining the trees together""" - - list_of_files = [] - """The list of DataFiles in the chain""" - - chain = None - """The main TChain""" - - def __init__(self, files, tree_type): - - # Create the ROOT TChain and the list of files - self.chain = ROOT.TChain(tree_type) - for f in files: - self.list_of_files.append(f) - self.chain.Add(f.filename) - - From 4b7d711f804d67a3354d7152e5cfa58d2b9f6b8b Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Wed, 18 Sep 2024 21:37:50 +0200 Subject: [PATCH 74/85] Event: Initialising trees from the DataDirectory if given EventList: Do not force init trees if DataDirectory used --- grand/grandlib_classes/grandlib_classes.py | 23 +++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/grand/grandlib_classes/grandlib_classes.py b/grand/grandlib_classes/grandlib_classes.py index c00fc88b..1fb826e9 100644 --- a/grand/grandlib_classes/grandlib_classes.py +++ b/grand/grandlib_classes/grandlib_classes.py @@ -402,12 +402,21 @@ def directory(self, value): else: self._directory = value - # Set all the tree files as this file + # Set all the tree files as this file and trees as file's trees self.file_trun = self.directory.ftrun.f - if self.directory.ftvoltages: self.file_tvoltage = self.directory.ftvoltage.f - if self.directory.ftefield: self.file_tefield = self.directory.ftefield.f - if self.directory.ftshower_l1: self.file_tshower = self.directory.ftshower_l1.f - if self.directory.ftshower_l0: self.file_tsimshower = self.directory.ftshower_l0.f + self.trun = self.directory.trun + if self.directory.ftvoltages: + self.file_tvoltage = self.directory.ftvoltage.f + self.tvoltage = self.directory.tvoltage + if self.directory.ftefield: + self.file_tefield = self.directory.ftefield.f + self.tefield = self.directory.tefield + if self.directory.ftshower_l1: + self.file_tshower = self.directory.ftshower_l1.f + self.tshower = self.directory.tshower_l1 + if self.directory.ftshower_l0: + self.tsimshower = self.directory.tshower_l0 + @property def origin_geoid(self): @@ -1078,6 +1087,10 @@ def __init__(self, inp_name, **kwargs): self.event = Event() self.init_trees = True + # No need to init trees if using a DataDirectory (which inits the trees) + if self.directory: + self.init_trees = False + def get_event(self, event_number=None, run_number=None, entry_number=None, fill_event=True, **kwargs): """Get specified event from the event list""" From 747e4d8870de979e54e681d8421f15f0a00d025b Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Thu, 19 Sep 2024 14:47:25 +0200 Subject: [PATCH 75/85] Removed the unnecessary and not working file close --- examples/grandlib_classes/data_play.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/grandlib_classes/data_play.py b/examples/grandlib_classes/data_play.py index 1a154f39..37b1f644 100755 --- a/examples/grandlib_classes/data_play.py +++ b/examples/grandlib_classes/data_play.py @@ -25,7 +25,6 @@ if ROOT.gROOT.GetVersionInt() < 62602: import os os._exit(1) - e.file.Close() else: print("Please provide a ROOT filename with the trees") From 064fe522cabcc909e7a16de5ad1f3dcd78b7f8e6 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Thu, 19 Sep 2024 14:48:22 +0200 Subject: [PATCH 76/85] Fixed coordinates to (3,) dim and traces to proper ordering when filling the trees --- grand/grandlib_classes/grandlib_classes.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/grand/grandlib_classes/grandlib_classes.py b/grand/grandlib_classes/grandlib_classes.py index 1fb826e9..30953505 100644 --- a/grand/grandlib_classes/grandlib_classes.py +++ b/grand/grandlib_classes/grandlib_classes.py @@ -907,7 +907,7 @@ def fill_run_tree(self, overwrite=False, filename=None): self.trun.site = self.site # self.trun.site_long = self.site_long # self.trun.site_lat = self.site_lat - self.trun.origin_geoid = self.origin_geoid + self.trun.origin_geoid = self.origin_geoid[:,0] self.trun.t_bin_size = self._t_bin_size # Fill the tree with values @@ -943,7 +943,8 @@ def fill_voltage_tree(self, overwrite=False, filename=None): self.tvoltage.du_id = [v.du_id for v in self.voltages] # Remark: best to set list. Append will append to the previous event, since it is not cleared automatically - self.tvoltage.trace = [[np.array(v.trace.x).astype(np.float32), np.array(v.trace.y).astype(np.float32), np.array(v.trace.z).astype(np.float32)] for v in self.voltages] + # self.tvoltage.trace = [[np.array(v.trace.x).astype(np.float32), np.array(v.trace.y).astype(np.float32), np.array(v.trace.z).astype(np.float32)] for v in self.voltages] + self.tvoltage.trace = [v.trace for v in self.voltages] # self.tvoltage.trace_x = [np.array(v.trace.y).astype(np.float32) for v in self.voltages] # self.tvoltage.trace_y = [np.array(v.trace.y).astype(np.float32) for v in self.voltages] # self.tvoltage.trace_z = [np.array(v.trace.z).astype(np.float32) for v in self.voltages] @@ -990,7 +991,8 @@ def fill_efield_tree(self, overwrite=False, filename=None): self.tefield.du_id = [v.du_id for v in self.voltages] # Remark: best to set list. Append will append to the previous event, since it is not cleared automatically - self.tefield.trace = [[np.array(v.trace.x).astype(np.float32) for v in self.efields], [np.array(v.trace.y).astype(np.float32) for v in self.efields], [np.array(v.trace.z).astype(np.float32) for v in self.efields]] + # self.tefield.trace = [[np.array(v.trace.x).astype(np.float32) for v in self.efields], [np.array(v.trace.y).astype(np.float32) for v in self.efields], [np.array(v.trace.z).astype(np.float32) for v in self.efields]] + self.tefield.trace = [v.trace for v in self.efields] # self.tefield.trace_x = [np.array(v.trace.x).astype(np.float32) for v in self.efields] # self.tefield.trace_y = [np.array(v.trace.y).astype(np.float32) for v in self.efields] # self.tefield.trace_z = [np.array(v.trace.z).astype(np.float32) for v in self.efields] @@ -1029,13 +1031,13 @@ def fill_shower_tree(self, overwrite=False, filename=None, tree_name="tshower"): ## Shower Xmax [g/cm2] self.tshower.xmax_grams = self.shower.Xmax ## Shower position in the site's reference frame - self.tshower.xmax_pos = self.shower.Xmaxpos + self.tshower.xmax_pos = self.shower.Xmaxpos[:,0] ## Shower azimuth self.tshower.azimuth = self.shower.azimuth ## Shower zenith self.tshower.zenith = self.shower.zenith ## Poistion of the core on the ground in the site's reference frame - self.tshower.shower_core_pos = self.shower.core_ground_pos + self.tshower.shower_core_pos = self.shower.core_ground_pos[:,0] self.tshower.fill() From e8e82d5b2edda657d1dc57a38c6a92dbf9933574 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Thu, 19 Sep 2024 16:19:42 +0200 Subject: [PATCH 77/85] Added scripts for opening and (I)Python prompt with GRAND ROOT file/directory opened inside --- scripts/open_grand_directory.sh | 21 +++++++++++++++++++++ scripts/open_grand_file.sh | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100755 scripts/open_grand_directory.sh create mode 100755 scripts/open_grand_file.sh diff --git a/scripts/open_grand_directory.sh b/scripts/open_grand_directory.sh new file mode 100755 index 00000000..d3662640 --- /dev/null +++ b/scripts/open_grand_directory.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Check if a filename argument is provided +if [ -z "$1" ]; then + echo "Usage: $0 [-p] " + echo "-p run standard Python shell instead of IPython" + exit 1 +fi + +# Check for the -p option +if [ "$1" == "-p" ]; then + # Assign the filename argument to a variable + DIRNAME=$2 + # Start the Python shell with the specified import and command + python3 -i -c "from grand.dataio.root_trees import *; d = DataDirectory('$DIRNAME'); print('\n\033[0;31mOpened directory $DIRNAME as d\033[0m\n'); d.print()" +else + # Assign the filename argument to a variable + DIRNAME=$1 + # Start IPython with the specified import and command + ipython -i -c "from grand.dataio.root_trees import *; d = DataDirectory('$DIRNAME'); print('\n\033[0;31mOpened directory $DIRNAME as d\033[0m\n'); d.print()" +fi \ No newline at end of file diff --git a/scripts/open_grand_file.sh b/scripts/open_grand_file.sh new file mode 100755 index 00000000..3e53f1b2 --- /dev/null +++ b/scripts/open_grand_file.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Check if a filename argument is provided +if [ -z "$1" ]; then + echo "Usage: $0 [-p] " + echo "-p run standard Python shell instead of IPython" + exit 1 +fi + +# Check for the -p option +if [ "$1" == "-p" ]; then + # Assign the filename argument to a variable + FILENAME=$2 + # Start the Python shell with the specified import and command + python3 -i -c "from grand.dataio.root_trees import *; f = DataFile('$FILENAME'); print('\n\033[0;31mOpened file $FILENAME as f\033[0m\n'); f.print()" +else + # Assign the filename argument to a variable + FILENAME=$1 + # Start IPython with the specified import and command + ipython -i -c "from grand.dataio.root_trees import *; f = DataFile('$FILENAME'); print('\n\033[0;31mOpened file $FILENAME as f\033[0m\n'); f.print()" +fi \ No newline at end of file From 707be190018b99910e3f97c9e82c522208412ee1 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Thu, 19 Sep 2024 18:05:17 +0200 Subject: [PATCH 78/85] DataFile: filling flist also with single files --- grand/dataio/root_trees.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index 3affda05..e1ee8f25 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -2607,12 +2607,12 @@ def __init__(self, filename): if type(filename) is str: # Check if a chain - filename string resolves to a list longer than 1 (due to wildcards) flist = glob.glob(filename) + self.flist = flist if len(flist) > 1: f = ROOT.TFile(flist[0]) self.f = f self.filename = flist[0] self.is_tchain = True - self.flist = flist # Single file else: f = ROOT.TFile(filename) @@ -2631,6 +2631,7 @@ def __init__(self, filename): f = ROOT.TFile(filename[0]) self.f = f self.filename = filename[0] + self.flist = filename elif type(filename) is ROOT.TFile: self.f = filename self.filename = self.f.GetName() From 134e08ca7fb701641f08e3bd98e580ddfdd6e01d Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Thu, 19 Sep 2024 21:04:46 +0200 Subject: [PATCH 79/85] Replaced the bash version of file/directory opening scripts with a Python version --- scripts/open_grand_directory.py | 37 +++++++++++++++++++++++++++++++++ scripts/open_grand_directory.sh | 21 ------------------- scripts/open_grand_file.py | 30 ++++++++++++++++++++++++++ scripts/open_grand_file.sh | 21 ------------------- 4 files changed, 67 insertions(+), 42 deletions(-) create mode 100755 scripts/open_grand_directory.py delete mode 100755 scripts/open_grand_directory.sh create mode 100755 scripts/open_grand_file.py delete mode 100755 scripts/open_grand_file.sh diff --git a/scripts/open_grand_directory.py b/scripts/open_grand_directory.py new file mode 100755 index 00000000..6f64645c --- /dev/null +++ b/scripts/open_grand_directory.py @@ -0,0 +1,37 @@ +#! /usr/bin/env python3 +# Opens the GRAND ROOT directory with a DataDirectory class and leaves the prompt open, so the user can work with the opened directory + +import argparse +import os +import sys + +# Create the argument parser +parser = argparse.ArgumentParser(description='Open a GRAND directory in an IPython or Python shell.') + +# Add the command-line options +parser.add_argument('-p', action='store_true', help='Use Python instead of IPython') +parser.add_argument('-s', action='store_true', help='Do not print any initial output') +parser.add_argument('-nv', action='store_true', help='Do not print verbose output') +parser.add_argument('dirname', metavar='', type=str, help='The GRAND ROOT directory to load') + +# Parse the arguments +args = parser.parse_args() + +interp = "ipython" + +# Prepare to run in the standard Python shell if requested +if args.p: + interp = "python" + +if args.nv: + verbose=False +else: + verbose=True + +# Construct the command based on the arguments +command = f"from grand.dataio.root_trees import *; d = DataDirectory('{args.dirname}');" +if not args.s: + command+=f" print(f'\\n\\033[0;31mOpened directory {args.dirname} as d\\033[0m\\n'); d.print(verbose={verbose})" + +os.execlp(interp, interp, '-i', '-c', command) + diff --git a/scripts/open_grand_directory.sh b/scripts/open_grand_directory.sh deleted file mode 100755 index d3662640..00000000 --- a/scripts/open_grand_directory.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# Check if a filename argument is provided -if [ -z "$1" ]; then - echo "Usage: $0 [-p] " - echo "-p run standard Python shell instead of IPython" - exit 1 -fi - -# Check for the -p option -if [ "$1" == "-p" ]; then - # Assign the filename argument to a variable - DIRNAME=$2 - # Start the Python shell with the specified import and command - python3 -i -c "from grand.dataio.root_trees import *; d = DataDirectory('$DIRNAME'); print('\n\033[0;31mOpened directory $DIRNAME as d\033[0m\n'); d.print()" -else - # Assign the filename argument to a variable - DIRNAME=$1 - # Start IPython with the specified import and command - ipython -i -c "from grand.dataio.root_trees import *; d = DataDirectory('$DIRNAME'); print('\n\033[0;31mOpened directory $DIRNAME as d\033[0m\n'); d.print()" -fi \ No newline at end of file diff --git a/scripts/open_grand_file.py b/scripts/open_grand_file.py new file mode 100755 index 00000000..592074dc --- /dev/null +++ b/scripts/open_grand_file.py @@ -0,0 +1,30 @@ +#! /usr/bin/env python3 +# Opens the GRAND ROOT file with a DataFile class and leaves the prompt open, so the user can work with the opened file + +import argparse +import os +import sys + +# Create the argument parser +parser = argparse.ArgumentParser(description='Open a GRAND file in an IPython or Python shell.') + +# Add the command-line options +parser.add_argument('-p', action='store_true', help='Use Python instead of IPython') +parser.add_argument('-s', action='store_true', help='Do not print any initial output') +parser.add_argument('filename', metavar='', type=str, help='The GRAND ROOT filename to load') + +# Parse the arguments +args = parser.parse_args() + +interp = "ipython" + +# Prepare to run in the standard Python shell if requested +if args.p: + interp = "python" + +# Construct the command based on the arguments +command = f"from grand.dataio.root_trees import *; f = DataFile('{args.filename}');" +if not args.s: + command+=f" print(f'\\n\\033[0;31mOpened file {args.filename} as f\\033[0m\\n'); f.print()" +os.execlp(interp, interp, '-i', '-c', command) + diff --git a/scripts/open_grand_file.sh b/scripts/open_grand_file.sh deleted file mode 100755 index 3e53f1b2..00000000 --- a/scripts/open_grand_file.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# Check if a filename argument is provided -if [ -z "$1" ]; then - echo "Usage: $0 [-p] " - echo "-p run standard Python shell instead of IPython" - exit 1 -fi - -# Check for the -p option -if [ "$1" == "-p" ]; then - # Assign the filename argument to a variable - FILENAME=$2 - # Start the Python shell with the specified import and command - python3 -i -c "from grand.dataio.root_trees import *; f = DataFile('$FILENAME'); print('\n\033[0;31mOpened file $FILENAME as f\033[0m\n'); f.print()" -else - # Assign the filename argument to a variable - FILENAME=$1 - # Start IPython with the specified import and command - ipython -i -c "from grand.dataio.root_trees import *; f = DataFile('$FILENAME'); print('\n\033[0;31mOpened file $FILENAME as f\033[0m\n'); f.print()" -fi \ No newline at end of file From e9adbdb0ae74831af8c59a7e903547d734ecfe6d Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Thu, 19 Sep 2024 21:06:36 +0200 Subject: [PATCH 80/85] Added DataDirectory print() --- grand/dataio/root_trees.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index e1ee8f25..d82e2a25 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -2539,7 +2539,7 @@ def split_filenames(x): # Init the instance with sim2root structure files def init_sim2root_structure(self): - + self.file_attrs = [] # Loop through groups of files with tree types expected in the directory for flistname in ["ftruns", "ftrunshowersims", "ftrunefieldsims", "ftefields", "ftshowers", "ftshowersims", "ftvoltages", "ftadcs", "ftrawvoltages", "ftrunnoises"]: # Assign the list of files with specific tree type to the class instance @@ -2548,6 +2548,7 @@ def init_sim2root_structure(self): for (l, f) in getattr(self, flistname).items(): # Assign the file with the tree with the specific analysis level to the class instance setattr(self, f"{flistname[:-1]}_l{l}", f) + self.file_attrs.append(f"{flistname[:-1]}_l{l}") # Assign the tree with the specific analysis level to the class instance setattr(self, f"{flistname[1:-1]}_l{l}", getattr(f, f"{flistname[1:-1]}_l{l}")) if (l>max_level and self.analysis_level==-1) or l==self.analysis_level: @@ -2558,9 +2559,19 @@ def init_sim2root_structure(self): # Assign the tree with the highest or requested analysis level as default to the class instance setattr(self, f"{flistname[1:-1]}", getattr(f, f"{flistname[1:-1]}")) - def print(self, recursive=True): + def print(self, verbose=True): """Prints all the information about all the data""" - pass + print(f"This DataDirectory instance has:") + print(f" {len(self.file_attrs):<3} file/tree attributes") + print(f" {len(self.file_list):<3} files") + print(f" {len(self.file_handle_list):<3} file chains") + + if verbose: + print("\n\033[95;40mProperties of each file attribute:\033[0m") + for attr in self.file_attrs: + f = getattr(self, attr) + print(f"\n\033[34;40m{attr}\033[0m\n{f.flist}\n") + f.print() def get_list_of_chains(self): """Gets list of TTree chains of specific type from the directory""" From 96155d2432ffe0f523efccc9d1c300e9a6bcc63a Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Thu, 19 Sep 2024 21:12:45 +0200 Subject: [PATCH 81/85] Improved on DataFile and DataDirectory prints() --- grand/dataio/root_trees.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index d82e2a25..9b6d5119 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -2570,7 +2570,7 @@ def print(self, verbose=True): print("\n\033[95;40mProperties of each file attribute:\033[0m") for attr in self.file_attrs: f = getattr(self, attr) - print(f"\n\033[34;40m{attr}\033[0m\n{f.flist}\n") + print(f"\n\n\033[34;40m{attr}:\033[0m\n") f.print() def get_list_of_chains(self): @@ -2737,6 +2737,8 @@ def print(self): print(f"The first file size: {self.f.GetSize():40}") print("Most of the information below are based on the tree in the first file") else: + print("This DataFile refers to the following file:") + print(self.flist) print(f"File size: {self.f.GetSize():40}") print(f"Tree classes found in the file: {str([el for el in self.tree_types.keys()]):40}") From d32acf58cd6ded43e32f6f4369e54e2a5393c9f2 Mon Sep 17 00:00:00 2001 From: fleg Date: Mon, 14 Oct 2024 11:59:03 +0200 Subject: [PATCH 82/85] Added -l to bash scripts to use new env for rhel9 plateform @ccin2p3 (and get all env variables ok). Exclusion of TR files from GrandRoot registration into DB. --- scripts/transfers/bintoroot.bash | 8 +++++--- scripts/transfers/ccscript.bash | 2 +- scripts/transfers/refresh_mat_views.bash | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) mode change 100755 => 100644 scripts/transfers/bintoroot.bash mode change 100755 => 100644 scripts/transfers/ccscript.bash mode change 100644 => 100755 scripts/transfers/refresh_mat_views.bash diff --git a/scripts/transfers/bintoroot.bash b/scripts/transfers/bintoroot.bash old mode 100755 new mode 100644 index bb5a184c..edb8b7b0 --- a/scripts/transfers/bintoroot.bash +++ b/scripts/transfers/bintoroot.bash @@ -67,7 +67,7 @@ do mkdir -p $dirlogs >/dev/null 2>&1 fi #Determine if file is TR (so no conversion) - tr=$(echo basename ${file} |awk -F_ '{print $5} ') + tr=$(echo basename ${file} |awk -F_ '{print $5}') if [ $tr == "TR" ]; then cp ${file} ${dest}/${filename%.*}.root conv_status=0 @@ -96,8 +96,10 @@ do echo "Register convertion" >> ${logfile} python3 ${register_convertion} -i ${filename} -o ${filename%.*}.root -s ${conv_status} -l ${logfile} >> ${logfile} 2>&1 # Register root file into db - echo "register file in database" >> ${logfile} - python3 ${register_root} -c ${config_file} -r "CCIN2P3" ${dest}/${filename%.*}.root >> ${logfile} 2>&1 + if [ $tr != "TR" ]; then + echo "register file in database" >> ${logfile} + python3 ${register_root} -c ${config_file} -r "CCIN2P3" ${dest}/${filename%.*}.root >> ${logfile} 2>&1 + fi fi done diff --git a/scripts/transfers/ccscript.bash b/scripts/transfers/ccscript.bash old mode 100755 new mode 100644 index f46222ae..e8e62b28 --- a/scripts/transfers/ccscript.bash +++ b/scripts/transfers/ccscript.bash @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash -l # Script triggered after transfering data from a GRAND observatory to CCIN2P3 (or to any site) # It will launch the jobs to convert binary files into GrandRoot and register the results of the transfers and convertions into the database # Fleg & Fred: 03/2024 diff --git a/scripts/transfers/refresh_mat_views.bash b/scripts/transfers/refresh_mat_views.bash old mode 100644 new mode 100755 index d2b2b390..f570d07f --- a/scripts/transfers/refresh_mat_views.bash +++ b/scripts/transfers/refresh_mat_views.bash @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash -l cd /pbs/home/p/prod_grand/softs/grand export PLATFORM=redhat-9-x86_64 source /pbs/throng/grand/soft/miniconda3/etc/profile.d/conda.sh From eee4be93297315670c434bd3c7c9fbb96925fdfc Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Sat, 2 Nov 2024 21:45:50 +0100 Subject: [PATCH 83/85] Added handling of exp directories in DataDirectory --- grand/dataio/root_trees.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index 9b6d5119..d2926fe3 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -2503,10 +2503,20 @@ def __init__(self, dir_name: str, recursive: bool = False, analysis_level: int = # Get the file handle list self.file_handle_list = self.get_list_of_files_handles() + # Set the structure type depending on the dir name + exp_structure = False + if dir_name[:4]=="sim_": + sim2root_structure = True + elif dir_name[:4]=="exp_": + sim2root_structure = False + exp_structure = True + if sim2root_structure: self.init_sim2root_structure() + elif exp_structure: + self.init_exp_structure() else: - logger.warning("Sorry, non sim2root directories not supported yet") + logger.warning("Sorry, non exp or sim2root directories are not supported yet") def __getattr__(self, name): """For non-existing tree files or tree parameters, return None instead of rising an exception""" @@ -2559,6 +2569,30 @@ def init_sim2root_structure(self): # Assign the tree with the highest or requested analysis level as default to the class instance setattr(self, f"{flistname[1:-1]}", getattr(f, f"{flistname[1:-1]}")) + # Init the instance with exp (gtot) structure files + # ToDo: It should be the same as sim2root, but at the moment sim2root has different naming convention + def init_exp_structure(self): + self.file_attrs = [] + # Loop through groups of files with tree types expected in the directory + for flistname in ["ftruns", "ftadcs", "ftrawvoltages"]: + # for flistname in ["ftruns", "ftrunshowersims", "ftrunefieldsims", "ftefields", "ftshowers", "ftshowersims", "ftvoltages", "ftadcs", "ftrawvoltages", "ftrunnoises"]: + # Assign the list of files with specific tree type to the class instance + setattr(self, flistname, {int(Path(el.filename).name.split("_")[-2][1:]): el for el in self.file_handle_list if Path(el.filename).name.startswith(flistname[2:-1]+"_")}) + max_level = -1 + for (l, f) in getattr(self, flistname).items(): + # Assign the file with the tree with the specific analysis level to the class instance + setattr(self, f"{flistname[:-1]}_l{l}", f) + self.file_attrs.append(f"{flistname[:-1]}_l{l}") + # Assign the tree with the specific analysis level to the class instance + setattr(self, f"{flistname[1:-1]}_l{l}", getattr(f, f"{flistname[1:-1]}_l{l}")) + if (l>max_level and self.analysis_level==-1) or l==self.analysis_level: + max_level = l + # Assign the file with the highest or requested analysis level as default to the class instance + # ToDo: This may assign all files until it goes to the max level. Probably could be avoided + setattr(self, f"{flistname[:-1]}", f) + # Assign the tree with the highest or requested analysis level as default to the class instance + setattr(self, f"{flistname[1:-1]}", getattr(f, f"{flistname[1:-1]}")) + def print(self, verbose=True): """Prints all the information about all the data""" print(f"This DataDirectory instance has:") From bec0b9039a54cf458716e8cc35171f28e1db1eaa Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Tue, 26 Nov 2024 16:19:57 +0100 Subject: [PATCH 84/85] Added TRunRawVoltage (but still with bad fields) --- grand/dataio/root_trees.py | 72 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index d2926fe3..f9819a1b 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -1645,6 +1645,74 @@ def __post_init__(self): self.create_branches() +## General info on the raw voltage common to all events. +@dataclass +class TRunRawVoltage(MotherRunTree): + """General info on the voltage common to all events.""" + + _type: str = "runrawvoltage" + + _tree_name: str = "trunrawvoltage" + + ## Control parameters - the list of general parameters that can set the mode of operation, select trigger sources and preset the common coincidence read out time window (Digitizer mode parameters in the manual). + digi_ctrl: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Control parameters - the list of general parameters that can set the mode of operation, select trigger sources and preset the common coincidence read out time window (Digitizer mode parameters in the manual).""" + ## Firmware version + firmware_version: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short")) + """Firmware version""" + ## Nominal trace length in units of samples + trace_length: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Nominal trace length in units of samples""" + ## ADC sampling frequency in MHz + adc_sampling_frequency: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short")) + """ADC sampling frequency in MHz""" + ## ADC sampling resolution in bits + adc_sampling_resolution: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short")) + """ADC sampling resolution in bits""" + ## ADC input channels - > 16 BIT WORD (4*4 BITS) LOWEST IS CHANNEL 1, HIGHEST CHANNEL 4. FOR EACH CHANNEL IN THE EVENT WE HAVE: 0: ADC1, 1: ADC2, 2:ADC3, 3:ADC4 4:FILTERED ADC1, 5:FILTERED ADC 2, 6:FILTERED ADC3, 7:FILTERED ADC4. ToDo: decode this? + adc_input_channels: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short")) + """ADC input channels - > 16 BIT WORD (4*4 BITS) LOWEST IS CHANNEL 1, HIGHEST CHANNEL 4. FOR EACH CHANNEL IN THE EVENT WE HAVE: 0: ADC1, 1: ADC2, 2:ADC3, 3:ADC4 4:FILTERED ADC1, 5:FILTERED ADC 2, 6:FILTERED ADC3, 7:FILTERED ADC4. ToDo: decode this?""" + ## ADC enabled channels - LOWEST 4 BITS STATE WHICH CHANNEL IS READ OUT ToDo: Decode this? + adc_enabled_channels: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short")) + """ADC enabled channels - LOWEST 4 BITS STATE WHICH CHANNEL IS READ OUT ToDo: Decode this?""" + ## Value of the Variable gain amplification on the board + gain: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Value of the Variable gain amplification on the board""" + ## Conversion factor from bits to V for ADC + adc_conversion: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Conversion factor from bits to V for ADC""" + ## Window parameters - describe Pre Coincidence, Coincidence and Post Coincidence readout windows (Digitizer window parameters in the manual). ToDo: Decode? + digi_prepost_trig_windows: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Window parameters - describe Pre Coincidence, Coincidence and Post Coincidence readout windows (Digitizer window parameters in the manual). ToDo: Decode?""" + ## Channel x properties - described in Channel property parameters in the manual. ToDo: Decode? + channel_properties_x: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Channel x properties - described in Channel property parameters in the manual. ToDo: Decode?""" + ## Channel y properties - described in Channel property parameters in the manual. ToDo: Decode? + channel_properties_y: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Channel y properties - described in Channel property parameters in the manual. ToDo: Decode?""" + ## Channel z properties - described in Channel property parameters in the manual. ToDo: Decode? + channel_properties_z: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Channel z properties - described in Channel property parameters in the manual. ToDo: Decode?""" + ## Channel x trigger settings - described in Channel trigger parameters in the manual. ToDo: Decode? + channel_trig_settings_x: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Channel x trigger settings - described in Channel trigger parameters in the manual. ToDo: Decode?""" + ## Channel y trigger settings - described in Channel trigger parameters in the manual. ToDo: Decode? + channel_trig_settings_y: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Channel y trigger settings - described in Channel trigger parameters in the manual. ToDo: Decode?""" + ## Channel z trigger settings - described in Channel trigger parameters in the manual. ToDo: Decode? + channel_trig_settings_z: StdVectorListDesc = field(default=StdVectorListDesc("vector")) + """Channel z trigger settings - described in Channel trigger parameters in the manual. ToDo: Decode?""" + + def __post_init__(self): + super().__post_init__() + + if self._tree.GetName() == "": + self._tree.SetName(self._tree_name) + if self._tree.GetTitle() == "": + self._tree.SetTitle(self._tree_name) + + self.create_branches() + @dataclass ## The class for storing ADC traces and associated values for each event @@ -2520,7 +2588,7 @@ def __init__(self, dir_name: str, recursive: bool = False, analysis_level: int = def __getattr__(self, name): """For non-existing tree files or tree parameters, return None instead of rising an exception""" - trees_to_check = ["trun", "trunvoltage", "trawvoltage", "tadc", "tvoltage", "tefield", "tshower", "trunefieldsim", "trunshowersim", "tshowersim", "trunnoise"] + trees_to_check = ["trun", "trunvoltage", "trunrawvoltage", "trawvoltage", "tadc", "tvoltage", "tefield", "tshower", "trunefieldsim", "trunshowersim", "tshowersim", "trunnoise"] if any(s in name for s in trees_to_check): return None else: @@ -2574,7 +2642,7 @@ def init_sim2root_structure(self): def init_exp_structure(self): self.file_attrs = [] # Loop through groups of files with tree types expected in the directory - for flistname in ["ftruns", "ftadcs", "ftrawvoltages"]: + for flistname in ["ftruns", "ftrunrawvoltages", "ftadcs", "ftrawvoltages"]: # for flistname in ["ftruns", "ftrunshowersims", "ftrunefieldsims", "ftefields", "ftshowers", "ftshowersims", "ftvoltages", "ftadcs", "ftrawvoltages", "ftrunnoises"]: # Assign the list of files with specific tree type to the class instance setattr(self, flistname, {int(Path(el.filename).name.split("_")[-2][1:]): el for el in self.file_handle_list if Path(el.filename).name.startswith(flistname[2:-1]+"_")}) From 8be035140425eebeee3470211cf3632bbac81eb0 Mon Sep 17 00:00:00 2001 From: lwpiotr Date: Wed, 4 Dec 2024 10:46:43 +0100 Subject: [PATCH 85/85] Accepting unsigned int du_id --- grand/dataio/root_trees.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/grand/dataio/root_trees.py b/grand/dataio/root_trees.py index f9819a1b..a788090a 100644 --- a/grand/dataio/root_trees.py +++ b/grand/dataio/root_trees.py @@ -1539,7 +1539,7 @@ class TRun(MotherRunTree): """Origin of the coordinate system used for the array""" ## Detector unit (antenna) ID - du_id: StdVectorListDesc = field(default=StdVectorListDesc("int")) + du_id: StdVectorListDesc = field(default=StdVectorListDesc("int", "unsigned int")) """Detector unit (antenna) ID""" ## Detector unit (antenna) (lat,lon,alt) position du_geoid: StdVectorListDesc = field(default=StdVectorListDesc("vector")) @@ -1754,7 +1754,7 @@ class TADC(MotherEventTree): event_id: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short")) """The T3 trigger number""" ## Detector unit (antenna) ID - du_id: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short")) + du_id: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short", "unsigned int")) """Detector unit (antenna) ID""" ## Unix time of the trigger for this DU du_seconds: StdVectorListDesc = field(default=StdVectorListDesc("unsigned int")) @@ -2031,7 +2031,7 @@ class TRawVoltage(MotherEventTree): ## Specific for each Detector Unit ## Detector unit (antenna) ID - du_id: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short")) + du_id: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short", "unsigned int")) """Detector unit (antenna) ID""" ## Unix time of the trigger for this DU du_seconds: StdVectorListDesc = field(default=StdVectorListDesc("unsigned int")) @@ -2140,7 +2140,7 @@ class TVoltage(MotherEventTree): ## Specific for each Detector Unit ## Detector unit (antenna) ID - du_id: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short")) + du_id: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short", "unsigned int")) """Detector unit (antenna) ID""" ## Unix time of the trigger for this DU du_seconds: StdVectorListDesc = field(default=StdVectorListDesc("unsigned int")) @@ -2199,7 +2199,7 @@ class TEfield(MotherEventTree): ## Specific for each Detector Unit ## Detector unit (antenna) ID - du_id: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short")) + du_id: StdVectorListDesc = field(default=StdVectorListDesc("unsigned short", "unsigned int")) """Detector unit (antenna) ID""" ## Unix time of the trigger for this DU du_seconds: StdVectorListDesc = field(default=StdVectorListDesc("unsigned int"))