-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: merge slurm support #40
base: main
Are you sure you want to change the base?
Changes from all commits
6011615
737d27d
cad18db
208a69b
332f735
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
*.out | ||
*.err | ||
*.log |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,11 @@ else | |
CSV_FILE=$($(dirname $0)/csv_to_chunks.sh ${FILE} ${TARGET}) | ||
fi | ||
|
||
# number of jobs (zero-based) | ||
NUM_JOBS=$(wc -l ${CSV_FILE} | cut -f1 -d\ ) | ||
let NUM_JOBS=${NUM_JOBS}-1 | ||
echo ${NUM_JOBS} | ||
|
||
# create command line | ||
EXECUTABLE="$(dirname $0)/run.sh" | ||
ARGUMENTS="EVGEN/\$(file) \$(ext) \$(nevents) \$(ichunk)" | ||
|
@@ -72,11 +77,28 @@ sed " | |
" templates/${TEMPLATE}.submit.in > ${SUBMIT_FILE} | ||
|
||
# submit job | ||
condor_submit -verbose -file ${SUBMIT_FILE} | ||
if [[ ${TEMPLATE} =~ slurm ]] ; then | ||
|
||
# create log dir | ||
if [ $? -eq 0 ] ; then | ||
for i in `condor_q --batch | grep ^${USER} | tail -n1 | awk '{print($NF)}' | cut -d. -f1` ; do | ||
mkdir -p LOG/CONDOR/osg_$i/ | ||
# slurm | ||
max_array_size=2000 | ||
while [ ${NUM_JOBS} -ge 0 ] ; do | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Most of our submissions need multiple jobs at max array size to get through. |
||
let taskN=${NUM_JOBS} | ||
let task0=${NUM_JOBS}-${max_array_size}+1 | ||
if [ ${task0} -lt 0 ] ; then task0=0 ; fi | ||
sbatch --array=${task0}-${taskN} ${SUBMIT_FILE} | ||
let NUM_JOBS=${NUM_JOBS}-${max_array_size} | ||
done | ||
|
||
else | ||
|
||
# condor | ||
condor_submit -verbose -file ${SUBMIT_FILE} | ||
|
||
# create log dir | ||
if [ $? -eq 0 ] ; then | ||
for i in `condor_q --batch | grep ^${USER} | tail -n1 | awk '{print($NF)}' | cut -d. -f1` ; do | ||
mkdir -p LOG/CONDOR/osg_$i/ | ||
done | ||
fi | ||
|
||
fi |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
export COPYRECO=%COPYRECO% | ||
export COPYFULL=%COPYFULL% | ||
export COPYLOG=%COPYLOG% | ||
export DETECTOR_VERSION=%DETECTOR_VERSION% | ||
export DETECTOR_CONFIG=%DETECTOR_CONFIG% | ||
export EBEAM=%EBEAM% | ||
export PBEAM=%PBEAM% |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name=%DETECTOR_VERSION%/%DETECTOR_CONFIG%/%CSV_FILE% | ||
#SBATCH --error=LOG/SLURM/grex_%A/grex_%A_%4a.err | ||
#SBATCH --output=LOG/SLURM/grex_%A/grex_%A_%4a.out | ||
#SBATCH --mail-type=FAIL,TIME_LIMIT,TIME_LIMIT_90 | ||
#SBATCH --cpus-per-task=1 | ||
#SBATCH --mem=3G | ||
Comment on lines
+6
to
+7
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing compared to osg_csv.submit: disk space 5G, since not supported on slurm (there is tmp dir space, but that's different apparently). |
||
#SBATCH --time=24:00:00 | ||
|
||
# Load singularity module | ||
module load singularity | ||
|
||
# Get CSV_FILE entry $a | ||
mapfile array < %CSV_FILE% | ||
IFS=, read file ext nevents ichunk <<< ${array[${SLURM_ARRAY_TASK_ID}]} | ||
|
||
# Start singularity instance | ||
SingularityImage="/cvmfs/singularity.opensciencegrid.org/eicweb/eic_xl:%JUG_XL_TAG%" | ||
instance=${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID} | ||
singularity instance start --bind /cvmfs --bind $(realpath $PWD) ${SingularityImage} ${instance} | ||
|
||
# Create aliases for condor syntax | ||
set -a # export the following funcs | ||
file() { echo $file; } | ||
ext() { echo $ext; } | ||
nevents() { echo $nevents; } | ||
ichunk() { echo $ichunk; } | ||
set +a # stop exporting | ||
|
||
# Run command | ||
singularity exec instance://${instance} /bin/bash << EOF | ||
cd $(realpath $PWD) | ||
source $(basename $0 .submit).sh | ||
%EXECUTABLE% %ARGUMENTS% | ||
EOF | ||
|
||
# Stop singularity instance | ||
singularity instance stop ${instance} |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Site dependent. No general way to query this on slurm clusters. Assumed but can also grep from /etc/slurm/slurm.conf, which is typical location. Or fall back to default of 1000.