-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_benchmark.slurm
executable file
·70 lines (60 loc) · 1.92 KB
/
run_benchmark.slurm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/bash
#SBATCH --job-name=bench-h100
#SBATCH --partition=kempner_requeue
#SBATCH --account=kempner_sham_lab
#SBATCH --nodes=1
#SBATCH --cpus-per-task=16
#SBATCH --gres=gpu:nvidia_h100_80gb_hbm3:1
#SBATCH --time=0-01:00:00
#SBATCH --mem=64gb
#SBATCH --array=0-52
#SBATCH --output=/n/holyscratch01/idreos_lab/Users/spurandare/mem-run-estimator/job_logs/bench_%x_%j.out
#SBATCH --error=/n/holyscratch01/idreos_lab/Users/spurandare/mem-run-estimator/job_logs/bench_%x_%j.err
#SBATCH --open-mode=append
#SBATCH --chdir=/n/holyscratch01/idreos_lab/Users/spurandare/mem-run-estimator/
OUT_DIR=/n/holyscratch01/idreos_lab/Users/spurandare/mem-run-estimator/outputs
# gemma_2b 7
# hf_T5 7
# timm_convnext_v2 8
# llama_v3_1b 9
# hf_clip 7
# timm_vit 7
# hf_GPT2 8
# Define model config counts
declare -A model_configs
model_configs[gemma_2b]=7
model_configs[hf_T5]=7
model_configs[timm_convnext_v2]=8
model_configs[llama_v3_1b]=9
model_configs[hf_clip]=7
model_configs[timm_vit]=7
model_configs[hf_GPT2]=8
# Calculate total configs and model offsets
total_configs=0
declare -A model_offsets
for model in "${!model_configs[@]}"; do
model_offsets[$model]=$total_configs
(( total_configs += ${model_configs[$model]} ))
done
# Determine model and config index
for model in "${!model_configs[@]}"; do
if (( $SLURM_ARRAY_TASK_ID >= ${model_offsets[$model]} && $SLURM_ARRAY_TASK_ID < ${model_offsets[$model]} + ${model_configs[$model]} )); then
MODEL_NAME=$model
CONFIG_IDX=$(( $SLURM_ARRAY_TASK_ID - ${model_offsets[$model]} ))
break
fi
done
# #for single model run
# MODEL_NAME=hf_GPT2
# CONFIG_IDX=$SLURM_ARRAY_TASK_ID
# srun python driver.py \
# --real_execution \
# --model_name $MODEL_NAME \
# --preset_config \
# --config_idx $CONFIG_IDX
srun python driver.py \
--runtime_estimation \
--runtime_estimation_mode operator-level-benchmark \
--model_name $MODEL_NAME \
--preset_config \
--config_idx $CONFIG_IDX