-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathtrain_combo.sh
36 lines (31 loc) · 1.03 KB
/
train_combo.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
CLS_CONFIG=$1
LIT_CONFIG=$2
DIR=$3
NV_GPUS=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
GPUS=${GPUS:-${NV_GPUS}}
NNODES=${NNODES:-1}
NODE_RANK=${NODE_RANK:-0}
PORT=${PORT:-55565}
DIR=${DIR:-"opencls"}
MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
HOSTE_NODE_ADDR=${MASTER_ADDR}:${PORT}
TIMESTAMP=$(date +%Y_%m_%d-%H_%M_%S)
export TOKENIZERS_PARALLELISM=true
DATA_PATH="{your_path_to_datacomp-1b-webdataset}/{000000..140146}.tar"
VAL_DATA_PATH="{your_path_to_imagenet1k}/ILSVRC/Data/CLS-LOC/val"
echo "$DIR"
cd $DIR
torchrun --nproc_per_node=$GPUS \
--rdzv_endpoint=$HOSTE_NODE_ADDR \
--nnodes=$NNODES --node_rank=$NODE_RANK \
-m training.main \
--config="${CLS_CONFIG}" \
--train-data $DATA_PATH \
--imagenet-val $VAL_DATA_PATH
torchrun --nproc_per_node=$GPUS \
--rdzv_endpoint=$HOSTE_NODE_ADDR \
--nnodes=$NNODES --node_rank=$NODE_RANK \
-m training.main \
--config="${LIT_CONFIG}" \
--train-data $DATA_PATH \
--imagenet-val $VAL_DATA_PATH