-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcreate_t7k_scenarios.sh
155 lines (136 loc) · 5.75 KB
/
create_t7k_scenarios.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/bin/bash
# This script creates scenarios for the Texas-7k power grid system for the
# entire year (2018) for which all input datasets are available. It first
# generates scenarios for five randomly-chosen days; using the runtimes
# measured for these days it estimates the number of days that can be run
# on a single compute node within the given time limit. Based on this the
# days of the year are partitioned into blocks, with each block running in
# parallel on its own compute node.
# Note that this script can be run directly on the command line (ideally using
# an interactive compute node instead of a head node) or submitted as a Slurm
# task in its own right — see the example usages listed below.
#
# Arguments:
# -o The directory where output files should be stored. This directory must
# already exist; any existing scenario files within it will NOT be
# overwritten.
#
# -n The number of scenarios to generate.
#
# -m The maximum runtime for each Slurm job spawned by this script, in
# minutes. Use smaller maximum runtimes to generate scenarios faster
# at the expense of having to use more cluster jobs. Maximum runtimes
# of 100-200 are reasonable if there are a lot of idle nodes and you
# want scenarios generated quickly, whereas runtimes of 500-800 are
# more suitable for having this pipeline run overnight.
#
# -a An optional string specifying additional options to be passed to the
# Slurm scheduler.
#
# -j Generate load and solar scenarios together using a joint model instead
# of the default behaviour in which they are modeled separately.
#
# -c Optional argument which turns on saving output scenarios in the original
# PGscen .csv output format. By default, scenarios are saved as compressed
# pickle objects containing output for all assets for each day; otherwise,
# a directory is created for each day which will contain .csv files for
# each asset in the corresponding "load", "wind", or "solar" subdirectory.
#
# -p Use models based on principal components analysis instead of conditional
# models for solar scenarios.
#
# Example usages:
# sh create_scenarios.sh -o <scratch-dir>/t7k_scens -n 1000 -m 150
# sh create_scenarios.sh -o <scratch-dir>/t7k_scens -n 500 -m 400 -a '--partition=orfeus'
#
# sbatch --output=<scratch-dir>/slurm-logs/scen-pipeline.out \
# --error=<scratch-dir>/slurm-logs/scen-pipeline.err \
# repos/PGscen/create_t7k_scenarios.sh \
# -o <scratch-dir>/t7k-scens_4k -n 4000 -m 800 -j
#SBATCH --job-name=create_t7k-scens
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --time=200
# default command line argument values
opt_str=""
joint_opt=""
pkl_str="-p"
pgscen_cmd="pgscen"
# collect command line arguments
while getopts :o:n:m:jcpa: var
do
case "$var" in
o) out_dir=$OPTARG;;
n) scen_count=$OPTARG;;
m) min_limit=$OPTARG;;
j) joint_opt="--joint";;
c) pkl_str="";;
p) pgscen_cmd="pgscen-pca";;
a) opt_str=$OPTARG;;
[?]) echo "Usage: $0 " \
"[-o] output directory" \
"[-n] how many scenarios to generate" \
"[-m] maximum time to run the pipeline, in minutes" \
"[-j] generate load and solar scenarios jointly?" \
"[-c] use .csv output format instead of pickled dataframes?" \
"[-p] use PCA models for solar scenarios?" \
"[-a] additional Slurm scheduler options" \
exit 1;;
esac
done
if [ ! -d "$out_dir" ];
then
echo "given output directory does not exist, create it before running this pipeline!"
exit 1
fi
# create output directory; load licensed software and conda environment
mkdir -p $out_dir/logs
module purge
module load anaconda3/2021.11
conda activate pgscen
# run time trials using five randomly chosen days
run_times=()
for rand in $( shuf -i 0-362 -n 5 );
do
use_date=$( date -d "2018-01-02 + $rand day" '+%Y-%m-%d' )
start_time=$(date +%s)
$pgscen_cmd $use_date 1 -o $out_dir -n $scen_count $joint_opt $pkl_str -v
end_time=$(date +%s)
run_times+=($( echo "$end_time - $start_time" | bc ))
done
# sort the time trial results and get the range of runtimes
IFS=$'\n'
sort_times=$( echo "${run_times[*]}" | sort -n )
min_time=$( echo "$sort_times" | head -n1 )
max_time=$( echo "$sort_times" | tail -n1 )
# calculate a conservative estimate of the worst-case runtime of a single day
# and use that to decide how many days we can run on a single node
day_time=$(( max_time + (max_time - min_time) ))
task_size=$( printf %.0f $( bc <<< "$min_limit * 60 / ($day_time * 1.17)" ))
ntasks=$(( 363 / task_size + 1 ))
task_days=$(( 363 / ntasks + 1 ))
use_time=$( printf %.0f $( bc <<< "$task_days * $day_time * 1.13 / 60" ))
# make sure we don't end up on the testing queue
if [ "$use_time" -le 61 ];
then
use_time=62
fi
# break the year into evenly-sized chunks and generate scenarios for each
# chunk using its own Slurm job
day_jobs=()
fmt_str='+%Y-%m-%d'
echo "Submitting $ntasks scenario generation jobs..."
for i in $( seq 1 $ntasks );
do
day_str=$( date -d "2018-01-02 + $(( (i - 1) * task_days )) day" $fmt_str )
# make sure we don't try to generate scenarios for days past 2018-12-31
max_days=$(( ($( date +%s -d "2018-12-31" ) - $( date +%s -d "$day_str" )) / 86400 ))
use_days=$(( task_days < max_days ? task_days : max_days ))
day_jobs+=($( sbatch --job-name=t7k_scens --time=$use_time $opt_str --mem-per-cpu=16G \
--wrap=" $pgscen_cmd $day_str $use_days -o $out_dir \
-n $scen_count $joint_opt \
$pkl_str --skip-existing -v " \
--parsable \
--output=$out_dir/logs/slurm_${day_str}.out \
--error=$out_dir/logs/slurm_${day_str}.err ))
done