Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Plumb through disk and memory overrides for ExtractTask [VS-1124] #8582

Merged
merged 11 commits into from
Nov 16, 2023
2 changes: 2 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ workflows:
branches:
- master
- ah_var_store
- rsa_vs_1124
tags:
- /.*/
- name: GvsImportGenomes
Expand Down Expand Up @@ -235,6 +236,7 @@ workflows:
branches:
- master
- ah_var_store
- rsa_vs_1124
tags:
- /.*/
- name: GvsWithdrawSamples
Expand Down
10 changes: 8 additions & 2 deletions scripts/variantstore/wdl/GvsExtractCallset.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ workflow GvsExtractCallset {
String query_project = project_id
# This is optional now since the workflow will choose an appropriate value below if this is unspecified.
Int? scatter_count
Int? memory_override
Int? disk_override
Boolean zero_pad_output_vcf_filenames = true

# set to "NONE" if all the reference data was loaded into GVS in GvsImportGenomes
Expand Down Expand Up @@ -211,6 +213,8 @@ Int effective_split_intervals_disk_size_override = select_first([split_intervals
max_last_modified_timestamp = GetBQTablesMaxLastModifiedTimestamp.max_last_modified_timestamp,
extract_preemptible_override = extract_preemptible_override,
extract_maxretries_override = extract_maxretries_override,
disk_override = disk_override,
memory_override = memory_override,
emit_pls = emit_pls,
emit_ads = emit_ads,
write_cost_to_db = write_cost_to_db,
Expand Down Expand Up @@ -304,6 +308,8 @@ task ExtractTask {
File? gatk_override
Int? extract_preemptible_override
Int? extract_maxretries_override
Int? disk_override
Int? memory_override

Int? local_sort_max_records_in_ram = 10000000

Expand Down Expand Up @@ -389,8 +395,8 @@ task ExtractTask {
>>>
runtime {
docker: gatk_docker
memory: "12 GB"
disks: "local-disk 150 HDD"
memory: select_first([memory_override, 12]) + " GB"
disks: "local-disk " + select_first([disk_override, 150]) + " HDD"
bootDiskSizeGb: 15
preemptible: select_first([extract_preemptible_override, "2"])
maxRetries: select_first([extract_maxretries_override, "3"])
Expand Down
14 changes: 10 additions & 4 deletions scripts/variantstore/wdl/GvsExtractCohortFromSampleNames.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@ workflow GvsExtractCohortFromSampleNames {
String extraction_uuid
String filter_set_name
String output_file_base_name
Int? scatter_count

String? output_gcs_dir
# set to "NONE" if all the reference data was loaded into GVS in GvsImportGenomes
String drop_state = "NONE"

Int? extract_preemptible_override
Int? extract_maxretries_override
Int? extract_scatter_count_override
Int? extract_memory_override
Int? extract_disk_override
Int? split_intervals_disk_size_override
Int? split_intervals_mem_override

Expand All @@ -56,17 +58,19 @@ workflow GvsExtractCohortFromSampleNames {
call Utils.GetBQTableLastModifiedDatetime as SamplesTableDatetimeCheck {
input:
project_id = query_project,
fq_table = "~{gvs_project}.~{gvs_dataset}.sample_info"
fq_table = "~{gvs_project}.~{gvs_dataset}.sample_info",
cloud_sdk_docker = effective_cloud_sdk_docker
}

call Utils.GetNumSamplesLoaded {
input:
fq_sample_table = "~{gvs_project}.~{gvs_dataset}.sample_info",
project_id = gvs_project,
sample_table_timestamp = SamplesTableDatetimeCheck.last_modified_timestamp
sample_table_timestamp = SamplesTableDatetimeCheck.last_modified_timestamp,
cloud_sdk_docker = effective_cloud_sdk_docker
}

Int effective_scatter_count = if defined(scatter_count) then select_first([scatter_count])
Int effective_scatter_count = if defined(extract_scatter_count_override) then select_first([extract_scatter_count_override])
else if GetNumSamplesLoaded.num_samples < 100 then 50 # Quickstart
else if GetNumSamplesLoaded.num_samples < 1000 then 250
else if GetNumSamplesLoaded.num_samples < 5000 then 500
Expand Down Expand Up @@ -123,6 +127,8 @@ workflow GvsExtractCohortFromSampleNames {
extract_maxretries_override = extract_maxretries_override,
split_intervals_disk_size_override = split_intervals_disk_size_override,
split_intervals_mem_override = split_intervals_mem_override,
memory_override = extract_memory_override,
disk_override = extract_disk_override,

gatk_override = gatk_override,
write_cost_to_db = write_cost_to_db
Expand Down
Loading