From daf5151e02faa1e5c2292c19cfaba368bdcab3e7 Mon Sep 17 00:00:00 2001 From: lhmoncla Date: Wed, 17 Apr 2024 12:40:20 -0400 Subject: [PATCH 1/4] adding in include files for h5nx and h5n1 to capture recent cattle outbreak sequences --- Snakefile | 14 +++-- clade-labeling/h5n1-clades.tsv | 82 +++++++++++++++++++++++++++++ clade-labeling/h5nx-clades.tsv | 92 +++++++++++++++++++++++++++++++++ config/include_strains_h5n1.txt | 14 ++++- config/include_strains_h5nx.txt | 14 ++++- 5 files changed, 210 insertions(+), 6 deletions(-) diff --git a/Snakefile b/Snakefile index 118ac80..b2e4416 100644 --- a/Snakefile +++ b/Snakefile @@ -1,15 +1,19 @@ -SUBTYPES = ["h5nx","h5n1","h9n2","h7n9"] +SUBTYPES = ["h5nx","h5n1"]#["h5nx","h5n1","h9n2","h7n9"] SEGMENTS = ["pb2", "pb1", "pa", "ha","np", "na", "mp", "ns"] path_to_fauna = '../fauna' rule all: input: - auspice_json = expand("auspice/flu_avian_{subtype}_{segment}.json", subtype=SUBTYPES, segment=SEGMENTS) + #auspice_json = expand("auspice/flu_avian_{subtype}_{segment}.json", subtype=SUBTYPES, segment=SEGMENTS) + auspice_json = expand("auspice/avian-flu_{subtype}_{segment}.json", subtype=SUBTYPES, segment=SEGMENTS) + #sequences = expand("results/sequences_{subtype}_{segment}.fasta", subtype=SUBTYPES, segment=SEGMENTS), + #metadata = expand("results/metadata_{subtype}_{segment}.tsv", subtype=SUBTYPES, segment=SEGMENTS) rule files: params: dropped_strains = "config/dropped_strains_{subtype}.txt", + include_strains = "config/include_strains_{subtype}.txt", reference = "config/reference_{subtype}_{segment}.gb", colors = "config/colors_{subtype}.tsv", lat_longs = "config/lat_longs_{subtype}.tsv", @@ -113,7 +117,8 @@ rule filter: input: sequences = rules.parse.output.sequences, metadata = metadata_by_wildcards, - exclude = files.dropped_strains + exclude = files.dropped_strains, + include = files.include_strains output: sequences = "results/filtered_{subtype}_{segment}.fasta" params: @@ -129,6 +134,7 @@ rule filter: --sequences {input.sequences} \ --metadata {input.metadata} \ --exclude {input.exclude} \ + --include {input.include}\ --output {output.sequences} \ --group-by {params.group_by} \ --sequences-per-group {params.sequences_per_group} \ @@ -297,7 +303,7 @@ rule export: auspice_config = files.auspice_config, description = files.description output: - auspice_json = "auspice/flu_avian_{subtype}_{segment}.json" + auspice_json = "auspice/avian-flu_{subtype}_{segment}.json" shell: """ augur export v2 \ diff --git a/clade-labeling/h5n1-clades.tsv b/clade-labeling/h5n1-clades.tsv index ee8dad6..89cb9f5 100755 --- a/clade-labeling/h5n1-clades.tsv +++ b/clade-labeling/h5n1-clades.tsv @@ -17811,3 +17811,85 @@ A/caspiantern/Washington/23024996001original/2023 2.3.4.4b A/Chicken/Netherlands/24001946006010/2024 EA-nonGsGD A/Tuftedduck/Netherlands/1/2023 EA-nonGsGD A/chicken/Iraq/KVCL016/2015 ? + A/Ph/ST/44/2004 2.3.2 +A/chicken/Vietnam/Raho77232263/2023 2.3.2.1e +A/duck/Vietnam/Raho723S2874/2023 2.3.2.1e +A/duck/Vietnam/Raho723S2490/2023 2.3.2.1e +A/Vietnam/KhanhhoaRV1005/2024 2.3.2.1e +A/duck/Vietnam/Raho723S2875/2023 2.3.2.1e +A/muteswan/Poland/MB055L2/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A045/2024 2.3.4.4b +A/Fox/Bayern/i193/2023 2.3.4.4b +A/feline/USA/24008764001original/2024 2.3.4.4b +A/henharrier/Parnu/TA2126003/2021 2.3.4.4b +A/chicken/CzechRepublic/3529/2024 2.3.4.4b +A/domesticduck/Poland/H69T2/2024 2.3.4.4b +A/goose/Bayern/wv196/2023 2.3.4.4b +A/feline/USA/24009116005original/2024 2.3.4.4b +A/muteswan/Poland/MB085N/2024 2.3.4.4b +A/feline/USA/24009311006original/2024 2.3.4.4b +A/chicken/CzechRepublic/47202/2024 2.3.4.4b +A/chicken/Poland/H79T2/2024 2.3.4.4b +A/muteswan/Poland/MB055L1/2024 2.3.4.4b +A/dairycattle/Texas/24008749002v/2024 2.3.4.4b +A/feline/USA/24009116004original/2024 2.3.4.4b +A/domesticduck/Poland/H52T1K2/2024 2.3.4.4b +A/chicken/CzechRepublic/35494/2024 2.3.4.4b +A/chicken/CzechRepublic/31374/2024 2.3.4.4b +A/feline/USA/24009116002original/2024 2.3.4.4b +A/chicken/CzechRepublic/3744orig/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A031/2024 2.3.4.4b +A/muteswan/Poland/MB055L4/2024 2.3.4.4b +A/domesticduck/Poland/H57T1/2024 2.3.4.4b +A/turkey/Poland/H40T2/2024 2.3.4.4b +A/goose/Bayern/wv351/2023 2.3.4.4b +A/domesticgoose/Poland/H49W/2024 2.3.4.4b +A/turkey/Poland/H47T4/2024 2.3.4.4b +A/turkey/Poland/H80T3/2024 2.3.4.4b +A/Texas/37/2024 2.3.4.4b +A/chicken/Poland/H45NM/2024 2.3.4.4b +A/turkey/Poland/H75T1/2024 2.3.4.4b +A/CommonBuzzard/GermanyBB/2024AI01490/2024 2.3.4.4b +A/feline/USA/23037332001original/2023 2.3.4.4b +A/feline/USA/24008850001original/2024 2.3.4.4b +A/turkey/Poland/H68T2/2024 2.3.4.4b +A/domesticduck/Poland/H66T1/2024 2.3.4.4b +A/feline/USA/24009311004original/2024 2.3.4.4b +A/turkey/Poland/H43T2/2024 2.3.4.4b +A/chicken/CzechRepublic/354910/2024 2.3.4.4b +A/chicken/CzechRepublic/34582/2024 2.3.4.4b +A/NorthernFulmar/Netherlands/4/2024 2.3.4.4b +A/largebilledcrow/Ishikawa/1702A010/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A044/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A033/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A032/2024 2.3.4.4b +A/chicken/CzechRepublic/37442/2024 2.3.4.4b +A/domesticduck/Poland/H60T4/2024 2.3.4.4b +A/feline/USA/24008850002original/2024 2.3.4.4b +A/Seagull/Parnu/TA21132845/2021 2.3.4.4b +A/chicken/CzechRepublic/37443/2024 2.3.4.4b +A/muteswan/Poland/MB070L1/2024 2.3.4.4b +A/buzzard/Poland/MB098N/2024 2.3.4.4b +A/chicken/CzechRepublic/31371/2024 2.3.4.4b +A/turkey/Poland/H81T1/2024 2.3.4.4b +A/domesticgoose/Poland/H48D1/2024 2.3.4.4b +A/chicken/CzechRepublic/37441/2024 2.3.4.4b +A/muteswan/CzechRepublic/4316/2024 2.3.4.4b +A/buzzard/Poland/MB103N/2024 2.3.4.4b +A/turkey/Poland/H63N/2024 2.3.4.4b +A/chicken/CzechRepublic/35499/2024 2.3.4.4b +A/largebilledcrow/Hokkaido/B114/2024 2.3.4.4b +A/chicken/CzechRepublic/34583/2024 2.3.4.4b +A/feline/USA/24008764002original/2024 2.3.4.4b +A/eagle/Parnu/TA21118641/2021 2.3.4.4b +A/turkey/Poland/H38T4/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A043/2024 2.3.4.4b +A/chicken/CzechRepublic/34584/2024 2.3.4.4b +A/chicken/CzechRepublic/31375/2024 2.3.4.4b +A/Northernshoveler/Jeju/D60/2023 2.3.4.4b +A/turkey/Poland/H543NM/2023 2.3.4.4b +A/largebilledcrow/Osaka/2702A030/2024 2.3.4.4b +A/CommonBuzzard/GermanyHH/2024AI01435/2024 EA-nonGsGD +A/BarnacleGoose/GermanySH/2024AI01487/2024 EA-nonGsGD +A/Dunlin/GermanySH/2024AI01484/2024 EA-nonGsGD +A/Fox/Bayern/WS113/2022 ? diff --git a/clade-labeling/h5nx-clades.tsv b/clade-labeling/h5nx-clades.tsv index e476a37..8776292 100755 --- a/clade-labeling/h5nx-clades.tsv +++ b/clade-labeling/h5nx-clades.tsv @@ -26824,3 +26824,95 @@ A/Eurasianteal/Netherlands/2/2023 EA-nonGsGD A/Mallard/Netherlands/12/2023 EA-nonGsGD A/mallard/Poland/P292w10/2023 EA-nonGsGD A/chicken/Iraq/KVCL016/2015 ? + A/Ph/ST/44/2004 2.3.2 +A/Vietnam/KhanhhoaRV1005/2024 2.3.2.1e +A/duck/Vietnam/Raho723S2875/2023 2.3.2.1e +A/chicken/Vietnam/Raho77232263/2023 2.3.2.1e +A/duck/Vietnam/Raho723S2490/2023 2.3.2.1e +A/duck/Vietnam/Raho723S2874/2023 2.3.2.1e +A/largebilledcrow/Hokkaido/B114/2024 2.3.4.4b +A/feline/USA/24008850002original/2024 2.3.4.4b +A/chicken/CzechRepublic/47202/2024 2.3.4.4b +A/chicken/CzechRepublic/34584/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A031/2024 2.3.4.4b +A/henharrier/Parnu/TA2126003/2021 2.3.4.4b +A/chicken/CzechRepublic/31375/2024 2.3.4.4b +A/chicken/CzechRepublic/31371/2024 2.3.4.4b +A/chicken/CzechRepublic/35499/2024 2.3.4.4b +A/turkey/Poland/H75T1/2024 2.3.4.4b +A/largebilledcrow/Hokkaido/B112/2024 2.3.4.4b +A/chicken/Poland/H45NM/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A032/2024 2.3.4.4b +A/buzzard/Poland/MB098N/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A033/2024 2.3.4.4b +A/goose/Bayern/wv351/2023 2.3.4.4b +A/feline/USA/24009116002original/2024 2.3.4.4b +A/NorthernFulmar/Netherlands/3/2024 2.3.4.4b +A/chicken/CzechRepublic/31374/2024 2.3.4.4b +A/feline/USA/24008850001original/2024 2.3.4.4b +A/feline/USA/24009311006original/2024 2.3.4.4b +A/domesticduck/Poland/H60T4/2024 2.3.4.4b +A/chicken/CzechRepublic/354910/2024 2.3.4.4b +A/domesticgoose/Poland/H48D1/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A045/2024 2.3.4.4b +A/domesticgoose/Poland/H49W/2024 2.3.4.4b +A/turkey/Poland/H63N/2024 2.3.4.4b +A/Texas/37/2024 2.3.4.4b +A/buzzard/Poland/MB103N/2024 2.3.4.4b +A/largebilledcrow/Ishikawa/1702A010/2024 2.3.4.4b +A/NorthernFulmar/Netherlands/1/2024 2.3.4.4b +A/domesticduck/Poland/H57T1/2024 2.3.4.4b +A/chicken/CzechRepublic/37443/2024 2.3.4.4b +A/turkey/Poland/H43T2/2024 2.3.4.4b +A/CommonBuzzard/GermanyBB/2024AI01490/2024 2.3.4.4b +A/Fox/Bayern/i193/2023 2.3.4.4b +A/turkey/Poland/H38T4/2024 2.3.4.4b +A/chicken/CzechRepublic/34583/2024 2.3.4.4b +A/feline/USA/24009311004original/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A030/2024 2.3.4.4b +A/NorthernFulmar/Netherlands/4/2024 2.3.4.4b +A/chicken/CzechRepublic/34582/2024 2.3.4.4b +A/Seagull/Parnu/TA21132845/2021 2.3.4.4b +A/turkey/Poland/H68T2/2024 2.3.4.4b +A/feline/USA/24009116004original/2024 2.3.4.4b +A/muteswan/Poland/MB055L2/2024 2.3.4.4b +A/largebilledcrow/Hokkaido/B104/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A044/2024 2.3.4.4b +A/goose/Bayern/wv196/2023 2.3.4.4b +A/feline/USA/24009116005original/2024 2.3.4.4b +A/muteswan/CzechRepublic/4316/2024 2.3.4.4b +A/turkey/Poland/H543NM/2023 2.3.4.4b +A/chicken/CzechRepublic/37441/2024 2.3.4.4b +A/muteswan/Poland/MB055L1/2024 2.3.4.4b +A/Sparrowhawk/England/133485/2024 2.3.4.4b +A/chicken/Poland/H79T2/2024 2.3.4.4b +A/largebilledcrow/Osaka/2702A043/2024 2.3.4.4b +A/muteswan/Poland/MB085N/2024 2.3.4.4b +A/dairycattle/Texas/24008749002v/2024 2.3.4.4b +A/feline/USA/23037332001original/2023 2.3.4.4b +A/chicken/CzechRepublic/35494/2024 2.3.4.4b +A/CommonBuzzard/England/133477/2024 2.3.4.4b +A/chicken/CzechRepublic/3744orig/2024 2.3.4.4b +A/feline/USA/24008764002original/2024 2.3.4.4b +A/Northernshoveler/Jeju/D60/2023 2.3.4.4b +A/Sparrowhawk/England/133522/2024 2.3.4.4b +A/turkey/Poland/H81T1/2024 2.3.4.4b +A/domesticduck/Poland/H52T1K2/2024 2.3.4.4b +A/largebilledcrow/Hokkaido/B106/2024 2.3.4.4b +A/chicken/CzechRepublic/3529/2024 2.3.4.4b +A/eagle/Parnu/TA21118641/2021 2.3.4.4b +A/muteswan/Poland/MB055L4/2024 2.3.4.4b +A/feline/USA/24008764001original/2024 2.3.4.4b +A/domesticduck/Poland/H69T2/2024 2.3.4.4b +A/domesticduck/Poland/H66T1/2024 2.3.4.4b +A/NorthernFulmar/Netherlands/2/2024 2.3.4.4b +A/turkey/Poland/H47T4/2024 2.3.4.4b +A/turkey/Poland/H80T3/2024 2.3.4.4b +A/muteswan/Poland/MB070L1/2024 2.3.4.4b +A/chicken/CzechRepublic/37442/2024 2.3.4.4b +A/turkey/Poland/H40T2/2024 2.3.4.4b +A/CommonBuzzard/GermanyHH/2024AI01435/2024 EA-nonGsGD +A/RedKnot/GermanySH/2024AI01485/2024 EA-nonGsGD +A/Dunlin/GermanySH/2024AI01484/2024 EA-nonGsGD +A/BarnacleGoose/GermanySH/2024AI01487/2024 EA-nonGsGD +A/Fox/Bayern/WS113/2022 ? diff --git a/config/include_strains_h5n1.txt b/config/include_strains_h5n1.txt index a196efa..138f404 100755 --- a/config/include_strains_h5n1.txt +++ b/config/include_strains_h5n1.txt @@ -22,6 +22,18 @@ A/dairycattle/Texas/24008749004original/2024 A/dairycattle/Texas/24008749006original/2024 A/dairycattle/Texas/24008749007original/2024 A/dairycattle/Texas/24008749005original/2024 +A/dairycattle/Texas/24008749002v/2024 A/goat/Minnesota/24007234006original/2024 A/goat/Minnesota/24007234009original/2024 -A/goat/Minnesota/24007234003original/2024 \ No newline at end of file +A/goat/Minnesota/24007234003original/2024 +A/Texas/37/2024 +A/feline/USA/24009116005original/2024 +A/feline/USA/24008850001original/2024 +A/feline/USA/24009116004original/2024 +A/feline/USA/24009116002original/2024 +A/feline/USA/24008764001original/2024 +A/feline/USA/24009311006original/2024 +A/feline/USA/24008764002original/2024 +A/feline/USA/23037332001original/2023 +A/feline/USA/24008850002original/2024 +A/feline/USA/24009311004original/2024 \ No newline at end of file diff --git a/config/include_strains_h5nx.txt b/config/include_strains_h5nx.txt index a196efa..138f404 100755 --- a/config/include_strains_h5nx.txt +++ b/config/include_strains_h5nx.txt @@ -22,6 +22,18 @@ A/dairycattle/Texas/24008749004original/2024 A/dairycattle/Texas/24008749006original/2024 A/dairycattle/Texas/24008749007original/2024 A/dairycattle/Texas/24008749005original/2024 +A/dairycattle/Texas/24008749002v/2024 A/goat/Minnesota/24007234006original/2024 A/goat/Minnesota/24007234009original/2024 -A/goat/Minnesota/24007234003original/2024 \ No newline at end of file +A/goat/Minnesota/24007234003original/2024 +A/Texas/37/2024 +A/feline/USA/24009116005original/2024 +A/feline/USA/24008850001original/2024 +A/feline/USA/24009116004original/2024 +A/feline/USA/24009116002original/2024 +A/feline/USA/24008764001original/2024 +A/feline/USA/24009311006original/2024 +A/feline/USA/24008764002original/2024 +A/feline/USA/23037332001original/2023 +A/feline/USA/24008850002original/2024 +A/feline/USA/24009311004original/2024 \ No newline at end of file From a9ce08000edd6fb1a7abb6d2a81d68b55106df59 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 19 Apr 2024 11:01:59 -0700 Subject: [PATCH 2/4] Reference new top-level name in CI build target --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1af597d..a8bd5c6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -8,4 +8,4 @@ jobs: ci: uses: nextstrain/.github/.github/workflows/pathogen-repo-ci.yaml@master with: - build-args: auspice/flu_avian_h5n1_ha.json + build-args: auspice/avian-flu_h5n1_ha.json From f8df5fff2cd2eedc7dcea44b8fc971865b1b02ce Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 19 Apr 2024 11:02:58 -0700 Subject: [PATCH 3/4] Reference new top-level name in AWS Batch script --- batch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/batch.py b/batch.py index bccabf7..825ba3e 100644 --- a/batch.py +++ b/batch.py @@ -30,8 +30,8 @@ for segment in params.segments: call = ['nextstrain', 'build', '--aws-batch', '.', '-j 1'] targets = [] - targets.append('auspice/flu_avian_%s_%s_tree.json'%(subtype, segment)) - targets.append('auspice/flu_avian_%s_%s_meta.json'%(subtype, segment)) + targets.append('auspice/avian-flu_%s_%s_tree.json'%(subtype, segment)) + targets.append('auspice/avian-flu_%s_%s_meta.json'%(subtype, segment)) call.extend(targets) print(' '.join(call)) log = open('logs/%s_%s.txt'%(subtype, segment), 'w') From cb887f6fe69831005607af6ae0f05fbacf332aa1 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 19 Apr 2024 11:05:43 -0700 Subject: [PATCH 4/4] Reference new top-level name in quickstart build --- quickstart-build/Snakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/quickstart-build/Snakefile b/quickstart-build/Snakefile index 3c9cf6f..bb18d24 100755 --- a/quickstart-build/Snakefile +++ b/quickstart-build/Snakefile @@ -15,7 +15,7 @@ SEGMENTS = ["ha"] generated JSON files in the auspice folder for each subtype and segment.""" rule all: input: - auspice_json = expand("auspice/flu_avian_{subtype}_{segment}.json", subtype=SUBTYPES, segment=SEGMENTS) + auspice_json = expand("auspice/avian-flu_{subtype}_{segment}.json", subtype=SUBTYPES, segment=SEGMENTS) """Specify all input files here. For this build, you'll start with input sequences from the example_data folder, which contain metadata information in the @@ -305,7 +305,7 @@ rule export: node_data = node_data_by_wildcards, auspice_config = files.auspice_config output: - auspice_json = "auspice/flu_avian_{subtype}_{segment}.json" + auspice_json = "auspice/avian-flu_{subtype}_{segment}.json" shell: """ augur export v2 \