diff --git a/HPC_analysis/scripts/lcWGS/F0_all_juveniles_OM_angsd_dupsquery.sh b/HPC_analysis/scripts/lcWGS/F0_all_juveniles_OM_angsd_dupsquery.sh
deleted file mode 100644
index 0eabc2a..0000000
--- a/HPC_analysis/scripts/lcWGS/F0_all_juveniles_OM_angsd_dupsquery.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name="angsdF0alljuvOM"
-#SBATCH -t 500:00:00
-#SBATCH --mail-type=ALL
-#SBATCH --mem=180GB
-#SBATCH -c 20
-#SBATCH -p medmem
-#SBATCH --mail-user=samuel.gurr@noaa.gov
-#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_out.%j"
-#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_err.%j"
-
-# Set-up
-
-# output dir
-mkdir -p angsd/output
-
-## load module
-module load bio/angsd/0.940
-module load bio/samtools/1.19
-
-## dir shortcuts
-DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files
-OUTDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/output # Path to the base directory where output files will be written
-REFDIR=~/refs # Path to reference genome
-
-# angsd parameters
-DOMAJORMINOR=4 # 1 is for either seq data like bam files or genotpe liklihood data where the maj minor allele is inferred from likli$
-# 4 is forcing the major allele according to the reference states defined in ref - the minor allele will be inferred based on the ge$
-DOMAF=1 # 1 assumed known major and minor allele 2 assumed known major and unknown minor allele
-DOGENO=5 # 1 write major and minor allele 2 print the called genotype and count of minor 4 print the called genotype
-DOPOST=1 # 1 estimate the posterior genotype probability based on the allele frequency as a prior 2 estimat eth posterier genotpe pr$
-GENOLIKE=1 # SAMtools
-DOGLF=2 # beagle genotype liklihood format
-DOCOUNTS=1 # provide freq of bases - output as pos.gz
-DODEPTH=1 # default 0 output distribution of seq depths sites aove maxDepth will be binnes out files in depthSample and depthGlobal
-MAXDEPTH=100 # default 100
-SETMINDEPTHIND=5 # min depth, below which the individual is omitted from analysis
-SETMAXDEPTHIND=150 # max depth - above which the indivisual is omitted from analysis
-DUMPCOUNTS=2 # 1 is the total seq depth for site as .pos.gz, 2 is the seq deth per sample as .pos.gz and .counts.gz; look up allele $
-MINQ=30 # Minimum quality filter
-MINMAPQ=30 # Minmm mapping quality
-MINMAF=0.05 # Minimum minor allele frequency filter
-# C -50 is flag to adjust for excessive mismatches as recomennded when BWA was used for alignment
-# -uniqueOnly; removed reads that have multiple best hits 0 default, 1 remove
-# -remove_bads = reads flagged as bad
-# -only_proper_pairs = read that did not have a mate pair
-
-# ANGSD for data within generation (F1 - F2 - F3, broodstock and juveniles separately)
-# for loop - run ansd for the total bam list within time/generation (i.e. all F1 Broodstock, F2_Juveniles, etc.)
-# objective to input a strata / metadta to identify treatments associated with ids downstream..
-
-
-mkdir -p $OUTDIR/all_juveniles # make director for ansd output files
-
-# nav to dir
-cd $DATDIR/Master_query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME!
-#ls *.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams
-
-# cal number of indiv
-nIND=$(wc -l $DATDIR/Master_query_dups_removed/F0_all_juveniles_OM_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id
-minIND=$(echo "(${nIND} * 0.95)" | bc) # min individual is 90% of the total count
-minINDRd=$(printf "%.0f" ${minIND}) # round that
-
-# set min and max depth for minuum of 5X and max of 20X coerage per loci to acoid potential bias arising from seq errors and recommendations in O'Leary (etal 2018)
-MINDP=$(echo "(${minINDRd} * 5)" | bc) # min coverage of 5X accoutning for the minimum num of individuals for genotype calls as 90% and assuming per indiv per loci
-MAXDP=$(echo "(${minINDRd} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci
-
-# call unique outbase
-OUTBASE='F0_OM_all_juveniles_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files
-
-# run angsd
-angsd -b $DATDIR/Master_query_dups_removed/F0_all_juveniles_OM_bamlist.txt \
-    -ref $REFDIR/GCA_041381155.1_Ai_NY_genomic.fna \
-    -GL $GENOLIKE \
-    -doGlf $DOGLF \
-    -doMaf $DOMAF \
-    -doMajorMinor $DOMAJORMINOR \
-    -dogeno $DOGENO \
-    -doPOST $DOPOST \
-    -doCounts $DOCOUNTS \
-    -dumpCounts $DUMPCOUNTS \
-    -doDepth $DODEPTH \
-    -maxDepth $MAXDEPTH \
-    -setMinDepth $MINDP \
-    -setMaxDepth $MAXDP \
-    -setMaxDepthInd $SETMAXDEPTHIND \
-    -setMinDepthInd $SETMINDEPTHIND \
-    -doIBS 1 \
-    -makematrix 1 \
-    -doCov 1 \
-    -minInd $minINDRd \
-    -dobcf 1 \
-    --ignore-RG 0 \
-    -minQ $MINQ \
-    -minMapQ $MINMAPQ \
-    -minMaf $MINMAF \
-    -SNP_pval 1e-6 \
-    -P 2 \
-    -remove_bads 1 \
-    -only_proper_pairs 1 \
-    -uniqueOnly 1 \
-    -C 50 \
-    -out $OUTDIR/F0_all_juveniles/$OUTBASE \
-    >& $OUTDIR/F0_all_juveniles/$OUTBASE'.log';
-#done
diff --git a/HPC_analysis/scripts/lcWGS/F0_all_juveniles_angsd_dupsquery.sh b/HPC_analysis/scripts/lcWGS/F0_all_juveniles_angsd_dupsquery.sh
deleted file mode 100644
index 1ee2231..0000000
--- a/HPC_analysis/scripts/lcWGS/F0_all_juveniles_angsd_dupsquery.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name="angsdF0alljuv"
-#SBATCH -t 500:00:00
-#SBATCH --mail-type=ALL
-#SBATCH --mem=180GB
-#SBATCH -c 20
-#SBATCH -p medmem
-#SBATCH --mail-user=samuel.gurr@noaa.gov
-#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_out.%j"
-#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_all_juveniles/"%x_err.%j"
-
-# Set-up
-
-# output dir
-mkdir -p angsd/output
-
-## load module
-module load bio/angsd/0.940
-module load bio/samtools/1.19
-
-## dir shortcuts
-DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files
-OUTDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/output # Path to the base directory where output files will be written
-REFDIR=~/refs # Path to reference genome
-
-# angsd parameters
-DOMAJORMINOR=4 # 1 is for either seq data like bam files or genotpe liklihood data where the maj minor allele is inferred from likli$
-# 4 is forcing the major allele according to the reference states defined in ref - the minor allele will be inferred based on the ge$
-DOMAF=1 # 1 assumed known major and minor allele 2 assumed known major and unknown minor allele
-DOGENO=5 # 1 write major and minor allele 2 print the called genotype and count of minor 4 print the called genotype
-DOPOST=1 # 1 estimate the posterior genotype probability based on the allele frequency as a prior 2 estimat eth posterier genotpe pr$
-GENOLIKE=1 # SAMtools
-DOGLF=2 # beagle genotype liklihood format
-DOCOUNTS=1 # provide freq of bases - output as pos.gz
-DODEPTH=1 # default 0 output distribution of seq depths sites aove maxDepth will be binnes out files in depthSample and depthGlobal
-MAXDEPTH=100 # default 100
-SETMINDEPTHIND=5 # min depth, below which the individual is omitted from analysis
-SETMAXDEPTHIND=150 # max depth - above which the indivisual is omitted from analysis
-DUMPCOUNTS=2 # 1 is the total seq depth for site as .pos.gz, 2 is the seq deth per sample as .pos.gz and .counts.gz; look up allele $
-MINQ=30 # Minimum quality filter
-MINMAPQ=30 # Minmm mapping quality
-MINMAF=0.05 # Minimum minor allele frequency filter
-# C -50 is flag to adjust for excessive mismatches as recomennded when BWA was used for alignment
-# -uniqueOnly; removed reads that have multiple best hits 0 default, 1 remove
-# -remove_bads = reads flagged as bad
-# -only_proper_pairs = read that did not have a mate pair
-
-# ANGSD for data within generation (F1 - F2 - F3, broodstock and juveniles separately)
-# for loop - run ansd for the total bam list within time/generation (i.e. all F1 Broodstock, F2_Juveniles, etc.)
-# objective to input a strata / metadta to identify treatments associated with ids downstream..
-
-
-mkdir -p $OUTDIR/all_juveniles # make director for ansd output files
-
-# nav to dir
-cd $DATDIR/Master_query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME!
-#ls *.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams
-
-# cal number of indiv
-nIND=$(wc -l $DATDIR/Master_query_dups_removed/F0_all_juveniles_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id
-minIND=$(echo "(${nIND} * 0.95)" | bc) # min individual is 90% of the total count
-minINDRd=$(printf "%.0f" ${minIND}) # round that
-
-# set min and max depth for minuum of 5X and max of 20X coerage per loci to acoid potential bias arising from seq errors and recommendations in O'Leary (etal 2018)
-MINDP=$(echo "(${minINDRd} * 5)" | bc) # min coverage of 5X accoutning for the minimum num of individuals for genotype calls as 90% and assuming per indiv per loci
-MAXDP=$(echo "(${minINDRd} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci
-
-# call unique outbase
-OUTBASE='F0_all_juveniles_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files
-
-# run angsd
-angsd -b $DATDIR/Master_query_dups_removed/F0_all_juveniles_bamlist.txt \
-    -ref $REFDIR/GCA_041381155.1_Ai_NY_genomic.fna \
-    -GL $GENOLIKE \
-    -doGlf $DOGLF \
-    -doMaf $DOMAF \
-    -doMajorMinor $DOMAJORMINOR \
-    -dogeno $DOGENO \
-    -doPOST $DOPOST \
-    -doCounts $DOCOUNTS \
-    -dumpCounts $DUMPCOUNTS \
-    -doDepth $DODEPTH \
-    -maxDepth $MAXDEPTH \
-    -setMinDepth $MINDP \
-    -setMaxDepth $MAXDP \
-    -setMaxDepthInd $SETMAXDEPTHIND \
-    -setMinDepthInd $SETMINDEPTHIND \
-    -doIBS 1 \
-    -makematrix 1 \
-    -doCov 1 \
-    -minInd $minINDRd \
-    -dobcf 1 \
-    --ignore-RG 0 \
-    -minQ $MINQ \
-    -minMapQ $MINMAPQ \
-    -minMaf $MINMAF \
-    -SNP_pval 1e-6 \
-    -P 20 \
-    -remove_bads 1 \
-    -only_proper_pairs 1 \
-    -uniqueOnly 1 \
-    -C 50 \
-    -out $OUTDIR/F0_all_juveniles/$OUTBASE \
-    >& $OUTDIR/F0_all_juveniles/$OUTBASE'.log';
-#done
diff --git a/HPC_analysis/scripts/lcWGS/Snakefile b/HPC_analysis/scripts/lcWGS/Snakefile
deleted file mode 100644
index 6758e6c..0000000
--- a/HPC_analysis/scripts/lcWGS/Snakefile
+++ /dev/null
@@ -1,846 +0,0 @@
-# input your common thread(s) here
-#subfolders={'F1_Broodstock', 'F1_Juveniles', 'F2_Juveniles', 'F2_Broodstock', 'F3_Juveniles'} # omitted F0_Broodstock bcus this was already run when I made this change
-subfolders={'F1_Juveniles', 'F2_Juveniles', 'F3_Juveniles'} # omitted F0_Broodstock bcus this was already run when I made this change
-
-# ALL: one rule to rule them all..
-# Important!: to run this entire make of snake, the input to 'all' should be equivalent to the **final** filename at the end of the pipe
-# Note: if you want to run a partial pipeline you can add an input for 'all' here that is equivalent to the output of a specific chunk (e.g. mapping)
-rule all:
-    # remember - 4 spaces via python syntax!
-    input:
-        # -remember - 8 spaces via pyton syntax! DO NOT LEAVE EMPTY LINES FOR INPUT OR OUTPUT       
-        # rawQC - check for upload succes via md5checksum 
-        "md5checksum/Nov2023/Nov2023_BAD_UPLOADS.txt",
-        # trim - currently by (i) adapter fasta and (ii) by quality score
-        "trim_QC/trim_complete",
-        # mutliQC - manually scp out this report to view
-        "trim_QC/multiQC_complete",
-        # map_SortSam -  QC-d reads to the reference genome (note: this is where we will change to the new genome!)
-        "hisat2/mapping_complete",
-        # bams_linked - link bam files to angsd/data by generation and life stage 
-        "angsd/data/bams_linked",
-        # merge_bams - self explanatory.. use samtools here to merge seq lanes by ID (R1 and R2 to a master bam)
-        "angsd/data/merge_complete",
-        # sortbams_removedups - use picard to SortSam by queryname and then remove duplicates - the output file here is used for SNP calling (finally!)
-        #"angsd/data/sortbams_removedups",
-        expand("angsd/data/{generation}/sortremovedups_complete", generation=subfolders),
-	# generate_bamlists - angsd required a list of bam files to call, generate these lists here
-        "angsd/data/bamlists_generated"
-	# run angsd for F0 - based on base counts, no prior
-	#"angsd/output/F0_angsd",
-        # run angsd 
-        #"angsd/output/All_angsd"
-# RAW QC: load the reads, initial symbolic directory, etc.
-rule rawQC:
-    #input:
-        # insert here
-    output:
-        # insert here
-        "md5checksum/Nov2023/Nov2023_BAD_UPLOADS.txt"
-        #"md5checksum_complete"    
-    shell:
-        """
-        echo 'start' $(date)
-        
-        # Symbolic links to raw data, md5sum & check
-        
-        ## notes: each raw upload folder under 'share' space has a subdir 'md5_files' with the .md5 reports from azenta 
-        
-        ## setup:
-        mkdir -p ./md5checksum  # start folder, if already exists it wont throw the error 'C. POWERS!' 
-        April2023='April2023' # string, equal to foldername containing raw data in 'share'
-        Oct2022='Oct2022' #  string, equal to foldername containing raw data in 'share'
-        July2023='July2023' # string, equal to foldername containing raw data in 'share' 
-        Nov2023='Nov2023'
-        # IDS='Oct2022 April2023 July2023 Nov2023' # loop strings
-        IDS='Nov2023' # had to quickly just redo the nov2023 data - omit this line and run the previous for the full pipeline
-
-        ## loop symbolic links from the 'share' directory to our current snakemake_pipeline folder for this project
-        
-        for m in ${{IDS}}; do # run first loop to add raw data to symbolic directories
-            mkdir -p ./md5checksum/${{m}} # make subfolders in md5checksum, avoid that error
-            cd ./md5checksum/${{m}} # go to the dir where you want to make symbolic link - this avoids the link from 'breaking'
-            ln -sf ../../../../../../share/nefsc/mcfarland_sequecenes/lcWGS_scallops_${{m}}/*.gz ./ # SYBMBOLIC LINKS ADDED
-            cd ../../ # go back two folders for simplicity - now all commands from the main snakemake_pipeline directory
-            grep -c '@' ./md5checksum/${{m}}/*.gz > ./md5checksum/${{m}}/${{m}}_rawread_count.txt # RAW READ COUNTS
-            cat ../../../../share/nefsc/mcfarland_sequecenes/lcWGS_scallops_${{m}}/md5_files/*md5 > ./md5checksum/${{m}}/${{m}}_reference.md5 # REFERENCE md5sum
-            cat ./md5checksum/${{m}}/${{m}}_reference.md5 | awk -F[./] '{{print $1 $3}}' > ./md5checksum/${{m}}/${{m}}_refsummary.txt # REFERENCE REFORMTTED md5sum
-            md5sum ./md5checksum/${{m}}/*.gz > ./md5checksum/${{m}}/${{m}}_check.md5 # CHECK md5sum
-            cat ./md5checksum/${{m}}/${{m}}_check.md5 | awk -F[../] '{{print $1 $5}}'  > ./md5checksum/${{m}}/${{m}}_checksummary.txt # CHECK REFORMTTED md5sum
-	    # comm -23 checks for differences between files, here we have the known and calulated (and sorted!) md5chcksum, ANY output to 'BAD_UPLOADS' means that file 
-            # was uploaded to the server with error/incomplete
-            comm -23 <(sort ./md5checksum/${{m}}/${{m}}_refsummary.txt  | uniq) <(sort ./md5checksum/${{m}}/${{m}}_checksummary.txt | uniq) > ./md5checksum/${{m}}/${{m}}_BAD_UPLOADS.txt;
-            done
-        
-        #touch md5checksum_complete
-        
-            
-        echo 'raw data symbolically linked & md5sum & read counts complete!' $(date)        
-        
-        """
-
-# TRIM
-rule trim:
-    input:
-        # insert here
-	"md5checksum/Nov2023/Nov2023_BAD_UPLOADS.txt"
-    output:
-	# insert here
-	#"trim_QC/July2023/July2023_trim_complete.txt"
-        "trim_QC/trim_complete"
-    shell:
-        """
-        echo 'start' $(date)
-		
-        # setup:
-	## load modules 
-	module load bio/fastp/0.23.2
-	module load bio/fastqc/0.11.9
-		
-	## start new dir for output files
-	mkdir -p ./trim_QC  # start folder, use -p if already exists it wont throw the error 'C. POWERS!'
-		
-		
-	## loop vars
-        April2023='April2023' # string, equal to foldername containing raw data 
-        Oct2022='Oct2022' #  string, equal to foldername containing raw data 
-        July2023='July2023' # string, equal to foldername containing raw data 
-        Nov2023='Nov2023'
-        IDS='Oct2022 April2023 July2023 Nov2023' # loop strings
-        #IDS='Nov2023' # just run these new data.. omit this line if running the full pipe
-
-	## adapter.fasta file  
-        echo -e ">Nextera_mate_pair1 \nAGATGTGTATAAGAGACAG \n>Nextera_mate_pair2 \nCTGTCTCTTATACACATCT \n>Nextera_transposase_1 \nTCGTCGGCAGCGTCAGATGTGTATAAGAGACAG \n>Nextera_transposase_2 \nGTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG" > ./trim_QC/adapter.fasta
-
-
-	# run the loop:
-        for m in ${{IDS}}; do # run first loop to add raw data to symbolic directories
-            mkdir -p ./trim_QC/${{m}} # make subfolder
-	    array_m=($(ls ./md5checksum/${{m}}/*.fastq.gz));  # an array of sequences to trim
-            for i in ${{array_m[@]}}; do # inside for loop for each loop var
-	        filename=$(echo "${{i}}" | awk -F/ '{{print $NF}}') # filename ensures that the output name of the .gz file does not include the preceeding dir - solely filename.fasta.gz
-                fastp --in1 ${{i}} --out1 ./trim_QC/${{m}}/adapter_trim.${{filename}} --adapter_sequence=CTGTCTCTTATACACATCT --adapter_fasta ./trim_QC/adapter.fasta  
-	        fastqc  ./trim_QC/${{m}}/adapter_trim.${{filename}} --outdir ./trim_QC/${{m}}/ # call the output files from fastp in previous line and output fastqc in the same folder with adapter_trim filename head
-	        done;
-	    done
-	
-	#echo 'trim_complete' > ./trim_QC/trim_complete
-        touch trim_QC/trim_complete
-
-        echo 'end yay!' $(date)
-        
-        """
-
-# MultiQC report
-rule multiQC:
-    input:
-        # insert here
-        "trim_QC/trim_complete"
-    output:
-        # insert here
-        "trim_QC/multiQC_complete"
-    shell:
-        """
-        echo 'start' $(date)
-        
-        # setup 
-        ## vars
-        PYTHONENV=~/python_venv/bin # python environment
-        ## loop vars by seqence run
-        April2023='April2023' # string, equal to foldername containing raw data
-        Oct2022='Oct2022' #  string, equal to foldername containing raw data
-        July2023='July2023' # string, equal to foldername containing raw data
-        Nov2023='Nov2023'
-        IDS='Oct2022 April2023 July2023 Nov2023' # loop strings
-        #IDS='Nov2023' # just run these new data.. omit this line if running the full pipe
-        
-        # nav to the python env from the current snake lair (..directory)
-        source $PYTHONENV/activate # from the current directory, activates the bin of installed python packages, including multiqc
-        
-        # for loop mutliqc by sequence run 
-        for i in ${{IDS}}; do # loopty loop, note {{}} calls as a 'string'
-            mkdir -p ./trim_QC/${{i}}/mutliQC_report # make dir in each trim folder
-            multiqc ./trim_QC/${{i}}/  -o ./trim_QC/${{i}}/mutliQC_report; # compile MultiQC report from FastQC files - output .html in mutliQC_report dir
-            done
-        
-        # deactivate python 
-        #deactivate
-    
-        # output to tether together snakes
-        touch trim_QC/multiQC_complete 
-        #echo 'multiQC compelte' > ./trim_QC/multiQC_complete
-        
-        echo 'Cleaned MultiQC reports generated.' $(date)
-
-        """
-        
-# Mapping using HISAT2
-rule map_SortSam:
-    input:
-        # insert here
-        "trim_QC/multiQC_complete"
-    output:
-        # insert here
-        "hisat2/mapping_complete"
-    shell:
-        """
-        echo 'start' $(date) # checkpoint output   
-        
-        # setup
-        
-        ## load modules, requires hisat2 and samtools
-        module load bio/hisat2/2.2.1
-        module load bio/samtools/1.11
-        module load bio/picard/2.23.9
-        module load bio/gatk/4.2.0.0
-        
-        ## vars - note: base dir is ~/Airradians_lcWGS/snakemake_pipeline
-        REFDIR=~/refs # the reference folder
-        PYTHONENV=~/python_venv/bin # python environment
-        DATDIR=~/Airradians_lcWGS/snakemake_pipeline/trim_QC/ # directory of trimmed and filtered fastq.gz files
-        
-        ## loop vars by seqence run
-        April2023='April2023' # string, equal to foldername containing raw data
-        Oct2022='Oct2022' #  string, equal to foldername containing raw data
-        July2023='July2023' # string, equal to foldername containing raw data
-        Nov2023='Nov2023'
-        IDS='Oct2022 April2023 July2023 Nov2023' # loop strings
-        #IDS='Nov2023' # just run these new data.. omit this line if running the full pipe
-
-        ## make dirs 
-        mkdir -p hisat2
-        mkdir -p hisat2/Airradians_ref
-        
-        # index the ref genome for mapping - hashed out below becasue we already did this and repeated wth duplicate marking
-        #source $PYTHONENV/activate  # activate python virtual envriomment to call python and run hisat2-build
-        #hisat2-build -f $REFDIR/Argopecten_irradians_irradians_genome.fasta hisat2/Airradians_ref/Airradians_ref # previous reference, outdated!
-        #hisat2-build -f $REFDIR/GCA_041381155.1_Ai_NY_genomic.fna hisat2/Airradians_ref/Airradians_ref
-        #deactivate # exit python virtual envrionment - NOTE: deactivate causes the job to cancel out, I just hashed this out after building the reference index and it runs fine..
-        
-        #echo "Referece genome indexed." $(date) # checkpoint output 
-
-        # for loop hisat2 map + samtools covertion to bam + marked duplcates by seq date and filename 
-        for i in ${{IDS}}; do # loopty loop, note {{}} calls as a 'string'
-            mkdir -p hisat2/${{i}}
-            mkdir -p hisat2/${{i}}/dup_metrics
-            ln -s $DATDIR/${{i}}/*.fastq.gz hisat2/${{i}} # symb dir from trim_QC to hisat2 by seq run
-            array=($(ls hisat2/${{i}}/*.fastq.gz)); # call the symbolically linked trim seqs - as array to align
-            for m in ${{array[@]}}; do
-                filename=$(echo "${{m}}" | awk -F/ '{{print $NF}}') # solely string 'filename.fasta.gz' no root dirs
-                hisat2 -p 8 --dta -x hisat2/Airradians_ref/Airradians_ref -U ${{m}} -S hisat2/${{i}}/${{filename}}.sam # input m, output sam file
-                samtools sort -@ 8 -o hisat2/${{i}}/${{filename}}.bam hisat2/${{i}}/${{filename}}.sam # convert sam to bam
-                echo "${{i}} bam-ified!" # boomshakalacka
-                rm hisat2/${{i}}/${{filename}}.sam # no longer need sam..
-                java -jar $PICARD SortSam I=hisat2/${{i}}/${{filename}}.bam O=hisat2/${{i}}/${{filename}}.sorted.bam SORT_ORDER=coordinate
-                rm hisat2/${{i}}/${{filename}}.bam # no longer need the unsorted bam file..
-                echo "${{i}} sort-ified!" # sorted using picard
-            done;
-        done
-        
-        echo 'Mapping complete' $(date) # checkpoint output 
-        
-        # output to tether together snakes
-        touch hisat2/mapping_complete
-        
-        """
-
-# Sort BAMs
-rule bams_linked:
-    input:
-        # insert here
-        "hisat2/mapping_complete"
-    output:
-        # insert here
-        "angsd/data/bams_linked"
-    shell:
-        """
-        # objective: start angsd dirs and make symbolic dirs for bams output from hisat2 
-        # sorted by generation and life stage (broodstock or juvenile) - in prep for SNP calling
-        # IMPORTNAT! at this step we can effectively truncate samples from downstream analysis
-        echo 'start' $(date) # checkpoint output 
-        
-        # setup 
-        ## vars
-        
-        ## make dirs 
-        mkdir -p angsd
-        mkdir -p angsd/data
-        mkdir -p angsd/data/F0_Broodstock
-        mkdir -p angsd/data/F1_Broodstock
-        mkdir -p angsd/data/F2_Broodstock
-        mkdir -p angsd/data/F1_Juveniles
-        mkdir -p angsd/data/F2_Juveniles
-        mkdir -p angsd/data/F3_Juveniles
-
-        # symbolic dirs by life stage and generation 
-        
-        ## F0_Broodstock
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/April2023/*trim.F0*sorted.bam ./angsd/data/F0_Broodstock/
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/July2023/*trim.F0*sorted.bam ./angsd/data/F0_Broodstock/ #  12, 22, and 26
-        
-        ## F1_Broodstock
-        # three files in April that had 0 data! omit these , duplicated in July 
-        #rm ./hisat2/April2023/adapter_trim.F1-B2-pH8* ./hisat2/April2023/adapter_trim.F1-B22-pH75* ./hisat2/April2023/adapter_trim.F1-B30-pH75* # SEEMS TEY WERE ALREADY OMITTED? 
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/April2023/*trim.F1-B*sorted.bam  ./angsd/data/F1_Broodstock/ 
-        rm ./angsd/data/F1_Broodstock/*F1-B22-pH75* # omit bad samplesymb dir from April 2023, ID 22 pH 75
-        rm ./angsd/data/F1_Broodstock/*F1-B2-pH8* # omit bad samplesymb dir from April 2023, ID 2 pH 8
-        rm ./angsd/data/F1_Broodstock/*F1-B30-pH75* # omit bad samplesymb dir from April 2023, ID  30 pH 75
-
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/July2023/*trim.F1-B*sorted.bam  ./angsd/data/F1_Broodstock/ # new 2_ph8 + 30 and 22 ph 7.5
-
-        ## F2_Broodstock 
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/Nov2023/*trim.F2-B*sorted.bam  ./angsd/data/F2_Broodstock/         
-       
-        ## F1_Juveniles
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/Oct2022/*sorted.bam  ./angsd/data/F1_Juveniles/
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/April2023/*trim.F1-J*sorted.bam  ./angsd/data/F1_Juveniles/
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/July2023/*trim.F1-J*sorted.bam  ./angsd/data/F1_Juveniles/        
-
-        ## F2_Juveniles
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/April2023/*trim.F2-J*sorted.bam ./angsd/data/F2_Juveniles/
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/July2023/*trim.F2-J*sorted.bam ./angsd/data/F2_Juveniles/
-
-        ## F3_Juveniles 
-        ln -s ~/Airradians_lcWGS/snakemake_pipeline/hisat2/Nov2023/*trim.F3-J*sorted.bam  ./angsd/data/F3_Juveniles/
-
-        echo 'BAMS symbolic dirs in angsd/data' $(date) # checkpoint output 
-        
-        # output to tether together snakes
-        touch angsd/data/bams_linked
-                            
-        """
-
-# Merge BAMs
-rule merge_bams:
-    input:
-        # insert here
-        "angsd/data/bams_linked"
-    output:
-        # insert here
-        "angsd/data/merge_complete"
-    shell:
-        """
-        # Objective: use samtool to merge seq lanes within sample ID
-        echo 'start' $(date) # checkpoint output   
-        
-        # setup
-        
-        ## load modules, requires hisat2 and samtools
-        module load bio/samtools/1.11
-        module load bio/picard/2.23.9
-        
-        ## loop vars by generation and life stage - NOTE - one more lane of sequencing to finalize!
-        F0_Broodstock='F0_Broodstock' # string, equal to foldername containing symb linked bam files
-        F1_Broodstock='F1_Broodstock' #  string, equal to foldername containing symb linked bam files
-        F2_Broodstock='F2_Broodstock' #  string, equal to foldername containing symb linked bam files
-        F1_Juveniles='F1_Juveniles' # string, equal to foldername containing symb linked bam files
-        F2_Juveniles='F2_Juveniles' # string, equal to foldername containing symb linked bam files
-        F3_Juveniles='F3_Juveniles' # string, equal to foldername containing symb linked bam files
-        #IDS='F0_Broodstock F1_Broodstock F1_Juveniles F2_Juveniles F2_Broodstock F3_Juveniles' # loop strings
-        IDS='F2_Juveniles F2_Broodstock F3_Juveniles' # loop strings
-        #IDS='F2_Juveniles'
-        
-
-        # for loop mutliqc by sequence run 
-        for i in ${{IDS[@]}}; do # loopty loop, note {{}} calls as a 'string'
-            mkdir -p angsd/data/${{i}}/merged_bams;
-            # print the filename BEFORE '_R1...gz" and "_R2...gz" as string for next loop
-            ls angsd/data/${{i}}/*R1_001*.bam | awk -F  '[_./]' '{{print $5"_"$6"."$7}}' | sort | uniq > angsd/data/${{i}}/ID;
-            for m in `cat angsd/data/${{i}}/ID`; do
-            #for m in `cat angsd/data/${{i}}/ID_diffs`; do # I used grep to make an ID _diffs manually bcus the job quite unexpectedly 
-            samtools merge angsd/data/${{i}}/merged_bams/${{m}}\.bam angsd/data/${{i}}/${{m}}\_R1* angsd/data/${{i}}/${{m}}\_R2*;
-            done;
-            done
-	
-        echo 'end' $(date) # checkpoint output  
-        
-        touch angsd/data/merge_complete
-        
-        """
-# A more efficient version, within wildcard 'generation' assigned using expand in the next rule
-# a new sort bames remove/mark duplicates and sort by coord for angsd
-rule sortbams_removedups_effic:
-    input:
-        # insert here
-        "angsd/data/merge_complete"
-    output:
-        # insert here
-        "angsd/data/{generation}/sortremovedups_complete"
-    shell:
-        """
-        echo 'start' $(date) # checkpoint output
-
-        # load libraries
-        module load bio/samtools/1.11
-        module load bio/picard/2.23.9
-
-        ls angsd/data/{wildcards.generation}/merged_bams/*.bam | awk -F  '[_./]' '{{print $7"_"$8"."$9}}' | sort | uniq > angsd/data/{wildcards.generation}/merged_bams/ID;
-
-        mkdir -p angsd/data/{wildcards.generation}/merged_bams/query_dups_removed
-        mkdir -p angsd/data/{wildcards.generation}/merged_bams/query_dups_removed/stats # make directory to input the summary text files from marking duplicates and removing them
- 
-        for filename in `cat angsd/data/{wildcards.generation}/merged_bams/ID`; do
-            # sort by queryname
-            java -jar $PICARD SortSam I=angsd/data/{wildcards.generation}/merged_bams/${{filename}}.bam O=angsd/data/{wildcards.generation}/merged_bams/${{filename}}_querysorted.bam SORT_ORDER=queryname
-            # call and omit duplicates - queryname sorted, then re sort by coord and delete the previous
-            java -jar $PICARD MarkDuplicates I=angsd/data/{wildcards.generation}/merged_bams/${{filename}}_querysorted.bam O=angsd/data/{wildcards.generation}/merged_bams/query_dups_removed/${{filename}}_querydupsrem.bam M=angsd/data/{wildcards.generation}/merged_bams/query_dups_removed/stats/${{filename}}.txt ASSUME_SORT_ORDER=queryname REMOVE_DUPLICATES=true 
-            java -jar $PICARD SortSam I=angsd/data/{wildcards.generation}/merged_bams/query_dups_removed/${{filename}}_querydupsrem.bam O=angsd/data/{wildcards.generation}/merged_bams/query_dups_removed/${{filename}}.bam SORT_ORDER=coordinate
-            rm angsd/data/{wildcards.generation}/merged_bams/query_dups_removed/${{filename}}_querydupsrem.bam
-            # delete the sorted files w/o removed duplicates
-            rm angsd/data/{wildcards.generation}/merged_bams/${{filename}}_querysorted.bam # remove the query sorted bam file - now all we have left is 'final' with removed duplicates!
-            rm angsd/data/{wildcards.generation}/merged_bams/${{filename}}.bam # remove themerged file fromthe previous chunk to save disc space
-        done;
-
-        echo 'end' $(date) # checkpoint output
-
-        touch angsd/data/{wildcards.generation}/sortremovedups_complete
-
-        """
-
-# SamSort queryname and Picard to remove duplicates
-rule sortbams_removedups:
-    input:
-        # insert here
-        "angsd/data/merge_complete"
-    output:
-        # insert here
-        "angsd/data/sortbams_removedups"
-    shell:
-        """
-        # Objective: Using the merged reads int he previous chunks (sortted by coordinate), use picard SortSam by queryname 
-        # then picard MarkDuplicates and remove them - output removed dups bam files 
-        
-        echo 'start' $(date) # checkpoint output
-
-        # setup
-        ## load modules, requires hisat2 and samtools
-        module load bio/samtools/1.11
-        module load bio/picard/2.23.9
-
-        ## loop vars by generation and life stage - NOTE - one more lane of sequencing to finalize!
-        F0_Broodstock='F0_Broodstock' # string, equal to foldername containing symb linked bam files
-        F1_Broodstock='F1_Broodstock' #  string, equal to foldername containing symb linked bam files
-        F2_Broodstock='F2_Broodstock' #  string, equal to foldername containing symb linked bam files
-        F1_Juveniles='F1_Juveniles' # string, equal to foldername containing symb linked bam files
-        F2_Juveniles='F2_Juveniles' # string, equal to foldername containing symb linked bam files
-        F3_Juveniles='F3_Juveniles' # string, equal to foldername containing symb linked bam files
-        IDS='F0_Broodstock F1_Broodstock F1_Juveniles F2_Juveniles F2_Broodstock F3_Juveniles' # loop strings
-        #IDS='F0_Broodstock' # loop strings
-        #IDS='F1_Broodstock F1_Juveniles F2_Broodstock' # completed!
-        #IDS='F2_Juveniles F3_Juveniles'
-
-        # to refer to generation (from subfolers), use {wildcards.generation} with a single bracket 
-        # for loop mutliqc by sequence run
-        for i in ${{IDS[@]}}; do # loopty loop, note {{}} calls as a 'string'
-            # print the filename BEFORE '_R1...gz" and "_R2...gz" as string for next loop
-            ls angsd/data/${{i}}/merged_bams/*.bam | awk -F  '[_./]' '{{print $7"_"$8"."$9}}' | sort | uniq > angsd/data/${{i}}/merged_bams/ID;
-            #mkdir -p angsd/data/${{i}}/merged_bams/coord_dups_removed
-            mkdir -p angsd/data/${{i}}/merged_bams/query_dups_removed
-            #mkdir -p angsd/data/${{i}}/merged_bams/coord_dups_removed/stats
-            mkdir -p angsd/data/${{i}}/merged_bams/query_dups_removed/stats # make directory to input the summary text files from marking duplicates and removing them
-            for filename in `cat angsd/data/${{i}}/merged_bams/ID`; do
-                # sort by coord
-                #java -jar $PICARD SortSam I=angsd/data/${{i}}/merged_bams/${{filename}}.bam O=angsd/data/${{i}}/merged_bams/${{filename}}_coordsorted.bam SORT_ORDER=coordinate
-                # sort by queryname
-                java -jar $PICARD SortSam I=angsd/data/${{i}}/merged_bams/${{filename}}.bam O=angsd/data/${{i}}/merged_bams/${{filename}}_querysorted.bam SORT_ORDER=queryname
-                # delete the original merge file
-                #rm angsd/data/${{i}}/merged_bams/${{filename}}.bam
-                # call and omit duplicates - coordinate sorted
-                #java -jar $PICARD MarkDuplicates I=angsd/data/${{i}}/merged_bams/${{filename}}_coordsorted.bam O=angsd/data/${{i}}/merged_bams/coord_dups_removed/${{filename}}.bam M=angsd/data/${{i}}/merged_bams/coord_dups_removed/stats/${{filename}}.txt  ASSUME_SORT_ORDER=coordinate REMOVE_DUPLICATES=true
-                # call and omit duplicates - queryname sorted, then re sort by coord and delete the previous
-                java -jar $PICARD MarkDuplicates I=angsd/data/${{i}}/merged_bams/${{filename}}_querysorted.bam O=angsd/data/${{i}}/merged_bams/query_dups_removed/${{filename}}_querydupsrem.bam M=angsd/data/${{i}}/merged_bams/query_dups_removed/stats/${{filename}}.txt ASSUME_SORT_ORDER=queryname REMOVE_DUPLICATES=true
-                java -jar $PICARD SortSam I=angsd/data/${{i}}/merged_bams/query_dups_removed/${{filename}}_querydupsrem.bam O=angsd/data/${{i}}/merged_bams/query_dups_removed/${{filename}}.bam SORT_ORDER=coordinate
-                rm angsd/data/${{i}}/merged_bams/query_dups_removed/${{filename}}_querydupsrem.bam
-                # delete the sorted files w/o removed duplicates             
-                #rm angsd/data/${{i}}/merged_bams/${{filename}}_coordsorted.bam # remove the coord sorted bam file - now we have left is 'fina' with removed duplicates! 
-                rm angsd/data/${{i}}/merged_bams/${{filename}}_querysorted.bam # remove the query sorted bam file - now all we have left is 'final' with removed duplicates!
-                done;
-            done
-
-
-        echo 'end' $(date) # checkpoint output
-
-        touch angsd/data/sortbams_removedups
-        """
-
-# Generate txt list bam files to call SNPs
-rule generate_bamlists: 
-    input: 
-        #"angsd/data/sortbams_removedups" # OLD JOB INPUT 
-        expand("angsd/data/{generation}/sortremovedups_complete", generation=subfolders)
-    output:
-        "angsd/data/bamlists_generated"
-    shell:
-        """
-        # Objective: generate  
-        echo 'start' $(date) #checkpoint output
-
-        ## dir shortcuts
-        DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files
-        
-        ## make dir for bamlists
-        mkdir -p $DATDIR/bamlists
-        mkdir -p $DATDIR/bamlists/all_juveniles
-        mkdir -p $DATDIR/bamlists/all_juveniles_and_F0
-        mkdir -p $DATADIR/bamlists/parentage_F0B_F1J
-        
-        # FOR LOOPS
-        IDS=('F0_Broodstock' 'F1_Broodstock' 'F1_Juveniles' 'F2_Juveniles' 'F2_Broodstock' 'F3_Juveniles') # loop strings
-        #IDS=('F0_Broodstock' 'F1_Broodstock' 'F1_Juveniles' 'F2_Juveniles' 'F2_Broodstock') # loop strings
-        #IDS=('F2_Broodstock')
-         
-        ## (1) for loop i - call bam lists (merged and dup removed!) for all data and within pCO2 treatment, store in the data folder
-        for i in ${{IDS[@]}}; do  # update to include F2 brood and F3 juv once we have them
-            LOOPDIR=$DATDIR/${{i}}/merged_bams/query_dups_removed
-            if [ "${{i}}" == "F1_Broodstock" ]; then
-                ls $LOOPDIR/*.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/merged_bamlist.txt
-                awk '!/DUP/' $LOOPDIR/merged_bamlist.txt > $LOOPDIR/merged_bamlist2.txt # omit duplicate in list, name 2
-                rm $LOOPDIR/merged_bamlist.txt # remove the previous list
-                mv $LOOPDIR/merged_bamlist2.txt $DATDIR/${{i}}/merged_bams/merged_bamlist.txt # rename 2 to original
-                ls $LOOPDIR/*pH8.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/LOWpCO2_bamlist.txt
-                ls $LOOPDIR/*pH75.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/MODpCO2_bamlist.txt
-            elif [ "${{i}}" == "F1_Juveniles" ]; then
-                ls $LOOPDIR/*.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/merged_bamlist.txt
-                awk '!/dup/' $LOOPDIR/merged_bamlist.txt > $LOOPDIR/merged_bamlist2.txt # omit duplicate in list, name 2
-                rm $LOOPDIR/merged_bamlist.txt # remove the previous list
-                mv $LOOPDIR/merged_bamlist2.txt $LOOPDIR/merged_bamlist.txt # rename 2 to original
-                #ls $LOOPDIR/*pH8.bam | awk -F '[/]' '{{print $10}}' | sort | uniq > $LOOPDIR/LOWpCO2merged_bamlist.txt
-                ls $LOOPDIR/*pH8.bam $LOOPDIR/*trim.1*.bam $LOOPDIR/*trim.3.bam $LOOPDIR/*trim.4.bam $LOOPDIR/*trim.5.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/LOWpCO2_bamlist.txt
-                ls $LOOPDIR/*pH75.bam $LOOPDIR/*trim.2*.bam $LOOPDIR/*trim.3*.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/MODpCO2_bamlist.txt
-                awk '!/adapter_trim.3.bam|dup/' $LOOPDIR/MODpCO2_bamlist.txt > $LOOPDIR/MODpCO2_bamlist2.txt # omit 3 as a pH8, and follow same step to omit the dup sample
-                rm $LOOPDIR/MODpCO2_bamlist.txt
-                mv $LOOPDIR/MODpCO2_bamlist2.txt $LOOPDIR/MODpCO2_bamlist.txt
-            elif [ "${{i}}" == "F2_Juveniles" ] || [ "${{i}}" == "F2_Broodstock" ] || [ "${{i}}" == "F3_Juveniles" ]; then
-                ls $LOOPDIR/*.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/merged_bamlist.txt
-                ls $LOOPDIR/*pH8.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/LOWpCO2merged_bamlist.txt
-                ls $LOOPDIR/*pH75.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/MODpCO2merged_bamlist.txt
-                ls $LOOPDIR/*pH7.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/HIGHpCO2merged_bamlist.txt
-            else # just F0_Broodstock since there were no cohort / treatment
-                ls $LOOPDIR/*.bam | awk -F '[/]' '{{print $11}}' | sort | uniq > $LOOPDIR/merged_bamlist.txt
-            fi;
-        done
-
-        # remove duplicate sequence files from the merged bams dirs - there are two
-        # F1_Broodstock as adapter_trim.F1-B6-pH75DUP.bam and  F1_Juveniles as adapter_trim.353dup.bam
-
-        # new directory to call all merged bams( as a symbolic link) and a master merged.bam list for the one popgen run to rule them all..!
-        # Important! if you want to rerun the all angsd call, you need to omit these symbolic links or else the script wont run
-        #mkdir -p $DATDIR/all_merged_bams # make dir - then call all symbolic links for merged bam file
-        #ln -s $DATDIR/F0_Broodstock/merged_bams/*.bam ./angsd/data/all_merged_bams
-        #ln -s $DATDIR/F1_Broodstock/merged_bams/*.bam ./angsd/data/all_merged_bams
-        #ln -s $DATDIR/F2_Broodstock/merged_bams/*.bam ./angsd/data/all_merged_bams
-        #ln -s $DATDIR/F1_Juveniles/merged_bams/*.bam ./angsd/data/all_merged_bams
-        #ln -s $DATDIR/F3_Juveniles/merged_bams/*.bam ./angsd/data/all_merged_bams
-        #rm $DATDIR/all_merged_bams/*DUP*  $DATDIR/all_merged_bams/*dup* # omit the duplicate files sent as sanity check
-        #ls $DATDIR/all_merged_bams/*.bam | awk -F '[/]' '{{print $9}}' | sort | uniq > $DATDIR/all_merged_bams/merged_bamlist.txt
-        
-
-        echo 'end' $(date) # checkpoint output
-
-        touch angsd/data/bamlists_generated
-	
-        """
-
-# Run angsd on F0 Broodstock allele freq based on counts
-rule angsd_F0:
-    input: 
-        "angsd/data/bamlists_generated"
-    output:
-        "angsd/output/F0_angsd"
-    shell:
-        """
-        # About: use doMaf=8 to call allele frequencies without use of the reference and on base on base counts
-        # Set-up
-
-        # output dir
-        mkdir -p angsd/output
-        mkdir -p angsd/output/F0_Broodstock
-
-        ## load module
-        module load bio/angsd/0.933
-        module load bio/samtools/1.15.1
-
-        ## dir shortcuts
-        DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data/F0_Broodstock # Path to generation and life stage bam files
-        OUTDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/output/F0_Broodstock # Path to the base directory where output files will be written
-        REFDIR=~/refs # Path to reference genome
-
-        # angsd parameters
-        #DOMAF=8 # frequency based on directly base counts - does not rely on genotypy probabilities or liklihoods (no reference necessary)
-        DOMAJORMINOR=1 # 1 is for either seq data like bam files or genotpe liklihood data where the maj minor allele is inferred from liklihoods, max liklihood approach 2 is from counts, most freq observed alleles
-        # 4 is forcing the major allele according to the reference states defined in ref - the minor allele will be inferred based on the genotype liklihood
-        DOMAF=1 # 1 assumed known major and minor allele 2 assumed known major and unknown minor allele
-        DOGENO=1 # 1 write major and minor allele 2 print the called genotype and count of minor 4 print the called genotype
-        DOPOST=1 # 1 estimate the posterior genotype probability based on the allele frequency as a prior 2 estimat eth posterier genotpe prob assuming a uniform prior (if domajorminor == 4)
-        GENOLIKE=1 # SAMtools
-        DOGLF=2 # beagle genotype liklihood format
-        DOCOUNTS=1 # provide freq of bases - output as 'pos.gz'
-        DODEPTH=1 # default 0 output distribution of seq depths sites aove maxDepth will be binnes out files in depthSample and depthGlobal
-        MAXDEPTH=1000 # default 100
-        DUMPCOUNTS=2 # 1 is the total seq depth for site as .pos.gz, 2 is the seq deth per sample as .pos.gz and .counts.gz; look up allele coutns in angsd for more info to diagnose depth cutoffs
-        MINQ=30 # Minimum quality filter
-        MINMAPQ=30 # Minmm mapping quality
-        MINMAF=0.05 # Minimum minor allele frequency filter
-        # C -50 is flag to adjust for excessive mismatches as recomennded when BWA was used for alignment 
-        # -uniqueOnly; removed reads that have multiple best hits 0 default, 1 remove
-        # -remove_bads = reads flagged as bad
-        # -only_proper_pairs = read that did not have a mate pair   
-
-        # make directory in output and call all necessary data to run angsd
-        cd $DATDIR/merged_bams
-        nIND=$(wc -l $DATDIR/merged_bams/merged_bamlist.txt | awk '{{print $1}}') # call and count lines of the bamlist with delimiter id
-        minIND=$(echo "(${{nIND}} * 0.9)" | bc) # min individual is 90% of the total count
-        minINDRd=$(printf "%.0f" ${{minIND}}) # round that 
-      
-        # set min and max depth for minuum of 5X and max of 20X coerage per loci to acoid potential bias arising from seq errors and recommendations in O'Leary (etal 2018)
-        MINDP=$(echo "(${{minINDRd}} * 5)" | bc) # min coverage of 5X accoutning for the minimum num of individuals for genotype calls as 90% and assuming per indiv per loci
-        MAXDP=$(echo "(${{minINDRd}} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci 
-  
-        # output file start
-        OUTBASE='F0Broodstock_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD'$MINDP'_maxD'$MAXDP'_minq'$MINQ'_minmapQ'$MINMAPQ # Build base name of output files
-        # run angsd for all samples
-        angsd -b $DATDIR/merged_bams/merged_bamlist.txt \
-            -ref $REFDIR/Argopecten_irradians_irradians_genome.fasta \
-            -GL $GENOLIKE \
-            -doGlf $DOGLF \
-            -doMaf $DOMAF \
-            -doMajorMinor $DOMAJORMINOR \
-            -dogeno $DOGENO \
-            -doPOST $DOPOST \
-            -doCounts $DOCOUNTS \
-            -dumpCounts $DUMPCOUNTS \
-            -doDepth $DODEPTH \
-            -maxDepth $MAXDEPTH \
-            -setMinDepth $MINDP \
-            -setMaxDepth $MAXDP \
-            -doIBS 1 \
-            -makematrix 1 \
-            -doCov 1 \
-            -minInd $minINDRd \
-            -dobcf 1 \
-            --ignore-RG 0 \
-            -minQ $MINQ \
-            -minMapQ $MINMAPQ \
-            -minMaf $MINMAF \
-            -SNP_pval 1e-6 \
-            -P -20 \
-            -remove_bads 1 \
-            -only_proper_pairs 1 \
-            -uniqueOnly 1 \
-            -C 50 \
-            -out $OUTDIR/$OUTBASE \
-            >& $OUTDIR/$OUTBASE'.log';
-
-        echo 'end' $(date) # checkpoint output
-
-        touch angsd/output/F0_angsd
-        """
-# Run angsd
-rule angsd_run:
-    input:
-        "angsd/output/F0_angsd"
-    output:
-        "angsd/output/All_angsd"
-    shell:
-        """
-        # Set-up
-        
-        # output dir
-        mkdir -p angsd/output
-        mkdir -p angsd/output/all
-
-        ## load module
-        module load bio/angsd/0.933
-        module load bio/samtools/1.15.1
-        
-        IDS=('F1_Broodstock' 'F1_Juveniles' 'F2_Juveniles' 'F2_Broodstock' 'F3_Juveniles') # loop strings
-        #IDLIST='F1_Broodstock F1_Juveniles F2_Juveniles F2_Broodstock F3_Juveniles' # loop strings
-
-        ## loop vars by treatment (for loop #2)
-        TREATMENT_ALL=('LOWpCO2' 'MODpCO2' 'HIGHpCO2') # a list of the common str on file names and outputs in the for loop below
-        #TREATMENT_ALL=($TREATLIST_ALL) # as an array - we can call specific strings in the for loops
-        
-        TREATMENT_SUB=('LOWpCO2' 'MODpCO2')
-        #TREATMENT_SUB=($TREATLIST_SUB)
-
-        ## dir shortcuts
-        DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files
-        OUTDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/output/all # Path to the base directory where output files will be written
-        REFDIR=~/refs # Path to reference genome
-        
-        # angsd parameters
-        #DOMAF=8 # frequency based on directly base counts - does not rely on genotypy probabilities or liklihoods (no reference necessary)
-        DOMAJORMINOR=1 # 1 is for either seq data like bam files or genotpe liklihood data where the maj minor allele is inferred from liklihoods, max liklihood approach 2 is from counts, most freq $
-        # 4 is forcing the major allele according to the reference states defined in ref - the minor allele will be inferred based on the genotype liklihood
-        DOMAF=1 # 1 assumed known major and minor allele 2 assumed known major and unknown minor allele
-        DOGENO=1 # 1 write major and minor allele 2 print the called genotype and count of minor 4 print the called genotype
-        DOPOST=1 # 1 estimate the posterior genotype probability based on the allele frequency as a prior 2 estimat eth posterier genotpe prob assuming a uniform prior (if domajorminor == 4)
-        GENOLIKE=1 # SAMtools
-        DOGLF=2 # beagle genotype liklihood format
-        DOCOUNTS=1 # provide freq of bases - output as 'pos.gz'
-        DODEPTH=1 # default 0 output distribution of seq depths sites aove maxDepth will be binnes out files in depthSample and depthGlobal
-        MAXDEPTH=1000 # default 100
-        DUMPCOUNTS=2 # 1 is the total seq depth for site as .pos.gz, 2 is the seq deth per sample as .pos.gz and .counts.gz; look up allele coutns in angsd for more info to diagnose depth cutoffs
-        MINQ=30 # Minimum quality filter
-        MINMAPQ=30 # Minmm mapping quality
-        MINMAF=0.05 # Minimum minor allele frequency filter
-        # C -50 is flag to adjust for excessive mismatches as recomennded when BWA was used for alignment
-        # -uniqueOnly; removed reads that have multiple best hits 0 default, 1 remove
-        # -remove_bads = reads flagged as bad
-        # -only_proper_pairs = read that did not have a mate pair
-        
-        # ANGSD for all data
-		
-        # make directory in output and call all necessary data to run angsd
-        cd $DATDIR/all_merged_bams/
-        nIND_all=$(wc -l $DATDIR/all_merged_bams/merged_bamlist.txt | awk '{{print $1}}') # call and count lines of the bamlist with delimiter id
-        minIND_all=$(echo "(${{nIND_all}} * 0.9)" | bc) # min individual is 90% of the total count
-        minINDRd_all=$(printf "%.0f" ${{minIND_all}}) # round that
-        # set min and max depth for minuum of 5X and max of 20X coerage per loci to acoid potential bias arising from seq errors and recommendations in O'Leary (etal 2018)
-        MINDP_all=$(echo "(${{minINDRd_all}} * 5)" | bc) # min coverage of 5X accoutning for the minimum num of individuals for genotype calls as 90% and assuming per indiv per loci
-        MAXDP_all=$(echo "(${{minINDRd_all}} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci
-        # outbase
-        OUTBASE='AllSamples_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd_all'_minD'$MINDP_all'_maxD'$MAXDP_all'_minq'$MINQ'_minmapQ'$MINMAPQ # Build base name of output files
-        # run angsd for all samples
-        angsd -b $DATDIR/all_merged_bams/merged_bamlist.txt \
-            -ref $REFDIR/Argopecten_irradians_irradians_genome.fasta \
-            -GL $GENOLIKE \
-            -doGlf $DOGLF \
-            -doMaf $DOMAF \
-            -doMajorMinor $DOMAJORMINOR \
-            -dogeno $DOGENO \
-            -doPOST $DOPOST \
-            -doCounts $DOCOUNTS \
-            -dumpCounts $DUMPCOUNTS \
-            -doDepth $DODEPTH \
-            -maxDepth $MAXDEPTH \
-            -setMinDepth $MINDP_all \
-            -setMaxDepth $MAXDP_all \
-            -doIBS 1 \
-            -makematrix 1 \
-            -doCov 1 \
-            -minInd $minINDRd_all \
-            -dobcf 1 \
-            --ignore-RG 0 \
-            -minQ $MINQ \
-            -minMapQ $MINMAPQ \
-            -minMaf $MINMAF \
-            -SNP_pval 1e-6 \
-            -P -20 \
-            -remove_bads 1 \
-            -only_proper_pairs 1 \
-            -uniqueOnly 1 \
-            -C 50 \
-            -out $OUTDIR/$OUTBASE \
-            >& $OUTDIR/$OUTBASE'.log'
-
-        # ANGSD for data within generation (F1 - F2 - F3, broodstock and juveniles separately)
-        # for loop - run ansd for the total bam list within time/generation (i.e. all F1 Broodstock, F2_Juveniles, etc.)
-	# objective to input a strata / metadta to identify treatments associated with ids downstream..
-        
-        #for i in ${{IDS[@]:0:5}}; # From f1 to f2 and so on..
-        #do 
-        #    mkdir -p $OUTDIR/${{i}} # make director for ansd output files
-            # nav to dir
-        #    cd $DATDIR/${{i}}/merged_bams/
-            # cal number of indiv    
-        #    nIND=$(wc -l $DATDIR/${{i}}/merged_bams/merged_bamlist.txt | awk '{{print $1}}') # call and count lines of the bamlist with delimiter id
-        #    minIND=$(echo "(${{nIND}} * 0.9)" | bc) # min individual is 90% of the total count
-        #    minINDRd=$(printf "%.0f" ${{minIND}}) # round that
-            # set min and max depth for minuum of 5X and max of 20X coerage per loci to acoid potential bias arising from seq errors and recommendations in O'Leary (etal 2018)
-        #    MINDP=$(echo "(${{minINDRd}} * 5)" | bc) # min coverage of 5X accoutning for the minimum num of individuals for genotype calls as 90% and assuming per indiv per loci
-        #    MAXDP=$(echo "(${{minINDRd}} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci
-            # call unique outbase
-        #    OUTBASE=${{i}}'_mindp'$MINDP'_maxdp'$MAXDP'_minind'$minINDRd'_minq'$MINQ'_minmapQ'$MINMAPQ # Build base name of output files
-            # run angsd
-        #    angsd -b $DATDIR/${{i}}/merged_bams/merged_bamlist.txt \
-        #        -ref $REFDIR/Argopecten_irradians_irradians_genome.fasta \
-        #        -GL $GENOLIKE \
-        #        -doGlf $DOGLF \
-        #        -doMaf $DOMAF \
-        #        -doMajorMinor $DOMAJORMINOR \
-        #        -dogeno $DOGENO \
-        #        -doPOST $DOPOST \
-        #        -doCounts $DOCOUNTS \
-        #        -dumpCounts $DUMPCOUNTS \
-        #        -doDepth $DODEPTH \
-        #        -maxDepth $MAXDEPTH \
-        #        -setMinDepth $MINDP \
-        #        -setMaxDepth $MAXDP \
-        #        -doIBS 1 \
-        #        -makematrix 1 \
-        #        -doCov 1 \
-        #        -minInd $minINDRd \
-        #        -dobcf 1 \
-        #        --ignore-RG 0 \
-        #        -minQ $MINQ \
-        #        -minMapQ $MINMAPQ \
-        #        -minMaf $MINMAF \
-        #        -SNP_pval 1e-6 \
-        #        -P -20 \
-        #        -remove_bads 1 \
-        #        -only_proper_pairs 1 \
-        #        -uniqueOnly 1 \
-        #        -C 50 \
-        #        -out $OUTDIR/${{i}}/$OUTBASE \
-        #        >& $OUTDIR/${{i}}/$OUTBASE'.log'
-
-
-        #done
-
-        ## (3) for loop - run angsd for SNP calls within treatment
-        # insert if statment bcus F0 uses 'merged_bamlist.txt' whereas the rest are by treatment 
-        #for i in ${{IDS[@]:1:5}}; do
-        #    mkdir -p $OUTDIR/${{i}}/within_treatment # make directory for angsd output files
-        #    cd $DATDIR/${{i}}/merged_bams/
-        #        if [ "${{i}}" == "F2_Juveniles" ] || [ "${{i}}" == "F2_Broodstock" ] || [ "${{i}}" == "F3_Juveniles" ]; then # generation and life stages when there are three pCO2 treatments
-        #            for x in ${{TREATMENT_ALL[@]}}; do # run angsd separately for each treatment-based bam list (m), grouped by generation/lifestage (i)
-        #                # NOTE:  a mistake here, we names teh LOW MOD and HIGH bamlist as i.e. LOWpCO2merged_bamlist.txt, need to make a change below, we wont run successfully past F1s
-        #                nIND=$(wc -l $DATDIR/${{i}}/merged_bams/${{x}}merged_bamlist.txt | awk '{{print $1}}') # call the bamlist with delimiter id
-        #                minIND=$(echo "(${{nIND}} * 0.9)" | bc) # min individual is 90% of the total count
-        #                minINDRd=$(printf "%.0f" ${{minIND}}) # round that
-        #                # call unique outbase
-        #                OUTBASE=${{i}}${{x}}'_mindp'$MINDP'_maxdp'$MAXDP'_minind'$minINDRd'_minq'$MINQ'_minmapQ'$MINMAPQ # Build base name of output files
-        #                ## Call SNPs
-        #                angsd -b $DATDIR/${{i}}/merged_bams/${{x}}merged_bamlist.txt \
-        #                    -ref $REFDIR/Argopecten_irradians_irradians_genome.fasta \
-        #                    -out $OUTDIR/${{i}}/within_treatment/$OUTBASE \
-        #                    -GL 1 -doGlf 2 -doMaf 1 -doMajorMinor 1 -doCounts 1 -doDepth 1 -maxDepth 10000 -dumpCounts 1 -doIBS 1 -makematrix 1 -doCov 1 \
-        #                    -setMinDepth $MINDP -setMaxDepth $MAXDP \
-        #                    -minInd $minINDRd \
-        #                    -dobcf 1 -dopost 1 --ignore-RG 0 -dogeno 1 \
-        #                    -minQ $MINQ -minMapQ $MINMAPQ -minMaf $MINMAF \
-        #                    -SNP_pval 1e-6 \
-        #                    -P -8 \
-        #                    -remove_bads 1 -only_proper_pairs 1 -C 50 \
-        #                    >& $OUTDIR/${{i}}/within_treatment/$OUTBASE'.log';
-        #            done; # for loop x in the elif statement - LOWpCO2, MODpCO2 and HIGHpCO2
-        #        else # all other cases where there are only 2 treatments - loop m 
-        #            for m in ${{TREATMENT_SUB[@]}}; do # run angsd separately for each treatment - ONLY LOWpCO2 and MODpCO2 
-        #                nIND=$(wc -l $DATDIR/${{i}}/merged_bams/${{m}}_bamlist.txt | awk '{{print $1}}') # call the bamlist with delimiter id
-        #                minIND=$(echo "(${{nIND}} * 0.9)" | bc) # min individual is 90% of the total count 
-        #                minINDRd=$(printf "%.0f" ${{minIND}}) # round that
-	#                # call unique outbase 
-        #                OUTBASE=${{i}}${{m}}'_mindp'$MINDP'_maxdp'$MAXDP'_minind'$minINDRd'_minq'$MINQ'_minmapQ'$MINMAPQ # Build base name of output files
-        #                ## Call SNPs
-        #                angsd -b $DATDIR/${{i}}/merged_bams/${{m}}_bamlist.txt \
-        #                    -ref $REFDIR/Argopecten_irradians_irradians_genome.fasta \
-        #                    -out $OUTDIR/${{i}}/within_treatment/$OUTBASE \
-        #                    -GL 1 -doGlf 2 -doMaf 1 -doMajorMinor 1 -doCounts 1 -doDepth 1 -maxDepth 10000 -dumpCounts 1 -doIBS 1 -makematrix 1 -doCov 1 \
-        #                    -setMinDepth $MINDP -setMaxDepth $MAXDP \
-        #                    -minInd $minINDRd \
-        #                    -dobcf 1 -dopost 1 --ignore-RG 0 -dogeno 1 \
-        #                    -minQ $MINQ -minMapQ $MINMAPQ -minMaf $MINMAF \
-        #                    -SNP_pval 1e-6 \
-        #                    -P -8 \
-        #                    -remove_bads 1 -only_proper_pairs 1 -C 50 \
-        #                    >& $OUTDIR/${{i}}/within_treatment/$OUTBASE'.log';
-        #            done; # for loop m - in the else statement - LOWpCO2 and MODpCO2 
-        #        fi; # if else statement to run angsd based on bam list requirements 
-        #done # for loop i by generation and list stage
-        
-        echo 'end' $(date) # checkpoint output  
-        
-        touch angsd/output/All_angsd
-		
-        """
diff --git a/HPC_analysis/scripts/lcWGS/everyone_angsd_dupsquery.sh b/HPC_analysis/scripts/lcWGS/everyone_angsd_dupsquery.sh
deleted file mode 100644
index 7c9c736..0000000
--- a/HPC_analysis/scripts/lcWGS/everyone_angsd_dupsquery.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name="angsdevery"
-#SBATCH -t 500:00:00
-#SBATCH --mail-type=ALL
-#SBATCH --mem=180GB
-#SBATCH -p medmem
-#SBATCH -c 20
-#SBATCH --mail-user=samuel.gurr@noaa.gov
-#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/everyone/"%x_out.%j"
-#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/everyone/"%x_err.%j"
-
-# Set-up
-
-# output dir
-mkdir -p angsd/output
-
-## load module
-module load bio/angsd/0.940
-module load bio/samtools/1.19
-
-## dir shortcuts
-DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files
-OUTDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/output # Path to the base directory where output files will be written
-REFDIR=~/refs # Path to reference genome
-
-# angsd parameters
-DOMAJORMINOR=4 # 1 is for either seq data like bam files or genotpe liklihood data where the maj minor allele is inferred from likli$
-# 4 is forcing the major allele according to the reference states defined in ref - the minor allele will be inferred based on the ge$
-DOMAF=1 # 1 assumed known major and minor allele 2 assumed known major and unknown minor allele
-DOGENO=5 # 1 write major and minor allele 2 print the called genotype and count of minor 4 print the called genotype
-DOPOST=1 # 1 estimate the posterior genotype probability based on the allele frequency as a prior 2 estimat eth posterier genotpe pr$
-GENOLIKE=1 # SAMtools
-DOGLF=2 # beagle genotype liklihood format
-DOCOUNTS=1 # provide freq of bases - output as pos.gz
-DODEPTH=1 # default 0 output distribution of seq depths sites aove maxDepth will be binnes out files in depthSample and depthGlobal
-MAXDEPTH=100 # default 100
-SETMINDEPTHIND=5 # min depth, below which the individual is omitted from analysis
-SETMAXDEPTHIND=150 # max depth - above which the indivisual is omitted from analysis
-DUMPCOUNTS=2 # 1 is the total seq depth for site as .pos.gz, 2 is the seq deth per sample as .pos.gz and .counts.gz; look up allele $
-MINQ=30 # Minimum quality filter
-MINMAPQ=30 # Minmm mapping quality
-MINMAF=0.05 # Minimum minor allele frequency filter
-# C -50 is flag to adjust for excessive mismatches as recomennded when BWA was used for alignment
-# -uniqueOnly; removed reads that have multiple best hits 0 default, 1 remove
-# -remove_bads = reads flagged as bad
-# -only_proper_pairs = read that did not have a mate pair
-
-# ANGSD for data within generation (F1 - F2 - F3, broodstock and juveniles separately)
-# for loop - run ansd for the total bam list within time/generation (i.e. all F1 Broodstock, F2_Juveniles, etc.)
-# objective to input a strata / metadta to identify treatments associated with ids downstream..
-
-
-mkdir -p $OUTDIR/everyone # make director for ansd output files
-
-# nav to dir
-cd $DATDIR/Master_query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME!
-#ls *.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams
-
-# cal number of indiv
-nIND=$(wc -l $DATDIR/Master_query_dups_removed/everyone_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id
-minIND=$(echo "(${nIND} * 0.95)" | bc) # min individual is 90% of the total count
-minINDRd=$(printf "%.0f" ${minIND}) # round that
-
-# set min and max depth for minuum of 5X and max of 20X coerage per loci to acoid potential bias arising from seq errors and recommendations in O'Leary (etal 2018)
-MINDP=$(echo "(${minINDRd} * 5)" | bc) # min coverage of 5X accoutning for the minimum num of individuals for genotype calls as 90% and assuming per indiv per loci
-MAXDP=$(echo "(${minINDRd} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci
-
-# call unique outbase
-OUTBASE='Everyone_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files
-
-# run angsd
-angsd -b $DATDIR/Master_query_dups_removed/everyone_bamlist.txt \
-    -ref $REFDIR/GCA_041381155.1_Ai_NY_genomic.fna \
-    -GL $GENOLIKE \
-    -doGlf $DOGLF \
-    -doMaf $DOMAF \
-    -doMajorMinor $DOMAJORMINOR \
-    -dogeno $DOGENO \
-    -doPOST $DOPOST \
-    -doCounts $DOCOUNTS \
-    -dumpCounts $DUMPCOUNTS \
-    -doDepth $DODEPTH \
-    -maxDepth $MAXDEPTH \
-    -setMinDepth $MINDP \
-    -setMaxDepth $MAXDP \
-    -setMaxDepthInd $SETMAXDEPTHIND \
-    -setMinDepthInd $SETMINDEPTHIND \
-    -doIBS 1 \
-    -makematrix 1 \
-    -doCov 1 \
-    -minInd $minINDRd \
-    -dobcf 1 \
-    --ignore-RG 0 \
-    -minQ $MINQ \
-    -minMapQ $MINMAPQ \
-    -minMaf $MINMAF \
-    -SNP_pval 1e-6 \
-    -P 20 \
-    -remove_bads 1 \
-    -only_proper_pairs 1 \
-    -uniqueOnly 1 \
-    -C 50 \
-    -out $OUTDIR/everyone/$OUTBASE \
-    >& $OUTDIR/everyone/$OUTBASE'.log';
-#done
diff --git a/HPC_analysis/scripts/lcWGS/everyone_omit_angsd_dupsquery.sh b/HPC_analysis/scripts/lcWGS/everyone_omit_angsd_dupsquery.sh
deleted file mode 100644
index 11c3316..0000000
--- a/HPC_analysis/scripts/lcWGS/everyone_omit_angsd_dupsquery.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name="angsdevery_OM"
-#SBATCH -t 500:00:00
-#SBATCH --mail-type=ALL
-#SBATCH --mem=180GB
-#SBATCH -p medmem
-#SBATCH -c 20
-#SBATCH --mail-user=samuel.gurr@noaa.gov
-#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/everyone/"%x_out.%j"
-#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/everyone/"%x_err.%j"
-
-# Set-up
-
-# output dir
-mkdir -p angsd/output
-
-## load module
-module load bio/angsd/0.940
-module load bio/samtools/1.19
-
-## dir shortcuts
-DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files
-OUTDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/output # Path to the base directory where output files will be written
-REFDIR=~/refs # Path to reference genome
-
-# angsd parameters
-DOMAJORMINOR=4 # 1 is for either seq data like bam files or genotpe liklihood data where the maj minor allele is inferred from likli$
-# 4 is forcing the major allele according to the reference states defined in ref - the minor allele will be inferred based on the ge$
-DOMAF=1 # 1 assumed known major and minor allele 2 assumed known major and unknown minor allele
-DOGENO=5 # 1 write major and minor allele 2 print the called genotype and count of minor 4 print the called genotype
-DOPOST=1 # 1 estimate the posterior genotype probability based on the allele frequency as a prior 2 estimat eth posterier genotpe pr$
-GENOLIKE=1 # SAMtools
-DOGLF=2 # beagle genotype liklihood format
-DOCOUNTS=1 # provide freq of bases - output as pos.gz
-DODEPTH=1 # default 0 output distribution of seq depths sites aove maxDepth will be binnes out files in depthSample and depthGlobal
-MAXDEPTH=100 # default 100
-SETMINDEPTHIND=5 # min depth, below which the individual is omitted from analysis
-SETMAXDEPTHIND=150 # max depth - above which the indivisual is omitted from analysis
-DUMPCOUNTS=2 # 1 is the total seq depth for site as .pos.gz, 2 is the seq deth per sample as .pos.gz and .counts.gz; look up allele $
-MINQ=30 # Minimum quality filter
-MINMAPQ=30 # Minmm mapping quality
-MINMAF=0.05 # Minimum minor allele frequency filter
-# C -50 is flag to adjust for excessive mismatches as recomennded when BWA was used for alignment
-# -uniqueOnly; removed reads that have multiple best hits 0 default, 1 remove
-# -remove_bads = reads flagged as bad
-# -only_proper_pairs = read that did not have a mate pair
-
-# ANGSD for data within generation (F1 - F2 - F3, broodstock and juveniles separately)
-# for loop - run ansd for the total bam list within time/generation (i.e. all F1 Broodstock, F2_Juveniles, etc.)
-# objective to input a strata / metadta to identify treatments associated with ids downstream..
-
-
-mkdir -p $OUTDIR/everyone # make director for ansd output files
-
-# nav to dir
-cd $DATDIR/Master_query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME!
-#ls *.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams
-
-# cal number of indiv
-nIND=$(wc -l $DATDIR/Master_query_dups_removed/everyone_omit_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id
-minIND=$(echo "(${nIND} * 0.95)" | bc) # min individual is 90% of the total count
-minINDRd=$(printf "%.0f" ${minIND}) # round that
-
-# set min and max depth for minuum of 5X and max of 20X coerage per loci to acoid potential bias arising from seq errors and recommendations in O'Leary (etal 2018)
-MINDP=$(echo "(${minINDRd} * 5)" | bc) # min coverage of 5X accoutning for the minimum num of individuals for genotype calls as 90% and assuming per indiv per loci
-MAXDP=$(echo "(${minINDRd} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci
-
-# call unique outbase
-OUTBASE='Everyone_OM_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files
-
-# run angsd
-angsd -b $DATDIR/Master_query_dups_removed/everyone_omit_bamlist.txt \
-    -ref $REFDIR/GCA_041381155.1_Ai_NY_genomic.fna \
-    -GL $GENOLIKE \
-    -doGlf $DOGLF \
-    -doMaf $DOMAF \
-    -doMajorMinor $DOMAJORMINOR \
-    -dogeno $DOGENO \
-    -doPOST $DOPOST \
-    -doCounts $DOCOUNTS \
-    -dumpCounts $DUMPCOUNTS \
-    -doDepth $DODEPTH \
-    -maxDepth $MAXDEPTH \
-    -setMinDepth $MINDP \
-    -setMaxDepth $MAXDP \
-    -setMaxDepthInd $SETMAXDEPTHIND \
-    -setMinDepthInd $SETMINDEPTHIND \
-    -doIBS 1 \
-    -makematrix 1 \
-    -doCov 1 \
-    -minInd $minINDRd \
-    -dobcf 1 \
-    --ignore-RG 0 \
-    -minQ $MINQ \
-    -minMapQ $MINMAPQ \
-    -minMaf $MINMAF \
-    -SNP_pval 1e-6 \
-    -P 2 \
-    -remove_bads 1 \
-    -only_proper_pairs 1 \
-    -uniqueOnly 1 \
-    -C 50 \
-    -out $OUTDIR/everyone/$OUTBASE \
-    >& $OUTDIR/everyone/$OUTBASE'.log';
-#done
diff --git a/HPC_analysis/output/Popgen/angsd/F0Broodstock_parentage.csv b/RAnalysis/Data/Popgen/Parentages/F1_parentage.csv
similarity index 100%
rename from HPC_analysis/output/Popgen/angsd/F0Broodstock_parentage.csv
rename to RAnalysis/Data/Popgen/Parentages/F1_parentage.csv
diff --git a/RAnalysis/Data/Popgen/Parentages/F1_Parentage_Spawn_7.26.21.xlsx b/RAnalysis/Data/Popgen/Parentages/excel_workbooks/F1_Parentage_Spawn_7.26.21.xlsx
similarity index 100%
rename from RAnalysis/Data/Popgen/Parentages/F1_Parentage_Spawn_7.26.21.xlsx
rename to RAnalysis/Data/Popgen/Parentages/excel_workbooks/F1_Parentage_Spawn_7.26.21.xlsx
diff --git a/RAnalysis/Data/Popgen/Parentages/F2_Parentage_Spawn_March_2023.xlsx b/RAnalysis/Data/Popgen/Parentages/excel_workbooks/F2_Parentage_Spawn_March_2023.xlsx
similarity index 100%
rename from RAnalysis/Data/Popgen/Parentages/F2_Parentage_Spawn_March_2023.xlsx
rename to RAnalysis/Data/Popgen/Parentages/excel_workbooks/F2_Parentage_Spawn_March_2023.xlsx
diff --git a/HPC_analysis/output/Popgen/angsd/README.md b/RAnalysis/Data/Popgen/README.md
similarity index 100%
rename from HPC_analysis/output/Popgen/angsd/README.md
rename to RAnalysis/Data/Popgen/README.md