Globally circulating clonal complexes as of 2020-06-17 (18298 genomes)

ahmedmagds · Jul 2, 2020 · f574597 · f574597
1 parent 791a21d
commit f574597
Show file tree

Hide file tree

Showing 10 changed files with 88 additions and 52 deletions.
diff --git a/...9_acknowledgement_table_2020_05_17_00.xls → ...9_acknowledgement_table_2020_06_17_00.xls b/...9_acknowledgement_table_2020_05_17_00.xls → ...9_acknowledgement_table_2020_06_17_00.xls
diff --git a/README.md b/README.md
diff --git a/bin/Extract_fasta_sequence_blast_report.py b/bin/Extract_fasta_sequence_blast_report.py
@@ -11,7 +11,7 @@
     "--version",
     help="print version and exit",
     action="version",
-    version="%(prog)s 1.1",
+    version="%(prog)s 1.2",
 )
 PARSER.add_argument(
     "output_folder", type=str, help="output folder name & quality report prefix"

diff --git a/bin/GNUVID.py b/bin/GNUVID.py
@@ -58,7 +58,7 @@
 
 PARSER = argparse.ArgumentParser(
     prog="GNUVID.py",
-    description="GNUVID v1.1 utilizes the natural\
+    description="GNUVID v1.2 utilizes the natural\
  variation in public genomes of SARS-CoV-2 to rank gene sequences based on the number of observed exact \
  matches (the GNU score) in all known genomes of SARS-CoV-2. It types the genomes based on their unique \
  gene allele sequences. It types (using a whole genome MLST) your query genome in seconds.",
@@ -119,7 +119,7 @@
     "--version",
     help="print version and exit",
     action="version",
-    version="%(prog)s 1.1",
+    version="%(prog)s 1.2",
 )
 PARSER.add_argument(
     "reference",
@@ -1013,7 +1013,7 @@
     QUERYFILE_OBJECT.close()
 logging.info("Typed the query isolate/s and wrote Query_isolates_GNUVID_ST_Report.txt")
 logging.info("Done in --- {:.3f} seconds ---".format(time.time() - START_TIME))
-logging.info("""Thanks for using GNUVID1.1, I hope you found it useful.
+logging.info("""Thanks for using GNUVID v1.2, I hope you found it useful.
 Please cite WhatsGNU 'Moustafa AM and Planet PJ 2020, Genome Biology;21:58'.
 Please also cite BLAST+ 'Camacho et al. 2009, BMC Bioinformatics;10:421' if you use GNUVID.
 Please also cite GISAID 'Shu Y. and McCauley J. 2017, EuroSurveillance; 22:13'

diff --git a/bin/GNUVID_CCs_summary.py b/bin/GNUVID_CCs_summary.py
@@ -55,7 +55,7 @@
     help="an inactive date cutoff, usually 1 month before release date, in this format (2020-06-03) to assign status"
 )
 PARSER.add_argument(
-    "silent_date", type=str, help="a silent date cutoff, usually 2 weeks before release date, in this format (2020-06-03) to assign status"
+    "quiet_date", type=str, help="a quiet date cutoff, usually 2 weeks before release date, in this format (2020-06-03) to assign status"
 )
 PARSER.add_argument(
     "DB_isolates_report", type=str, help="GNUVID_DB_isolates_report to analyze that has STs and CCs (.txt)"
@@ -66,8 +66,8 @@
 ARGS = PARSER.parse_args()
 #############################
 inactive_date = ARGS.inactive_date
-silent_date = ARGS.silent_date
-release_date = [inactive_date, silent_date]
+quiet_date = ARGS.quiet_date
+release_date = [inactive_date, quiet_date]
 ##############################
 output_report_file = (
           ARGS.DB_isolates_report.split("_DB_isolates_report.txt")[0]
@@ -133,15 +133,15 @@
 output_report_object.write(output_report_header)
 output_report_object2.write('| Clonal Complex            | Number of STs | Number of isolates | Most common 5 countries                             | Most common Region                             | Date range                 |   Status |\n|--------------------------|---------------|--------------------|----------------------------------------------------|-----------------------------------------------|---------------------------|---------|\n')
 for CC in sorted(CC_list):
-    release_date = [inactive_date, silent_date]
+    release_date = [inactive_date, quiet_date]
     STs_count = len(set(CC_STs_dict[CC]))
     isolates_count = len(CC_isolates_dict[CC])
     sorted_dates = sorted(CC_dates_dict[CC])
     dates_range = sorted_dates[0] + ' to ' + sorted_dates[-1]
     if sorted_dates[-1] in release_date:
         date_index = release_date.index(sorted_dates[-1])
         if date_index == 0:
-            CC_state = "Silent"
+            CC_state = "Quiet"
         else:
             CC_state = "Active"
     else:
@@ -150,7 +150,7 @@
         if state_date == 0:
             CC_state = "Inactive"
         elif state_date == 1:
-            CC_state = "Silent"
+            CC_state = "Quiet"
         else:
             CC_state = "Active"
 

diff --git a/bin/GNUVID_database_customizer.py b/bin/GNUVID_database_customizer.py
@@ -43,7 +43,7 @@
 from collections import OrderedDict
 
 PARSER = argparse.ArgumentParser(
-    prog="GNUVID_database_customizer.py", description="Database_customizer script for GNUVID v1.1.",)
+    prog="GNUVID_database_customizer.py", description="Database_customizer script for GNUVID v1.2.",)
 GROUP = PARSER.add_mutually_exclusive_group()
 GROUP.add_argument("-g", "--GenBank_RefSeq", help="fna files from GenBank or RefSeq", action="store_true",)
 GROUP.add_argument("-p", "--prokka", help="fna files from prokka", action="store_true",)

diff --git a/db/MST_05172020.pdf b/db/MST_05172020.pdf
diff --git a/db/MST_05172020.png b/db/MST_05172020.png
diff --git a/db/MST_06172020.pdf b/db/MST_06172020.pdf
diff --git a/db/MST_06172020.png b/db/MST_06172020.png