diff --git a/LiveBedfiles/Pan1159_LogFile.txt b/LiveBedfiles/Pan1159_LogFile.txt index 1a4e8b33..027f6fd1 100644 --- a/LiveBedfiles/Pan1159_LogFile.txt +++ b/LiveBedfiles/Pan1159_LogFile.txt @@ -48,4 +48,11 @@ sort -k1,1V -k2,2n -o Pan1159dataSambamba.bed Pan1159dataSambamba.bed 3. Pan1159dataRefSeqFormat.txt A line for each SNV was created in Pan1159_rsID_dataRefSeqFormat.bed -These were copied and pasted in the same order as the above bed files \ No newline at end of file +These were copied and pasted in the same order as the above bed files + +# ============== Missing gene +It was noticed that due to the missing header in the input file the first gene (ACVRL1) was missing from the output. +The header line was added to the input file and mokabed re-run. +The newly created bed files have the suffix _temporary (these were commited in commit https://github.com/woook/mokabed/pull/82/commits/3651f3210cc8e3c9e061cfff3b8b8f83d621260b). +The lines for ACVRL1 were copied and pasted to the correct position in the bed file to avoid having to repeat the steps described above. +The difference in https://github.com/woook/mokabed/pull/82/commits/c20b49cd93187411a6ab7411fd110d5cdfdf54c9 show that only lines relating to ACVRL1 were affected. \ No newline at end of file diff --git a/LiveBedfiles/Pan1159data.bed b/LiveBedfiles/Pan1159data.bed index 36f09596..8a399fe0 100644 --- a/LiveBedfiles/Pan1159data.bed +++ b/LiveBedfiles/Pan1159data.bed @@ -501,6 +501,15 @@ 11 71155028 71155271 1717 DHCR7;NM_001360.2 11 71155890 71156008 1717 DHCR7;NM_001360.2 12 5153303 5155165 3741 KCNA5;NM_002234.3 +12 52306248 52306329 94 ACVRL1;NM_000020.2 +12 52306872 52307144 94 ACVRL1;NM_000020.2 +12 52307332 52307564 94 ACVRL1;NM_000020.2 +12 52307747 52307867 94 ACVRL1;NM_000020.2 +12 52308212 52308379 94 ACVRL1;NM_000020.2 +12 52308998 52309294 94 ACVRL1;NM_000020.2 +12 52309809 52310027 94 ACVRL1;NM_000020.2 +12 52312758 52312909 94 ACVRL1;NM_000020.2 +12 52314532 52314687 94 ACVRL1;NM_000020.2 12 57944044 57944193 3798 KIF5A;NM_004984.3 12 57957211 57957319 3798 KIF5A;NM_004984.3 12 57957392 57957486 3798 KIF5A;NM_004984.3 diff --git a/LiveBedfiles/Pan1159dataRefSeqFormat.txt b/LiveBedfiles/Pan1159dataRefSeqFormat.txt index fa26ce83..78ede2d6 100644 --- a/LiveBedfiles/Pan1159dataRefSeqFormat.txt +++ b/LiveBedfiles/Pan1159dataRefSeqFormat.txt @@ -41,6 +41,7 @@ rs111033640 rs111033640 9 + 34646575 34646596 34646575 34646596 1 34646575 34646 5091 PC;NM_001040716.1 11 - 66616359 66639640 66616359 66639640 20 66616359,66616690,66617071,66617397,66617680,66618134,66618500,66619250,66619899,66620207,66620699,66631234,66633647,66636306,66637762,66638235,66638512,66638775,66639147,66639484 66616628,66616851,66617340,66617597,66617945,66618404,66618761,66619427,66620141,66620317,66620864,66631437,66633830,66636445,66637934,66638373,66638678,66638961,66639352,66639640 nan PC nan nan 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 1717 DHCR7;NM_001360.2 11 - 71146410 71156008 71146410 71156008 7 71146410,71148847,71149914,71152262,71153298,71155028,71155890 71146895,71148999,71150139,71152496,71153409,71155271,71156008 nan DHCR7 nan nan 0,0,0,0,0,0,0 3741 KCNA5;NM_002234.3 12 + 5153303 5155165 5153303 5155165 1 5153303 5155165 nan KCNA5 nan nan 0 +94 ACVRL1;NM_000020.2 12 + 52306248 52314687 52306248 52314687 9 52306248,52306872,52307332,52307747,52308212,52308998,52309809,52312758,52314532 52306329,52307144,52307564,52307867,52308379,52309294,52310027,52312909,52314687 nan ACVRL1 nan nan 0,0,0,0,0,0,0,0,0 3798 KIF5A;NM_004984.3 12 + 57944044 57976972 57944044 57976972 28 57944044,57957211,57957392,57957880,57958232,57958690,57960898,57961266,57962735,57963028,57963307,57963759,57965093,57965833,57966352,57968856,57969412,57969859,57970041,57970533,57971485,57971781,57972010,57974728,57975187,57975642,57976374,57976873 57944193,57957319,57957486,57958005,57958301,57958766,57961006,57961411,57962860,57963197,57963476,57963955,57965182,57966060,57966519,57969065,57969550,57969944,57970171,57970655,57971566,57971873,57972135,57974965,57975361,57975745,57976422,57976972 nan KIF5A nan nan 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 29110 TBK1;NM_013254.3 12 + 64849640 64895171 64849640 64895171 20 64849640,64853958,64858103,64860670,64867999,64873781,64875611,64878072,64879224,64879695,64882256,64883810,64889252,64889468,64890136,64890718,64890932,64891417,64891737,64895099 64849747,64854119,64858253,64860872,64868180,64873912,64875811,64878289,64879303,64879807,64882378,64883909,64889394,64889565,64890196,64890840,64891049,64891544,64891829,64895171 nan TBK1 nan nan 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 1610 DAO;NM_001917.4 12 + 109278772 109294321 109278772 109294321 10 109278772,109281215,109283234,109283973,109286747,109288028,109290771,109292444,109293142,109294169 109278986,109281350,109283331,109284059,109286822,109288153,109290874,109292582,109293261,109294321 nan DAO nan nan 0,0,0,0,0,0,0,0,0,0 diff --git a/LiveBedfiles/Pan1159dataSambamba.bed b/LiveBedfiles/Pan1159dataSambamba.bed index 4b6149c5..cc0b24ab 100644 --- a/LiveBedfiles/Pan1159dataSambamba.bed +++ b/LiveBedfiles/Pan1159dataSambamba.bed @@ -499,6 +499,15 @@ 11 71155028 71155271 11-71155028-71155271 0 + DHCR7;NM_001360.2 1717 11 71155890 71156008 11-71155890-71156008 0 + DHCR7;NM_001360.2 1717 12 5153303 5155165 12-5153303-5155165 0 + KCNA5;NM_002234.3 3741 +12 52306248 52306329 12-52306248-52306329 0 + ACVRL1;NM_000020.2 94 +12 52306872 52307144 12-52306872-52307144 0 + ACVRL1;NM_000020.2 94 +12 52307332 52307564 12-52307332-52307564 0 + ACVRL1;NM_000020.2 94 +12 52307747 52307867 12-52307747-52307867 0 + ACVRL1;NM_000020.2 94 +12 52308212 52308379 12-52308212-52308379 0 + ACVRL1;NM_000020.2 94 +12 52308998 52309294 12-52308998-52309294 0 + ACVRL1;NM_000020.2 94 +12 52309809 52310027 12-52309809-52310027 0 + ACVRL1;NM_000020.2 94 +12 52312758 52312909 12-52312758-52312909 0 + ACVRL1;NM_000020.2 94 +12 52314532 52314687 12-52314532-52314687 0 + ACVRL1;NM_000020.2 94 12 57944044 57944193 12-57944044-57944193 0 + KIF5A;NM_004984.3 3798 12 57957211 57957319 12-57957211-57957319 0 + KIF5A;NM_004984.3 3798 12 57957392 57957486 12-57957392-57957486 0 + KIF5A;NM_004984.3 3798 diff --git a/LiveBedfiles/Pan1457_LogFile.txt b/LiveBedfiles/Pan1457_LogFile.txt index 2b533478..2b81b61b 100644 --- a/LiveBedfiles/Pan1457_LogFile.txt +++ b/LiveBedfiles/Pan1457_LogFile.txt @@ -1,4 +1,4 @@ -Time Stamp:2017-11-29 17:11:17.400293 +Time Stamp:2017-12-11 10:52:44.876015 Command arguments executed: RefSeq table format version generated as /home/dnanexus/out/Output_files/Pan1457dataRefSeqFormat.txt /home/dnanexus/mokabed/LiveBedfiles/TestArea_for_bed_generation_script/OOBed7_uses_mirrored_database_.py --codingup 50 --codingdown 50 --useaccessions --transcripts /home/dnanexus/Pan1457.txt --minuschr --outputfile /home/dnanexus/out/Output_files/Pan1457data.bed --logfile /home/dnanexus/out/Output_files/Pan1457_LogFile.txt @@ -14,4 +14,4 @@ RefSeq table format version generated as /home/dnanexus/out/Output_files/Pan1457 cruzdb module file path: /home/dnanexus/anaconda2/lib/python2.7/site-packages/cruzdb/__init__.py -version as defined by git tag = v1.0-204-g5ab2be3 \ No newline at end of file +version as defined by git tag = v1.0-248-g8ac22d9 \ No newline at end of file diff --git a/LiveBedfiles/Pan1457_RPKM.bed b/LiveBedfiles/Pan1457_RPKM.bed index 8c1b71d2..96cce53c 100644 --- a/LiveBedfiles/Pan1457_RPKM.bed +++ b/LiveBedfiles/Pan1457_RPKM.bed @@ -498,6 +498,15 @@ 11 71154988 71155311 1717 11 71155850 71156048 1717 12 5153263 5155205 3741 +12 52306208 52306369 94 +12 52306832 52307184 94 +12 52307292 52307604 94 +12 52307707 52307907 94 +12 52308172 52308419 94 +12 52308958 52309334 94 +12 52309769 52310067 94 +12 52312718 52312949 94 +12 52314492 52314727 94 12 57944004 57944233 3798 12 57957171 57957359 3798 12 57957352 57957526 3798 diff --git a/LiveBedfiles/Pan1457data.bed b/LiveBedfiles/Pan1457data.bed index 4dcebc9d..56493877 100644 --- a/LiveBedfiles/Pan1457data.bed +++ b/LiveBedfiles/Pan1457data.bed @@ -1,4 +1,4 @@ -#2017-11-29 17:11:37.853845 +#2017-12-11 10:53:06.106674 #Chr Start Stop EntrezID Gene_Accession 1 11073734 11074072 23435 TARDBP;NM_007375.3 1 11076850 11077114 23435 TARDBP;NM_007375.3 @@ -500,6 +500,15 @@ 11 71154988 71155311 1717 DHCR7;NM_001360.2 11 71155850 71156048 1717 DHCR7;NM_001360.2 12 5153263 5155205 3741 KCNA5;NM_002234.3 +12 52306208 52306369 94 ACVRL1;NM_000020.2 +12 52306832 52307184 94 ACVRL1;NM_000020.2 +12 52307292 52307604 94 ACVRL1;NM_000020.2 +12 52307707 52307907 94 ACVRL1;NM_000020.2 +12 52308172 52308419 94 ACVRL1;NM_000020.2 +12 52308958 52309334 94 ACVRL1;NM_000020.2 +12 52309769 52310067 94 ACVRL1;NM_000020.2 +12 52312718 52312949 94 ACVRL1;NM_000020.2 +12 52314492 52314727 94 ACVRL1;NM_000020.2 12 57944004 57944233 3798 KIF5A;NM_004984.3 12 57957171 57957359 3798 KIF5A;NM_004984.3 12 57957352 57957526 3798 KIF5A;NM_004984.3 diff --git a/LiveBedfiles/Pan1457dataRefSeqFormat.txt b/LiveBedfiles/Pan1457dataRefSeqFormat.txt index 113a5238..515bb6a9 100644 --- a/LiveBedfiles/Pan1457dataRefSeqFormat.txt +++ b/LiveBedfiles/Pan1457dataRefSeqFormat.txt @@ -40,6 +40,7 @@ 5091 PC;NM_001040716.1 11 - 66616319 66639680 66616319 66639680 20 66616319,66616650,66617031,66617357,66617640,66618094,66618460,66619210,66619859,66620167,66620659,66631194,66633607,66636266,66637722,66638195,66638472,66638735,66639107,66639444 66616668,66616891,66617380,66617637,66617985,66618444,66618801,66619467,66620181,66620357,66620904,66631477,66633870,66636485,66637974,66638413,66638718,66639001,66639392,66639680 nan PC nan nan 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 1717 DHCR7;NM_001360.2 11 - 71146370 71156048 71146370 71156048 7 71146370,71148807,71149874,71152222,71153258,71154988,71155850 71146935,71149039,71150179,71152536,71153449,71155311,71156048 nan DHCR7 nan nan 0,0,0,0,0,0,0 3741 KCNA5;NM_002234.3 12 + 5153263 5155205 5153263 5155205 1 5153263 5155205 nan KCNA5 nan nan 0 +94 ACVRL1;NM_000020.2 12 + 52306208 52314727 52306208 52314727 9 52306208,52306832,52307292,52307707,52308172,52308958,52309769,52312718,52314492 52306369,52307184,52307604,52307907,52308419,52309334,52310067,52312949,52314727 nan ACVRL1 nan nan 0,0,0,0,0,0,0,0,0 3798 KIF5A;NM_004984.3 12 + 57944004 57977012 57944004 57977012 28 57944004,57957171,57957352,57957840,57958192,57958650,57960858,57961226,57962695,57962988,57963267,57963719,57965053,57965793,57966312,57968816,57969372,57969819,57970001,57970493,57971445,57971741,57971970,57974688,57975147,57975602,57976334,57976833 57944233,57957359,57957526,57958045,57958341,57958806,57961046,57961451,57962900,57963237,57963516,57963995,57965222,57966100,57966559,57969105,57969590,57969984,57970211,57970695,57971606,57971913,57972175,57975005,57975401,57975785,57976462,57977012 nan KIF5A nan nan 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 29110 TBK1;NM_013254.3 12 + 64849600 64895211 64849600 64895211 20 64849600,64853918,64858063,64860630,64867959,64873741,64875571,64878032,64879184,64879655,64882216,64883770,64889212,64889428,64890096,64890678,64890892,64891377,64891697,64895059 64849787,64854159,64858293,64860912,64868220,64873952,64875851,64878329,64879343,64879847,64882418,64883949,64889434,64889605,64890236,64890880,64891089,64891584,64891869,64895211 nan TBK1 nan nan 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 1610 DAO;NM_001917.4 12 + 109278732 109294361 109278732 109294361 10 109278732,109281175,109283194,109283933,109286707,109287988,109290731,109292404,109293102,109294129 109279026,109281390,109283371,109284099,109286862,109288193,109290914,109292622,109293301,109294361 nan DAO nan nan 0,0,0,0,0,0,0,0,0,0 diff --git a/LiveBedfiles/Pan1457dataSambamba.bed b/LiveBedfiles/Pan1457dataSambamba.bed index 2b35c9f5..4532532a 100644 --- a/LiveBedfiles/Pan1457dataSambamba.bed +++ b/LiveBedfiles/Pan1457dataSambamba.bed @@ -498,6 +498,15 @@ 11 71154988 71155311 11-71154988-71155311 0 + DHCR7;NM_001360.2 1717 11 71155850 71156048 11-71155850-71156048 0 + DHCR7;NM_001360.2 1717 12 5153263 5155205 12-5153263-5155205 0 + KCNA5;NM_002234.3 3741 +12 52306208 52306369 12-52306208-52306369 0 + ACVRL1;NM_000020.2 94 +12 52306832 52307184 12-52306832-52307184 0 + ACVRL1;NM_000020.2 94 +12 52307292 52307604 12-52307292-52307604 0 + ACVRL1;NM_000020.2 94 +12 52307707 52307907 12-52307707-52307907 0 + ACVRL1;NM_000020.2 94 +12 52308172 52308419 12-52308172-52308419 0 + ACVRL1;NM_000020.2 94 +12 52308958 52309334 12-52308958-52309334 0 + ACVRL1;NM_000020.2 94 +12 52309769 52310067 12-52309769-52310067 0 + ACVRL1;NM_000020.2 94 +12 52312718 52312949 12-52312718-52312949 0 + ACVRL1;NM_000020.2 94 +12 52314492 52314727 12-52314492-52314727 0 + ACVRL1;NM_000020.2 94 12 57944004 57944233 12-57944004-57944233 0 + KIF5A;NM_004984.3 3798 12 57957171 57957359 12-57957171-57957359 0 + KIF5A;NM_004984.3 3798 12 57957352 57957526 12-57957352-57957526 0 + KIF5A;NM_004984.3 3798 diff --git a/LiveBedfiles/TestArea_for_bed_generation_script/OOBed7_uses_mirrored_database_.py b/LiveBedfiles/TestArea_for_bed_generation_script/OOBed7_uses_mirrored_database_.py index cbd987a6..c6909932 100644 --- a/LiveBedfiles/TestArea_for_bed_generation_script/OOBed7_uses_mirrored_database_.py +++ b/LiveBedfiles/TestArea_for_bed_generation_script/OOBed7_uses_mirrored_database_.py @@ -213,7 +213,17 @@ def filereader(self): if self.genes: self.bed = pd.read_table(self.genes, header= 0) if self.transcripts: - self.bed = pd.read_table(self.transcripts, header= 0) + # check the transcript file has a header line + with open(self.transcripts, 'r') as f: + # read the first line only + first_line = f.readline() + # check the first line doesn't contain "NM_" and the header contains a string that should be present in at least one of the column headers (header is GuysAccession approvedsymbol GuysAccessionVersion) + if "Accession" in first_line and "NM_" not in first_line: + # read the file into a pandas table + self.bed = pd.read_table(self.transcripts, header= 0) + else: + # if no header report the missing header and exit + sys.exit("missing header in transcript file") def coordfile(self): #Set up bed file using a bed file where the start positions for each exon are base 0 diff --git a/LiveBedfiles/Transcripts/Pantranscriptfiles/Pan1159.txt b/LiveBedfiles/Transcripts/Pantranscriptfiles/Pan1159.txt index 1496ab6f..d66a7b6c 100644 --- a/LiveBedfiles/Transcripts/Pantranscriptfiles/Pan1159.txt +++ b/LiveBedfiles/Transcripts/Pantranscriptfiles/Pan1159.txt @@ -1,3 +1,4 @@ +Accession ApprovedSymbol GuysAccessionVersion NM_000020 ACVRL1 0 NM_020919 ALS2 0 NM_001145 ANG 0 diff --git a/LiveBedfiles/Transcripts/Pantranscriptfiles/Pan1457.txt b/LiveBedfiles/Transcripts/Pantranscriptfiles/Pan1457.txt index 1496ab6f..d6745618 100644 --- a/LiveBedfiles/Transcripts/Pantranscriptfiles/Pan1457.txt +++ b/LiveBedfiles/Transcripts/Pantranscriptfiles/Pan1457.txt @@ -1,3 +1,4 @@ +GuysAccession Symbol GuysAccessionVersion NM_000020 ACVRL1 0 NM_020919 ALS2 0 NM_001145 ANG 0