-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcontingency_tables.R
111 lines (87 loc) · 5.68 KB
/
contingency_tables.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env Rscript
library(data.table)
library(ggplot2)
args <- commandArgs(trailingOnly = TRUE)
# args <- c("/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86726_B18_r1.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86737_B18_AIDKO_s2.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86727_B18_r2.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86742_B18_AIDER_e1.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86728_B18_AIDKO_r1.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86743_B18_AIDER_e2.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86729_B18_AIDKO_r2.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86744_B18_AIDER_r1.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86734_B18_s1.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86745_B18_AIDER_s1.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86735_B18_s2.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86746_B18_AIDER_r2.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86736_B18_AIDKO_s1.aln.point.stats.RDS", "/groups/pavri/Kimon/ursi/mutPEseq/round5/process/in-vitro/wt/pileup/86747_B18_AIDER_s2.aln.point.stats.RDS")
for (sf in args) {
# sf <- args[1] # .stats.RDS post-processed mpileup summary
message(sf)
rds <- readRDS(sf)
# Deconvolve mutation type
rds$posdata[grepl('>', type) & !grepl('N>', type), c('was', 'became') := list(substr(type, start=1, stop=1), substr(type, start=3, stop=3))]
####################
# Overall mutations
####################
contingency1 <- data.frame('mut'=c('to_A', 'T', 'G', 'C'),
from_A=c(0,
sum(rds$posdata[was == 'A' & became == 'T' & freq >= 0, count]),
sum(rds$posdata[was == 'A' & became == 'G' & freq >= 0, count]),
sum(rds$posdata[was == 'A' & became == 'C' & freq >= 0, count])),
T=c(sum(rds$posdata[was == 'T' & became == 'A' & freq >= 0, count]),
0,
sum(rds$posdata[was == 'T' & became == 'G' & freq >= 0, count]),
sum(rds$posdata[was == 'T' & became == 'C' & freq >= 0, count])),
G=c(sum(rds$posdata[was == 'G' & became == 'A' & freq >= 0, count]),
sum(rds$posdata[was == 'G' & became == 'T' & freq >= 0, count]),
0,
sum(rds$posdata[was == 'G' & became == 'C' & freq >= 0, count])),
C=c(sum(rds$posdata[was == 'C' & became == 'A' & freq >= 0, count]),
sum(rds$posdata[was == 'C' & became == 'T' & freq >= 0, count]),
sum(rds$posdata[was == 'C' & became == 'G' & freq >= 0, count]),
0) )
# Fix number, order and colour of categories for point mutations and combined files.
if(!any('mutlev' == names(rds))){
rds$mutlev <- c("C>G", "C>A", "C>T", "C>N",
"G>C", "G>A", "G>T", "G>N",
"A>T", "A>C", "A>G", "A>N",
"T>A", "T>C", "T>G", "T>N",
"N>C", "N>G", "N>A", "N>T")
}
if(!any('mutcol' == names(rds))){
rds$mutcol <- c("C>G"='black', "C>A"='black', "C>T"='black', "C>N"='black',
"G>C"='black', "G>A"='black', "G>T"='black', "G>N"='black',
"A>T"='grey30', "A>C"='grey30', "A>G"='grey30', "A>N"='grey30',
"T>A"='grey30', "T>C"='grey30', "T>G"='grey30', "T>N"='grey30',
"N>C"='grey60', "N>G"='grey60', "N>A"='grey60', "N>T"='grey60')
}
##########################
# Potential AIDER targets
##########################
# contingency2 <- data.frame(hot_CG = sum(rds$posdata[sweetspot & !sourspot, count]),
# cold_CG = sum(rds$posdata[sourspot & !sweetspot, count]),
# ambiguous_CG = sum(rds$posdata[sourspot & sweetspot, count]),
# other_CG = sum(rds$posdata[!sweetspot & !sourspot & muttype=='G:C', count]),
# AT = sum(rds$posdata[muttype == 'A:T', count]) )
contingency2 <- data.frame(hot_CG = sum(rds$posdata[sweetspot & aggrfreq >= 0, count]),
other_CG = sum(rds$posdata[(!sweetspot) & aggrfreq >= 0 & muttype=='G:C', count]),
AT = sum(rds$posdata[muttype == 'A:T' & aggrfreq >= 0, count]) )
# Fix number, order and colour of categories for point mutations and combined files.
if(!any('catlev' == names(rds))){
rds$catlev <- c("C:G in WRCH/DGYW",
# "C:G in WRCH/DGYW and SYC/GRS",
# "C:G in SYC/GRS",
"other C:G",
"A:T")
}
if(!any('catcol' == names(rds))){
rds$catcol <- c("C:G in WRCH/DGYW"="#DD0000",
# "C:G in WRCH/DGYW and SYC/GRS"="black",
# "C:G in SYC/GRS"="black",
"other C:G"="black",
"A:T"="grey50")
}
#######
# Done
#######
# Write out contingency tables
mtxf <- sub('.RDS', '.mtx.txt', sf, fixed=TRUE)
cat(c(basename(sf), "\n\n"), file=mtxf, append=FALSE)
suppressWarnings( write.table(contingency2, file=mtxf, sep="\t", row.names=FALSE, quote=FALSE, append=TRUE) ) # appending with headers triggers warning
cat("\n", file=mtxf, append=TRUE)
suppressWarnings( write.table(contingency1, file=mtxf, sep="\t", row.names=FALSE, quote=FALSE, append=TRUE) )
cat("\n", file=mtxf, append=TRUE)
# Update the R object
rds[['mutcntg']] <- contingency1
rds[['patcntg']] <- contingency2
saveRDS(rds, file=sf)
}