-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathPatternSimilarity.R
181 lines (136 loc) · 6.45 KB
/
PatternSimilarity.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
install.packages("vioplot")
library(vioplot)
####### Select DEGs from RNA seq Data
setwd("Path")
Gene_RPKM=read.csv("GBM_gene_RPKM.csv")
Gene_RPKM=na.omit(Gene_RPKM)
Gene_S=Gene_RPKM[,2:6] ## Sensitive cell line
Gene_R=Gene_RPKM[,7:11] ## Resistant cell line
Gene_U=Gene_RPKM[,12:16] ## test cell line
Gene_S=matrix(as.numeric(as.matrix(Gene_S)),dim(Gene_S)[1],dim(Gene_S)[2])
Gene_R=matrix(as.numeric(as.matrix(Gene_R)),dim(Gene_R)[1],dim(Gene_R)[2])
Gene_U=matrix(as.numeric(as.matrix(Gene_U)),dim(Gene_U)[1],dim(Gene_U)[2])
rownames(Gene_S)=Gene_RPKM[,1]
rownames(Gene_R)=Gene_RPKM[,1]
rownames(Gene_U)=Gene_RPKM[,1]
###### DEG of sensitive cells
timestart<-Sys.time()
DEG_S=c()
RNames_DEG_S=c()
for (i in 1:dim(Gene_S)[1])
{ for (j in 1:5)
{ for (k in 1:max(j-1,1))
{if ((max(Gene_S[i,])>=10) & (Gene_S[i,k]>0) & (Gene_S[i,j]>0) & (Gene_S[i,j]/Gene_S[i,k]>5 || Gene_S[i,j]/Gene_S[i,k]<0.2)){
DEG_S=rbind(DEG_S,Gene_S[i,])
RNames_DEG_S=rbind(RNames_DEG_S,rownames(Gene_S)[i])
}
}
}
}
rownames(DEG_S)=RNames_DEG_S
DEG_S=unique(DEG_S)
# DEG_S=DEG_S[-1,]
timeend<-Sys.time()
runningtime<-timeend-timestart
print(runningtime)
summary(DEG_S)
dim(DEG_S) #163,5
plot(c(0,6,12,24,48),DEG_S["STC1",],type="b",main="DEG_S",sub='',xlim=c(0,50), ylim=c(min(DEG_S["STC1",]), max(DEG_S["STC1",])),xlab="Time (Hours)",ylab='Gene Expression')
#### DEG of resistance
timestart<-Sys.time()
DEG_R=c()
RNames_DEG_R=c()
for (i in 1:dim(Gene_R)[1])
{ for (j in 1:5)
{ for (k in 1:max(j-1,1))
{if ((max(Gene_R[i,])>=10) & (Gene_R[i,k]>0) & (Gene_R[i,j]>0) & (Gene_R[i,j]/Gene_R[i,k]>5 || Gene_R[i,j]/Gene_R[i,k]<0.2)){
DEG_R=rbind(DEG_R,Gene_R[i,])
RNames_DEG_R=rbind(RNames_DEG_R,rownames(Gene_R)[i])
}
}
}
}
rownames(DEG_R)=RNames_DEG_R
DEG_R=unique(DEG_R)
# DEG_S=DEG_S[-1,]
timeend<-Sys.time()
runningtime<-timeend-timestart
print(runningtime)
summary(DEG_R)
dim(DEG_R) #101,5
# plot(c(0,6,12,24,48),DEG_R["STC1",],type="b",main="DEG_R",sub='',xlim=c(0,50), ylim=c(min(DEG_R["STC1",]), max(DEG_R["STC1",])),xlab="Time (Hours)",ylab='Gene Expression')
names_SR=union(rownames(DEG_S),rownames(DEG_R))
setwd("Path/Results")
Gene_Annotation=read.csv("Gene Annotation_ranked.csv",sep = ",")
GeneName=Gene_Annotation[,1]
# TC_gene_1=Gene_S[names_SR,] # Temporally changing genes of sensitivity
# TC_gene_2=Gene_R[names_SR,] # Temporally changing genes of resistance
#
# TC_gene_3=Gene_U[names_SR,] # Temporally changing genes of test cell line
TC_gene_1=Gene_S[GeneName,] # Temporally changing genes of sensitivity
TC_gene_2=Gene_R[GeneName,] # Temporally changing genes of resistance
TC_gene_3=Gene_U[GeneName,] # Temporally changing genes of test cell line
setwd("Path/Results")
write.table(TC_gene_1, file="TC_gene_1.txt",quote=F,row.names=F,col.names = F,sep = "\t")
write.table(TC_gene_2, file="TC_gene_2.txt",quote=F,row.names=F,col.names = F,sep = "\t")
write.table(TC_gene_3, file="TC_gene_3.txt",quote=F,row.names=F,col.names = F,sep = "\t")
### Euclid similarity measurement
D_1=sqrt(rowSums((TC_gene_3-TC_gene_1)^2))
D_2=sqrt(rowSums((TC_gene_3-TC_gene_2)^2))
wilcox.test(D_1,D_2,alternative="less", paired=T) # Wilcoxon signed rank test with continuity correction; p-value =0.01625
vioplot(D_1,D_2,names=c("Distance to Sensitive cells","Distance to Resistant cells"),col=c("green","brown"))
title("Temporal Pattern Similarity_Euclid Distance")
temp <- locator(1) # 在图表上,你喜欢的地方点击一下,文字就出来了
text(temp,"p-value = 0.01625")
### Manhattan Distance
D_1=rowSums(abs(TC_gene_3-TC_gene_1)^1)
D_2=rowSums(abs(TC_gene_3-TC_gene_2)^1)
wilcox.test(D_1,D_2,alternative="less", paired=T) # Wilcoxon signed rank test with continuity correction; p-value = 0.02675
vioplot(D_1,D_2,names=c("Distance to Sensitive cells","Distance to Resistant cells"),col=c("green","brown"))
title("Temporal Pattern Similarity_Manhattan Distance")
temp <- locator(1) # 在图表上,你喜欢的地方点击一下,文字就出来了
text(temp,"p-value = 0.02675")
### Slope Distance
Time=t(matrix(c(6,6,12,24),dim(TC_gene_1)[2]-1,dim(TC_gene_1)[1]))
Slope_1=t(diff(t(TC_gene_1)))/Time
Slope_2=t(diff(t(TC_gene_2)))/Time
Slope_3=t(diff(t(TC_gene_3)))/Time
D_1=sqrt(rowSums((Slope_3-Slope_1)^2))
D_2=sqrt(rowSums((Slope_3-Slope_2)^2))
wilcox.test(D_1,D_2,alternative="less", paired=T) # Wilcoxon signed rank test with continuity correction; p-value =0.01625
vioplot(D_1,D_2,names=c("Distance to Sensitive cells","Distance to Resistant cells"),col=c("green","brown"))
title("Temporal Pattern Similarity")
### DTW, dynamic time wrapping
install.packages("dtw")
library(dtw)
for (i in 1:42)
{
D1[i]=dtw(TCG3[i,],TCG1[i,])$distance
D2[i]=dtw(TCG3[i,],TCG2[i,])$distance
}
# Or ## calculate DTW distance in matlab "PatternSimilarity" and read the results from there saved
# D1=read.table("Path/Results/DTWDistance1.txt")
# D2=read.table("Path/Results/DTWDistance2.txt")
# D1=as.numeric(D1)
# D2=as.numeric(D2)
wilcox.test(D1,D2,alternative="less", paired=T) # Wilcoxon signed rank test with continuity correction; p-value = 0.01523
vioplot(D1,D2,names=c("DTW Distance to Sensitive cells","DTW Distance to Resistant cells"),col=c("green","brown"))
title("Temporal Pattern Similarity_DTW Distance")
temp <- locator(1)
text(temp,"p-value = 0.01523")
########## Heatmap of gene expression in the differential network
DNG=read.csv("Path/Survival analysis/Data/Quantifing each node.csv") # Differential network genes
Score=as.matrix(DNG[,c(1,9)]) # Importance Score
Nodes=Score[,1]
x=cbind(TC_gene_1,TC_gene_2,TC_gene_3)
rownames(x)=Nodes
colnames(x)=c('S0h','S6h','S12h','S24h','S48h','R0h','R6h','R12h','R24h','R48h','U0h','U6h','U12h','U24h','U48h')
x_Normalized=(x-apply(x,1,min))/(apply(x,1,max)-apply(x,1,min)+1e-3)
x=x_Normalized
# x[is.nan(x)]==0
library(pheatmap)
dev.off()
dev.new()
pheatmap(x,cluster_row=T, cluster_cols=F, clustering_distance_rows='euclidean',clustering_method = "ward", cellwidth = 10, cellheight = 8,color = colorRampPalette(c("CornflowerBlue", "white", "firebrick3"))(50), fontsize=9, fontsize_row=6,labRow=NA) #自定义颜色
dev.new()
pheatmap(t(x),cluster_row=F, cluster_cols=T, clustering_distance_rows='euclidean',clustering_method = "ward", cellwidth = 10, cellheight = 8,color = colorRampPalette(c("CornflowerBlue", "white", "firebrick3"))(50), fontsize=9, fontsize_row=6,labRow=NA) #自定义颜色