-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCD48_B_Sep_AS_vioplot.R
1063 lines (850 loc) · 43.9 KB
/
CD48_B_Sep_AS_vioplot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#### Cell Type Specific AS events
#################################################################################################################################################
#### SE #########################################################################################################################################
#################################################################################################################################################
load("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/SE_psi_new.RData")
PSI <- SE_psi[, 13:ncol(SE_psi)]
row.names(PSI) <- SE_psi$loci
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
PSI <- PSI[, row.names(Cell_type)]
dim(PSI)
# [1] 17670 3039
rs <- rowSums(!is.na(PSI))
sum(rs == 0)
#[1] 53
PSI_tu <- PSI[rs>=1, ]
#Bcell_Sep <- t(apply(PSI_tu, 1, function(x){
# x <- x[!is.na(x)]
# k = sum(x[names(x) %in% row.names(Cell_type[Cell_type$CellType=="B Cells",])] > 0.5)#
# #k = sum(x[which(Cell_type$CellType=="B Cells")] >= 0.15)#
# D = sum(x >= 0.15)
# n = length(x) - D
# N = sum(names(x) %in% row.names(Cell_type[Cell_type$CellType=="B Cells",]))
# pval = phyper(k, D, n, N, lower.tail=FALSE)
# if(k==0) {
# adj_pval <- pval
# } else {
# adj_pval <- pval * k
# }
# enrichment <- c(as.integer(k), D, n, N, pval, adj_pval); names(enrichment) <- c("k", "D", "n", "N", "pval", "adj_pval")
# return(enrichment)
#}))#
#Bcell_Sep <- as.data.frame(Bcell_Sep)
#Bcell_Sep$Cell_p <- Bcell_Sep$k/Bcell_Sep$N
#Bcell_Sep$Bg_p <- Bcell_Sep$D/(Bcell_Sep$D + Bcell_Sep$n)
#Bcell_Sep$Other_p <- (Bcell_Sep$D - Bcell_Sep$k)/(Bcell_Sep$D + Bcell_Sep$n - Bcell_Sep$N)
#Bcell_Sep <- na.omit(Bcell_Sep)
#dim(Bcell_Sep[Bcell_Sep$adj_pval < 0.01 & Bcell_Sep$Cell_p > .1, ])
##[1] 5436 9
#dim(Bcell_Sep[Bcell_Sep$adj_pval < 0.01 & Bcell_Sep$Cell_p > .1 & Bcell_Sep$Cell_p > 2*Bcell_Sep$Other_p, ])
##[1] 401 9
#hist(Bcell_Sep$pval)#
#dim(Bcell_Sep[Bcell_Sep$adj_pval < 0.01 & Bcell_Sep$Cell_p > .2 & Bcell_Sep$Cell_p > 4*Bcell_Sep$Other_p, ])
## [1] 266 9#
#dim(Bcell_Sep[Bcell_Sep$adj_pval < 0.01 & Bcell_Sep$Cell_p > .1 & Bcell_Sep$Cell_p > 1.5*Bcell_Sep$Other_p & Bcell_Sep$k > 60, ])#
#
#
#PSI_sub <- PSI[row.names(Bcell_Sep[Bcell_Sep$adj_pval < 0.01 & Bcell_Sep$Cell_p > .1 & Bcell_Sep$Cell_p > Bcell_Sep$Other_p & Bcell_Sep$k > 60, ]), ]#
#
#pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/Bcell_Sep_SE_vioplot2.pdf", , width=9, height=6)
#for(i in 1:nrow(PSI_sub)){
# plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
# print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
# geom_violin()+
# #geom_point(color="blue", alpha=.5, na.rm = TRUE)+
# geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
# ggtitle(row.names(PSI_sub[i,]))+
# scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
# table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
#}
#dev.off()#
#
#
#
#
#
#Tcel <- row.names(Cell_type[Cell_type$CellType=="CD4+ T Cells" | Cell_type$CellType=="CD8+ T Cells",])
#Cell_type <- Cell_type[Cell_type$CellType=="CD4+ T Cells" | Cell_type$CellType=="CD8+ T Cells",]#
#PSI_tu <- PSI[rs>=1, Tcel]#
#CD4_Sep <- t(apply(PSI_tu, 1, function(x){
# x <- x[!is.na(x)]
# k = sum(x[names(x) %in% row.names(Cell_type[Cell_type$CellType=="CD4+ T Cells",])] > 0.5)#
# #k = sum(x[which(Cell_type$CellType=="B Cells")] >= 0.15)#
# D = sum(x >= 0.15)
# n = length(x) - D
# N = sum(names(x) %in% row.names(Cell_type[Cell_type$CellType=="CD4+ T Cells",]))
# pval = phyper(k, D, n, N, lower.tail=FALSE)
# if(k==0) {
# adj_pval <- pval
# } else {
# adj_pval <- pval * k
# }
# enrichment <- c(as.integer(k), D, n, N, pval, adj_pval); names(enrichment) <- c("k", "D", "n", "N", "pval", "adj_pval")
# return(enrichment)
#}))#
#
#CD4_Sep <- as.data.frame(CD4_Sep)
#CD4_Sep$Cell_p <- CD4_Sep$k/CD4_Sep$N
#CD4_Sep$Bg_p <- CD4_Sep$D/(CD4_Sep$D + CD4_Sep$n)
#CD4_Sep$Other_p <- (CD4_Sep$D - CD4_Sep$k)/(CD4_Sep$D + CD4_Sep$n - CD4_Sep$N)
#CD4_Sep <- na.omit(CD4_Sep)
#dim(CD4_Sep[CD4_Sep$adj_pval < 0.01 & CD4_Sep$Cell_p > .1, ])
##[1] 3565 9
#dim(CD4_Sep[CD4_Sep$adj_pval < 0.01 & CD4_Sep$Cell_p > .1 & CD4_Sep$Cell_p > 2*CD4_Sep$Other_p, ])
##[1] 630 9
#hist(CD4_Sep$pval)#
#dim(CD4_Sep[CD4_Sep$adj_pval < 0.01 & CD4_Sep$Cell_p > .2 & CD4_Sep$Cell_p > 4*CD4_Sep$Other_p, ])
## [1] 390 9#
#dim(CD4_Sep[CD4_Sep$adj_pval < 0.01 & CD4_Sep$Cell_p > .1 & CD4_Sep$Cell_p > CD4_Sep$Other_p & CD4_Sep$k > 168, ])
## [1] 69 9#
#
#PSI_sub <- PSI[row.names(CD4_Sep[CD4_Sep$adj_pval < 0.01 & CD4_Sep$Cell_p > .1 & CD4_Sep$Cell_p > CD4_Sep$Other_p & CD4_Sep$k > 168, ]), Tcel]#
#pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/CD4_Sep_CD4vsCD8_vioplot.pdf", , width=9, height=6)
#for(i in 1:nrow(PSI_sub)){
# plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
# print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
# geom_violin()+
# #geom_point(color="blue", alpha=.5, na.rm = TRUE)+
# geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
# ggtitle(row.names(PSI_sub[i,]))+
# scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
# table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
#}
#dev.off()#
#
#
#
#
#
#
#CD8_Sep <- t(apply(PSI_tu, 1, function(x){
# x <- x[!is.na(x)]
# k = sum(x[names(x) %in% row.names(Cell_type[Cell_type$CellType=="CD8+ T Cells",])] > 0.5)#
# #k = sum(x[which(Cell_type$CellType=="B Cells")] >= 0.15)#
# D = sum(x >= 0.15)
# n = length(x) - D
# N = sum(names(x) %in% row.names(Cell_type[Cell_type$CellType=="CD8+ T Cells",]))
# pval = phyper(k, D, n, N, lower.tail=FALSE)
# if(k==0) {
# adj_pval <- pval
# } else {
# adj_pval <- pval * k
# }
# enrichment <- c(as.integer(k), D, n, N, pval, adj_pval); names(enrichment) <- c("k", "D", "n", "N", "pval", "adj_pval")
# return(enrichment)
#}))#
#
#CD8_Sep <- as.data.frame(CD8_Sep)
#CD8_Sep$Cell_p <- CD8_Sep$k/CD8_Sep$N
#CD8_Sep$Bg_p <- CD8_Sep$D/(CD8_Sep$D + CD8_Sep$n)
#CD8_Sep$Other_p <- (CD8_Sep$D - CD8_Sep$k)/(CD8_Sep$D + CD8_Sep$n - CD8_Sep$N)
#CD8_Sep <- na.omit(CD8_Sep)
#dim(CD8_Sep[CD8_Sep$adj_pval < 0.01 & CD8_Sep$Cell_p > .1, ])
##[1] 5318 9
#dim(CD8_Sep[CD8_Sep$adj_pval < 0.01 & CD8_Sep$Cell_p > .1 & CD8_Sep$Cell_p > 2*CD8_Sep$Other_p, ])
##[1] 403 9
#hist(CD8_Sep$pval)#
#dim(CD8_Sep[CD8_Sep$adj_pval < 0.01 & CD8_Sep$Cell_p > .2 & CD8_Sep$Cell_p > 4*CD8_Sep$Other_p, ])
## [1] 256 9#
#dim(CD8_Sep[CD8_Sep$adj_pval < 0.01 & CD8_Sep$Cell_p > .1 & CD8_Sep$Cell_p > CD8_Sep$Other_p & CD8_Sep$k > 168, ])
## [1] 142 9#
#PSI_sub <- PSI[row.names(CD8_Sep[CD8_Sep$adj_pval < 0.01 & CD8_Sep$Cell_p > .1 & CD8_Sep$Cell_p > CD8_Sep$Other_p & CD8_Sep$k > 168, ]), Tcel]#
#pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/CD8_Sep_CD8vsCD4_vioplot.pdf", , width=9, height=6)
#for(i in 1:nrow(PSI_sub)){
# plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
# print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
# geom_violin()+
# #geom_point(color="blue", alpha=.5, na.rm = TRUE)+
# geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
# ggtitle(row.names(PSI_sub[i,]))+
# scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
# table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
#}
#dev.off()
#### CD4 VS. CD8 ===================
PSI_tu_CD4 <- PSI_tu[, row.names(Cell_type[Cell_type$CellType == "CD4+ T Cells",])]
PSI_tu_CD8 <- PSI_tu[, row.names(Cell_type[Cell_type$CellType == "CD8+ T Cells",])]
psi_mean_4 <- rowMeans(PSI_tu_CD4, na.rm=T)
psi_mean_8 <- rowMeans(PSI_tu_CD8, na.rm=T)
psi_number_4 <- rowSums(!is.na(PSI_tu_CD4))
psi_number_8 <- rowSums(!is.na(PSI_tu_CD8))
test <- data.frame(cbind(psi_mean_4,psi_mean_8,psi_number_4,psi_number_8))
sum(psi_number_4 > ncol(PSI_tu_CD4)*.1 & psi_number_8 > ncol(PSI_tu_CD8)*.1)
#[1] 3842
sum(psi_number_4 > ncol(PSI_tu_CD4)*.1 & psi_number_8 > ncol(PSI_tu_CD8)*.1 )
test <- test[psi_number_4 > ncol(PSI_tu_CD4)*.1 & psi_number_8 > ncol(PSI_tu_CD8)*.1, ]
dim(na.omit(test))
#[1] 3842 4
max(test$psi_mean_4 - test$psi_mean_8)
#[1] 0.1849871
test[tail(order(abs(test$psi_mean_4 - test$psi_mean_8)),10),]
Cell_type <- Cell_type[Cell_type$CellType == "CD4+ T Cells" | Cell_type$CellType == "CD8+ T Cells", ]
Cell_type$CellType <- as.factor(as.character(Cell_type$CellType))
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_4 - test$psi_mean_8)),10),]), row.names(Cell_type)]
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/CD4vsCD8_vioplot.pdf", , width=9, height=6)
for(i in 1:nrow(PSI_sub)){
plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
geom_violin()+
#geom_point(color="blue", alpha=.5, na.rm = TRUE)+
#geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
ggtitle(row.names(PSI_sub[i,]))+
scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
}
dev.off()
load("/mnt/data5/BGI/UCB/ExpMat_NewID/Seurat.RData")
tsne <- as.data.frame(Combine@[email protected])
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/CD4vsCD8_featureplot.pdf", , width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_4 - test$psi_mean_8)),10),]), ]
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/CD4vsCD8_featureplot2.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
#### B Cell VS. CD8 ===================
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
PSI_tu_BCell <- PSI[row.names(PSI_tu), row.names(Cell_type[Cell_type$CellType == "B Cells",])]
psi_mean_B <- rowMeans(PSI_tu_BCell, na.rm=T)
psi_number_B <- rowSums(!is.na(PSI_tu_BCell))
test <- data.frame(cbind(psi_mean_B,psi_mean_8,psi_number_B,psi_number_8))
test <- test[psi_number_B > ncol(PSI_tu_BCell)*.1 & psi_number_8 > ncol(PSI_tu_CD8)*.1, ]
dim(test)
#[1] 3510 4
dim(na.omit(test))
#[1] 3510 4
abs(test$psi_mean_B - test$psi_mean_8)
test[tail(order(abs(test$psi_mean_B - test$psi_mean_8)),10),]
Cell_type <- Cell_type[Cell_type$CellType == "B Cells" | Cell_type$CellType == "CD8+ T Cells", ]
Cell_type$CellType <- as.factor(as.character(Cell_type$CellType))
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_8)),10),]), row.names(Cell_type)]
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/BcellvsCD8_vioplot.pdf", width=9, height=6)
for(i in 1:nrow(PSI_sub)){
plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
geom_violin()+
#geom_point(color="blue", alpha=.5, na.rm = TRUE)+
#geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
ggtitle(row.names(PSI_sub[i,]))+
scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
}
dev.off()
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/BcellvsCD8_featureplot.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_8)),10),]), ]
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/BcellvsCD8_featureplot2.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
#### B Cell VS. CD4 ===================
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
test <- data.frame(cbind(psi_mean_B,psi_mean_4,psi_number_B,psi_number_4))
test <- test[psi_number_B > ncol(PSI_tu_BCell)*.1 & psi_number_4 > ncol(PSI_tu_CD4)*.1, ]
dim(test)
#[1] 3360 4
dim(na.omit(test))
#[1] 3360 4
abs(test$psi_mean_B - test$psi_mean_4)
test[tail(order(abs(test$psi_mean_B - test$psi_mean_4)),10),]
Cell_type <- Cell_type[Cell_type$CellType == "B Cells" | Cell_type$CellType == "CD4+ T Cells", ]
Cell_type$CellType <- as.factor(as.character(Cell_type$CellType))
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_4)),10),]), row.names(Cell_type)]
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/BcellvsCD4_vioplot.pdf", , width=9, height=6)
for(i in 1:nrow(PSI_sub)){
plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
geom_violin()+
#geom_point(color="blue", alpha=.5, na.rm = TRUE)+
#geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
ggtitle(row.names(PSI_sub[i,]))+
scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
}
dev.off()
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/BcellvsCD4_featureplot.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_4)),10),]), ]
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/BcellvsCD4_featureplot2.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
PSI_tu_BCell["1:198692374-198694017-198696910-198699563", ]
sashimi(junction = "1:198692374-198699563", cell = c("UCB4.01164", "UCB4.01435", "UCB4.01408", "UCB4.01357"),
outDir = "/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/")
PSI_tu_BCell["1:198692374-198699563-198699705-198703297", ]
sashimi(junction = "1:198692374-198699563", cell = c("UCB4.01164", "UCB4.01190", "UCB4.01226", "UCB4.01248"),
outDir = "/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/")
PSI_tu_BCell["1:198699705-198702386-198702531-198703297", ]
sashimi(junction = "1:198699705-198703297", cell = c("UCB4.01435", "UCB4.01327", "UCB4.01357"),
outDir = "/mnt/data5/BGI/UCB/tangchao/DSU2/SE/Cell_Sep/")
#################################################################################################################################################
#### A3SS & A5SS ################################################################################################################################
#################################################################################################################################################
load("/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/A3SS_psi_new.RData")
load("/mnt/data5/BGI/UCB/tangchao/DSU2/A5SS/A5SS_psi_new.RData")
PSI3 <- A3SS_psi[, 15:ncol(A3SS_psi)]
PSI5 <- A5SS_psi[, 15:ncol(A5SS_psi)]
row.names(PSI3) <- A3SS_psi$as2
row.names(PSI5) <- A5SS_psi$as2
PSI <- rbind(PSI3, PSI5)
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
PSI <- PSI[, row.names(Cell_type)]
dim(PSI)
rs <- rowSums(!is.na(PSI))
sum(rs == 0)
# [1] 13
PSI_tu <- PSI[rs>=1, ]
PSI_tu_CD4 <- PSI_tu[, row.names(Cell_type[Cell_type$CellType == "CD4+ T Cells",])]
PSI_tu_CD8 <- PSI_tu[, row.names(Cell_type[Cell_type$CellType == "CD8+ T Cells",])]
PSI_tu_B <- PSI_tu[, row.names(Cell_type[Cell_type$CellType == "B Cells",])]
psi_mean_4 <- rowMeans(PSI_tu_CD4, na.rm=T)
psi_mean_8 <- rowMeans(PSI_tu_CD8, na.rm=T)
psi_mean_B <- rowMeans(PSI_tu_B, na.rm=T)
psi_number_4 <- rowSums(!is.na(PSI_tu_CD4))
psi_number_8 <- rowSums(!is.na(PSI_tu_CD8))
psi_number_B <- rowSums(!is.na(PSI_tu_B))
#### CD4 VS. CD8 ===================
test <- data.frame(cbind(psi_mean_4,psi_mean_8,psi_number_4,psi_number_8))
sum(psi_number_4 > ncol(PSI_tu_CD4)*.1 & psi_number_8 > ncol(PSI_tu_CD8)*.1)
#[1] 608
test <- test[psi_number_4 > ncol(PSI_tu_CD4)*.1 & psi_number_8 > ncol(PSI_tu_CD8)*.1, ]
dim(na.omit(test))
#[1] 608 4
max(test$psi_mean_4 - test$psi_mean_8)
#[1] 0.1381762
test[tail(order(abs(test$psi_mean_4 - test$psi_mean_8)),10),]
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
Cell_type <- Cell_type[Cell_type$CellType == "CD4+ T Cells" | Cell_type$CellType == "CD8+ T Cells", ]
Cell_type$CellType <- as.factor(as.character(Cell_type$CellType))
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_4 - test$psi_mean_8)),10),]), row.names(Cell_type)]
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/CD4vsCD8_vioplot.pdf", , width=9, height=6)
for(i in 1:nrow(PSI_sub)){
plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
geom_violin()+
#geom_point(color="blue", alpha=.5, na.rm = TRUE)+
#geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
ggtitle(row.names(PSI_sub[i,]))+
scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
}
dev.off()
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/CD4vsCD8_featureplot.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_4 - test$psi_mean_8)),10),]), ]
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/CD4vsCD8_featureplot2.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
#### B Cell VS. CD4 ===================
test <- data.frame(cbind(psi_mean_B,psi_mean_4,psi_number_B,psi_number_4))
test <- test[psi_number_B > ncol(PSI_tu_BCell)*.1 & psi_number_4 > ncol(PSI_tu_CD4)*.1, ]
dim(test)
#[1] 507 4
abs(test$psi_mean_B - test$psi_mean_4)
test[tail(order(abs(test$psi_mean_B - test$psi_mean_4)),10),]
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
Cell_type <- Cell_type[Cell_type$CellType == "B Cells" | Cell_type$CellType == "CD4+ T Cells", ]
Cell_type$CellType <- as.factor(as.character(Cell_type$CellType))
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_4)),10),]), row.names(Cell_type)]
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/BcellvsCD4_vioplot.pdf", , width=9, height=6)
for(i in 1:nrow(PSI_sub)){
plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
geom_violin()+
#geom_point(color="blue", alpha=.5, na.rm = TRUE)+
#geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
ggtitle(row.names(PSI_sub[i,]))+
scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
}
dev.off()
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/BcellvsCD4_featureplot.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_4)),10),]), ]
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/BcellvsCD4_featureplot2.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
PSI_tu_B["X:19746442-19836124_X:19746442-19836150",]
sashimi(junction = "X:19746442-19836150", cell = c("UCB4.01141", "UCB4.00386"),
outDir = "/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/")
sashimi(junction = "X:19836124-19836150", cell = c("UCB4.01141", "UCB4.00386"),
outDir = "/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/")
#### B Cell VS. CD8 ===================
test <- data.frame(cbind(psi_mean_B,psi_mean_8,psi_number_B,psi_number_8))
test <- test[psi_number_B > ncol(PSI_tu_BCell)*.1 & psi_number_8 > ncol(PSI_tu_CD8)*.1, ]
dim(test)
#[1] 543 4
abs(test$psi_mean_B - test$psi_mean_8)
test[tail(order(abs(test$psi_mean_B - test$psi_mean_8)),10),]
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
Cell_type <- Cell_type[Cell_type$CellType == "B Cells" | Cell_type$CellType == "CD8+ T Cells", ]
Cell_type$CellType <- as.factor(as.character(Cell_type$CellType))
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_8)),10),]), row.names(Cell_type)]
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/BcellvsCD8_vioplot.pdf", , width=9, height=6)
for(i in 1:nrow(PSI_sub)){
plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
geom_violin()+
#geom_point(color="blue", alpha=.5, na.rm = TRUE)+
#geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
ggtitle(row.names(PSI_sub[i,]))+
scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
}
dev.off()
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/BcellvsCD8_featureplot.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_8)),10),]), ]
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/A3SS/Cell_Sep/BcellvsCD8_featureplot2.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
#################################################################################################################################################
#### AFE & ALE ##################################################################################################################################
#################################################################################################################################################
load("/mnt/data5/BGI/UCB/tangchao/DSU2/AFE/AFE_psi_new.RData")
load("/mnt/data5/BGI/UCB/tangchao/DSU2/ALE/ALE_psi_new.RData")
PSIf <- AFE_psi[, 12:ncol(AFE_psi)]
PSIl <- ALE_psi[, 12:ncol(ALE_psi)]
row.names(PSIf) <- AFE_psi$as2
row.names(PSIl) <- ALE_psi$as2
PSI <- rbind(PSIf, PSIl)
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
PSI <- PSI[, row.names(Cell_type)]
dim(PSI)
# [1] 4263 3039
rs <- rowSums(!is.na(PSI))
sum(rs == 0)
# [1] 3
PSI_tu <- PSI[rs>=1, ]
PSI_tu_CD4 <- PSI_tu[, row.names(Cell_type[Cell_type$CellType == "CD4+ T Cells",])]
PSI_tu_CD8 <- PSI_tu[, row.names(Cell_type[Cell_type$CellType == "CD8+ T Cells",])]
PSI_tu_B <- PSI_tu[, row.names(Cell_type[Cell_type$CellType == "B Cells",])]
psi_mean_4 <- rowMeans(PSI_tu_CD4, na.rm=T)
psi_mean_8 <- rowMeans(PSI_tu_CD8, na.rm=T)
psi_mean_B <- rowMeans(PSI_tu_B, na.rm=T)
psi_number_4 <- rowSums(!is.na(PSI_tu_CD4))
psi_number_8 <- rowSums(!is.na(PSI_tu_CD8))
psi_number_B <- rowSums(!is.na(PSI_tu_B))
#### CD4 VS. CD8 ===================
test <- data.frame(cbind(psi_mean_4,psi_mean_8,psi_number_4,psi_number_8))
sum(psi_number_4 > ncol(PSI_tu_CD4)*.1 & psi_number_8 > ncol(PSI_tu_CD8)*.1)
#[1] 1202
test <- test[psi_number_4 > ncol(PSI_tu_CD4)*.1 & psi_number_8 > ncol(PSI_tu_CD8)*.1, ]
dim(na.omit(test))
#[1] 1202 4
max(test$psi_mean_4 - test$psi_mean_8)
#[1] 0.1381762
test[tail(order(abs(test$psi_mean_4 - test$psi_mean_8)),10),]
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
Cell_type <- Cell_type[Cell_type$CellType == "CD4+ T Cells" | Cell_type$CellType == "CD8+ T Cells", ]
Cell_type$CellType <- as.factor(as.character(Cell_type$CellType))
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_4 - test$psi_mean_8)),10),]), row.names(Cell_type)]
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/AFE/Cell_Sep/CD4vsCD8_vioplot.pdf", , width=9, height=6)
for(i in 1:nrow(PSI_sub)){
plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
geom_violin()+
#geom_point(color="blue", alpha=.5, na.rm = TRUE)+
#geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
ggtitle(row.names(PSI_sub[i,]))+
scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
}
dev.off()
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/AFE/Cell_Sep/CD4vsCD8_featureplot.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_4 - test$psi_mean_8)),10),]), ]
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/AFE/Cell_Sep/CD4vsCD8_featureplot2.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
#### B Cell VS. CD4 ===================
test <- data.frame(cbind(psi_mean_B,psi_mean_4,psi_number_B,psi_number_4))
test <- test[psi_number_B > ncol(PSI_tu_BCell)*.1 & psi_number_4 > ncol(PSI_tu_CD4)*.1, ]
dim(test)
#[1] 1063 4
abs(test$psi_mean_B - test$psi_mean_4)
test[tail(order(abs(test$psi_mean_B - test$psi_mean_4)),10),]
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
Cell_type <- Cell_type[Cell_type$CellType == "B Cells" | Cell_type$CellType == "CD4+ T Cells", ]
Cell_type$CellType <- as.factor(as.character(Cell_type$CellType))
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_4)),10),]), row.names(Cell_type)]
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/AFE/Cell_Sep/BcellvsCD4_vioplot.pdf", , width=9, height=6)
for(i in 1:nrow(PSI_sub)){
plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+
geom_violin()+
#geom_point(color="blue", alpha=.5, na.rm = TRUE)+
#geom_jitter(height = 0, color="blue", alpha=.5, na.rm = TRUE)+
ggtitle(row.names(PSI_sub[i,]))+
scale_x_discrete(labels=paste(names(table(na.omit(plot_tab)$Cell)), "\n",
table(na.omit(plot_tab)$Cell), "/", table(plot_tab$Cell), sep="")))
}
dev.off()
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/AFE/Cell_Sep/BcellvsCD4_featureplot.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_4)),10),]), ]
tsne_tu <- merge(tsne, t(PSI_sub), by = 0, all.x=T)
tsne_tu <- data.frame(tsne_tu[,-1], row.names = tsne_tu[,1])
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/AFE/Cell_Sep/BcellvsCD4_featureplot2.pdf", width=9, height=6)
for(i in 3:ncol(tsne_tu)){
tsne_tu[rev(order(tsne_tu[,i], decreasing=T)),] -> CD45_R_r_tab1
print(ggplot(data = CD45_R_r_tab1, aes(x = tSNE_1, y = tSNE_2))+
geom_point(aes(colour = as.numeric(CD45_R_r_tab1[,i])), size = 1)+
#scale_colour_gradient(high = 'red',low = 'Grey')+
scale_colour_gradient(low = "#00FFFF", high = "red",na.value = "grey50")+
ggtitle(colnames(CD45_R_r_tab1)[i])+
theme(legend.title = element_blank(),
axis.title = element_blank(),
axis.text= element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()))
}
dev.off()
#### B Cell VS. CD8 ===================
test <- data.frame(cbind(psi_mean_B,psi_mean_8,psi_number_B,psi_number_8))
test <- test[psi_number_B > ncol(PSI_tu_BCell)*.1 & psi_number_8 > ncol(PSI_tu_CD8)*.1, ]
dim(test)
#[1] 1116 4
dim(na.omit(test))
#[1] 1116 4
abs(test$psi_mean_B - test$psi_mean_8)
test[tail(order(abs(test$psi_mean_B - test$psi_mean_8)),10),]
Cell_type <- read.table("/mnt/data5/BGI/UCB/ExpMat_NewID/Cell_type.txt", header = F, row.names = 1, sep = "\t", stringsAsFactors=F)
colnames(Cell_type) <- "CellType"
Cell_type <- data.frame(CellType = Cell_type[order(Cell_type),], row.names = row.names(Cell_type)[order(Cell_type)])
Cell_type$Individual <- substr(row.names(Cell_type), 1, 4)
Cell_type <- Cell_type[Cell_type$CellType == "B Cells" | Cell_type$CellType == "CD8+ T Cells", ]
Cell_type$CellType <- as.factor(as.character(Cell_type$CellType))
PSI_sub <- PSI[row.names(test[tail(order(abs(test$psi_mean_B - test$psi_mean_8)),10),]), row.names(Cell_type)]
pdf("/mnt/data5/BGI/UCB/tangchao/DSU2/AFE/Cell_Sep/BcellvsCD8_vioplot.pdf", , width=9, height=6)
for(i in 1:nrow(PSI_sub)){
plot_tab <- data.frame(psi = as.numeric(PSI_sub[i,]), Cell = Cell_type$CellType)
print(ggplot(data = plot_tab, aes(x = Cell, y = psi, fill = Cell))+