forked from heyuan7676/COVID-19
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPPI_geneList.R
34 lines (22 loc) · 1.05 KB
/
PPI_geneList.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
library(readr)
datadir = './PPI_data/'
PPI_dat = read_csv(paste0(datadir, 'PPI_TableS2.csv'))
## map gene names to ensemble IDs
library(biomaRt)
geneIDs <- function(geneName){
ensembl=useMart("ensembl")
ensembl=useDataset("hsapiens_gene_ensembl",mart=ensembl)
geneNames=getBM(attributes=c("ensembl_gene_id", "external_gene_name",
"gene_biotype", "entrezgene_id", "chromosome_name"),filters= 'external_gene_name',
values=geneName, mart = ensembl, verbose=F)
## restrict to chr1-22, chrX, chrY
geneNames = geneNames[!grepl("CHR", geneNames$chromosome_name), ]
return(geneNames)
}
gene_Info = geneIDs(PPI_dat$PreyGene)
## attach virus protein information
dat_save = merge(gene_Info, PPI_dat[,c("Bait", "PreyGene")],
by.x = 'external_gene_name', by.y = 'PreyGene')
## substitue Gene Synonyms to match with GTEx gene names
dat_save[dat_save$PreyGene == 'ATP5MG', 'PreyGene'] = 'ATP5L'
write.table(dat_save, paste0(datadir, 'PPI_interactions.csv'), sep = ',', row.names = F, quote = F)