-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprepareData.R
34 lines (27 loc) · 920 Bytes
/
prepareData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
suppressMessages({
library(readr)
library(dplyr)
library(tidyr)
library(VariantAnnotation)
library(rtracklayer)
})
args <- commandArgs(trailingOnly = TRUE)
# Input files:
fileSNP = "Data/snpid_imp.tsv" # SNPs (snpid,chr,pos)
fileRES = "Data/chr02.cis.tsv" # MatrixEQTL results
fileGTF = "Data/gencode.v12.annotationLiftover_02.gtf.gz"
gtf <- readGFF(fileGTF)
g <- gtf[gtf$type=='gene', c("gene_id","gene_name")]
snps = read_tsv(fileSNP)
### MatrixEQTL dataframe, columns used: snpid gene_name pValue
res = read_tsv(fileRES)
names(res) =c("snpid","gene_name", "beta","tStat","pValue","FDR")
# if gene_id use gene_name
if(sum(grepl('ENSG', res$gene_name)) > length(res$gene_name)/2){
names(res)[2]='gene_id'
res = res %>% left_join(g, by=c('gene_id'))
res$gene_id <- NULL
res = res[!is.na(res$gene_name),]
}
# Only SNPs in results
snps = snps %>% filter(snpid %in% res$snpid)