-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenerate_bootstrap.R
75 lines (64 loc) · 3.68 KB
/
generate_bootstrap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#------------Read input file---------------------
## provide the input file to read
## If input file is not present the same directory where R is running, the provide the full path of the file
data <- read.table("hg18.farFromGenes.20kb.nosep.schweinfurthii.bothsexes.reheader",header=TRUE,sep="\t")
## provide the initial name of the directory to write the bootstrap files, the program will append Afiles (for autosomes) and Xfiles (for X-chromosomes)
## Two directories are created: (1) 1000 bootstrap files for autosomes in directory $DirectoryAfiles (2) 1000 bootstrap files for X chromosomes in directory $DirectoryXfiles
## the program will check if the directories already exist, if not it creates the new directories
Directory <- "Schweinfurthii"
#------------Filter data based on Number of bases(NumBases) and Divergence(Dist2Root) values----------------
selectedX <- subset(data,chrom == "chrX")
selectedA <- subset(data,chrom != "chrX")
#-----------Divide filtered data into windows on the bases of cM distance from genes(cm2genes)---------
##------- This step divide the data into six windows (0-0.05, 0.05-0.1, 0.1-0.2, 0.2-0.4, 0.4-0.8, 0.8-2.0)
Xdatabin1 <- subset(selectedX,cm2genes >= 0 & cm2genes < 0.05)
Xdatabin2 <- subset(selectedX,cm2genes >= 0.05 & cm2genes < 0.1)
Xdatabin3 <- subset(selectedX,cm2genes >= 0.1 & cm2genes < 0.2)
Xdatabin4 <- subset(selectedX,cm2genes >= 0.2 & cm2genes < 0.4)
Xdatabin5 <- subset(selectedX,cm2genes >= 0.4 & cm2genes < 0.8)
Xdatabin6 <- subset(selectedX,cm2genes >= 0.8 & cm2genes < 2.0)
Adatabin1 <- subset(selectedA,cm2genes >= 0 & cm2genes < 0.05)
Adatabin2 <- subset(selectedA,cm2genes >= 0.05 & cm2genes < 0.1)
Adatabin3 <- subset(selectedA,cm2genes >= 0.1 & cm2genes < 0.2)
Adatabin4 <- subset(selectedA,cm2genes >= 0.2 & cm2genes < 0.4)
Adatabin5 <- subset(selectedA,cm2genes >= 0.4 & cm2genes < 0.8)
Adatabin6 <- subset(selectedA,cm2genes >= 0.8 & cm2genes < 2.0)
#Randomly select specified number of values(locii) from each bin and write to file
for (i in 1:1000){
XfileContent <- NULL
AfileContent <- NULL
selctedXbin1 <- Xdatabin1[sample(1:nrow(Xdatabin1),replace=TRUE),]
selctedXbin2 <- Xdatabin2[sample(1:nrow(Xdatabin2),replace=TRUE),]
selctedXbin3 <- Xdatabin3[sample(1:nrow(Xdatabin3),replace=TRUE),]
selctedXbin4 <- Xdatabin4[sample(1:nrow(Xdatabin4),replace=TRUE),]
selctedXbin5 <- Xdatabin5[sample(1:nrow(Xdatabin5),replace=TRUE),]
selctedXbin6 <- Xdatabin6[sample(1:nrow(Xdatabin6),replace=TRUE),]
XfileContent <- rbind(XfileContent,selctedXbin1, selctedXbin2, selctedXbin3, selctedXbin4, selctedXbin5, selctedXbin6)
selctedAbin1 <- Adatabin1[sample(1:nrow(Adatabin1),replace=TRUE),]
selctedAbin2 <- Adatabin2[sample(1:nrow(Adatabin2),replace=TRUE),]
selctedAbin3 <- Adatabin3[sample(1:nrow(Adatabin3),replace=TRUE),]
selctedAbin4 <- Adatabin4[sample(1:nrow(Adatabin4),replace=TRUE),]
selctedAbin5 <- Adatabin5[sample(1:nrow(Adatabin5),replace=TRUE),]
selctedAbin6 <- Adatabin6[sample(1:nrow(Adatabin6),replace=TRUE),]
AfileContent <- rbind(AfileContent,selctedAbin1, selctedAbin2, selctedAbin3, selctedAbin4, selctedAbin5, selctedAbin6)
DirX <- paste(Directory,"Xfiles",sep='')
DirA <- paste(Directory,"Afiles",sep='')
if (file.exists(DirX)){
print("directory exists")
}
else {
dir.create(file.path(DirX))
print("creating directory")
}
if (file.exists(DirA)){
print("directory exists")
}
else {
dir.create(file.path(DirA))
print("creating directory")
}
Xfilename <- paste(DirX,"/Xfile",i,sep='')
Afilename <- paste(DirA,"/Afile",i,sep='')
write.table(XfileContent,Xfilename,quote = FALSE, row.names = FALSE,sep='\t')
write.table(AfileContent,Afilename,quote = FALSE, row.names = FALSE,sep='\t')
}