-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathRLS_KF.R
93 lines (82 loc) · 3.49 KB
/
RLS_KF.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# The algorithm (RLS-KF) [1] combines the NII [2] and KF [3] technique to perform drug-target-interaction (DTI) prediction.
# [1] M Hao, et al., Analytica Chimica Acta (submit)
# [2] JP Mei, et. al., Bioinformatics 29 (2013) 238-245.
# [3] B Wang, et. al., Nat. Methods 11 (2014) 333-337.
# Main function
RLS_KF <- function(
currY = currY,
ytrMat = ytrMat, ## Y matrix after put zero to test set
currYsum = currYsum, ## sum of current used Y column
currSim2 = currSim2, ## used to infer current Y values when they are all zeros
idxTr = idxTr,
idxTe = idxTe,
simmat = simmatTarget,
simmat2 = simmatCompd,
gamma0 = 1, ## gamma0, for Kgip (default 1)
numNeig = 3, ## number of neighbours (default 3)
numIter = 2, ## number of iterators, default 2
lambda = 1, ## lambda, for RLS
k4simmat = k4simmat)
{
# Gaussian kernel matrix
Kgip <- fastKgipMat(ytrMat, gamma0)
# Kernel Fusion (KF)
K <- fastKF(Kgip, k4simmat, numNeig, numIter)
# NII
if (currYsum == 0) {
# Infer current Y column
currY_inferred <- ytrMat %*% currSim2
# Put 'test set' to zero
currY_inferred[idxTe] <- 0
# Normalize
currY <- (currY_inferred - min(currY_inferred)) / (max(currY_inferred) - min(currY_inferred))
# Training set
K1 <- K[idxTr, idxTr, drop = FALSE]
yTr <- currY[idxTr]
# Test set
K2 <- K[idxTe, idxTr, drop = FALSE]
numTrainSet <- length(idxTr)
# RLS model
model <- fastSolve(K1, yTr, numTrainSet, lambda)
# Predicted values for test set in each fold
myFoldPrediction <- matrix(0, nrow = length(idxTe), ncol = 1)
# Check if current test set is new.
# e.g., for new coming target, which is no interaction with known drugs.
# OR, for new coming drug, which is no interaction with known targets.
hasZeros <- which(apply(ytrMat[idxTe, ], 1, sum) == 0)
if (length(hasZeros) == 0) {
# None of test set is new
myFoldPrediction[, 1] <- K2 %*% model
} else if (length(hasZeros) == length(idxTe)) {
# All test sets are new: prediction based on the similarity itself rather than K2
myFoldPrediction[, 1] <- simmat[idxTe, idxTr, drop = FALSE] %*% model
} else {
# Some of test set are new, and others are not
# (1) Test set are new
idxZeros <- idxTe[hasZeros]
myFoldPrediction[hasZeros, 1] <- simmat[idxZeros, idxTr, drop = FALSE] %*% model
# (2) Test set are not new
idxNotZeros <- setdiff(1:length(idxTe), hasZeros)
myFoldPrediction[idxNotZeros, 1] <- K2[idxNotZeros, , drop = FALSE] %*% model
}
} else { # no NII
K1 <- K[idxTr, idxTr, drop = FALSE]
yTr <- currY[idxTr]
K2 <- K[idxTe, idxTr, drop = FALSE]
numTrainSet <- length(idxTr)
model <- fastSolve(K1, yTr, numTrainSet, lambda)
myFoldPrediction <- matrix(0, nrow = length(idxTe), ncol = 1)
hasZeros <- which(apply(ytrMat[idxTe, ], 1, sum) == 0)
if (length(hasZeros) == 0) {
myFoldPrediction[, 1] <- K2 %*% model
} else if (length(hasZeros) == length(idxTe)) {
myFoldPrediction[, 1] <- simmat[idxTe, idxTr, drop = FALSE] %*% model
} else {
idxZeros <- idxTe[hasZeros]
myFoldPrediction[hasZeros, 1] <- simmat[idxZeros, idxTr, drop = FALSE] %*% model
idxNotZeros <- setdiff(1:length(idxTe), hasZeros)
myFoldPrediction[idxNotZeros, 1] <- K2[idxNotZeros, , drop = FALSE] %*% model
}
}
return(myFoldPrediction)
}