Skip to content

Commit

Permalink
Added the whole project, more or less
Browse files Browse the repository at this point in the history
  • Loading branch information
Filip Cornell committed Jan 11, 2019
0 parents commit 158ec54
Show file tree
Hide file tree
Showing 3 changed files with 334 additions and 0 deletions.
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
*.tex
*.bib
/Pics
*.log
*.bbl
*.synctex.gz
*.toc
*.blg
*.aux
*.log
.DS_Store
/WorkingDoc
322 changes: 322 additions & 0 deletions Algorithm-comparison.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
####################################
######## Initialize functions ######
####################################

Get_Convergence = function(mus) {
sortedMaxRewards = sort(mus, decreasing = TRUE)

return(sortedMaxRewards[1] - sortedMaxRewards[2])
}
normalize = function(vec) {

print(sd(vec))
if (sd(vec) != 0) {
return((vec - mean(vec)/sd(vec)))
} else {
return(vec)
}

}
Convergence_Means <- function (samples) {
means <- numeric()
for (i in 1:length(samples)) {
means[i] <- mean(samples[1:i])
}
return(means)
}

ret_epsilon = function(mode = 1, t = NULL, c = 1) {
if (mode == 1) {
return(0.5)
} else if (mode == 2) {
return(min(1,c/sqrt(t)))
} else if (mode == 3) {
return(min(1,c/log(t)))
} else if (mode == 4) {
# Another suggestion?
}
}

epsilonGreedy = function(k, mus, sigma2) {

epsilon = ret_epsilon(mode = 1)

# Set all mu estimates to 0.5 first.
muEstimates = rep(0.5, k)

# Set probability to pick each arm to epsilon/k,
probabilities = rep(epsilon/k, k)

# Set the best arm's probability to 1 - epsilon + e/k, i.e. add 1 - epsilon
# In this case, all
# Initialize vectors to store information
rewards = as.numeric()
armChosen = as.numeric()
t = 1
while (t <= T) {

# update epsilon
epsilon = ret_epsilon(epsilonMode, t = t)


# Sample an arm
armChosen[t] = sample(seq(1, k, 1), size = 1, prob = probabilities)

rewards[t] = rnorm(n=1,mean = mus[armChosen[t]], sd = sqrt(sigma2))


# Update muEstimates
muEstimates[armChosen[t]] = sum(rewards[armChosen == armChosen[t]])/sum(armChosen == armChosen[t])

# Update probabilities
probabilities = rep(epsilon/k, k)
# Set the best arm's probability to 1 - epsilon + e/k, i.e. add 1 - epsilon
probabilities[which.max(muEstimates)] = probabilities[which.max(muEstimates)] + 1 - epsilon

t = t + 1
}

data = data.frame(armChosen, rewards)
colnames(data) = c("ArmChosen", "Rewards")
return(data)
}

SoftMax = function(k, mus, sigma2, c = NULL) {
muEstimates = rep(0.5, k)

# Initialize vectors to store information
rewards = as.numeric()
armChosen = as.numeric()
n_k_t = rep(0,k)

for (t in 1:T) {
if (!is.null(c)) {
eta_t = c/t
} else {
eta_t = 0.9
}


probs = exp(eta_t*muEstimates)


if(any(is.na(probs))) {
print(probs)
print(muEstimates)
muEstimates = normalize(muEstimates)
print(muEstimates)
probs = exp(eta_t*muEstimates)/sum(exp(eta_t*muEstimates))
print(probs)
}
armChosen[t] = sample(seq(1,k,1), size = 1, prob = probs)
rewards[t] = rnorm(n=1,mean = mus[armChosen[t]], sd = sqrt(sigma2))
n_k_t[armChosen[t]] = n_k_t[armChosen[t]] + 1
muEstimates[armChosen[t]] = sum(rewards[armChosen == armChosen[t]])/(n_k_t[armChosen[t]] + max(0, 1 - n_k_t[armChosen[t]]))
}
data = data.frame(armChosen, rewards)
colnames(data) = c("ArmChosen", "Rewards")
return(data)
}


UCB = function(k, mus, sigma2) {

muEstimates = rep(0.5, k)

# Initialize vectors to store information
rewards = as.numeric()
armChosen = as.numeric()
n_k_t = rep(0,k)

t = 1
while (t <= T) {

# Generate the ucbs. We need the indices for this, hence the for loop.
ucbs = as.numeric()
for (i in 1:k) {

ucbs[i] = muEstimates[i] + sqrt(2*log(t)/(n_k_t[i] + max(0, 1 - n_k_t[i])))
}

# Choose arm from the one with highest UCB
armChosen[t] = which.max(ucbs)
# update n_k_t
n_k_t[armChosen[t]] = n_k_t[armChosen[t]] + 1

# Sample reward
rewards[t] = rnorm(n=1, mean = mus[armChosen[t]], sd = sqrt(sigma2))


# Update muEstimates

muEstimates[armChosen[t]] = sum(rewards[armChosen == armChosen[t]])/(n_k_t[armChosen[t]] + max(0, 1 - n_k_t[armChosen[t]]))

t = t + 1

}

data = data.frame(armChosen, rewards)
colnames(data) = c("ArmChosen", "Rewards")
return(data)
}

calc_regret = function(muStar, armChosen, mus) {

return(apply(as.matrix(armChosen), 2, function(row, muStar) {
return(muStar - mus[armChosen])
}, muStar))
}

cumRegret = function(regrets) {
cumRegrets = as.numeric()
for (i in 1:length(regrets)) {
cumRegrets[i] = sum(regrets[1:i])
}
return(cumRegrets)
}

analyze = function(k, sigma2, regretsEps1, regretsEps2, regretsEps3, regretsUCB, regretsSM1, regretsSM10, avgBestArmEps1, avgBestArmEps2, avgBestArmEps3, avgBestArmUCB, avgBestArmSM1, avgBestArmSM10) {
xSeq = seq(1, length(regretsEps1),1)
plot(x=xSeq, y = Convergence_Means(regretsEps1), type = "l", lwd = 2, xlab = "Iteraciones", ylab = "Arrepentimiento promedio", col = "blue", ylim = c(0,max(Convergence_Means(regretsEps1), Convergence_Means(regretsUCB))),main = paste("K = ",k, ", ", expression(sigma), " = ", sigma2))
lines(Convergence_Means(regretsUCB), lwd = 2, col = "red")
lines(Convergence_Means(regretsEps2), lwd = 2, col = "green")
lines(Convergence_Means(regretsEps3), lwd = 2, col = "black")
lines(Convergence_Means(regretsSM1), lwd = 2, col = "purple")
lines(Convergence_Means(regretsSM10), lwd = 2, col = "yellow")
cumulativeRegretEpsilon1 = cumRegret(regretsEps1)
cumulativeRegretEpsilon2 = cumRegret(regretsEps2)
cumulativeRegretEpsilon3 = cumRegret(regretsEps3)
cumulativeRegretUCV = cumRegret(regretsUCB)
cumulativeRegretSM1 = cumRegret(regretsSM1)
cumulativeRegretSM10 = cumRegret(regretsSM10)

legend("top",
col = c("blue", "red", "green", "black", "purple", "yellow"),
lwd = c(2,2),
legend = c(expression(paste(epsilon,"-greedy, ", epsilon, " = 0.5")),
"UCB",
expression(paste(epsilon," = 1/",sqrt(t))),
expression(paste(epsilon, "= 1/log(t)")),
expression(paste("Softmax, ",eta," = 0.9")),
expression(paste("Softmax, ",eta," = 10/t"))
))
plot(x=xSeq, y = cumulativeRegretEpsilon1, type = "l", lwd = 2, xlab = "Iteraciones", ylab = "Arrepentimiento cumulativo", col = "blue", ylim = c(min(cumulativeRegretUCV, cumulativeRegretEpsilon1),max(cumulativeRegretUCV, cumulativeRegretEpsilon1)),main = paste("K = ",k, ", ", expression(sigma), " = ", sigma2))
lines(cumulativeRegretUCV, lwd = 2, col = "red")
lines(cumulativeRegretEpsilon2, lwd = 2, col = "green")
lines(cumulativeRegretEpsilon3, lwd = 2, col = "black")
lines(cumulativeRegretSM1, lwd = 2, col = "purple")
lines(cumulativeRegretSM10, lwd = 2, col = "yellow")
legend("top",
col = c("blue", "red", "green", "black", "purple", "yellow"),
lwd = c(2,2),
legend = c(expression(paste(epsilon,"-greedy, ", epsilon, " = 0.5")),
"UCB",
expression(paste(epsilon," = 1/",sqrt(t))),
expression(paste(epsilon, "= 1/log(t)")),
expression(paste("Softmax, ",eta," = 0.9")),
expression(paste("Softmax, ",eta," = 10/t"))
))

## Plot best arm

plot(x=xSeq, y = avgBestArmEps1, type = "l", lwd = 2, xlab = "Iteraciones", ylab = "Veces mejor brazo elegido", col = "blue", ylim = c(0,max(avgBestArmEps1, avgBestArmEps2, avgBestArmEps3, avgBestArmUCB)),main = paste("K = ",k, ", ", expression(sigma), " = ", sigma2))
lines(avgBestArmUCB, lwd = 2, col = "red")
lines(avgBestArmEps2, lwd = 2, col = "green")
lines(avgBestArmEps3, lwd = 2, col = "black")
lines(avgBestArmSM1, lwd = 2, col = "purple")
lines(avgBestArmSM10, lwd = 2, col = "yellow")
legend("top",
col = c("blue", "red", "green", "black", "purple", "yellow"),
lwd = c(2,2),
legend = c(expression(paste(epsilon,"-greedy, ", epsilon, " = 0.5")),
"UCB",
expression(paste(epsilon," = 1/",sqrt(t))),
expression(paste(epsilon, "= 1/log(t)")),
expression(paste("Softmax, ",eta," = 0.9")),
expression(paste("Softmax, ",eta," = 10/t"))
))
}

bestArmPlayed = function(armPlayed, mus) {
percArmPlayed = as.numeric()
for (i in 1:length(armPlayed)) {
tmp = armPlayed[1:i]
percArmPlayed[i] = sum(tmp == which.max(mus))
}
return(percArmPlayed)
}

############################################
######## Initialize and run ################
############################################

set.seed(123)
# Set parameters

K = c(5,10,25,50)

sigma2 = c(0.01^2,0.1^2,1^2, 10^2)

## Set algorithm specifications
# Set epsilon mode. Sets which way the epsilon is decided. 1 for fixed, 2 for epsilon = 1/t, 3 for epsilon = 1/sqrt(t), 4 for epsilon = 1/log(t)
epsilonMode = 1
c = 1
#Setting number of max iterations and convergence criteria. Problem will run until convergence or for the max nr iterations specified.
T = 2000
nRuns = 1
convergence = Get_Convergence(mus)
# Generate results
for (k in K) {
for (sig in sigma2) {
for (i in 1:nRuns) {
# Get results for the run in terms of ArmChosen and reward
mus = runif(k, min = 0, max = 1)
epsilonMode = 1
epsilonResults1 = epsilonGreedy(k, mus, sig)
epsilonMode = 2
epsilonResults2 = epsilonGreedy(k, mus, sig)
epsilonMode = 3
epsilonResults3 = epsilonGreedy(k, mus, sig)
UCBResults = UCB(k, mus, sig)
if (sig != 10^2) {
softMaxC1 = SoftMax(k, mus, sig)
softMaxC10 = SoftMax(k, mus, sig, c = 10)
}
# Now, convert to AVERAGE regret over the 1000 iterations.
if (i == 1) {
avgRegretEps1 = calc_regret(mus[which.max(mus)], epsilonResults1$ArmChosen, mus)
avgRegretEps2 = calc_regret(mus[which.max(mus)], epsilonResults2$ArmChosen, mus)
avgRegretEps3 = calc_regret(mus[which.max(mus)], epsilonResults3$ArmChosen, mus)
avgRegretUCB = calc_regret(mus[which.max(mus)], UCBResults$ArmChosen, mus)
avgRegretSM1 = calc_regret(mus[which.max(mus)], softMaxC1$ArmChosen, mus)
avgRegretSM10 = calc_regret(mus[which.max(mus)], softMaxC10$ArmChosen, mus)

avgBestArmEps1 = bestArmPlayed(epsilonResults1$ArmChosen, mus)
avgBestArmEps2 = bestArmPlayed(epsilonResults2$ArmChosen, mus)
avgBestArmEps3 = bestArmPlayed(epsilonResults3$ArmChosen, mus)
avgBestArmUCB = bestArmPlayed(UCBResults$ArmChosen, mus)
avgBestArmSM1 = bestArmPlayed(softMaxC1$ArmChosen, mus)
avgBestArmSM10 = bestArmPlayed(softMaxC10$ArmChosen, mus)
} else {
avgRegretEps1 = avgRegretEps1*(i-1)/i + calc_regret(mus[which.max(mus)], epsilonResults1$ArmChosen, mus)/i
avgRegretEps2 = avgRegretEps2*(i-1)/i + calc_regret(mus[which.max(mus)], epsilonResults2$ArmChosen, mus)/i
avgRegretEps3 = avgRegretEps3*(i-1)/i + calc_regret(mus[which.max(mus)], epsilonResults3$ArmChosen, mus)/i
avgRegretUCB = avgRegretUCB*(i-1)/i + calc_regret(mus[which.max(mus)], UCBResults$ArmChosen, mus)/i
avgRegretSM1 = avgRegretSM1*(i-1)/i + calc_regret(mus[which.max(mus)], softMaxC1$ArmChosen, mus)/i
avgRegretSM10 = avgRegretSM10*(i-1)/i + calc_regret(mus[which.max(mus)], softMaxC10$ArmChosen, mus)/i

avgBestArmEps1 = avgBestArmEps1*(i-1)/i + bestArmPlayed(epsilonResults1$ArmChosen,mus)/i
avgBestArmEps2 = avgBestArmEps2*(i-1)/i + bestArmPlayed(epsilonResults2$ArmChosen,mus)/i
avgBestArmEps3 = avgBestArmEps3*(i-1)/i + bestArmPlayed(epsilonResults3$ArmChosen,mus)/i
avgBestArmUCB = avgBestArmUCB*(i-1)/i + bestArmPlayed(UCBResults$ArmChosen,mus)/i
avgBestArmSM1 = avgBestArmSM1*(i-1)/i + bestArmPlayed(softMaxC1$ArmChosen,mus)/i
avgBestArmSM10 = avgBestArmSM10*(i-1)/i + bestArmPlayed(softMaxC10$ArmChosen,mus)/i
}

print(i)
}

analyze(k, sqrt(sig), avgRegretEps1, avgRegretEps2, avgRegretEps3, avgRegretUCB, avgRegretSM1, avgRegretSM10, avgBestArmEps1, avgBestArmEps2, avgBestArmEps3, avgBestArmUCB, avgBestArmSM1, avgBestArmSM10)

}
}
Binary file added TrabajoTHSP18.pdf
Binary file not shown.

0 comments on commit 158ec54

Please sign in to comment.