Skip to content

Commit

Permalink
Rprofvis is used for optimisation of code
Browse files Browse the repository at this point in the history
  • Loading branch information
Amalan-ConStat committed Jan 8, 2025
1 parent 5754184 commit 4ba7cf1
Show file tree
Hide file tree
Showing 40 changed files with 1,248 additions and 531 deletions.
2 changes: 1 addition & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@
^CRAN-SUBMISSION$
^doc$
^Meta$
^vignettes$
^vignettes
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ License: MIT + file LICENSE
URL:
https://github.com/Amalan-ConStat/NeEDS4BigData,https://amalan-constat.github.io/NeEDS4BigData/index.html
BugReports: https://github.com/Amalan-ConStat/NeEDS4BigData/issues
Depends:
R (>= 4.0.0)
Imports:
dplyr,
foreach,
Expand All @@ -21,6 +23,7 @@ Imports:
ggplot2,
ggridges,
matrixStats,
mvnfast,
psych,
Rdpack,
Rfast,
Expand All @@ -42,6 +45,4 @@ Language: en-GB
LazyData: true
LazyDataCompression: xz
RoxygenNote: 7.3.1
Depends:
R (>= 3.5.0)
Config/testthat/edition: 3
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,13 @@ importFrom(Rdpack,reprompt)
importFrom(Rfast,rowprods)
importFrom(dplyr,group_by)
importFrom(dplyr,summarise)
importFrom(gam,lo)
importFrom(gam,s)
importFrom(ggh4x,facet_grid2)
importFrom(matrixStats,rowSums2)
importFrom(mvnfast,rmvn)
importFrom(psych,tr)
importFrom(rlang,.data)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,starts_with)
importFrom(utils,combn)
2 changes: 1 addition & 1 deletion NeEDS4BigData.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace,vignette
PackageRoxygenize: rd,collate,namespace

SpellingDictionary: en_GB
51 changes: 24 additions & 27 deletions R/ALoptimalGLMSub.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@
#'
#' r1<-300; r2<-rep(600,50); Original_Data<-Full_Data$Complete_Data;
#'
#' ALoptimalGLMSub(r1 = r1, r2 = r2,
#' Y = as.matrix(Original_Data[,colnames(Original_Data) %in% c("Y")]),
#' ALoptimalGLMSub(r1 = r1, r2 = r2,Y = as.matrix(Original_Data[,1]),
#' X = as.matrix(Original_Data[,-1]),N = nrow(Original_Data),
#' family = "linear")->Results
#'
Expand All @@ -77,8 +76,7 @@
#'
#' r1<-300; r2<-rep(600,50); Original_Data<-Full_Data$Complete_Data;
#'
#' ALoptimalGLMSub(r1 = r1, r2 = r2,
#' Y = as.matrix(Original_Data[,colnames(Original_Data) %in% c("Y")]),
#' ALoptimalGLMSub(r1 = r1, r2 = r2,Y = as.matrix(Original_Data[,1]),
#' X = as.matrix(Original_Data[,-1]),N = nrow(Original_Data),
#' family = "logistic")->Results
#'
Expand All @@ -90,8 +88,7 @@
#'
#' r1<-300; r2<-rep(600,50); Original_Data<-Full_Data$Complete_Data;
#'
#' ALoptimalGLMSub(r1 = r1, r2 = r2,
#' Y = as.matrix(Original_Data[,colnames(Original_Data) %in% c("Y")]),
#' ALoptimalGLMSub(r1 = r1, r2 = r2,Y = as.matrix(Original_Data[,1]),
#' X = as.matrix(Original_Data[,-1]),N = nrow(Original_Data),
#' family = "poisson")->Results
#'
Expand Down Expand Up @@ -129,16 +126,16 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){

if(family %in% c("linear")){
PI.prop <- rep(1/N, N)
idx.prop <- sample(1:N, r1, T)
idx.prop <- sample(1:N, size = r1, replace = TRUE)

x.prop <- X[idx.prop,]
y.prop <- Y[idx.prop,]

pinv.prop <- N
pinv.prop <- 1/PI.prop[idx.prop]

beta.prop<-solve(a=t(x.prop)%*%x.prop,b=t(x.prop)%*%y.prop)
Xbeta_Final<-as.vector(X%*%beta.prop)
beta.prop<-solve(a=crossprod(x.prop),b= crossprod(x.prop,y.prop))
Xbeta_Final<-X%*%beta.prop
Var.prop<-sum((Y-Xbeta_Final)^2)/N
Epsilon.prop<-Y-Xbeta_Final

Expand All @@ -161,15 +158,15 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
PI.mVc<-PI.mVc/sum(PI.mVc)

## mMSE
PI.mMSE<-sqrt(Epsilon.prop^2 * matrixStats::rowSums2((X %*% solve(t(X)%*%X))^2))
PI.mMSE<-sqrt(Epsilon.prop^2 * matrixStats::rowSums2((X %*% solve(crossprod(X)))^2))
PI.mMSE<-PI.mMSE/sum(PI.mMSE)

message("Step 1 of the algorithm completed.\n")

for (i in 1:length(r2))
{
## mVc
idx.mVc <- sample(1:N, r2[i]-r1, T, PI.mVc)
idx.mVc <- sample(1:N, size = r2[i]-r1, replace = TRUE, prob = PI.mVc)

x.mVc <- X[c(idx.mVc, idx.prop),]
y.mVc <- Y[c(idx.mVc, idx.prop)]
Expand All @@ -178,8 +175,8 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
pi4_r<-sqrt(r2[i]*pinv.mVc^(-1))
X_r4<-x.mVc/pi4_r
Y_r4<-y.mVc/pi4_r
beta.prop<-solve(a=t(X_r4)%*%X_r4,b=t(X_r4)%*%Y_r4)
Xbeta_Final<-as.vector(X%*%beta.prop)
beta.prop<-solve(a=crossprod(X_r4),b=crossprod(X_r4,Y_r4))
Xbeta_Final<-X%*%beta.prop
Var.prop<-sum((Y-Xbeta_Final)^2)/N

Sample.mVc[[i+1]]<-idx.mVc;
Expand All @@ -191,7 +188,7 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
}

## mMSE
idx.mMSE <- sample(1:N, r2[i]-r1, T, PI.mMSE)
idx.mMSE <- sample(1:N, size = r2[i]-r1, replace = TRUE, prob = PI.mMSE)

x.mMSE <- X[c(idx.mMSE, idx.prop),]
y.mMSE <- Y[c(idx.mMSE, idx.prop)]
Expand All @@ -200,8 +197,8 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
pi4_r<-sqrt(r2[i]*pinv.mMSE^(-1))
X_r4<-x.mMSE/pi4_r
Y_r4<-y.mMSE/pi4_r
beta.prop<-solve(a=t(X_r4)%*%X_r4,b=t(X_r4)%*%Y_r4)
Xbeta_Final<-as.vector(X%*%beta.prop)
beta.prop<-solve(a=crossprod(X_r4),b=crossprod(X_r4,Y_r4))
Xbeta_Final<-X%*%beta.prop
Var.prop<-sum((Y-Xbeta_Final)^2)/N

Sample.mMSE[[i+1]]<-idx.mMSE;
Expand Down Expand Up @@ -242,7 +239,7 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
n0 <- N - n1
PI.prop <- rep(1/(2*n0), N)
PI.prop[Y==1] <- 1/(2*n1)
idx.prop <- sample(1:N, r1, T, PI.prop)
idx.prop <- sample(1:N, size = r1, replace = TRUE, prob = PI.prop)

x.prop <- X[idx.prop,]
y.prop <- Y[idx.prop,]
Expand Down Expand Up @@ -274,7 +271,7 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
## mMSE
p.prop <- P.prop[idx.prop]
w.prop <- p.prop * (1 - p.prop)
W.prop <- solve(t(x.prop) %*% (x.prop * w.prop * pinv.prop))
W.prop <- solve(crossprod(x.prop,x.prop * w.prop * pinv.prop))

PI.mMSE<-sqrt((Y - P.prop)^2 * matrixStats::rowSums2((X%*%W.prop)^2))
PI.mMSE <- PI.mMSE/sum(PI.mMSE)
Expand All @@ -284,7 +281,7 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
for (i in 1:length(r2))
{
## mVc
idx.mVc <- sample(1:N, r2[i]-r1, T, PI.mVc)
idx.mVc <- sample(1:N, size = r2[i]-r1, replace = TRUE, prob = PI.mVc)

x.mVc <- X[c(idx.mVc, idx.prop), ]
y.mVc <- Y[c(idx.mVc, idx.prop)]
Expand All @@ -307,7 +304,7 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
Utility_mVc[i,-1]<-c(psych::tr(V_Final),det(solve(V_Final)))

## mMSE
idx.mMSE <- sample(1:N, r2[i]-r1, T, PI.mMSE)
idx.mMSE <- sample(1:N, size = r2[i]-r1, replace = TRUE, prob = PI.mMSE)

x.mMSE <- X[c(idx.mMSE, idx.prop),]
y.mMSE <- Y[c(idx.mMSE, idx.prop)]
Expand Down Expand Up @@ -356,14 +353,14 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
}
if(family %in% "poisson"){
PI.prop <- rep(1/N, N)
idx.prop <- sample(1:N, r1, T)
idx.prop <- sample(1:N, size = r1, replace = TRUE)

x.prop<-X[idx.prop,]
y.prop <- Y[idx.prop,]

pinv.prop <- N
pinv.prop <- 1/PI.prop[idx.prop]
fit.prop <- stats::glm(y.prop~x.prop-1,family = "poisson")
fit.prop <- stats::glm(y.prop~x.prop-1,family = "quasipoisson")

beta.prop <- fit.prop$coefficients
if(anyNA(beta.prop)){
Expand All @@ -389,7 +386,7 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){

## mMSE
w.prop <- P.prop[idx.prop]
W.prop <- solve(t(x.prop) %*% (x.prop * w.prop * pinv.prop))
W.prop <- solve(crossprod(x.prop,x.prop * w.prop * pinv.prop))

PI.mMSE<-sqrt((Y - P.prop)^2 * matrixStats::rowSums2((X%*%W.prop)^2))
PI.mMSE <- PI.mMSE/sum(PI.mMSE)
Expand All @@ -399,13 +396,13 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
for (i in 1:length(r2))
{
## mVc
idx.mVc <- sample(1:N, r2[i]-r1, T, PI.mVc)
idx.mVc <- sample(1:N, size = r2[i]-r1, replace = TRUE, prob = PI.mVc)

x.mVc <- X[c(idx.mVc, idx.prop),]
y.mVc <- Y[c(idx.mVc, idx.prop)]
pinv.mVc<-c(1 / PI.mVc[idx.mVc], pinv.prop)

fit.mVc <-stats::glm(y.mVc~x.mVc-1, family = "poisson",weights=pinv.mVc)
fit.mVc <-stats::glm(y.mVc~x.mVc-1, family = "quasipoisson",weights=pinv.mVc)
Sample.mVc[[i+1]]<-idx.mVc;
beta.mVc[i,-1] <- fit.mVc$coefficients

Expand All @@ -422,13 +419,13 @@ ALoptimalGLMSub <- function(r1,r2,Y,X,N,family){
Utility_mVc[i,-1]<-c(psych::tr(V_Final),det(solve(V_Final)))

## mMSE
idx.mMSE <- sample(1:N, r2[i]-r1, T, PI.mMSE)
idx.mMSE <- sample(1:N, size = r2[i]-r1, replace = TRUE, prob = PI.mMSE)

x.mMSE <- X[c(idx.mMSE, idx.prop),]
y.mMSE <- Y[c(idx.mMSE, idx.prop)]
pinv.mMSE<-c(1 / PI.mMSE[idx.mMSE], pinv.prop)

fit.mMSE <- stats::glm(y.mMSE~x.mMSE-1, family = "poisson",weights=pinv.mMSE)
fit.mMSE <- stats::glm(y.mMSE~x.mMSE-1, family = "quasipoisson",weights=pinv.mMSE)
Sample.mMSE[[i+1]]<-idx.mMSE;
beta.mMSE[i,-1] <-fit.mMSE$coefficients

Expand Down
22 changes: 11 additions & 11 deletions R/AoptimalGauLMSub.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@
#'
#' r1<-300; r2<-rep(100*c(6,12),50); Original_Data<-Full_Data$Complete_Data;
#'
#' AoptimalGauLMSub(r1 = r1, r2 = r2,
#' Y = as.matrix(Original_Data[,colnames(Original_Data) %in% c("Y")]),
#' AoptimalGauLMSub(r1 = r1, r2 = r2,Y = as.matrix(Original_Data[,1]),
#' X = as.matrix(Original_Data[,-1]),
#' N = nrow(Original_Data))->Results
#'
#' plot_Beta(Results)
#'
#' @importFrom Rdpack reprompt
#' @importFrom matrixStats rowSums2
#' @export
AoptimalGauLMSub <- function(r1,r2,Y,X,N){
if(any(is.na(c(r1,r2,N))) | any(is.nan(c(r1,r2,N)))){
Expand All @@ -70,7 +70,7 @@ AoptimalGauLMSub <- function(r1,r2,Y,X,N){
stop("r1 or N has a value greater than length one")
}

if(any(is.na(cbind(Y,X))) | any(is.nan(cbind(Y,X)))){
if(anyNA(Y) | anyNA(X) | any(is.nan(Y)) | any(is.nan(X)) ){
stop("NA or Infinite or NAN values in the Y or X")
}

Expand All @@ -83,16 +83,16 @@ AoptimalGauLMSub <- function(r1,r2,Y,X,N){
}

PI.prop <- rep(1/N, N)
idx.prop <- sample(1:N, r1, T)
idx.prop <- sample(1:N, size = r1, replace = TRUE)

x.prop <- X[idx.prop,]
y.prop <- Y[idx.prop,]

pinv.prop <- N
pinv.prop <- 1/PI.prop[idx.prop]

beta.prop<-solve(a=t(x.prop)%*%x.prop,b=t(x.prop)%*%y.prop)
Xbeta_Final<-as.vector(X%*%beta.prop)
beta.prop<-solve(a=crossprod(x.prop),b= crossprod(x.prop,y.prop))
Xbeta_Final<-X%*%beta.prop
Var.prop<-sum((Y-Xbeta_Final)^2)/N
Epsilon.prop<-Y-Xbeta_Final

Expand All @@ -101,7 +101,7 @@ AoptimalGauLMSub <- function(r1,r2,Y,X,N){
}

Second <- (Epsilon.prop^2 - Var.prop)^2/(4 * N^2 * Var.prop)
ML_Inv <- solve(t(X)%*%X)
ML_Inv <- solve(crossprod(X))

beta.mMSE<-matrix(nrow = length(r2),ncol = ncol(X)+1 )
Var_Epsilon<-matrix(nrow = length(r2),ncol = 2)
Expand All @@ -114,15 +114,15 @@ AoptimalGauLMSub <- function(r1,r2,Y,X,N){
colnames(Var_Epsilon)<-c("r2","A-Optimality")

## mMSE
PI.mMSE <- sqrt(Epsilon.prop^2 * rowSums((X %*% ML_Inv)^2) + Second)
PI.mMSE <- sqrt(Epsilon.prop^2 * matrixStats::rowSums2((X %*% ML_Inv)^2) + Second)
PI.mMSE <- PI.mMSE/sum(PI.mMSE)

message("Step 1 of the algorithm completed.\n")

for (i in 1:length(r2))
{
## mMSE
idx.mMSE <- sample(1:N, r2[i]-r1, T, PI.mMSE)
idx.mMSE <- sample(1:N, size = r2[i]-r1, replace = TRUE, prob = PI.mMSE)

x.mMSE <- X[c(idx.mMSE, idx.prop),]
y.mMSE <- Y[c(idx.mMSE, idx.prop)]
Expand All @@ -131,8 +131,8 @@ AoptimalGauLMSub <- function(r1,r2,Y,X,N){
pi4_r<-sqrt(r2[i]*pinv.mMSE^(-1))
X_r4<-x.mMSE/pi4_r
Y_r4<-y.mMSE/pi4_r
beta.prop<-solve(a=t(X_r4)%*%X_r4,b=t(X_r4)%*%Y_r4)
Xbeta_Final<-as.vector(X%*%beta.prop)
beta.prop<-solve(a=crossprod(X_r4),b=crossprod(X_r4,Y_r4))
Xbeta_Final<-X%*%beta.prop
Var.prop<-sum((Y-Xbeta_Final)^2)/N

Sample.mMSE[[i+1]]<-idx.mMSE;
Expand Down
Loading

0 comments on commit 4ba7cf1

Please sign in to comment.