Skip to content

Commit

Permalink
apply cal_feature_sel function to training data in trans_classifier
Browse files Browse the repository at this point in the history
  • Loading branch information
ChiLiubio committed Jan 30, 2025
1 parent 404bbf8 commit 001a15c
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 71 deletions.
83 changes: 45 additions & 38 deletions R/trans_classifier.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,36 @@ trans_classifier <- R6::R6Class(classname = "trans_classifier",
invisible(self)
},
#' @description
#' Split data for training and testing.
#'
#' @param prop.train default 3/4; the ratio of the data used for the training.
#' @return \code{data_train} and \code{data_test} in the object.
#' @examples
#' \dontrun{
#' t1$cal_split(prop.train = 3/4)
#' }
cal_split = function(prop.train = 3/4){
###################### DATA SPLIT: TRAIN and TEST
message("Creating training set and testing set ...")
data_response <- self$data_response
if(self$type == "Classification"){
data_response %<>% factor
}
data_feature <- self$data_feature

data_all <- data.frame(Response = data_response, data_feature, check.names = FALSE)
SplitData <- rsample::initial_split(data_all, prop = prop.train, strata = "Response")
train_data <- rsample::training(SplitData)
test_data <- rsample::testing(SplitData)
message("Stratified sampling with the proportion of ", prop.train*100 ,"% for the training set ...")

###################### DATA SPLIT end
self$data_train <- train_data
self$data_test <- test_data
message("Training and testing data are stored in object$data_train and object$data_test respectively ...")
invisible(self)
},
#' @description
#' Perform feature selection.
#' See \href{https://topepo.github.io/caret/feature-selection-overview.html}{https://topepo.github.io/caret/feature-selection-overview.html} for more details.
#'
Expand All @@ -155,23 +185,24 @@ trans_classifier <- R6::R6Class(classname = "trans_classifier",
boruta.repetitions = 4,
...
){
# ClassNames
data_response <- self$data_response
data_input <- self$data_train
data_x <- data_input[, -1]
data_y <- data_input[, 1]

if(self$type == "Classification"){
data_response <- factor(data_response)
data_y %<>% factor
}
DataX <- self$data_feature

###################### ----------------
###################### BORUTA
boruta.list <- list()
boura.fs <- function(i){
boruta.res <- Boruta::Boruta(x = DataX, y = data_response,
boruta.res <- Boruta::Boruta(x = data_x, y = data_y,
maxRuns = boruta.maxRuns, pValue = boruta.pValue, ...)
boruta.stats <- data.frame(Boruta::attStats(boruta.res))
boruta.list[[i]] <- rownames(boruta.stats[boruta.stats$decision =='Confirmed', ])
}
message("Running Feature Selection (Boruta) ...")
message("Running Feature Selection (Boruta) based on the training data ...")
boruta.list <- parallel::mclapply(1:boruta.repetitions, boura.fs)

boruta.final <- as.data.frame(table(unlist(boruta.list)))
Expand All @@ -181,38 +212,14 @@ trans_classifier <- R6::R6Class(classname = "trans_classifier",
message("End of Feature Selection - Total of selected features = ", boruta.n.features)
###################### BORUTA end
###################### ----------------
self$data_feature <- DataX[, boruta.list.top]
message("The selected features is reassigned to object$data_feature ...")
invisible(self)
},
#' @description
#' Split data for training and testing.
#'
#' @param prop.train default 3/4; the ratio of the data used for the training.
#' @return \code{data_train} and \code{data_test} in the object.
#' @examples
#' \dontrun{
#' t1$cal_split(prop.train = 3/4)
#' }
cal_split = function(prop.train = 3/4){
###################### DATA SPLIT: TRAIN and TEST
message("Creating training set and testing set ...")
data_response <- self$data_response
if(self$type == "Classification"){
data_response %<>% factor
}
data_feature <- self$data_feature

data_all <- data.frame(Response = data_response, data_feature, check.names = FALSE)
SplitData <- rsample::initial_split(data_all, prop = prop.train, strata = "Response")
train_data <- rsample::training(SplitData)
test_data <- rsample::testing(SplitData)
message("Stratified sampling with the proportion of ", prop.train*100 ,"% for the training set ...")

###################### DATA SPLIT end
self$data_train <- train_data
self$data_test <- test_data
message("Training and testing data are stored in object$data_train and object$data_test respectively ...")
data_output <- data_input[, c(colnames(data_input)[1], boruta.list.top)]
self$data_train <- data_output

data_input <- self$data_test
data_output <- data_input[, c(colnames(data_input)[1], boruta.list.top)]
self$data_test <- data_output

message("Selected features are reassigned to object$data_train and object$data_test ...")
invisible(self)
},
#' @description
Expand Down
66 changes: 33 additions & 33 deletions man/trans_classifier.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 001a15c

Please sign in to comment.