.Rhistory

K = num_latent_factor,
sumabsv = local_environment$hyperparameter_shared_between_folds)$v
# plot(estimate_leading_pc[,1])
# plot(estimate_leading_pc[,2])
estimate_leading_pc <- estimate_leading_pc/apply(X = estimate_leading_pc, MARGIN = 2, FUN = norm, type = '2')
# estimate_leading_pc <- estimate_leading_pc/norm(estimate_leading_pc, type = '2')
}else if(pca_method == "dense_pca"){
estimate_leading_pc <- irlba::irlba(sample_centered, nv = num_latent_factor)$v
# plot(estimate_leading_pc[,1])
# plot(estimate_leading_pc[,2])
}else if(pca_method == "hard"){
estimate_leading_pc <- dense_leading_pc <- irlba::irlba(sample_centered, nv = num_latent_factor)$v
pc_threshold <-nuisance_sample_size^(-1/3)
estimate_leading_pc[abs(estimate_leading_pc) < pc_threshold] <- 0
for(latent_index in 1:num_latent_factor){
estimate_vector <- estimate_leading_pc[, latent_index]
if(norm(estimate_vector, type = '2') == 0){
estimate_leading_pc[, latent_index] <- dense_leading_pc[, latent_index]
}else{
estimate_leading_pc[, latent_index] <- estimate_vector/norm(estimate_vector, type = '2')
}
}
# if(norm(estimate_leading_pc, type = '2') == 0){
#   estimate_leading_pc <- dense_leading_pc
# }else{
#   estimate_leading_pc <- estimate_leading_pc/norm(estimate_leading_pc, type = '2')
# }
}
colnames(estimate_leading_pc) <- paste0('pc', 1:ncol(estimate_leading_pc))
rownames(estimate_leading_pc) <- colnames(nuisance_sample_1)
##estimate eigenvalue
data_times_pc <- sample_centered %*% estimate_leading_pc
estimate_eigenvalue <- (apply(X = data_times_pc, MARGIN = 2,
FUN = norm, type = '2'))^2/nuisance_sample_size
# estimate_eigenvalue <- norm(data_times_pc, type = '2')^2/nuisance_sample_size
##estimate noise variance
estimate_covariance_matrix_diagonal<- apply(sample_centered, 2, function(x) sum(x^2))/nuisance_sample_size
estimate_covariance_matrix_trace <- sum(estimate_covariance_matrix_diagonal)
estimate_noise_variance <- (estimate_covariance_matrix_trace - sum(estimate_eigenvalue))/(feature_number - num_latent_factor)
# ##estimate the transformed eigenvector
# sigma_times_pc <- t(sample_centered) %*% data_times_pc / nuisance_sample_size
#
return(list(estimate_leading_pc = estimate_leading_pc,
estimate_mean_1 = estimate_mean_1,
estimate_mean_2 = estimate_mean_2,
estimate_eigenvalue = estimate_eigenvalue,
estimate_noise_variance = estimate_noise_variance))
# estimate_eigenvalue = 1.1,
# estimate_noise_variance = 0.1,
# sigma_times_pc = sigma_times_pc))
}
#' Calculate the test statistics on the left-out samples. Called in simple_pc_testing().
#'
#' @param cross_fitting_sample_1 Group 1 sample. Each row is a subject and each column corresponds to a feature.
#' @param cross_fitting_sample_2 Group 2 sample. Each row is a subject and each column corresponds to a feature.
#' @param nuisance_collection A collection of nuisance quantities estimated using "nuisance" samples. It is the output of estimate_nuisance_pc().
#'
#' @export
#'
#'
evaluate_pca_plug_in <- function(cross_fitting_sample_1,
cross_fitting_sample_2 = NULL,
nuisance_collection){
cross_fitting_sample_1 <<- cross_fitting_sample_1
cross_fitting_sample_2 <<- cross_fitting_sample_2
nuisance_collection <<- nuisance_collection
cross_fitting_sample_1 <- as.data.frame(cross_fitting_sample_1)
estimate_leading_pc <- nuisance_collection$estimate_leading_pc
#####inner product statistics
inner_product_1 <- as.matrix(cross_fitting_sample_1) %*% estimate_leading_pc
influence_each_subject_1 <- inner_product_1
colnames(influence_each_subject_1) <- paste0('pc', 1:ncol(estimate_leading_pc))
if(!is.null(cross_fitting_sample_2)){
cross_fitting_sample_2 <- as.data.frame(cross_fitting_sample_2)
#####inner product statistics
inner_product_2 <- as.matrix(cross_fitting_sample_2) %*% estimate_leading_pc
influence_each_subject_2 <- inner_product_2
colnames(influence_each_subject_2) <- paste0('pc', 1:ncol(estimate_leading_pc))
}else{
influence_each_subject_2 <- NULL
}
return(list(influence_each_subject_1 = influence_each_subject_1,
influence_each_subject_2 = influence_each_subject_2))
}
#'                                Sigma = simulation_covariance))
#'  sample_2 <- data.frame(MASS::mvrnorm(sample_size_2,
#'                                mu = true_mean_2,
#'                                Sigma = simulation_covariance))
#'  result <- simple_pc_testing(sample_1, sample_2)
#'  result$test_statistics
#'  ##these are test statistics. Each one of them corresponds to one PC.
#'  summarize_pc_name(result, latent_fator_index = 1) #shows which features contribute to PC1
#'  extract_pc(result) # extract the estimated leading PCs.
#'
simple_pc_testing <- function(sample_1,
sample_2 = NULL,
pca_method = "sparse_pca",
mean_method = "naive",
num_latent_factor = 1,
n_folds = 5){
###split the samples into n_folds splits
set.seed(1)
sample_1 <- as.data.frame(sample_1)
split_data <- vector("list", n_folds)
sample_1_split_index <- index_spliter(1:nrow(sample_1),
n_folds = n_folds)
if(!is.null(sample_2)){
sample_2 <- as.data.frame(sample_2)
sample_2_split_index <- index_spliter(1:nrow(sample_2),
n_folds = n_folds)
}
local_environment <- new.env(parent = emptyenv())
local_environment$hyperparameter_shared_between_folds <- -1
# hyperparameter_shared_between_folds <<- -1
###process each split
for(split_index in 1:n_folds){
print(paste0("processiong fold ", split_index))
sample_1_cross <- sample_1[sample_1_split_index[[split_index]], ]
sample_1_nuisance <- sample_1[-sample_1_split_index[[split_index]], ]
split_data[[split_index]]$sample_1_cross_index <- sample_1_split_index[[split_index]]
if(!is.null(sample_2)){
split_data[[split_index]]$sample_2_cross_index <- sample_2_split_index[[split_index]]
sample_2_cross <- sample_2[sample_2_split_index[[split_index]], ]
sample_2_nuisance <- sample_2[-sample_2_split_index[[split_index]], ]
}else{
sample_2_cross <- sample_2_nuisance <- NULL
}
split_data[[split_index]]$nuisance_collection <- estimate_nuisance_pc(sample_1_nuisance,
sample_2_nuisance,
pca_method = pca_method,
mean_method = mean_method,
num_latent_factor = num_latent_factor,
local_environment = local_environment)
split_data[[split_index]]$influence_function_value <- evaluate_pca_plug_in(sample_1_cross,
sample_2_cross,
split_data[[split_index]]$nuisance_collection)
##############
sample_1_individual <- split_data[[split_index]]$influence_function_value$influence_each_subject_1
split_data[[split_index]]$variance_sample_1 <- apply(X = sample_1_individual, MARGIN = 2, FUN = stats::var)
if(!is.null(sample_2)){
sample_2_individual <- split_data[[split_index]]$influence_function_value$influence_each_subject_2
split_data[[split_index]]$variance_sample_2 <- apply(X = sample_2_individual, MARGIN = 2, FUN = stats::var)
split_data[[split_index]]$test_statistic <- apply(X = sample_1_individual, MARGIN = 2, FUN = mean) - apply(X = sample_2_individual, MARGIN = 2, FUN = mean)
}else{
split_data[[split_index]]$test_statistic <- apply(X = sample_1_individual, MARGIN = 2, FUN = mean)
}
}
####now combine the folds
combine_1_factor_over_splits <- function(latent_factor_index){
test_statistics_before_studentization <- 0
variance_sample_1 <- variance_sample_2 <- 0
for(split_index in 1:n_folds){
inner_product_projection_direction <- crossprod(split_data[[1]]$nuisance_collection$estimate_leading_pc[,latent_factor_index],
split_data[[split_index]]$nuisance_collection$estimate_leading_pc[,latent_factor_index])
same_sign <- sign(inner_product_projection_direction)
print(inner_product_projection_direction)
if(same_sign == 0){
print('the projection directions are orthogonal')
same_sign <- 1
}
test_statistics_before_studentization <- test_statistics_before_studentization + same_sign * split_data[[split_index]]$test_statistic[latent_factor_index]
variance_sample_1 <- variance_sample_1 + split_data[[split_index]]$variance_sample_1[latent_factor_index]
if(!is.null(sample_2)){
variance_sample_2 <- variance_sample_2 + split_data[[split_index]]$variance_sample_2[latent_factor_index]
}
}
test_statistics_before_studentization <- test_statistics_before_studentization/n_folds
variance_sample_1 <- variance_sample_1/n_folds
if(!is.null(sample_2)) variance_sample_2 <- variance_sample_2/n_folds
if(!is.null(sample_2)){
standard_error <- sqrt((nrow(sample_1))^(-1) * variance_sample_1 +
(nrow(sample_2))^(-1) *variance_sample_2)
}else{
standard_error <- sqrt((nrow(sample_1))^(-1) * variance_sample_1)
}
test_statistics <- test_statistics_before_studentization/standard_error
tuple_one_latent_factor <- c(test_statistics_before_studentization, standard_error, test_statistics)
return(tuple_one_latent_factor)
}
test_statistics_before_studentization <- standard_error <- test_statistics <- rep(NULL,num_latent_factor)
for(latent_factor_index in 1:num_latent_factor){
tuple_one_latent_factor <- combine_1_factor_over_splits(latent_factor_index)
test_statistics_before_studentization[latent_factor_index] <- tuple_one_latent_factor[1]
standard_error[latent_factor_index] <- tuple_one_latent_factor[2]
test_statistics[latent_factor_index] <- tuple_one_latent_factor[3]
}
return(list(test_statistics = test_statistics,
standard_error = standard_error,
test_statistics_before_studentization = test_statistics_before_studentization,
split_data = split_data))
}
#' Simple plug-in test for two-sample mean comparison.
#'
#' @param sample_1 Group 1 sample. Each row is a subject and each column corresponds to a feature.
#' @param sample_2 Group 2 sample. Each row is a subject and each column corresponds to a feature.
#' @param pca_method Methods used to estimate principle component The default is "sparse_pca", using sparse PCA from package PMA. Other choices are "dense_pca"---the regular PCA; and "hard"--- hard-thresholding PCA, which also induces sparsity.
#' @param mean_method Methods used to estimate the mean vector. Default is sample mean "naive". There is also a hard-thresholding sparse estiamtor "hard".
#' @param num_latent_factor Number of principle to be estimated/tested. Default is 1.
#' @param n_folds Number of splits when performing cross-fitting. The default is 5, if computational time allows, you can try to set it to 10.
#' @export
#' @examples
sample_size_1 <- sample_size_2 <- 300
true_mean_1 <- matrix(c(rep(1, 10), rep(0, 90)), ncol = 1)
true_mean_2 <- matrix(c(rep(1.5, 10), rep(0, 90)), ncol = 1)
pc1 <- c(rep(1, 10), rep(0, 90))
pc1 <- pc1/norm(pc1, type = '2')
simulation_covariance <- 10 * pc1 %*% t(pc1)
simulation_covariance <- simulation_covariance + diag(1, 100)
sample_1 <- data.frame(MASS::mvrnorm(sample_size_1,
mu = true_mean_1,
Sigma = simulation_covariance))
sample_2 <- data.frame(MASS::mvrnorm(sample_size_2,
mu = true_mean_2,
Sigma = simulation_covariance))
result <- simple_pc_testing(sample_1, sample_2)
result$test_statistics
##these are test statistics. Each one of them corresponds to one PC.
summarize_pc_name(result, latent_fator_index = 1) #shows which features contribute to PC1
extract_pc(result) # extract the estimated leading PCs.
##these are test statistics. Each one of them corresponds to one PC.
summarize_pc_name(result, latent_fator_index = 1) #shows which features contribute to PC1
##these are test statistics. Each one of them corresponds to one PC.
summarize_pc_name(result, latent_fator_index = 1, method = 'intersection') #shows which features contribute to PC1
extract_pc(result) # extract the estimated leading PCs.
##these are test statistics. Each one of them corresponds to one PC.
summarize_pc_name(result, latent_fator_index = 1, method = 'intersection') #shows which features contribute to PC1
##these are test statistics. Each one of them corresponds to one PC.
summarize_pc_name(result, latent_fator_index = 1) #shows which features contribute to PC1
extract_pc(result) # extract the estimated leading PCs.
#'
#' @param nuisance_sample_1 Group 1 sample. Each row is a subject and each column corresponds to a feature.
#' @param nuisance_sample_2 Group 2 sample. Each row is a subject and each column corresponds to a feature.
#' @param pca_method Methods used to estimate principle component The default is "sparse_pca", using sparse PCA from package PMA. Other choices are "dense_pca"---the regular PCA; and "hard"--- hard-thresholding PCA, which also induces sparsity.
#' @param mean_method Methods used to estimate the mean vector. Default is sample mean "naive". There is also a hard-thresholding sparse estiamtor "hard".
#' @param num_latent_factor Number of principle to be estimated/tested. Default is 1.
#' @param local_environment A environment for hyperparameters shared between folds.
#'
#' @export
#'
estimate_nuisance_pc <- function(nuisance_sample_1,
nuisance_sample_2 = NULL,
pca_method = "sparse_pca",
mean_method = "naive",
num_latent_factor = 1,
local_environment = NA){
#
# nuisance_sample_1 <<- nuisance_sample_1
# nuisance_sample_2 <<- nuisance_sample_2
feature_number <- ncol(nuisance_sample_1)
nuisance_sample_1 <- as.data.frame(nuisance_sample_1)
nuisance_sample_size_1 <- nrow(nuisance_sample_1)
###estimate mean vector
estimate_mean_1 <- colMeans(nuisance_sample_1)
# estimate_mean_1[abs(estimate_mean_1) < mean_threshold_1] <- 0
if(mean_method == 'hard'){
variance_feature_wise_centered <- apply(nuisance_sample_1, 2, stats::var)
mean_threshold <- 2*sqrt(variance_feature_wise_centered)*sqrt(2*log(2*feature_number*nuisance_sample_size_1)/nuisance_sample_size_1)
estimate_mean_1[abs(estimate_mean_1) < mean_threshold] <- 0
}
centered_sample_1 <- sweep(nuisance_sample_1, 2, estimate_mean_1)
#######
if(!is.null(nuisance_sample_2)){
nuisance_sample_2 <- as.data.frame(nuisance_sample_2)
nuisance_sample_size_2 <- nrow(nuisance_sample_2)
###estimate mean vector
estimate_mean_2 <- colMeans(nuisance_sample_2)
# estimate_mean_2[abs(estimate_mean_2) < mean_threshold_2] <- 0
if(mean_method == 'hard'){
variance_feature_wise_centered <- apply(nuisance_sample_2, 2, stats::var)
mean_threshold <- 2*sqrt(variance_feature_wise_centered)*sqrt(2*log(2*feature_number*nuisance_sample_size_2)/nuisance_sample_size_2)
estimate_mean_2[abs(estimate_mean_2) < mean_threshold] <- 0
}
centered_sample_2 <- sweep(nuisance_sample_2, 2, estimate_mean_2)
sample_centered <- as.matrix(rbind(centered_sample_1, centered_sample_2))
}else{
estimate_mean_2 <- NULL
sample_centered <- as.matrix(centered_sample_1)
}
########
nuisance_sample_size <- nrow(sample_centered)
if(pca_method == "sparse_pca"){
###estimate sparse principle component
if(local_environment$hyperparameter_shared_between_folds < 0){
# cv_result <- SPC.cv(sample_centered,
#                     sumabsvs = seq(1, sqrt(floor(feature_number)), length = 10))
# hyperparameter_shared_between_folds <<- cv_result$bestsumabsv
cv_result <- PMA::SPC.cv(sample_centered)
# hyperparameter_shared_between_folds <<- max(1, 0.7*cv_result$bestsumabsv) ##I USED THIS FOR LUPAS REAL-DATA ANALYSIS
assign("hyperparameter_shared_between_folds", cv_result$bestsumabsv, envir = local_environment)
assign("hyperparameter_shared_between_folds", 7, envir = local_environment)
}
print(local_environment$hyperparameter_shared_between_folds)
estimate_leading_pc <- PMA::SPC(sample_centered,
K = num_latent_factor,
sumabsv = local_environment$hyperparameter_shared_between_folds)$v
# plot(estimate_leading_pc[,1])
# plot(estimate_leading_pc[,2])
estimate_leading_pc <- estimate_leading_pc/apply(X = estimate_leading_pc, MARGIN = 2, FUN = norm, type = '2')
# estimate_leading_pc <- estimate_leading_pc/norm(estimate_leading_pc, type = '2')
}else if(pca_method == "dense_pca"){
estimate_leading_pc <- irlba::irlba(sample_centered, nv = num_latent_factor)$v
# plot(estimate_leading_pc[,1])
# plot(estimate_leading_pc[,2])
}else if(pca_method == "hard"){
estimate_leading_pc <- dense_leading_pc <- irlba::irlba(sample_centered, nv = num_latent_factor)$v
pc_threshold <-nuisance_sample_size^(-1/3)
estimate_leading_pc[abs(estimate_leading_pc) < pc_threshold] <- 0
for(latent_index in 1:num_latent_factor){
estimate_vector <- estimate_leading_pc[, latent_index]
if(norm(estimate_vector, type = '2') == 0){
estimate_leading_pc[, latent_index] <- dense_leading_pc[, latent_index]
}else{
estimate_leading_pc[, latent_index] <- estimate_vector/norm(estimate_vector, type = '2')
}
}
# if(norm(estimate_leading_pc, type = '2') == 0){
#   estimate_leading_pc <- dense_leading_pc
# }else{
#   estimate_leading_pc <- estimate_leading_pc/norm(estimate_leading_pc, type = '2')
# }
}
colnames(estimate_leading_pc) <- paste0('pc', 1:ncol(estimate_leading_pc))
rownames(estimate_leading_pc) <- colnames(nuisance_sample_1)
##estimate eigenvalue
data_times_pc <- sample_centered %*% estimate_leading_pc
estimate_eigenvalue <- (apply(X = data_times_pc, MARGIN = 2,
FUN = norm, type = '2'))^2/nuisance_sample_size
# estimate_eigenvalue <- norm(data_times_pc, type = '2')^2/nuisance_sample_size
##estimate noise variance
estimate_covariance_matrix_diagonal<- apply(sample_centered, 2, function(x) sum(x^2))/nuisance_sample_size
estimate_covariance_matrix_trace <- sum(estimate_covariance_matrix_diagonal)
estimate_noise_variance <- (estimate_covariance_matrix_trace - sum(estimate_eigenvalue))/(feature_number - num_latent_factor)
# ##estimate the transformed eigenvector
# sigma_times_pc <- t(sample_centered) %*% data_times_pc / nuisance_sample_size
#
return(list(estimate_leading_pc = estimate_leading_pc,
estimate_mean_1 = estimate_mean_1,
estimate_mean_2 = estimate_mean_2,
estimate_eigenvalue = estimate_eigenvalue,
estimate_noise_variance = estimate_noise_variance))
# estimate_eigenvalue = 1.1,
# estimate_noise_variance = 0.1,
# sigma_times_pc = sigma_times_pc))
}
#' Simple plug-in test for two-sample mean comparison.
#'
#' @param sample_1 Group 1 sample. Each row is a subject and each column corresponds to a feature.
#' @param sample_2 Group 2 sample. Each row is a subject and each column corresponds to a feature.
#' @param pca_method Methods used to estimate principle component The default is "sparse_pca", using sparse PCA from package PMA. Other choices are "dense_pca"---the regular PCA; and "hard"--- hard-thresholding PCA, which also induces sparsity.
#' @param mean_method Methods used to estimate the mean vector. Default is sample mean "naive". There is also a hard-thresholding sparse estiamtor "hard".
#' @param num_latent_factor Number of principle to be estimated/tested. Default is 1.
#' @param n_folds Number of splits when performing cross-fitting. The default is 5, if computational time allows, you can try to set it to 10.
#' @export
#' @examples
sample_size_1 <- sample_size_2 <- 300
true_mean_1 <- matrix(c(rep(1, 10), rep(0, 90)), ncol = 1)
true_mean_2 <- matrix(c(rep(1.5, 10), rep(0, 90)), ncol = 1)
pc1 <- c(rep(1, 10), rep(0, 90))
pc1 <- pc1/norm(pc1, type = '2')
simulation_covariance <- 10 * pc1 %*% t(pc1)
simulation_covariance <- simulation_covariance + diag(1, 100)
sample_1 <- data.frame(MASS::mvrnorm(sample_size_1,
mu = true_mean_1,
Sigma = simulation_covariance))
sample_2 <- data.frame(MASS::mvrnorm(sample_size_2,
mu = true_mean_2,
Sigma = simulation_covariance))
result <- simple_pc_testing(sample_1, sample_2)
result$test_statistics
##these are test statistics. Each one of them corresponds to one PC.
summarize_pc_name(result, latent_fator_index = 1) #shows which features contribute to PC1
extract_pc(result) # extract the estimated leading PCs.
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
build("HMC_package")
#' Debiased one-step test for two-sample mean comparison. A small p-value tells us not only there is difference in the mean vectors, but can also indicates which principle component the difference aligns with.
#'
#' @param sample_1 Group 1 sample. Each row is a subject and each column corresponds to a feature.
#' @param sample_2 Group 2 sample. Each row is a subject and each column corresponds to a feature.
#' @param pca_method Methods used to estimate principle component The default is "sparse_pca", using sparse PCA from package PMA. Other choices are "dense_pca"---the regular PCA; and "hard"--- hard-thresholding PCA, which also induces sparsity.
#' @param mean_method Methods used to estimate the mean vector. Default is sample mean "naive". There is also a hard-thresholding sparse estiamtor "hard".
#' @param num_latent_factor Number of principle to be estimated/tested. Default is 1.
#' @param n_folds Number of splits when performing cross-fitting. The default is 5, if computational time allows, you can try to set it to 10.
#' @export
#' @examples
sample_size_1 <- sample_size_2 <- 300
true_mean_1 <- matrix(c(rep(1, 10), rep(0, 90)), ncol = 1)
true_mean_2 <- matrix(c(rep(1.5, 10), rep(0, 90)), ncol = 1)
pc1 <- c(rep(1, 10), rep(0, 90))
pc1 <- pc1/norm(pc1, type = '2')
simulation_covariance <- 10 * pc1 %*% t(pc1)
simulation_covariance <- simulation_covariance + diag(1, 100)
sample_1 <- data.frame(MASS::mvrnorm(sample_size_1,
mu = true_mean_1,
Sigma = simulation_covariance))
sample_2 <- data.frame(MASS::mvrnorm(sample_size_2,
mu = true_mean_2,
Sigma = simulation_covariance))
result <- debiased_pc_testing(sample_1, sample_2)
result$test_statistics
##these are test statistics. Each one of them corresponds to one PC.
summarize_pc_name(result, latent_fator_index = 1) #shows which features contribute to PC1
extract_pc(result) # extract the estimated leading PCs.
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
build("HMC_package")
install("HMC")
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
build("HMC_package")
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
build("HMC_package")
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
library(devtools)
library(roxygen2)
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
build("HMC_package")
message('hi')
warnings('hi')
warnings('hi')
?warnings
message('the projection directions are orthogonal')
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
build("HMC_package")
sample_size_1 <- sample_size_2 <- 300
true_mean_1 <- matrix(c(rep(1, 10), rep(0, 90)), ncol = 1)
true_mean_2 <- matrix(c(rep(1.5, 10), rep(0, 90)), ncol = 1)
pc1 <- c(rep(1, 10), rep(0, 90))
pc1 <- pc1/norm(pc1, type = '2')
simulation_covariance <- 10 * pc1 %*% t(pc1)
simulation_covariance <- simulation_covariance + diag(1, 100)
sample_1 <- data.frame(MASS::mvrnorm(sample_size_1,
mu = true_mean_1,
Sigma = simulation_covariance))
sample_2 <- data.frame(MASS::mvrnorm(sample_size_2,
mu = true_mean_2,
Sigma = simulation_covariance))
result <- simple_pc_testing(sample_1, sample_2)
result$test_statistics
##these are test statistics. Each one of them corresponds to one PC.
summarize_pc_name(result, latent_fator_index = 1) #shows which features contribute to PC1
extract_pc(result) # extract the estimated leading PCs.
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
build("HMC_package")
library(HMC)
?index_spliter
?anchored_lasso_testing
library(sieve)
library(sieve)
library(Sieve)
?sieve.sgd.predict
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
build("HMC_package")
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
library(devtools)
library(roxygen2)
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
build("HMC_package")
library(HMC)
?index_spliter
?anchored_lasso_testing
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
library(devtools)
library(roxygen2)
# setwd("/Users/tianyuzhang/Documents/")
# create("HMC")
setwd("/Users/tianyuzhang/Documents/HMC/HMC_package/")
document()
setwd("..")
build("HMC_package")
install("HMC_package")