Skip to content

Commit

Permalink
export median data for the challenge
Browse files Browse the repository at this point in the history
  • Loading branch information
gabora committed Jun 24, 2019
1 parent b182e73 commit facaa16
Showing 1 changed file with 113 additions and 0 deletions.
113 changes: 113 additions & 0 deletions export_dream_median_phospho.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@


# 3 parts:
# 1. export data for public use
# - remove the conditions used for the scoring
# 2. export prediction coinditions:
# - export the conditions and reporters masked with NAs for aim2
# 3. export the validation data.


median_interpolated_data <- read_rds("./data/median_data/interpolated_median_all_reporters_mine.rds")
median_data <- median_interpolated_data %>% spread(reporter,value)
# cell-line in aims
cell_line_sheet <- readxl::read_excel("./data/cell_line_distribution.xlsx",sheet = 1,range = "A1:I69")


# 1. export data for public use --------------------------------------------------
# export median phosphorylation data after removing the conditions that are to be predicted
# Export data for the public: all data except the test conditions
# we remove imTOR from all cell-lines.
# then depending which cell-line is used in which AIM, we remove a subset of the data.


public_data <- median_data %>% filter(treatment != "imTOR") %>%# remove imTOR condition from all
group_by(cell_line) %>% nest(.key = "data") %>%
mutate(cleaned_data = map2(data,cell_line,function(data,cell_line){

current_cell_line = cell_line

purpose = cell_line_sheet %>% filter(cell_line == current_cell_line)
purpose[1,as.logical(is.na(purpose[1,]))] = ""


if(purpose$AIM_1_1 =="test"){
# remove psites to be predicted
data[,c("p.ERK", "p.Akt.Ser473.","p.S6","p.HER2", "p.PLCg2")] = NA_real_

}else if(purpose$AIM_1_2_1 == "test"){
# remove all data in condition to be predicted

data = data %>% filter(treatment!="iPKC")

}else if(purpose$AIM_1_2_2 == "test"){

# nothing to do here, imTOR condition already removed.

}else if(purpose$AIM2 == "test"){
# providing only the full condition for the test
data = data %>% filter(treatment=="full")

}

return(data)
})) %>% unnest(cleaned_data)


write_csv(public_data,path = "./challenge_data/median_phospho/median_phospho_data.csv")

### 2. export prediction conditions --------------------------------------------
# export the conditions and use NA fo the values that the participants ahve to predict

prediction_data <- median_data %>% filter(treatment != "imTOR") %>%# remove imTOR condition from all
group_by(cell_line) %>% nest(.key = "data") %>%
mutate(cleaned_data = map2(data,cell_line,function(data,cell_line){

current_cell_line = cell_line

purpose = cell_line_sheet %>% filter(cell_line == current_cell_line)
purpose[1,as.logical(is.na(purpose[1,]))] = ""

reporters = colnames(data)[-1:-2]

if(purpose$AIM2 == "test"){
# providing only the full condition for the test
data = data %>% filter(treatment !="full") %>%
mutate_at(reporters,~NA_real_)

}else return(tibble())

return(data)

})) %>% unnest(cleaned_data)


write_csv(prediction_data,path = "./challenge_data/predict_conditions/AIM_2_median_data.csv")


### # 3. export the validation data. --------------------------------------------
# export the conditions for validaion

validation_data <- median_data %>% filter(treatment != "imTOR") %>%# remove imTOR condition from all
group_by(cell_line) %>% nest(.key = "data") %>%
mutate(cleaned_data = map2(data,cell_line,function(data,cell_line){

current_cell_line = cell_line

purpose = cell_line_sheet %>% filter(cell_line == current_cell_line)
purpose[1,as.logical(is.na(purpose[1,]))] = ""

reporters = colnames(data)[-1:-2]

if(purpose$AIM2 == "test"){
# providing only the full condition for the test
data = data %>% filter(treatment !="full")

}else return(tibble())

return(data)

})) %>% unnest(cleaned_data)


write_csv(validation_data,path = "./challenge_data/validation_data/AIM_2_median_data.csv")

0 comments on commit facaa16

Please sign in to comment.