forked from sruthivijay/ZS-Challenge-2017
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathR_Starter.R
39 lines (26 loc) · 1.08 KB
/
R_Starter.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# This starter script is meant to help you understand how you can make your first submission
# in the format as expected. This scripts predicts events based on the popular past events per patient.
path <- "directory"
setwd(path)
# load library
library(data.table)
# load and check data ---------------------------------------------------------------
train <- fread("train.csv")
test <- fread("test.csv")
sample_sub <- fread("sample_submission.csv")
head(train)
head(test)
str(train)
str(test)
# order data
train <- train[order(PID)]
test <- test[order(PID)]
# Predicting future events based on popular past events per patient -------
train_dcast <- dcast(data = train, PID ~ Event, length, value.var = "Event")
# get top 10 events per row
random_submit <- colnames(train_dcast)[-1][apply(train_dcast[,-c('PID'),with=F],1, function(x)order(-x)[1:10])]
# create the submission file
random_mat <- as.data.table((matrix(random_submit,ncol = 10, byrow = T)))
colnames(random_mat) <- colnames(sample_sub)[-1]
random_mat <- cbind(PID = test$PID, random_mat)
fwrite(random_mat,"random_sub.csv")