-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_preprocess_clean_data.py
59 lines (45 loc) · 1.49 KB
/
main_preprocess_clean_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
# coding: utf-8
# In[5]:
# python liberaries
import numpy as np
import pandas as pd
import datetime, copy, imp
import time
import os
#import re
#import matplotlib.pyplot as plt
import tqdm
from tqdm.auto import tqdm, trange
from tqdm.notebook import tqdm
tqdm.pandas()
from datetime import timedelta
import copy
import sys
import gensim
from gensim.models.word2vec import Word2Vec
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.nn.functional as F
import sklearn.model_selection as model_selection
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data.sampler import WeightedRandomSampler
# importing our defined .py packeges file as a calss
import preprocessing as pp
import dataloader as dl
import model as mdl
import train as tr
# defining the location of reading and writing files
data_event_loc='data/MLB-MedicalEvents.hd5'
data_state_loc='data/MLB-MedicalState.hd5'
# In[6]:
# preprocess the data(join state and event tables, fill the null values with zero, add windowdiff column)
print("---------------------------------------")
print("Preprocessing, deleting inconsistency, join state and event tables, fill the null values with zero, add windowdiff column")
print("---------------------------------------")
tbl, state,events=pp.pre_processing(data_event_loc,data_state_loc)
tbl.to_hdf('data/tbl.hd5',key='Table')
state.to_hdf('data/state.hd5',key='State')
events.to_hdf('data/events.hd5',key='Events')