-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathpreprocess.py
126 lines (106 loc) · 3.07 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import numpy as np
class MinMaxScaler:
def fit_transform(self, X):
'''
Parameters
----------
X : shape (n_samples, n_features)
Training data
Returns
-------
X : shape (n_samples, n_features)
The Training data min-max encoded.
'''
self.__min = np.min(X, axis=0)
self.__max = np.max(X, axis=0)
return self.transform(X)
def transform(self, X):
'''
Parameters
----------
X : shape (n_samples, n_features)
Predicting data
Returns
-------
X : shape (n_samples, n_features)
The Predicting data min-max encoded.
'''
return (X - self.__min) / (self.__max - self.__min)
class StandardScaler:
def fit_transform(self, X):
'''
Parameters
----------
X : shape (n_samples, n_features)
Training data
Returns
-------
X : shape (n_samples, n_features)
The Training data standard scaler encoded.
'''
self.__mean = np.mean(X, axis=0)
self.__std = np.std(X, axis=0)
return self.transform(X)
def transform(self, X):
'''
Parameters
----------
X : shape (n_samples, n_features)
Predicting data
Returns
-------
X : shape (n_samples, n_features)
The Predicting data standard scaler encoded.
'''
return (X - self.__mean) / (self.__std + 1e-8)
class OneHot:
@property
def classes(self):
return self.__classes
def fit_transform(self, X):
'''
Parameters
----------
X : shape (n_samples, n_features)
Training data
Returns
-------
X : shape (n_samples, n_features)
The Training data one hot encoded.
'''
self.__classes = np.unique(X)
return self.transform(X)
def transform(self, X):
'''
Parameters
----------
X : shape (n_samples, n_features)
Predicting data
Returns
-------
X : shape (n_samples, n_features)
The Predicting data one hot encoded.
'''
n_samples = X.shape[0]
n_classes = len(self.classes)
X_transformed = np.zeros((n_samples, n_classes))
for i in range(n_classes):
X_transformed[:, i] = (X == self.classes[i]).ravel()
return X_transformed + 0
def bagging(n_samples, n_bags):
'''
Parameters
----------
n_samples : The number of data
n_bags : The number of bags
Returns
-------
indexs : The indexes per bag included
indexs_oob : The oob indexes per bag
'''
indexs = []
indexs_oob = []
for _ in range(n_bags):
indexs.append(np.random.choice(n_samples, n_samples))
indexs_oob.append(np.setdiff1d(n_samples, indexs[-1]))
return indexs, indexs_oob