-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgradient descent.py
136 lines (110 loc) · 3.39 KB
/
gradient descent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import numpy as np
from sklearn import datasets
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# loading datasets
dataset = datasets.load_boston()
features = dataset.data
labels = dataset.target
#Split training and testing
Nsplit = 50
X_train, y_train = features[:-Nsplit], labels[:-Nsplit]
X_test, y_test = features[-Nsplit:], labels[-Nsplit:]
#Define normalization function
#Define solve function
#Define error function
def preproc_params(x):
mean = np.mean(x,axis=0)
std = np.std(x,axis=0)
return mean, std
def preprocess(x,p):
mean, std = p
if std.any(0):
np.put(std, np.where(std==0),1)
x = (x-mean)/std
else:
x = (x-mean)/std
return np.hstack((np.ones((np.size(x[:,0]),1)),x))
def solve(x,y):
pinv_x = np.linalg.pinv(x)
w = np.dot(pinv_x,y)
return w
def mse(x,y,w):
er = np.dot(w,x.T) - y
err = np.inner(er,er)/np.size(y)
return err
#normalization
params = preproc_params(X_train)
X_train = preprocess(X_train, params)
X_test = preprocess(X_test, params)
#Stochastic Gradient Descent
rate = 5e-4
epochs = 500
data_num = np.size(X_train[:,1])
feature_num = np.size(X_train[1,:])
w = np.random.uniform(-0.1,0.1,feature_num)
error = []
for i in range(epochs):
temp = np.arange(X_train.shape[0])
np.random.shuffle(temp)
x = X_train[temp]
y = y_train[temp]
for j in range(data_num):
diff = np.dot(w,x[j]) - y[j]
w = w - rate*diff*x[j]
err = mse(X_train,y_train,w)
error.append(err)
print("the weight matrix of SGD:\n", w)
print("the final train error of SGD:\n", err)
err = mse(X_test,y_test,w)
print("the final test error of SGD\n", err)
plt.plot(range(500),error)
#Batch Gradient Descent
w = np.random.uniform(-0.1,0.1,feature_num)
error = []
for i in range(epochs):
diff = np.dot(w,X_train.T) - y_train
w = w - rate*np.dot(diff,X_train)
err = mse(X_train,y_train,w)
error.append(err)
print("the weight matrix of BGD\n", w)
print("the final train_error of BGD\n", err)
plt.plot(range(epochs),error)
err = mse(X_test,y_test,w)
print("the final test_error of BGD:\n", err)
#Closed Form Solution
w = solve(X_train, y_train)
err = mse(X_train, y_train,w)
print("the weight matrix of closed form:\n", w)
print("the train_error of closed form:\n", err)
err = mse(X_test, y_test,w)
print("the test error of cloed form\n", err)
#Testing the corectness by shuffle data selected as training or test data.
features_orig = dataset.data
labels_orig = dataset.target
Ndata = len(features_orig)
train_errs = []
test_errs = []
for k in range(100):
# Shuffle data
rand_perm = np.random.permutation(Ndata)
features = [features_orig[ind] for ind in rand_perm]
labels = [labels_orig[ind] for ind in rand_perm]
# Train/test split
Nsplit = 50
X_train, y_train = features[:-Nsplit], labels[:-Nsplit]
X_test, y_test = features[-Nsplit:], labels[-Nsplit:]
# Preprocess your data - Normalization, adding a constant feature
params = preproc_params(X_train)
X_train = preprocess(X_train, params)
X_test = preprocess(X_test, params)
# Solve for optimal w
# Use your solver function
w = solve(X_train, y_train)
# Collect train and test errors
# Use your implementation of the mse function
train_errs.append(mse(X_train, y_train, w))
test_errs.append(mse(X_test, y_test, w))
print('Mean training error: ', np.mean(train_errs))
print('Mean test error: ', np.mean(test_errs))