-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgradient_descent.rs
196 lines (159 loc) · 8.4 KB
/
gradient_descent.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
fn main() {
// from : https://www.youtube.com/watch?v=sDv4f4s2SB8&t
const OBSERVED_HEIGHT: [f32; 3] = [1.4, 1.9, 3.2];
// The height of the three people, the height
// is the data we want to predicte from those
// three samples, who is what we would expecte
// from the three inputs, the weights.
const WEIGHT: [f32; 3] = [0.5, 2.3, 2.9];
// Their weight, this is the inputs of
// the gradient descent.
// Since each weight gives one height
// I think I could say there are three
// propagations if I take it as a tiny
// neural network.
let try_number: usize = 1000;
// This is the number of time the programme
// will try to determine the slope and the
// the intrcept afterward
let mut slope_intercept_trouve: [bool; 2] = [false, false];
// indicate if the good slope and intercept are
// found or not
let mut true_counter: usize = 0;
let precision_success: f32 = 0.001;
// The programme will stop once the derivative of the sum
// of square of the difference between the observed data
// and the predicted one is between precision_success and
// its negative
let mut step_size: f32;
// The
let power_dif : f32 = 2.0;
let slope_intercept_learning_rate: [f32; 2] = [0.01, 0.1];
// for 0 :
// The multiplicator that will determine the step size when
// it's use to multiply sum_derivative_square_residual.
// The step size calaculated is to create the next value of
// the slope of the prediction line.
// pour 1 :
// The multiplicator that will determine the step size when
// it's use to multiply sum_derivative_square_residual.
// The step size calaculated is to create the next value of
// the intercept of the prediction line.
let mut sum_derivative_square_residual: f32;
let mut derivative_square_residual: f32;
// la somme des dérivés du carré de la différence
// entre la valeur observé et celle attendue
// pour le calcule du coéficient directeur de la
// courbes des prédictions a N-1 et N
// <brouilon>
// let batch_number: usize = 2;
// pour mini batche :
// for j in 0..= batch_number - 1 { à la place de for j in WEIGHT.len() -1
// crée un nombre aléatoire x entre 0 et OBSERVED_HEIGHT.len()
// utilise x dans WEIGHT[x] et OBSERVED_HEIGHT[x]
// </brouilon>
let mut slope_intercept: [f32; 2] = [0.0, 0.0];
// pour 0:
// valeur de départ du coéficient directeur de la courbe
// des prédictions
// pour 1:
// valeur de départ de l'intercept de la courbe
// des prédictions
let mut predicted_height: f32;
// la ou sera stocké la taille prédite
// par rapport à slope et intercept
let mut number_end: usize = 0;
for i in 0..= try_number - 1 {
// for each number of try
// stop the algorihtm if the values has already been found
// I would add another for loop here :
// for eache layer (look backward)
for y in 0..= slope_intercept.len() - 1 {
if true_counter == slope_intercept_trouve.len() {
//number_end = i; false because will count unused try
break;
}
println!("\npour la valeur (0 = slope, 1 = intercept) : {}", y);
// for each type of data I want to predicte, here this is the
// slope and the intercept, but, I think in a real neural network
// it would be: for the weights and for the bias afterward since
// the bias is the constente like the intercept.
// That would say I will have to add another for loop for each
// weights (weights_bias[0]) and for each bias afterward (weights_bias[1]).
if slope_intercept_trouve[y] == false {
// if the good value for this data have not been found.
sum_derivative_square_residual = 0.0;
// clean the the sum calculated from the previous iteration
// and give it a value (usefull for the first iteration)
// will calculate difference of the observed datas
// and (with a -) the datas the network would give,
// for each pair of datas (here WEIGHT), sum it and
// will declare the data as the good one if the sum
// is between the precision (precision_success), and its
// negative, I ask the neural network to have.
for j in 0..= WEIGHT.len() - 1 {
// for each pair of datas (given and observed), samples:
predicted_height = (slope_intercept[0] * WEIGHT[j]) + slope_intercept[1];
// will try to predicte the good data, I wonder if, for a real neural network,
// the predicted data should be the calculation of the output(s) of the network
// from the multiplication between the two last layers or from all of them.
// in a function with let-else statements (weight or bias ?)
// in a match statement (wich layer ?) and by looking
// the layers backward (array.len() - x)
// or
// two function, one for the weights and another for the bias
// with a match statement
// the function will propably start before :
// for j in 0..= WEIGHT.len() - 1 {
// because I would like to avoid calling a function in a loop
if y == 0 {
// if the data is the slope (weights), there would be another
// if to see in wich layer is the weight
derivative_square_residual = (-power_dif * WEIGHT[j]) * (OBSERVED_HEIGHT[j] - predicted_height);
sum_derivative_square_residual = derivative_square_residual + sum_derivative_square_residual;
}
if y == 1 {
// if the data is the intercept (bias)
derivative_square_residual = -power_dif * (OBSERVED_HEIGHT[j] - predicted_height);
sum_derivative_square_residual = derivative_square_residual + sum_derivative_square_residual;
}
}
println!("La valeur de la somme des dérivées des carrées est : {}", sum_derivative_square_residual);
// calcule step size, le pas
step_size = sum_derivative_square_residual * slope_intercept_learning_rate[y];
println!("La valeur du step size est : {}", step_size);
println!("L'ancienne valeur : {}", slope_intercept[y]);
// determination de la prochaine valeur de la valeur
slope_intercept[y] = slope_intercept[y] - step_size;
println!("La nouvelle valeur : {}", slope_intercept[y]);
if sum_derivative_square_residual <= precision_success && sum_derivative_square_residual >= -precision_success {
slope_intercept_trouve[y] = true;
true_counter = true_counter + 1;
if y == 0 {
println!("\n\nfini de trouver le bon coéficient directeur de la droite de prediction ! ");
println!("Le coéficient directeur : {}", slope_intercept[y]);
}
if y == 1 {
println!("\n\nfini de trouver le bon intercept de la droite de prediction ! ");
println!("L'intercept : {}", slope_intercept[y]);
}
}
}
}
// can't add the break here because there are the for loop which will
// go to the next type (weight or bias) of value
//if true_counter == slope_intercept_trouve.len() {
// break;
//}
// need here anyway to avoid to use the others try available
if true_counter == slope_intercept_trouve.len() {
number_end = i;
break;
}
}
if true_counter == slope_intercept_trouve.len() {
println!("\nl'équation de la droite de prédiction est : y = a{} + {}", slope_intercept[0], slope_intercept[1]);
println!("L'algorithme a fait {} essaies pour trouver les bonnes données.", number_end + 1);
// + 1 because I want to count the first try which is with i = 0
}
}