-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLinearRegression.py
66 lines (55 loc) · 2.21 KB
/
LinearRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import csv
from sklearn.preprocessing import OneHotEncoder
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
import pandas as pd
###
# LINEAR REGRESSION MODEL
# Data Partition
# Ignore 1, 2, 4, 5, 11, 12, 20, 23, G1, G2 and keeps the other rows
studentMath = pd.read_csv('student-mat.csv', ';')
studentMath = studentMath.loc[:, ['school', 'age', 'Pstatus', 'Medu', 'Fedu',
'Mjob', 'Fjob', 'traveltime', 'studytime',
'failures', 'schoolsup', 'famsup', 'paid', 'activities',
'higher', 'internet', 'famrel', 'freetime', 'goout', 'Dalc',
'Walc', 'health', 'absences', 'G3']]
x = studentMath.loc[:, ['age', 'Medu', 'Fedu',
'Mjob', 'Fjob', 'traveltime', 'studytime',
'failures', 'schoolsup', 'famsup', 'paid', 'activities',
'higher', 'internet', 'famrel', 'freetime', 'goout', 'Dalc',
'Walc', 'health', 'absences']]
y = studentMath.G3
#ONE HOT ENCODER
ohe = OneHotEncoder(sparse=False)
# print(ohe.fit_transform(x[['Pstatus']]))
# print(ohe.categories_)
columnTrans = make_column_transformer((OneHotEncoder(),
['Mjob', 'Fjob', 'schoolsup', 'famsup', 'paid', 'activities',
'higher', 'internet']), remainder='passthrough')
#Creating Linear Regression Model
linReg = LinearRegression()
pipeLine = make_pipeline(columnTrans, linReg)
print("CVS:", cross_val_score(pipeLine, x, y, cv=3).mean())
### Prediction
xSample = x.head(50) #grabs the first 50 rows of x
ySample = y.head(50).to_numpy() #grabs the first 50 rows of y
pipeLine.fit(x, y)
yPred = pipeLine.predict(xSample)
print(yPred)
print(pipeLine.score(x, y))
print("this is the new shape X", xSample)
print("this is the new shape", yPred)
### Graph
# xSample = columnTrans.fit_transform(xSample)
# print(xSample)
yPred.sort(axis=0)
plt.plot(yPred, 'o', color='black')
plt.plot(ySample, 'o', color='red')
plt.show()