forked from sanithps98/Automobile-Dataset-Analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinal_work.py
69 lines (51 loc) · 1.94 KB
/
final_work.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
"""final2.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1Z01aBybX4M0oInJkPut3yzwSX0GI63EL
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
path = 'https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DA0101EN/automobileEDA.csv'
df = pd.read_csv(path)
df.head()
from sklearn.linear_model import LinearRegression
#Create the linear regression object
lm = LinearRegression()
lm
Z = df[['horsepower', 'curb-weight', 'engine-size', 'highway-mpg']]
#Fit the linear model
lm.fit(Z, df['price'])
lm.fit(Z, df['price'])
# Find the R^2
lm.score(Z, df['price'])
from sklearn.metrics import mean_squared_error
# Produce a prediction
Y_predict_multifit = lm.predict(Z)
# Compare the predicted results with the actual results
# The mean square error of price and predicted value using multifit is:
mean_squared_error(df['price'], Y_predict_multifit)
import pandas as pd
import numpy as np
# Import clean data
path = 'https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DA0101EN/module_5_auto.csv'
df = pd.read_csv(path)
df.to_csv('module_5_auto.csv')
df = df._get_numeric_data()
df.head()
y_data = df['price']
x_data=df.drop('price',axis=1)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.15, random_state=1)
print("number of test samples :", x_test.shape[0])
print("number of training samples:",x_train.shape[0])
lr = LinearRegression()
lr.fit(x_train[['horsepower', 'curb-weight', 'engine-size', 'highway-mpg']], y_train)
yhat_train = lr.predict(x_train[['horsepower', 'curb-weight', 'engine-size', 'highway-mpg']])
yhat_train[0:5]
yhat_test = lr.predict(x_test[['horsepower', 'curb-weight', 'engine-size', 'highway-mpg']])
yhat_test[0:5]
import pickle
filename='model_1.sav'
pickle.dump(lr,open(filename,'wb'))