-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlogistic_regression.html
80 lines (59 loc) · 2.13 KB
/
logistic_regression.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
"""Logistic_Regression.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1evsgNOvajsenWB5spLxNCrIiuTqemzB2
"""
import numpy as np
import pandas as pd
import os
import math
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
import matplotlib.pyplot as plt
df = pd.read_csv('HR Employee Attrition.csv',na_values= "?")
df.head(25)
df.shape
lb = LabelEncoder()
df['EducationField'] = lb.fit_transform(df['EducationField'])
df['BusinessTravel'] = lb.fit_transform(df['BusinessTravel'])
df['Attrition'] = lb.fit_transform(df['Attrition'])
df['Department'] = lb.fit_transform(df['Department'])
df['Gender'] = lb.fit_transform(df['Gender'])
df['JobRole'] = lb.fit_transform(df['JobRole'])
df['MaritalStatus'] = lb.fit_transform(df['MaritalStatus'])
df['OverTime'] = lb.fit_transform(df['OverTime'])
df['Over18'] = lb.fit_transform(df['Over18'])
d = df.drop(['Attrition'], axis=1)
df.shape
Y = df['Attrition']
X = d
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.3,random_state=42)
X_train.shape
X_test.shape
y_train.shape
y_test.shape
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)
lr = LogisticRegression(C = 1.0, penalty='l2', tol=0.01)
lr.fit(X_train,y_train)
lr_y_predict = lr.predict(X_test)
lr.score(X_test,y_test)
classification_report(y_test, lr_y_predict,target_names=['good','bad'])
clf = LogisticRegression(verbose = 3)
clf_trained = clf.fit(X_train,y_train)
clf_trained.score(X_train,y_train)
predictions = clf_trained.predict(X_test)
classification_report(y_test,predictions)
accuracy_score(y_test, predictions)
confusion_matrix(y_test, predictions)
f1_score(y_test, predictions)
precision_score(y_test, predictions, pos_label = 0)