-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecision_tree_analyze.py
84 lines (82 loc) · 3.13 KB
/
decision_tree_analyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -*- coding:utf-8 *-*
# @Time : 2017/10/30 0030 20:46
# @Author : LQY
# @File : decision_tree_analyze.py
# @Software: PyCharm Community Edition
# from sklearn import tree
import pandas
from pandas import Series, DataFrame
from sklearn.datasets import load_iris
from sklearn import tree
import sys
import os
from IPython.display import Image
import pydotplus
import numpy
os.environ["PATH"] += os.pathsep + 'D:\Anaconda\Library\\bin\graphviz'
import graphviz
from IPython.display import Image
# iris = load_iris()
# clf = tree.DecisionTreeClassifier()
# clf = clf.fit(iris.data, iris.target)
# with open("iris.dot", 'w') as f:
# f = tree.export_graphviz(clf, out_file=f)
# dot_data = tree.export_graphviz(clf, out_file=None,
# feature_names=iris.feature_names,
# class_names=iris.target_names,
# filled=True, rounded=True,
# special_characters=True)
# dot_data = tree.export_graphviz(clf, out_file=None)
# graph = pydotplus.graph_from_dot_data(dot_data)
# graph.write_pdf("iris.pdf")
def visualization_tree():
df = pandas.read_csv('E:\wash_data\\all_samples\\all_samples\\all_samples\\for_7model_all.csv')
# #help(pandas.read_csv)
df=df.fillna(0)
Y=df['label']
X=df.drop(['label'],axis=1)
print X.columns
clf = tree.DecisionTreeClassifier(max_depth=6)
clf = clf.fit(X, Y)
# with open("E:\wash_data\\all_samples\\all_samples\\all_samples\\7model_tree.dot", 'w') as f:
# f = tree.export_graphviz(clf, out_file=f)
dot_data = tree.export_graphviz(clf, out_file=None,filled=True,
feature_names=X.columns,
class_names= {0:'liushi',1:'feiliushi'},
rounded=True,
special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_pdf("E:\wash_data\\all_samples\\all_samples\\all_samples\\7model_tree.pdf")
#graph = pydotplus.graph_from_dot_data(dot_data)
#Image(graph.create_png())
def cart_model():
df = pandas.read_csv('E:\wash_data\generate_feature_28\\for_28model_train1.csv')
df = df.fillna(0)
# #help(pandas.read_csv)
Y_train = df['label']
X_train = df.drop(['label'], axis=1)
#print X_train.columns
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, Y_train)
test_data=pandas.read_csv('E:\wash_data\generate_feature_28\\for_28model_0117_test.csv')
test_data=test_data.fillna(0)
Y_test = test_data['label']
X_test = test_data.drop(['label'],axis=1)
p_right=0
r_right=0
neg_count=0
#print X_test.shape[0]
for i in range(X_test.shape[0]):
#print Y_test[i]
if Y_test[i]==0:
neg_count+=1
#print X_test.iloc[i,:]
y_predict=clf.predict(X_test.iloc[i,:])
if y_predict==Y_test[i]:
p_right+=1
if y_predict==0:
r_right+=1
precision = p_right/float(X_test.shape[0])
recall = r_right/float(neg_count)
print precision,recall
cart_model()