-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathML_1.py
97 lines (83 loc) · 2.96 KB
/
ML_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
import torch
from torch.autograd import Variable
import pandas as pd
#过采样函数
def oversampling(data):
data0 = data[data[:,0]==0]
data1 = data[data[:,0]==1]#0、1分开
size0 = data0.shape[0]
size1 = data1.shape[0]
if size0 >= size1:#多的减少的
index= np.random.choice(size1,size0-size1,replace=False,p=None)#随机采样
data1 = np.concatenate((data1, data1[index,:]), axis=0)#合并
else:
index= np.random.choice(size0,size1-size0,replace=False,p=None)#随机采样
data0 = np.concatenate((data0, data0[index,:]), axis=0)#合并
x = np.concatenate((data0[:, 1:31], data1[:, 1:31]), axis=0)#合并
y = np.concatenate((data0[:, 0:1], data1[:, 0:1]), axis=0)#合并
return x, y
#对数据进行预处理
def pre_process(dataset):
data = dataset.iloc[:, 1:32].values
data[data=='M'] = 1
data[data=='B'] = 0 #MB分类
data = data.astype(np.float64)
x,y = oversampling(data)
size = x.shape[0]
index = np.random.permutation(size) #随机排列
x = x[index]
y = y[index]
mu = np.mean(x, axis=0)
sigma = np.std(x, axis=0)
x=(x - mu)/sigma
split = int(len(y)*0.3) #0.3
x_train = x[:split]
y_train = y[:split]
x_test = x[split:]
y_test = y[split:]
return x_train, y_train, x_test, y_test
#精确度计算
def cal_acc(x, y, y_pred):
size = x.shape[0]
y_pred = np.array([0 if y_pred[i] < 0.5 else 1 for i in range(size)])
acc = np.mean([1 if y[i] == y_pred[i] else 0 for i in range(size)])
return acc
#模型
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear = torch.nn.Linear(30, 1)
def forward(self, x):
y_pred = torch.sigmoid(self.linear(x))
return y_pred
#主体
dataset= pd.read_csv('./data.csv')
x_train, y_train, x_test, y_test = pre_process(dataset)#处理
x_train = Variable(torch.Tensor(x_train))
y_train = Variable(torch.Tensor(y_train))
x_test = Variable(torch.Tensor(x_test))
y_test = Variable(torch.Tensor(y_test))
model = Model()
criterion = torch.nn.BCELoss(reduction="mean")#损失(二分类交叉熵)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)#优化器
#进行循环训练
i=50 #隔50个打印
j=1 #计数器
print('每',i,'个epoch一组')
for epoch in range(1000): #1000个
optimizer.zero_grad()
y_pred = model(x_train)
loss = criterion(y_pred, y_train)
loss.backward() #反向传播
optimizer.step() #更新优化器
if epoch % i == 0:
print('第',j,'组','损失loss:', loss.data.item(), '精确度accuracy:', cal_acc(x_train, y_train, y_pred))
j=j+1
y_test_pred = model(x_test)
test_acc = cal_acc(x_test, y_test, y_test_pred)
print('Test总精确度:', test_acc)
for f in model.parameters():
print('具体数据:')
print(f.data)
print(f.grad)