-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict.py
65 lines (54 loc) · 2.2 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np
import pandas as pd
from sklearn import metrics
import spearmonR as sprm
def predict(coNum, mult=0.66):
typeTrue = []
typePred = []
for preTime in range(coNum):
trainAll = []
predictAll = []
edgesByType = []
p = 0
for predictDataPre in data.predictDataFrames:
predict = predictDataPre.sample(random_state=preTime, frac=1 / coNum, axis=1)
train = predictDataPre.loc[:, ~predictDataPre.columns.isin(predict.columns)]
predictAll.append(predict)
trainPre = train.ix[data.remainIndex] # dimensionality reduction
trainPre = np.log(trainPre + 1)
trainAll.append(trainPre)
edgesPre = sprm.getSimilarByList(trainPre, p, data) # get spearman
edgesByType.append(edgesPre)
p += 1
# test by type
for pre in range(data.typeNum):
typeTruePre = []
typePredPre = []
for index, row in predictAll[pre].iteritems(): # predict each sample
predictType = predictSmpling(edgesByType, trainAll, row, mult)
typeTruePre.append(pre)
typePredPre.append(predictType)
typePred.extend(typePredPre)
typeTrue.extend(typeTruePre)
print(metrics.classification_report(typeTrue, typePred, digits=5))
return
def predictSmpling(edgesByType, trainAll, sampleData, mult):
# pretreat the sample
sampleData = sampleData.ix[data.remainIndex]
sampleData = np.log(sampleData + 1)
preSimiList = []
for preType in range(data.typeNum):
# add the sample to each type
newData = pd.concat([trainAll[preType], sampleData], axis=1)
preSimiliar = sprm.getSimilarByList(newData, preType, data)
# get delta
deltaChange = abs((edgesByType[preType] - preSimiliar) * (newData.shape[1] ** mult))
preSimi = deltaChange.sum().sum()
preSimiList.append(preSimi)
predictType = preSimiList.index(min(preSimiList))
return predictType
def predictRun(dataCache, coNum):
global data
data = dataCache
print("\n5 test")
predict(coNum=coNum)