-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhypergraph.cpp
executable file
·75 lines (65 loc) · 2.48 KB
/
hypergraph.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include "hypergraph.h"
#define RANDOMNESS 30
bool Hypergraph::constructHMat(string filename, bool labelFront){
Mat<int> dataset;
// data preprocessing, preprocess the categorical data to int category
data::DatasetInfo info;
data::Load(filename, dataset, info, true);
data::Save("log/dataset.txt", dataset);
Mat<int> oriLabel, oriFeature;
// split the labels and features. They will be separately one-hot encoded.
if (labelFront){
oriLabel = dataset(0, span::all);
oriFeature = dataset(span(1, dataset.n_rows-1), span::all);
}else{
oriLabel = dataset(dataset.n_rows-1, span::all);
oriFeature = dataset(span(0, dataset.n_rows-2), span::all);
}
// doing one hot encoding separately on label matrix and feature matrix and shuffle the matrices
hMat = oneHotEncoding(oriFeature);
lMat = oneHotEncoding(oriLabel);
tail = hMat; head = hMat;
weight = ones<Row<double>>(hMat.n_rows);
shuffleMat(RANDOMNESS);
return true;
}
Mat<unsigned int> Hypergraph::oneHotEncoding(Mat<int> oriData){
int r = oriData.n_rows;
int c = oriData.n_cols;
int coun = 0;
Mat<unsigned int> resData;
// find out in total how many choices of features there are
for (int i=0; i<r; i++){
Row<int> r_i = oriData.row(i);
coun += r_i.max()+1;
}
resData = zeros<Mat<unsigned int>>(coun, c); // initialize the matrix to all 0
coun = 0;
for (int i=0; i<r; i++){
Row<int> r_i = oriData.row(i);
for (int j=0; j<c; j++){
resData(coun + oriData(i, j), j) = 1; // set the corresponding entry of resData to be 1 according to value of oriData(i, j)
}
coun += r_i.max()+1; // add the number of choices for feature i to coun
}
return resData;
}
void Hypergraph::shuffleMat(int time_to_shuffle){
// shuffle the data for randomness, truer randomness will be generated with higher RANDOMNESS value
srand (time(NULL));
int r = rand() % time_to_shuffle;
int hrow = hMat.n_rows;
int hcol = hMat.n_cols;
int lrow = lMat.n_rows;
int lcol = lMat.n_cols;
Mat<unsigned int> candidate = join_vert(join_vert(lMat, hMat), join_vert(head, tail));
mlpack::data::Save("log/candidate.txt", candidate);
for (int i=0; i<r; i++){
candidate = shuffle(candidate, 1);
}
// split the candidate matrix
lMat = candidate(span(0, lrow-1), span::all);
hMat = candidate(span(lrow, lrow + hrow - 1), span::all);
head = candidate(span(lrow + hrow, lrow + 2 * hrow - 1), span::all);
tail = candidate(span(lrow + 2 * hrow, lrow + 3 * hrow - 1), span::all);
}