-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHEMSR.cpp
301 lines (271 loc) · 8.93 KB
/
HEMSR.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
#include "HEMSR.h"
HEMSR::HEMSR() {
numSub = 0;
numDimension = atts;
numGroup = (numDimension - 1) / gs + 1;
numState = pow(2, gs); // 每个维度上有两种状态
buckStep = (valDom - 1) / buks + 1;
numBucket = (valDom - 1) / buckStep + 1;
bitStep = numBucket >> 1;
cout << "ExpID = " << expID << ". HEMSRPS: bucketStep = " << buckStep << ", numBucket = " << numBucket << endl;
if (be != 0) be = 0;
//assert(be == 0);
//bucketSub.resize(numBucket);
data[0].resize(numDimension, vector<vector<Combo>>(numBucket));
data[1].resize(numDimension, vector<vector<Combo>>(numBucket));
endBucket[0] = new int[numBucket];
endBucket[1] = new int[numBucket];
bitsID[0] = new int[numBucket];
bitsID[1] = new int[numBucket];
}
HEMSR::~HEMSR() {
delete[] endBucket[0], endBucket[1], bitsID[0], bitsID[1];
}
void HEMSR::insert(IntervalSub sub)
{
for (int i = 0; i < sub.size; i++)
{
IntervalCnt cnt = sub.constraints[i];
Combo c;
// int bucketID = cnt.lowValue / buckStep; // Bug: 这里被坑了
c.val = cnt.lowValue;
c.subID = sub.id;
data[0][cnt.att][cnt.lowValue / buckStep].push_back(c);
c.val = cnt.highValue;
data[1][cnt.att][cnt.highValue / buckStep].push_back(c);
}
numSub++;
}
void HEMSR::initBits() {
// 如果有多次初始化
delete[] endBucket[0], endBucket[1], bitsID[0], bitsID[1];
endBucket[0] = new int[numBucket];
endBucket[1] = new int[numBucket];
bitsID[0] = new int[numBucket];
bitsID[1] = new int[numBucket];
bits[0].clear(), bits[1].clear();
bits[0].resize(numDimension);
bits[1].resize(numDimension);
fix[0].clear(), fix[1].clear();
fix[0].resize(numDimension);
fix[1].resize(numDimension);
bitsSR.clear();
bitsSR.resize(numGroup, vector<bitset<subs>>(numState)); // 最后一组可能没这么多状态
_for(i, 0, numBucket >> 1) {
bitsID[0][i] = 0; // 此时的0号代表0.5~1, 不是0~1
bitsID[1][i] = -1; // 此时用不到bits数组, -1表示非法
endBucket[0][i] = bitStep; // 标记时遍历到小于这个值
endBucket[1][i] = 0; // 标记时遍历到大于等于这个值
}
_for(i, numBucket >> 1, numBucket) {
bitsID[0][i] = -1;
bitsID[1][i] = 0;
endBucket[0][i] = numBucket;
endBucket[1][i] = bitStep;
}
_for(i, 0, numDimension) { // 每个维度
_for(j, 0, bitStep) { // 每个左半部分的桶
_for(k, 0, data[1][i][j].size()) // 桶里每个订阅
bits[1][i][data[1][i][j][k].subID] = 1; // high, i维, subID
fix[1][i] = fix[1][i] + data[1][i][j].size();
}
_for(j, bitStep, numBucket) { // 每个右半部分的桶
_for(k, 0, data[0][i][j].size()) // 桶里每个订阅
bits[0][i][data[0][i][j][k].subID] = 1; // high, i维, subID
fix[0][i] = fix[0][i] + data[0][i][j].size();
}
}
_for(g, 0, numGroup) {
int begin = g * gs, end = min(numDimension, (g + 1) * gs); // 为了适应最后一个组不满gs个维度的情况
int group_size = end - begin;
int state_num = pow(2, group_size); // 一般都等于numState
_for(s, 0, state_num) {
int si = 1; // s的最低位表示begin维度, 最高位表示end-1维度
_for(i, 0, group_size) {
if (si & s) // s的第i位为1表示用high上的bitset
bitsSR[g][s] = bitsSR[g][s] | bits[1][begin + i];
else bitsSR[g][s] = bitsSR[g][s] | bits[0][begin + i];
si = si << 1;
}
}
}
//cout << "Stop.\n";
return;
}
// 不计算时间组成
void HEMSR::match(const Pub& pub, int& matchSubs)
{
bitset<subs> globalBitset;
vector<int> attExist(numDimension, -1);
int value, att, buck;
_for(i, 0, pub.size)
{
value = pub.pairs[i].value, att = pub.pairs[i].att, buck = value / buckStep;
attExist[att] = bitsID[1][buck] + 1; // 用low上的就是0, 用high上的就是1
_for(k, 0, data[0][att][buck].size())
if (data[0][att][buck][k].val > value)
globalBitset[data[0][att][buck][k].subID] = 1;
_for(k, 0, data[1][att][buck].size())
if (data[1][att][buck][k].val < value)
globalBitset[data[1][att][buck][k].subID] = 1;
_for(j, buck + 1, endBucket[0][buck])
_for(k, 0, data[0][att][j].size())
globalBitset[data[0][att][j][k].subID] = 1;
mmfor(j, buck - 1, endBucket[1][buck])
_for(k, 0, data[1][att][j].size())
globalBitset[data[1][att][j][k].subID] = 1;
//Timer orStart;
//if (bitsID[0][buck] != -1)
// globalBitset = globalBitset | bits[0][att];
//else //if (bitsID[1][buck] != -1) // 每个维度上只有两种状态
// globalBitset = globalBitset | bits[1][att]; // Bug: 是att不是i
//orTime += (double)orStart.elapsed_nano();
}
_for(i, 0, numDimension)
if (attExist[i] == -1) { // 空维度
if (fix[0][i] >= fix[1][i]) { // low右边的谓词多, 用0号bitset
attExist[i] = 0;
_for(j, 0, bitStep)
_for(k, 0, data[0][i][j].size())
globalBitset[data[0][i][j][k].subID] = 1;
}
else {
attExist[i] = 1;
_for(j, bitStep, numBucket)
_for(k, 0, data[1][i][j].size())
globalBitset[data[1][i][j][k].subID] = 1;
}
}
int i = 0, j, state, end; // i 遍历每个维度, j 遍历组内部的每一位, 每个组最后一个元素的下一个位置是end,
_for(g, 0, numGroup) {
state = 0, j = 1;
end = min(numDimension, i + gs);
while (i < end) {
if (attExist[i])
state |= j;
j = j << 1;
i++;
}
globalBitset = globalBitset | bitsSR[g][state];
}
// _for(i, 0, subs)
// if (!globalBitset[i])
// {
// ++matchSubs;
// //cout << "HEMSR matches sub: : " << i << endl;
// }
matchSubs = subs - globalBitset.count();
}
// 计算时间组成
void HEMSR::match_debug(const Pub& pub, int& matchSubs)
{
bitset<subs> globalBitset;
vector<int> attExist(numDimension, -1);
int value, att, buck;
_for(i, 0, pub.size)
{
Timer compareStart;
value = pub.pairs[i].value, att = pub.pairs[i].att, buck = value / buckStep;
attExist[att] = bitsID[1][buck] + 1; // 用low上的就是0, 用high上的就是1
_for(k, 0, data[0][att][buck].size())
if (data[0][att][buck][k].val > value)
globalBitset[data[0][att][buck][k].subID] = 1;
_for(k, 0, data[1][att][buck].size())
if (data[1][att][buck][k].val < value)
globalBitset[data[1][att][buck][k].subID] = 1;
compareTime += (double)compareStart.elapsed_nano();
Timer markStart;
_for(j, buck + 1, endBucket[0][buck])
_for(k, 0, data[0][att][j].size())
globalBitset[data[0][att][j][k].subID] = 1;
mmfor(j, buck - 1, endBucket[1][buck])
_for(k, 0, data[1][att][j].size())
globalBitset[data[1][att][j][k].subID] = 1;
markTime += (double)markStart.elapsed_nano();
//Timer orStart;
//if (bitsID[0][buck] != -1)
// globalBitset = globalBitset | bits[0][att];
//else //if (bitsID[1][buck] != -1) // 每个维度上只有两种状态
// globalBitset = globalBitset | bits[1][att]; // Bug: 是att不是i
//orTime += (double)orStart.elapsed_nano();
}
Timer markStart2;
_for(i, 0, numDimension)
if (attExist[i] == -1) { // 空维度
if (fix[0][i] >= fix[1][i]) { // low右边的谓词多, 用0号bitset
attExist[i] = 0;
_for(j, 0, bitStep)
_for(k, 0, data[0][i][j].size())
globalBitset[data[0][i][j][k].subID] = 1;
}
else {
attExist[i] = 1;
_for(j, bitStep, numBucket)
_for(k, 0, data[1][i][j].size())
globalBitset[data[1][i][j][k].subID] = 1;
}
}
markTime += (double)markStart2.elapsed_nano();
Timer orStart;
int i = 0, j, state, end; // i 遍历每个维度, j 遍历组内部的每一位, 每个组最后一个元素的下一个位置是end,
_for(g, 0, numGroup) {
state = 0, j = 1;
end = min(numDimension, i + gs);
while (i < end) {
if (attExist[i])
state |= j;
j = j << 1;
i++;
}
globalBitset = globalBitset | bitsSR[g][state];
}
orTime += (double)orStart.elapsed_nano();
Timer bitStart;
//_for(i, 0, subs)
// if (!globalBitset[i])
// {
// ++matchSubs;
// //cout << "HEMSR matches sub: : " << i << endl;
// }
matchSubs = subs - globalBitset.count();
bitTime += (double)bitStart.elapsed_nano();
}
//void HEMSR::calBucketSize() {
// bucketSub.clear();
// bucketSub.resize(numBucket);
// _for(i, 0, numDimension)
// _for(j, 0, numBucket)
// {
// _for(k, 0, data[0][i][j].size())
// bucketSub[j].insert(data[0][i][j][k].subID);
// _for(k, 0, data[1][i][j].size())
// bucketSub[j].insert(data[1][i][j][k].subID);
// }
//}
int HEMSR::calMemory() {
long long size = 0; // Byte
size += 2 * numDimension * sizeof(bitset<subs>); // bits
size += numGroup*numState* sizeof(bitset<subs>); // bitsSR
size -= (numState - pow(2, numDimension % numGroup)) * sizeof(bitset<subs>); // 减去bitsSR最后一组多算的内存大小
_for(i, 0, numDimension) {
_for(j, 0, numBucket)
size += sizeof(Combo) * (data[0][i][j].size() + data[1][i][j].size());
}
// 两个endBucket和两个bitsID和两个fix
size += (4 * numBucket + 2 * numDimension) * sizeof(int);
size = size / 1024 / 1024; // MB
return (int)size;
}
void HEMSR::printRelation() {
cout << "\n\nHEM-SR-PS Map LowBucket\n";
_for(i, 0, numBucket) {
cout << "LBkt" << i << ": bID=" << bitsID[0][i] << ", eBkt=" << endBucket[0][i] << "; ";
if (i % 5 == 0 && i > 0)cout << "\n";
}
cout << "\n\nHEM-SR-PS Map HighBucket\n";
_for(i, 0, numBucket) {
cout << "HBkt" << i << ": bID=" << bitsID[1][i] << ", eBkt=" << endBucket[1][i] << "; ";
if (i % 5 == 0 && i > 0)cout << "\n";
}
cout << "\n\n";
}