forked from genialis/gotea
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinkbuilder.cpp
99 lines (86 loc) · 2.32 KB
/
linkbuilder.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#include<stdlib.h>
#include<string.h>
#include<stddef.h>
#include<math.h>
#include<map>
#include<vector>
#include<utility>
using std::multimap;
using std::map;
using std::vector;
using std::make_pair;
#include"preprocessor.h"
#include"processor.h"
static multimap<const char*, const char*, ltstr> links;
static map<const char*, ptrdiff_t, ltstr> geneset,termset;
static vector<const char*> sorted_links;
void add_link(const char *gene_name, const char *term_name)
{
links.insert(make_pair(gene_name,term_name));
geneset[gene_name]=0;
termset[term_name]=0;
}
int coagulate_links(char **buf, size_t *buf_len)
{
int tsize=3*sizeof(int),linksize,j;
struct int_link_t *linktable;
float_type *logs;
for (auto &i : geneset)
{
if (tsize%sizeof(size_t)) { tsize+=sizeof(size_t)-tsize%sizeof(size_t); }
i.second=tsize+str_serialize_padsize(); /*string file offset*/
tsize+=str_serialize_len(i.first);
}
for (auto &i : termset)
{
if (tsize%sizeof(size_t)) { tsize+=sizeof(size_t)-tsize%sizeof(size_t); }
i.second=tsize+str_serialize_padsize(); /*string file offset*/
tsize+=str_serialize_len(i.first);
}
if (tsize%4096) { tsize+=4096-tsize%4096; }
for_each(geneset.begin(),geneset.end(),[] (decltype(geneset)::value_type &v) {
sorted_links.push_back(v.first);
});
sort(sorted_links.begin(),sorted_links.end(),[] (const char *g1, const char *g2) -> bool {
return geneset[g1]<geneset[g2];
});
linksize=(links.size()+geneset.size())*sizeof(struct int_link_t);
*buf_len=tsize + linksize + (geneset.size()+1)*sizeof(float_type);
*buf=(char*)malloc(*buf_len);
logs=(float_type*)((*buf)+tsize+linksize);
((int*)(*buf))[0]=tsize;
((int*)(*buf))[1]=geneset.size();
((int*)(*buf))[2]=links.size();
for (auto &i : geneset)
{
str_serialize((*buf)+i.second,i.first);
}
for (auto &i : termset)
{
str_serialize((*buf)+i.second,i.first);
}
linktable=(struct int_link_t*)((*buf)+tsize);
j=0;
for (auto &i : geneset)
{
linktable[j].gene.idx=geneset[i.first];
linktable[j].term.idx=0;
j++;
}
for (auto &link : sorted_links)
{
auto range=links.equal_range(link);
for (auto i=range.first;i!=range.second;++i)
{
linktable[j].gene.idx=geneset[i->first];
linktable[j].term.idx=termset[i->second];
j++;
}
}
logs[0]=0.0;
for (size_t i=1;i<=geneset.size();i++)
{
logs[i]=logs[i-1]+log((float_type)i);
}
return 1;
}