-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdedupdef.h
219 lines (170 loc) · 5.47 KB
/
dedupdef.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#ifndef _DEDUPDEF_H_
#define _DEDUPDEF_H_
#include <sys/types.h>
#include <stdint.h>
#include <assert.h>
#include "config.h"
#include "mbuffer.h"
#include "sha.h"
#define CHECKBIT 123456
#define MAX_THREADS 1024
/*-----------------------------------------------------------------------*/
/* type definition */
/*-----------------------------------------------------------------------*/
typedef uint8_t u_char;
typedef uint64_t u_long;
typedef uint64_t ulong;
typedef uint32_t u_int;
typedef uint8_t byte;
typedef byte u_int8;
typedef uint16_t u_int16;
typedef uint32_t u_int32;
typedef uint64_t u_int64;
typedef uint64_t u64int;
typedef uint32_t u32int;
typedef uint8_t uchar;
typedef uint16_t u16int;
typedef int8_t int8;
typedef int16_t int16;
typedef int32_t int32;
typedef int64_t int64;
/*-----------------------------------------------------------------------*/
/* useful macros */
/*-----------------------------------------------------------------------*/
#ifndef NELEM
#define NELEM(x) (sizeof(x)/sizeof(x[0]))
#endif
#ifndef MAX
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#endif
#ifndef MIN
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#ifndef O_LARGEFILE
#define O_LARGEFILE 0100000
#endif
#define EXT ".ddp" /* extension */
#define EXT_LEN (sizeof(EXT)-1) /* extention length */
/*----------------------------------------------------------------------*/
//The possible states of a data chunk
typedef enum {
CHUNK_STATE_UNCOMPRESSED=0, //only uncompressed data available
CHUNK_STATE_COMPRESSED=1, //compressed data available, but nothing else
CHUNK_STATE_FLUSHED=2 //no data available because chunk has already been flushed
} chunk_state_t;
#ifdef ENABLE_PTHREADS
//Definition and basic functions for a two-level sequence number
typedef u_int32 sequence_number_t;
typedef struct _sequence_t {
sequence_number_t l1num; //first level id
sequence_number_t l2num; //second level id
} sequence_t;
//Returns TRUE if and only if s1 == s2
static inline int sequence_eq(sequence_t s1, sequence_t s2) {
return (s1.l1num == s2.l1num) && (s1.l2num == s2.l2num);
}
//Returns TRUE if and only if s1 < s2
static inline int sequence_lt(sequence_t s1, sequence_t s2) {
if(s1.l1num < s2.l1num) {
return TRUE;
} else {
return (s1.l1num == s2.l1num) && (s1.l2num < s2.l2num);
}
}
//Returns TRUE if and only if s1 > s2
static inline int sequence_gt(sequence_t s1, sequence_t s2) {
if(s1.l1num > s2.l1num) {
return TRUE;
} else {
return (s1.l1num == s2.l1num) && (s1.l2num > s2.l2num);
}
}
//Increments a sequence number. The upper bound for the 2nd level number must be specified
static inline void sequence_inc(sequence_t *s, sequence_number_t ubound) {
assert(s!=NULL);
s->l2num++;
if(s->l2num >= ubound) {
s->l1num++;
s->l2num=0;
}
}
//Increments L1 level of a sequence number, resetting L2
static inline void sequence_inc_l1(sequence_t *s) {
assert(s!=NULL);
s->l1num++;
s->l2num=0;
}
//Increments L2 level of a sequence number
static inline void sequence_inc_l2(sequence_t *s) {
assert(s!=NULL);
s->l2num++;
}
//Reset a sequence number.
static inline void sequence_reset(sequence_t *s) {
assert(s!=NULL);
s->l1num=0;
s->l2num=0;
}
#endif //ENABLE_PTHREADS
//The data type of a chunk, the basic work unit of dedup
//A chunk will flow through all the pipeline stages where it'll get increasingly refined
typedef struct _chunk_t {
struct {
int isDuplicate; //whether this is an original chunk or a duplicate
chunk_state_t state; //which type of data this chunk contains
#ifdef ENABLE_PTHREADS
//once a chunk has been added to the global database accesses
//to the state require synchronization b/c the chunk is globally viewable
pthread_mutex_t lock;
pthread_cond_t update;
#endif //ENABLE_PTHREADS
} header;
//The SHA1 sum of the chunk, computed by SHA1/Routing stage from the uncompressed chunk data
unsigned int sha1[SHA1_LEN/sizeof(unsigned int)]; //NOTE:: Force integer-alignment for hashtable, SHA1_LEN must be multiple of unsigned int
//FIXME: This can be put into a union to save space.
//The uncompressed version of the chunk, created by chunking stage(s)
mbuffer_t uncompressed_data;
//The compressed version of the chunk, created by compression stage
//based on uncompressed version (only if !isDuplicate)
mbuffer_t compressed_data;
//reference to original chunk with compressed data (only if isDuplicate)
struct _chunk_t *compressed_data_ref;
#ifdef ENABLE_PTHREADS
//Original location of the chunk in input stream (for reordering)
sequence_t sequence;
//whether this is the last L2 chunk for the given L1 number
int isLastL2Chunk;
#endif //ENABLE_PTHREADS
} chunk_t;
#define LEN_FILENAME 256
#define TYPE_FINGERPRINT 0
#define TYPE_COMPRESS 1
#define TYPE_ORIGINAL 2
#define QUEUE_SIZE 1024UL*1024
#define MAXBUF (128*1024*1024) /* 128 MB for buffers */
#define ANCHOR_JUMP (2*1024*1024) //best for all 2*1024*1024
#define MAX_PER_FETCH 10000
#define ITEM_PER_FETCH 20
#define ITEM_PER_INSERT 20
#define CHUNK_ANCHOR_PER_FETCH 20
#define CHUNK_ANCHOR_PER_INSERT 20
#define ANCHOR_DATA_PER_INSERT 1
typedef struct {
char infile[LEN_FILENAME];
char outfile[LEN_FILENAME];
int compress_type;
int preloading;
int nthreads;
int verbose;
} config_t;
#define COMPRESS_GZIP 0
#define COMPRESS_BZIP2 1
#define COMPRESS_NONE 2
#define UNCOMPRESS_BOUND 10000000
#endif //_DEDUPDEF_H_