-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathdecompress.ml
344 lines (316 loc) · 11.3 KB
/
decompress.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
let w = De.make_window ~bits:15
let l = De.Lz77.make_window ~bits:15
let o = De.bigstring_create De.io_buffer_size
let i = De.bigstring_create De.io_buffer_size
let q = De.Queue.create 4096
let str fmt = Format.asprintf fmt
let msgf fmt = Format.kasprintf (fun msg -> `Msg msg) fmt
let error_msgf fmt = Format.kasprintf (fun err -> Error (`Msg err)) fmt
let bigstring_input ic buf off len =
let tmp = Bytes.create len in
try
let len = input ic tmp 0 len in
for i = 0 to len - 1 do
buf.{off + i} <- Bytes.get tmp i
done
; len
with End_of_file -> 0
let bigstring_output oc buf off len =
let res = Bytes.create len in
for i = 0 to len - 1 do
Bytes.set res i buf.{off + i}
done
; output_string oc (Bytes.unsafe_to_string res)
let run_inflate ic oc =
let open De in
let decoder = Inf.decoder `Manual ~o ~w in
let rec go () =
match Inf.decode decoder with
| `Await ->
let len = bigstring_input ic i 0 io_buffer_size in
Inf.src decoder i 0 len ; go ()
| `Flush ->
let len = io_buffer_size - Inf.dst_rem decoder in
bigstring_output oc o 0 len
; Inf.flush decoder
; go ()
| `Malformed err -> `Error (false, str "%s." err)
| `End ->
let len = io_buffer_size - Inf.dst_rem decoder in
if len > 0 then bigstring_output oc o 0 len
; `Ok 0 in
go ()
let run_deflate ~level ic oc =
let open De in
let state = Lz77.state ~level ~q ~w:l (`Channel ic) in
let encoder = Def.encoder (`Channel oc) ~q in
let rec compress () =
match De.Lz77.compress state with
| `Await -> assert false
| `Flush ->
let literals = Lz77.literals state in
let distances = Lz77.distances state in
encode
@@ Def.encode encoder
(`Block
{
Def.kind=
Dynamic (Def.dynamic_of_frequencies ~literals ~distances)
; last= false
})
| `End ->
Queue.push_exn q Queue.eob
; pending @@ Def.encode encoder (`Block {Def.kind= Fixed; last= true})
and pending = function `Partial | `Block -> assert false | `Ok -> ()
and encode = function
| `Partial -> assert false
| `Ok | `Block -> compress () in
Def.dst encoder o 0 io_buffer_size
; compress ()
; `Ok 0
let run_zlib_inflate ic oc =
let open Zl in
let allocate bits = De.make_window ~bits in
let decoder = Inf.decoder `Manual ~o ~allocate in
let rec go decoder =
match Inf.decode decoder with
| `Await decoder ->
let len = bigstring_input ic i 0 De.io_buffer_size in
Inf.src decoder i 0 len |> go
| `Flush decoder ->
let len = De.io_buffer_size - Inf.dst_rem decoder in
bigstring_output oc o 0 len
; Inf.flush decoder |> go
| `Malformed err -> `Error (false, str "%s." err)
| `End decoder ->
let len = De.io_buffer_size - Inf.dst_rem decoder in
if len > 0 then bigstring_output oc o 0 len
; `Ok 0 in
go decoder
let run_zlib_deflate ~level ic oc =
let open Zl in
let encoder = Def.encoder `Manual `Manual ~q ~w:l ~level in
let rec go encoder =
match Def.encode encoder with
| `Await encoder ->
let len = bigstring_input ic i 0 De.io_buffer_size in
Def.src encoder i 0 len |> go
| `Flush encoder ->
let len = De.io_buffer_size - Def.dst_rem encoder in
bigstring_output oc o 0 len
; Def.dst encoder o 0 De.io_buffer_size |> go
| `End encoder ->
let len = De.io_buffer_size - Def.dst_rem encoder in
if len > 0 then bigstring_output oc o 0 len
; `Ok 0 in
Def.dst encoder o 0 De.io_buffer_size |> go
let run_gzip_inflate ic oc =
let open Gz in
let decoder = Inf.decoder `Manual ~o in
let rec go decoder =
match Inf.decode decoder with
| `Await decoder ->
let len = bigstring_input ic i 0 io_buffer_size in
Inf.src decoder i 0 len |> go
| `Flush decoder ->
let len = io_buffer_size - Inf.dst_rem decoder in
bigstring_output oc o 0 len
; Inf.flush decoder |> go
| `Malformed err -> `Error (false, str "%s." err)
| `End decoder ->
let len = io_buffer_size - Inf.dst_rem decoder in
if len > 0 then bigstring_output oc o 0 len
; `Ok 0 in
go decoder
let now () = Int32.of_float (Unix.gettimeofday ())
let run_gzip_deflate ~level ic oc =
let open Gz in
let encoder =
Def.encoder `Manual `Manual ~q ~w:l ~level ~mtime:(now ()) Gz.Unix in
let rec go encoder =
match Def.encode encoder with
| `Await encoder ->
let len = bigstring_input ic i 0 io_buffer_size in
Def.src encoder i 0 len |> go
| `Flush encoder ->
let len = io_buffer_size - Def.dst_rem encoder in
bigstring_output oc o 0 len
; Def.dst encoder o 0 io_buffer_size |> go
| `End encoder ->
let len = io_buffer_size - Def.dst_rem encoder in
if len > 0 then bigstring_output oc o 0 len
; `Ok 0 in
Def.dst encoder o 0 io_buffer_size |> go
external string_get_uint32 : string -> int -> int32 = "%caml_string_get32"
external bigstring_set_uint32 : Lzo.bigstring -> int -> int32 -> unit
= "%caml_bigstring_set32"
let string_get_uint8 str idx = Char.code (String.get str idx)
external bigstring_set_uint8 : Lzo.bigstring -> int -> int -> unit
= "%caml_ba_set_1"
let run_lzo_deflate ic oc =
let wrkmem = Lzo.make_wrkmem () in
let in_contents =
let buf = Buffer.create 0x1000 in
let tmp = Bytes.create 0x100 in
let rec go () =
match input ic tmp 0 (Bytes.length tmp) with
| 0 -> Buffer.contents buf
| len ->
Buffer.add_subbytes buf tmp 0 len
; go ()
| exception End_of_file -> Buffer.contents buf in
go () in
let in_contents =
let len = String.length in_contents in
let res = Bigarray.Array1.create Bigarray.char Bigarray.c_layout len in
let len0 = len land 3 in
let len1 = len asr 2 in
for i = 0 to len1 - 1 do
let i = i * 4 in
let v = string_get_uint32 in_contents i in
bigstring_set_uint32 res i v
done
; for i = 0 to len0 - 1 do
let i = (len1 * 4) + i in
let v = string_get_uint8 in_contents i in
bigstring_set_uint8 res i v
done
; res in
let out_contents =
Bigarray.(Array1.create char c_layout (Array1.dim in_contents * 2)) in
match Lzo.compress in_contents out_contents wrkmem with
| len ->
bigstring_output oc out_contents 0 len
; `Ok 0
| exception Invalid_argument _ -> assert false
let run_lzo_inflate ic oc =
let in_contents =
let buf = Buffer.create 0x1000 in
let tmp = Bytes.create 0x100 in
let rec go () =
match input ic tmp 0 (Bytes.length tmp) with
| 0 -> Buffer.contents buf
| len ->
Buffer.add_subbytes buf tmp 0 len
; go ()
| exception End_of_file -> Buffer.contents buf in
go () in
let in_contents =
let len = String.length in_contents in
let res = Bigarray.Array1.create Bigarray.char Bigarray.c_layout len in
let len0 = len land 3 in
let len1 = len asr 2 in
for i = 0 to len1 - 1 do
let i = i * 4 in
let v = string_get_uint32 in_contents i in
bigstring_set_uint32 res i v
done
; for i = 0 to len0 - 1 do
let i = (len1 * 4) + i in
let v = string_get_uint8 in_contents i in
bigstring_set_uint8 res i v
done
; res in
match Lzo.uncompress_with_buffer in_contents with
| Ok str -> output_string oc str ; `Ok 0
| Error err -> `Error (false, str "%a." Lzo.pp_error err)
let run deflate format level filename_ic filename_oc =
let ic, close_ic =
match filename_ic with
| Some filename ->
let ic = open_in_bin filename in
ic, fun () -> close_in ic
| None -> stdin, ignore in
let oc, close_oc =
match filename_oc with
| Some filename ->
let oc = open_out_bin filename in
oc, fun () -> close_out oc
| None -> stdout, ignore in
let res =
match deflate, format with
| true, `Deflate -> run_deflate ~level ic oc
| false, `Deflate -> run_inflate ic oc
| true, `Zlib -> run_zlib_deflate ~level ic oc
| false, `Zlib -> run_zlib_inflate ic oc
| true, `Gzip -> run_gzip_deflate ~level ic oc
| false, `Gzip -> run_gzip_inflate ic oc
| true, `Lzo -> run_lzo_deflate ic oc
| false, `Lzo -> run_lzo_inflate ic oc in
close_ic () ; close_oc () ; res
open Cmdliner
let deflate =
let doc = "Ask to deflate inputs (instead of inflate)." in
Arg.(value & flag & info ["d"] ~doc)
let format =
let parser s =
match String.lowercase_ascii s with
| "zlib" -> Ok `Zlib
| "gzip" -> Ok `Gzip
| "deflate" -> Ok `Deflate
| "lzo" -> Ok `Lzo
| x -> error_msgf "Invalid format: %S" x in
let pp ppf = function
| `Zlib -> Format.pp_print_string ppf "zlib"
| `Gzip -> Format.pp_print_string ppf "gzip"
| `Deflate -> Format.pp_print_string ppf "deflate"
| `Lzo -> Format.pp_print_string ppf "lzo" in
let format = Arg.conv (parser, pp) in
Arg.(value & opt format `Deflate & info ["f"; "format"] ~docv:"<format>")
let input = Arg.(value & pos 0 (some file) None & info [] ~docv:"<filename>")
let output = Arg.(value & pos 1 (some string) None & info [] ~docv:"<filename>")
let level =
let parser str =
match int_of_string str with
| n when n >= 0 -> Ok n
| _ -> Error (`Msg "The compression level must be positive")
| exception _ -> Error (`Msg "Invalid level") in
let positive_int = Arg.conv (parser, Format.pp_print_int) in
Arg.(value & opt positive_int 4 & info ["l"; "level"] ~docv:"<level>")
let command =
let doc =
"A tool to deflate/inflate a stream/file throught a specified format." in
let man =
[
`S Manpage.s_description
; `P
"$(tname) reads from the standard input and writes the \
deflated/inflated data to the standard output. Several formats \
exists:"
; `I
( "DEFLATE"
, "DEFLATE is a lossless data compression file format that uses a \
combination of LZ77 and Huffman coding. It is specified in RFC 1951 \
<https://datatracker.ietf.org/doc/html/rfc1951>." ); `Noblank
; `I
( "GZip"
, "GZip is a file format based on the DEFLATE algorithm, which is a \
combination of LZ77 and Huffman coding. It encodes few informations \
such as: the timestamp, the filename, or the operating system \
(which operates the deflation). It generates a CRC-32 checksum at \
the end of the stream. It is described by the RFC 1952 \
<https://datatracker.ietf.org/doc/html/rfc1952>." ); `Noblank
; `I
( "Zlib"
, "Zlib is an $(i,abstraction) of the DEFLATE algorithm compression \
algorithm which terminates the stream with an ADLER-32 checksum." )
; `Noblank
; `I
( "Lempel-Ziv-Overhumer (LZO)"
, "Lempel-Ziv-Oberhumer is a lossless data compression algorithm that \
is focused on decompression speed." ); `S Manpage.s_examples
; `P
"This is a small example of how to use $(tname) in your favorite shell:"
; `Pre
"\\$ $(tname) -f gzip -d <<EOF > file.gz\n\
Hello World!\n\
EOF\n\
\\$ $(tname) -f gzip < file.gz\n\
Hello World!\n\
\\$"; `S Manpage.s_bugs
; `P "Check bug reports at <https://github.com/mirage/decompress>"
] in
let term = Term.(ret (const run $ deflate $ format $ level $ input $ output))
and info = Cmd.info "decompress" ~doc ~man in
Cmd.v info term
let () = exit (Cmd.eval' command)