-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improved python module architecture and added algos.
* Splitted core and algos * Introduced a new API * Added zstd, brotli, snappy, lzham, implode and bzip * Fixed brotli files extension * Added paramaters for all the algos * Some improvements in packaging. * Improved testing.
- Loading branch information
Showing
33 changed files
with
758 additions
and
78 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,4 @@ meta: | |
seq: | ||
- id: body | ||
size-eos: true | ||
process: kaitai.compress.lzma_raw | ||
process: kaitai.compress.lzma(2, 9, "raw") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,4 @@ meta: | |
seq: | ||
- id: body | ||
size-eos: true | ||
process: kaitai.compress.lzma_xz | ||
process: kaitai.compress.lzma(2, 9, "xz") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
meta: | ||
id: test_snappy | ||
seq: | ||
- id: body | ||
size-eos: true | ||
process: kaitai.compress.snappy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,49 @@ | ||
#!/usr/bin/env python3 | ||
|
||
from glob import glob | ||
from os.path import basename | ||
from pathlib import Path | ||
import re | ||
import unittest | ||
|
||
from test_lz4 import TestLz4 | ||
from test_lzma_lzma import TestLzmaLzma | ||
from test_lzma_raw import TestLzmaRaw | ||
from test_lzma_xz import TestLzmaXz | ||
from test_zlib import TestZlib | ||
from test_snappy import TestSnappy | ||
from test_brotli import TestBrotli | ||
from test_zstd import TestZstd | ||
|
||
for uncompressed_fn in glob('uncompressed/*.dat'): | ||
name = re.sub(r'.dat$', '', basename(uncompressed_fn)) | ||
print(name) | ||
|
||
f = open(uncompressed_fn, 'rb') | ||
uncompressed_data = f.read() | ||
f.close() | ||
|
||
algs = [ | ||
(TestLz4, 'lz4'), | ||
(TestLzmaLzma, 'lzma'), | ||
# (TestLzmaRaw, 'lzma_raw'), # requires filters= to be set | ||
(TestLzmaXz, 'xz'), | ||
(TestZlib, 'zlib'), | ||
(TestBrotli, 'brotli'), | ||
] | ||
|
||
for alg in algs: | ||
test_class = alg[0] | ||
ext = alg[1] | ||
|
||
obj = test_class.from_file('compressed/%s.%s' % (name, ext)) | ||
print(obj.body == uncompressed_data) | ||
cwd = Path(".").absolute() | ||
this_dir = Path(__file__).absolute().parent.relative_to(cwd) | ||
compressed_dir = this_dir / "compressed" | ||
uncompressed_dir = this_dir / "uncompressed" | ||
|
||
|
||
class SimpleTests(unittest.TestCase): | ||
def testCompressors(self): | ||
for uncompressed_fn in uncompressed_dir.glob("*.dat"): | ||
name = uncompressed_fn.stem | ||
print(name) | ||
|
||
uncompressed_data = uncompressed_fn.read_bytes() | ||
|
||
algs = [ | ||
(TestLz4, "lz4"), | ||
(TestLzmaLzma, "lzma"), | ||
# (TestLzmaRaw, 'lzma_raw'), # requires filters= to be set | ||
(TestLzmaXz, "xz"), | ||
(TestZlib, "zlib"), | ||
(TestSnappy, "snappy"), | ||
(TestBrotli, "br"), | ||
(TestZstd, "zst"), | ||
] | ||
|
||
for test_class, ext in algs: | ||
compressed_fn = compressed_dir / (name + "." + ext) | ||
with self.subTest(test_class=test_class, file=compressed_fn): | ||
obj = test_class.from_file(str(compressed_fn)) | ||
self.assertEqual(obj.body, uncompressed_data) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
__pycache__ | ||
*.pyc | ||
*.pyo | ||
/build | ||
/dist | ||
/*.egg-info |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
from .lz4 import Lz4 | ||
from .zlib import Zlib | ||
from .lzma_raw import LzmaRaw | ||
from .lzma_lzma import LzmaLzma | ||
from .lzma_xz import LzmaXz | ||
from .brotli import Brotli | ||
from .core import * | ||
from .algorithms.zlib import Zlib | ||
from .algorithms.lzma import Lzma | ||
from .algorithms.lz4 import Lz4 | ||
from .algorithms.brotli import Brotli | ||
from .algorithms.zstd import Zstd | ||
from .algorithms.bz2 import Bz2 | ||
from .algorithms.snappy import Snappy |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import typing | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
# pylint:disable=arguments-differ | ||
|
||
|
||
class Brotli(KaitaiCompressor): | ||
__slots__ = ("compressorParams", "decompressorParams") | ||
brotli = None | ||
|
||
def __init__(self, level: typing.Optional[int] = None, mode: typing.Optional[str] = "generic", log_window_size: typing.Optional[int] = None, log_block_size: typing.Optional[int] = None, dictionary: typing.Optional[bytes] = None) -> None: # pylint:disable=redefined-builtin,too-many-arguments,too-many-locals,unused-argument | ||
super().__init__() | ||
if self.__class__.brotli is None: | ||
import brotli # pylint:disable=import-outside-toplevel | ||
|
||
self.__class__.brotli = brotli | ||
self.compressorParams = {} | ||
self.decompressorParams = {} | ||
|
||
if mode is not None: | ||
if isinstance(mode, str): | ||
mode = getattr(self.__class__.brotli, "MODE_" + mode.upper()) | ||
self.compressorParams["mode"] = mode | ||
|
||
if level is not None: | ||
self.compressorParams["quality"] = level | ||
|
||
if log_window_size is not None: | ||
self.compressorParams["lgwin"] = log_window_size | ||
|
||
if log_block_size is not None: | ||
self.compressorParams["lgblock"] = log_block_size | ||
|
||
if dictionary is not None: | ||
self.decompressorParams["dictionary"] = self.compressorParams["dictionary"] = dictionary | ||
|
||
# new API | ||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
return ProcessorContextStub(self.__class__.brotli.decompress(bytes(data), **self.decompressorParams)) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
return ProcessorContextStub(self.__class__.brotli.compress(data, **self.compressorParams)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import bz2 | ||
import typing | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
# pylint:disable=arguments-differ | ||
|
||
|
||
class Bz2(KaitaiCompressor): | ||
__slots__ = ("level",) | ||
|
||
def __init__(self, level: int = 9, *args, **kwargs) -> None: # pylint:disable=unused-argument | ||
super().__init__() | ||
self.level = level | ||
|
||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
decompressor = bz2.BZ2Decompressor() | ||
return ProcessorContextStub(decompressor.decompress(data)) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
compressor = bz2.BZ2Compressor(self.level) | ||
return ProcessorContextStub(compressor.compress(data) + compressor.flush()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import typing | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
# pylint:disable=arguments-differ | ||
|
||
|
||
class Implode(KaitaiCompressor): | ||
"""PKWare implode format""" | ||
|
||
__slots__ = () | ||
|
||
def __init__(self, *args, **kwargs) -> None: # pylint:disable=unused-argument | ||
super().__init__() | ||
|
||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
import pkblast | ||
|
||
return ProcessorContextStub(pkblast.decompressBytesWholeToBytes(data)[1]) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
raise NotImplementedError("pkimplode is needed, but not yet implemented") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import typing | ||
from enum import IntEnum | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
# pylint:disable=arguments-differ | ||
|
||
|
||
class LRZip(KaitaiCompressor): | ||
__slots__ = ("algo",) | ||
|
||
lrzip = None | ||
Algos = None | ||
|
||
@classmethod | ||
def initLib(cls): | ||
import lrzip | ||
|
||
self.__class__.lrzip = lrzip | ||
prefix = "LRZIP_MODE_COMPRESS_" | ||
self.__class__.Algos = IntEnum("A", sorted(((k[len(prefix) :].lower(), getattr(lrzip, k)) for k in dir(lrzip) if k[: len(prefix)] == prefix), key=lambda x: x[1])) | ||
|
||
def __init__(self, algo: typing.Union[int, str] = "none", *args, **kwargs) -> None: # pylint:disable=unused-argument | ||
if self.__class__.lrzip is None: | ||
self.__class__.initLib() | ||
if isinstance(algo, str): | ||
algo = self.__class__.Algos[algo.lower()] | ||
else: | ||
algo = self.__class__.Algos(algo) | ||
self.algo = algo | ||
super().__init__() | ||
|
||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
return ProcessorContextStub(self.__class__.lrzip.decompress(data)) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
return ProcessorContextStub(self.__class__.lrzip.compress(data, compressMode=self.algo)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import typing | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
# pylint:disable=arguments-differ | ||
|
||
|
||
class Lz4(KaitaiCompressor): | ||
__slots__ = ("compressorParams",) | ||
lz4Frame = None | ||
|
||
def __init__(self, block_size: typing.Optional[int] = None, should_link_blocks: bool = True, compression_level: typing.Optional[int] = None, frame_checksum: bool = False, block_checksum: bool = False, *args, **kwargs) -> None: # pylint:disable=unused-argument,too-many-arguments | ||
super().__init__() | ||
if self.__class__.lz4Frame is None: | ||
import lz4.frame # pylint:disable=import-outside-toplevel | ||
|
||
self.__class__.lz4Frame = lz4.frame | ||
|
||
if compression_level is None: | ||
compression_level = self.__class__.lz4Frame.COMPRESSIONLEVEL_MAX | ||
if block_size is None: | ||
block_size = self.__class__.lz4Frame.BLOCKSIZE_MAX4MB | ||
self.compressorParams = { | ||
"block_size": block_size, | ||
"block_linked": should_link_blocks, | ||
"compression_level": compression_level, | ||
"content_checksum": frame_checksum, | ||
"block_checksum": block_checksum, | ||
"return_bytearray": False | ||
} | ||
|
||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
obj = self.__class__.lz4Frame.LZ4FrameDecompressor(return_bytearray=False) | ||
return ProcessorContextStub(obj.decompress(data)) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
obj = self.__class__.lz4Frame.LZ4FrameCompressor(**self.compressorParams) | ||
return ProcessorContextStub(obj.begin(len(data)) + obj.compress(data) + obj.flush()) | ||
|
||
def extract_args(self, data: typing.Union[bytes, bytearray]): | ||
res = self.__class__.lz4Frame.get_frame_info(data) | ||
return (res["block_size"], res["linker"], res["compression_level"], res["content_checksum"], res["block_checksum"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import typing | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
raise NotImplementedError("The python bindings for lzham and lzham itself has bad security and design issues. It must be fixed first.") | ||
|
||
"""LZHAM | ||
Must Read: https://github.com/richgel999/lzham_codec | ||
uncompressed: 1 | ||
table_update_rate: # at default settings | ||
3: 0.0103 | ||
8: 0.0105 # default | ||
20: 0.0106 | ||
level: # "table_update_rate":20, "dict_size_log2": 26 | ||
1: 0.0108 | ||
4: 0.0104 | ||
highest: | ||
lzma: 0.008 # at highest settings | ||
""" | ||
# pylint:disable=arguments-differ | ||
|
||
|
||
class LZHAM(KaitaiCompressor): | ||
__slots__ = ("decompressor", "compressor", "dictTrainerParams",) | ||
lzham = None | ||
|
||
def __init__(self, level: int = 1, dict_size_log2: int = 26, table_update_rate: int = 20, max_helper_threads: int = 0, check_adler32: bool = False, table_max_update_interval: int = 0, table_update_interval_slow_rate: int = 0, *args, **kwargs) -> None: # pylint:disable=redefined-builtin,too-many-arguments,too-many-locals,unused-argument,too-many-branches,too-many-statements | ||
super().__init__() | ||
if self.__class__.lzham is None: | ||
import lzham # pylint:disable=import-outside-toplevel | ||
|
||
self.__class__.lzham = lzham | ||
|
||
commonFilters = {"table_update_rate": table_update_rate, "dict_size_log2": dict_size_log2, "table_max_update_interval": table_max_update_interval, "table_update_interval_slow_rate": table_update_interval_slow_rate} | ||
|
||
compFilters = {"level": level, "max_helper_threads": max_helper_threads} | ||
compFilters.update(commonFilters) | ||
|
||
decompFilters = {"compute_adler32_during_decomp": check_adler32, "unbuffered_decompression": True} | ||
decompFilters.update(commonFilters) | ||
|
||
self.compressor = lzham.LZHAMCompressor(compFilters) | ||
self.decompressor = lzham.LZHAMDecompressor(decompFilters) | ||
|
||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
return ProcessorContextStub(self.decompressor.decompress(data)) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
return ProcessorContextStub(self.compressor.compress(data)) |
Oops, something went wrong.