From 733d498dc16768a0d23587bcb165871168919495 Mon Sep 17 00:00:00 2001 From: Martin Lindhe Date: Sat, 14 Dec 2024 03:46:06 +0100 Subject: [PATCH] lzss: replace impl, fix decompression for known users --- TEMPLATE.md | 7 +- compression/compression.go | 19 +--- compression/lzss/lzss.go | 89 +++++++++++++++++++ go.mod | 1 - go.sum | 2 - mapper/extract.go | 5 +- templates/games/blinx_2/ipk.yml | 18 ++-- templates/games/mario_party_4/bin.yml | 19 ++-- templates/games/mario_party_4/gsnd.yml | 83 ++++++++++++++++- templates/games/namco_museum_megamix/lzss.yml | 6 +- templates/systems/gamecube/rvz.yml | 22 +++++ 11 files changed, 222 insertions(+), 49 deletions(-) create mode 100644 compression/lzss/lzss.go create mode 100644 templates/systems/gamecube/rvz.yml diff --git a/TEMPLATE.md b/TEMPLATE.md index c1056a4..0b05305 100644 --- a/TEMPLATE.md +++ b/TEMPLATE.md @@ -135,6 +135,7 @@ ext("hello.ext") = ".ext", return the extension of input filename basename("path/to/file.ext") = "file.ext", returns basename without file path struct(self.index, "Filename", "Name") = reads a value from a field in a struct array +``` # Data types @@ -195,9 +196,9 @@ data tagging (for extraction feature) compressed:zlib[self.Size] mark area as zlib compressed data compressed:gzip[self.Size] mark area as gzip compressed data compressed:deflate[self.Size] mark area as DEFLATE compressed data - compressed:lzo1x[self.Size] mark area as Lzo1x-compatible data - compressed:lzss[self.Size] mark area as Lzss-compatible data - compressed:lz4[self.Size] mark area as Lz4-compressed data + compressed:lzo1x[self.Size] mark area as Lzo1x compressed data + compressed:lzss[self.Size] mark area as Lzss compressed data + compressed:lz4[self.Size] mark area as Lz4 compressed data compressed:lzf[self.Size] mark area as LZF compressed data compressed:lzma[self.Size] mark area as Lzma compressed data compressed:lzma2[self.Size] mark area as Lzma2 compressed data diff --git a/compression/compression.go b/compression/compression.go index e48e94f..68e844e 100644 --- a/compression/compression.go +++ b/compression/compression.go @@ -9,13 +9,14 @@ import ( "io" "github.com/JoshVarga/blast" - lzss "github.com/fbonhomm/LZSS/source" + "github.com/pierrec/lz4/v4" "github.com/rasky/go-lzo" "github.com/spf13/afero" "github.com/ulikunitz/xz/lzma" "github.com/martinlindhe/feng/compression/lzf" + "github.com/martinlindhe/feng/compression/lzss" ) // The Extractor handles compression and decompression for a specific compression format @@ -242,23 +243,11 @@ type Lzss struct { } func (o Lzss) Extract(f afero.File) ([]byte, error) { - - // TODO need github.com/fbonhomm/LZSS to support reader interface - // https://github.com/fbonhomm/LZSS/pull/1 - - data := make([]byte, o.CompressedSize) - if _, err := io.ReadFull(f, data); err != nil { - return nil, err - } - - lzssMode := lzss.LZSS{Mode: 1, PositionMode: 1} - return lzssMode.Decompress(data) + return lzss.Decompress(f, o.CompressedSize) } func (o Lzss) Compress(in []byte, w io.Writer) error { - lzssMode := lzss.LZSS{Mode: 1, PositionMode: 0} - _, err := w.Write(lzssMode.Compress(in)) - return err + return lzss.Compress(in, w) } // PKWARE DCL compressed data (aka blast/explode/implode) diff --git a/compression/lzss/lzss.go b/compression/lzss/lzss.go new file mode 100644 index 0000000..5ce5dcb --- /dev/null +++ b/compression/lzss/lzss.go @@ -0,0 +1,89 @@ +package lzss + +import ( + "bytes" + "fmt" + "io" + + "github.com/spf13/afero" +) + +func readByte(f io.Reader) (byte, error) { + buf := make([]byte, 1) + _, err := f.Read(buf) + return buf[0], err +} + +// Decompress decompresses LZSS a data stream. +// Implementation based on https://github.com/blacktop/lzss/blob/5db4a74c19d62a8e41860aa404cd76a3ac5a49ac/lzss.go +func Decompress(in afero.File, compressedSize uint) ([]byte, error) { + // n is the size of ring buffer - must be power of 2 + n := 4096 + + // f is the upper limit for match_length + f := 18 + + threshold := 2 + + var i, j, r, c int + var flags uint + + dst := bytes.Buffer{} + + // ring buffer of size n, with extra f-1 bytes to aid string comparison + textBuf := make([]byte, n+f-1) + + r = n - f + flags = 0 + + for { + flags = flags >> 1 + if ((flags) & 0x100) == 0 { + bite, err := readByte(in) + if err != nil { + break + } + c = int(bite) + flags = uint(c | 0xFF00) /* uses higher byte cleverly to count eight*/ + } + if flags&1 == 1 { + bite, err := readByte(in) + if err != nil { + break + } + c = int(bite) + dst.WriteByte(byte(c)) + textBuf[r] = byte(c) + r++ + r &= (n - 1) + } else { + bite, err := readByte(in) + if err != nil { + break + } + i = int(bite) + + bite, err = readByte(in) + if err != nil { + break + } + j = int(bite) + + i |= ((j & 0xF0) << 4) + j = (j & 0x0F) + threshold + for k := 0; k <= j; k++ { + c = int(textBuf[(i+k)&(n-1)]) + dst.WriteByte(byte(c)) + textBuf[r] = byte(c) + r++ + r &= (n - 1) + } + } + } + + return dst.Bytes(), nil +} + +func Compress(in []byte, w io.Writer) error { + return fmt.Errorf("TODO lzss compression is not implemented") +} diff --git a/go.mod b/go.mod index 6d172b6..16a7b39 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,6 @@ require ( github.com/JoshVarga/blast v0.0.0-20210808061142-eadad17358e8 github.com/alecthomas/kong v1.6.0 github.com/davecgh/go-spew v1.1.1 - github.com/fbonhomm/LZSS v0.0.0-20200907090355-ba1a01a92989 github.com/maja42/goval v1.4.0 github.com/pierrec/lz4/v4 v4.1.22 github.com/pkg/errors v0.9.1 diff --git a/go.sum b/go.sum index e06f301..c7c65c2 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,6 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/maja42/goval v1.4.0 h1:tlX0X+GvjKzWW2Q6qzWwL4Av2KV1bLtzxwzgxiiwEPc= github.com/maja42/goval v1.4.0/go.mod h1:LDMwF8ocOwIsMZdwoyHC/3UpV8ABDwEzalxkVV2z/rI= -github.com/martinlindhe/LZSS v0.0.0-20221025204446-acc47c959dfe h1:3CfBT4bBSMucKFhPmljy8mte8x/sekluzF2Al8EzNvQ= -github.com/martinlindhe/LZSS v0.0.0-20221025204446-acc47c959dfe/go.mod h1:w+lBCdnOBTrplK7ed0f/s5XZQb/dP0GMaVpIf7b5kG4= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= diff --git a/mapper/extract.go b/mapper/extract.go index 525e893..c7ab01c 100644 --- a/mapper/extract.go +++ b/mapper/extract.go @@ -78,7 +78,8 @@ func (fl *FileLayout) extractField(field *Field, layout *Struct, outDir string) return nil } - feng.Printf("<%s.%s> Extracting %s from %08x to %s:", layout.Name, field.Format.Label, fl.PresentType(&field.Format), field.Offset, fullName) + feng.Printf("<%s.%s> %s\n", layout.Name, field.Format.Label, fullName) + feng.Printf(" ├╼ Extracting %s from %08x\n", fl.PresentType(&field.Format), field.Offset) f, err := fs1.Create(fullName) if err != nil { @@ -120,7 +121,7 @@ func (fl *FileLayout) extractField(field *Field, layout *Struct, outDir string) return err } - feng.Printf(" Extracted %d bytes -> %d\n", field.Length, FileSize(f)) + feng.Printf(" ╰╼ Extracted %d -> %d bytes\n", field.Length, FileSize(f)) case "raw": if parts[1] != "u8" { diff --git a/templates/games/blinx_2/ipk.yml b/templates/games/blinx_2/ipk.yml index 47e6c8e..7a62ab7 100644 --- a/templates/games/blinx_2/ipk.yml +++ b/templates/games/blinx_2/ipk.yml @@ -1,20 +1,13 @@ -# STATUS: 50% +# STATUS: 90%, extract: ok # USED IN: # Blinx 2: Masters of Time and Space (2004, XBox) by Artoon -# TODO MAX: -# our lzss decoding crashes on sample files, -# it is not using exactly the same decompression. -# a working decompress for these blinx2 ipk files exists at -# https://codeberg.org/KeybadeBlox/blipks/ - - references: - https://codeberg.org/KeybadeBlox/blipks/src/commit/8a0d684871b0a275a5ed91b37465536133e51c2b/blipks.hpp#L17 kind: archive -name: xxx +name: Blinx 2 game asset extensions: [.ipk] endian: little @@ -35,7 +28,7 @@ structs: entry: ascii[64] FullName: ?? - u32 IsCompressed: ?? # Interpret as boolean + u32 IsCompressed: ?? u32 CompressedSize: ?? u32 Offset: ?? u32 ExtractedSize: ?? @@ -44,7 +37,6 @@ structs: filename: self.FullName if self.IsCompressed == 0: raw:u8[self.CompressedSize] Data: ?? - #else: - # # FIXME our "lzss" decompressor is not compatible with this stream/dec 2024 - # compressed:lzss[self.CompressedSize] Data: ?? + else: + compressed:lzss[self.CompressedSize] Data: ?? offset: restore diff --git a/templates/games/mario_party_4/bin.yml b/templates/games/mario_party_4/bin.yml index 1f30351..3780b79 100644 --- a/templates/games/mario_party_4/bin.yml +++ b/templates/games/mario_party_4/bin.yml @@ -1,7 +1,10 @@ # STATUS: 50%, extract: without names (compressed block) +# TODO FIXME for lzss compression (mp4): seems like we need to "uncompress unknown amount of data" and +# then confirm it was DecompressedSize bytes of data + # Used in: -# - Mario Party 4 (2002) - lzss compression +# - Mario Party 4 (2002, GameCube) - LZSS compression # - Mario Party 5 (2003) - zlib compression # - Mario Party 6 (2004) - zlib compression # - Mario Party 7 (2005) - zlib compression @@ -9,9 +12,8 @@ # TODO: some idea on how to extract filenames from the compressed entry with "HSFV" header is in mario_party_5.bms -# TODO: mario party 4 lzss compression extraction fails. no idea why - references: + - https://github.com/mariopartyrd/marioparty4 - https://aluigi.altervista.org/bms/mario_party_5.bms - https://aluigi.altervista.org/bms/mario_party_6.bms - https://github.com/Ploaj/Metanoia/blob/225b4eec7c004109d7a7dc7bd56cc7584e44faf8/Metanoia/Formats/GameCube/HSF.cs # parser for the .hsf models @@ -35,19 +37,20 @@ structs: file: u32 Offset: ?? offset: self.Offset - u32 UncompressedSize: ?? + u32 DecompressedSize: ?? u32 CompressionType: eq 00000001: LZSS eq 00000004: SLIDE # NEED SAMPLE eq 00000005: RLE # NEED SAMPLE eq 00000007: ZLIB - # HACK because one entry pointed past end of file ... - if self.CompressionType == LZSS && OFFSET + self.UncompressedSize < FILE_SIZE: - compressed:lzss[self.UncompressedSize] Data: ?? + if self.CompressionType == LZSS: + # TODO FIXME: seems like we need to "uncompress unknown amount of data" and + # then confirm it was DecompressedSize bytes of data + compressed:lzss[self.DecompressedSize] Data: ?? if self.CompressionType == ZLIB: - u32 UncompressedSize2: ?? + u32 DecompressedSize2: ?? u32 CompressedSize: ?? compressed:zlib[self.CompressedSize] Data: ?? diff --git a/templates/games/mario_party_4/gsnd.yml b/templates/games/mario_party_4/gsnd.yml index 3103294..36b3d3b 100644 --- a/templates/games/mario_party_4/gsnd.yml +++ b/templates/games/mario_party_4/gsnd.yml @@ -3,6 +3,9 @@ # Used in: # - Mario Party 4 (2002) +references: + - https://github.com/Yoshimaster96/mpgc-sound-tools/blob/1e5ec11eda6cbbf863a533ef0f3dd9d9ffb8c9cc/dump_msm.c + kind: archive name: Mario Party 4 gsnd file endian: big @@ -17,8 +20,86 @@ structs: header: ascii[4] Signature: c'GSND' u32 SomeCount: ?? # XXX - u32 v2: ?? # 02 31 e1 60 XXX + u32 FileSize: ?? + u32 unk0: ?? + + u32 chk5Offs: ?? + u32 chk5Size: ?? + u32 chk6Offs: ?? + u32 chk6Size: ?? + u32 chk2Offs: ?? + u32 chk2Size: ?? layout: - header Header + +# //Extract banks +# for(int i=1; i<(chk2Size>>5); i++) { +# //Get offset/size data +# fseek(fp,chk2Offs+(i<<5),SEEK_SET); +# uint16_t groupId = read_u16_be(fp); +# fseek(fp,2,SEEK_CUR); +# uint32_t groupDataOffs = read_u32_be(fp); +# uint32_t groupDataSize = read_u32_be(fp); +# uint32_t sampOffs = read_u32_be(fp); +# uint32_t sampSize = read_u32_be(fp); +# groupDataOffs += chk5Offs; +# sampOffs += chk6Offs; +# +# fseek(fp,groupDataOffs,SEEK_SET); +# uint32_t poolOffs = read_u32_be(fp); +# uint32_t projOffs = read_u32_be(fp); +# uint32_t sdirOffs = read_u32_be(fp); +# uint32_t SNGOffs = read_u32_be(fp); +# uint32_t poolSize = projOffs-poolOffs; +# uint32_t projSize = sdirOffs-projOffs; +# uint32_t sdirSize = SNGOffs-sdirOffs; +# poolOffs += groupDataOffs; +# projOffs += groupDataOffs; +# sdirOffs += groupDataOffs; +# +# uint8_t * buf; +# char fname[0x100]; +# FILE * out; +# +# //Dump .pool +# buf = (uint8_t*)malloc(poolSize); +# fseek(fp,poolOffs,SEEK_SET); +# fread(buf,1,poolSize,fp); +# snprintf(fname,0x100,"%04X.pool",groupId); +# out = fopen(fname,"wb"); +# fwrite(buf,1,poolSize,out); +# fclose(out); +# free(buf); +# +# //Dump .proj +# buf = (uint8_t*)malloc(projSize); +# fseek(fp,projOffs,SEEK_SET); +# fread(buf,1,projSize,fp); +# snprintf(fname,0x100,"%04X.proj",groupId); +# out = fopen(fname,"wb"); +# fwrite(buf,1,projSize,out); +# fclose(out); +# free(buf); +# +# //Dump .sdir +# buf = (uint8_t*)malloc(sdirOffs); +# fseek(fp,sdirOffs,SEEK_SET); +# fread(buf,1,sdirOffs,fp); +# snprintf(fname,0x100,"%04X.sdir",groupId); +# out = fopen(fname,"wb"); +# fwrite(buf,1,sdirOffs,out); +# fclose(out); +# free(buf); +# +# //Dump .samp +# buf = (uint8_t*)malloc(sampSize); +# fseek(fp,sampOffs,SEEK_SET); +# fread(buf,1,sampSize,fp); +# snprintf(fname,0x100,"%04X.samp",groupId); +# out = fopen(fname,"wb"); +# fwrite(buf,1,sampSize,out); +# fclose(out); +# free(buf); +# } diff --git a/templates/games/namco_museum_megamix/lzss.yml b/templates/games/namco_museum_megamix/lzss.yml index fa85588..dd94f63 100644 --- a/templates/games/namco_museum_megamix/lzss.yml +++ b/templates/games/namco_museum_megamix/lzss.yml @@ -15,7 +15,7 @@ endian: little magic: - offset: 0000 - match: c'SSZL' # XXX namco_museum.bms also recognized "LZSS", from where? + match: c'SSZL' structs: header: @@ -23,9 +23,7 @@ structs: u32 Unknown: ?? # always 0 u32 CompressedSize: ?? u32 ExpandedSize: ?? - # XXX does not decompress correctly with the lzss decompressor we are using in feng - #compressed:lzss[self.CompressedSize] Data: ?? - raw:u8[self.CompressedSize] Data: ?? # quickbms comtype lzss0 + compressed:lzss[self.CompressedSize] Data: ?? layout: - header Header diff --git a/templates/systems/gamecube/rvz.yml b/templates/systems/gamecube/rvz.yml new file mode 100644 index 0000000..5463cc6 --- /dev/null +++ b/templates/systems/gamecube/rvz.yml @@ -0,0 +1,22 @@ +# STATUS: 1% + +# EMULATOR FILE FORMAT, NOT FOR CONSOLE + +references: + - xxx + +kind: archive +name: Nintendo Gamecube/Wii rvz archive +extensions: [.rvz] +endian: big # XXX + +magic: + - offset: 0000 + match: c'RVZ' 01 + +structs: + header: + ascii[4] Magic: ?? + +layout: + - header Header