diff --git a/CMakeLists.txt b/CMakeLists.txt index e54c5ea..9a40d95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,16 +18,21 @@ endif() set (HEADERS kaitai/kaitaistream.h kaitai/kaitaistruct.h + kaitai/kio.h + kaitai/kistream.h + kaitai/kostream.h ) set (SOURCES - kaitai/kaitaistream.cpp + kaitai/kio.cpp + kaitai/kistream.cpp + kaitai/kostream.cpp ) set(STRING_ENCODING_TYPE "ICONV" CACHE STRING "Set the way strings have to be encoded (ICONV|NONE|...)") add_library (${PROJECT_NAME} SHARED ${HEADERS} ${SOURCES}) -set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "kaitai/kaitaistream.h;kaitai/kaitaistruct.h") +set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "kaitai/kaitaistream.h;kaitai/kaitaistruct.h;kaitai/kio.h;kaitai/kistream.h") if (ZLIB_FOUND) target_include_directories(${PROJECT_NAME} PRIVATE ${ZLIB_INCLUDE_DIRS}) diff --git a/kaitai/endian.h b/kaitai/endian.h new file mode 100644 index 0000000..ed6bfe1 --- /dev/null +++ b/kaitai/endian.h @@ -0,0 +1,26 @@ +#ifndef KAITAI_ENDIAN_H +#define KAITAI_ENDIAN_H + +#if defined(__APPLE__) +#include +#include +#define bswap_16(x) OSSwapInt16(x) +#define bswap_32(x) OSSwapInt32(x) +#define bswap_64(x) OSSwapInt64(x) +#define __BIG_ENDIAN BIG_ENDIAN +#define __LITTLE_ENDIAN LITTLE_ENDIAN +#define __BYTE_ORDER BYTE_ORDER +#elif defined(_MSC_VER) // !__APPLE__ +#include +#define bswap_16(x) _byteswap_ushort(x) +#define bswap_32(x) _byteswap_ulong(x) +#define bswap_64(x) _byteswap_uint64(x) +#define __BIG_ENDIAN 4321 +#define __LITTLE_ENDIAN 1234 +#define __BYTE_ORDER __LITTLE_ENDIAN +#else // !__APPLE__ or !_MSC_VER +#include +#include +#endif + +#endif diff --git a/kaitai/exceptions.h b/kaitai/exceptions.h index 1d22176..fad40fb 100644 --- a/kaitai/exceptions.h +++ b/kaitai/exceptions.h @@ -43,7 +43,7 @@ class undecided_endianness_error: public kstruct_error { */ class validation_failed_error: public kstruct_error { public: - validation_failed_error(const std::string what, const kstream* io, const std::string src_path): + validation_failed_error(const std::string what, const kistream* io, const std::string src_path): kstruct_error(std::string("at pos ") + /*std::to_string(io->pos())*/ + ": validation failed:" + what, src_path), m_io(io) { @@ -52,7 +52,7 @@ class validation_failed_error: public kstruct_error { // "at pos #{io.pos}: validation failed: #{msg}" protected: - const kstream* m_io; + const kistream* m_io; }; /** @@ -62,7 +62,7 @@ class validation_failed_error: public kstruct_error { template class validation_not_equal_error: public validation_failed_error { public: - validation_not_equal_error(const T& expected, const T& actual, const kstream* io, const std::string src_path): + validation_not_equal_error(const T& expected, const T& actual, const kistream* io, const std::string src_path): validation_failed_error("not equal", io, src_path), m_expected(expected), m_actual(actual) diff --git a/kaitai/kaitaistream.h b/kaitai/kaitaistream.h index a16f50e..20cf001 100644 --- a/kaitai/kaitaistream.h +++ b/kaitai/kaitaistream.h @@ -1,249 +1,16 @@ #ifndef KAITAI_STREAM_H #define KAITAI_STREAM_H +#include "kistream.h" + // Kaitai Struct runtime API version: x.y.z = 'xxxyyyzzz' decimal #define KAITAI_STRUCT_VERSION 9000L -#include -#include -#include -#include - namespace kaitai { -/** - * Kaitai Stream class (kaitai::kstream) is an implementation of - * Kaitai Struct stream API - * for C++/STL. It's implemented as a wrapper over generic STL std::istream. - * - * It provides a wide variety of simple methods to read (parse) binary - * representations of primitive types, such as integer and floating - * point numbers, byte arrays and strings, and also provides stream - * positioning / navigation methods with unified cross-language and - * cross-toolkit semantics. - * - * Typically, end users won't access Kaitai Stream class manually, but would - * describe a binary structure format using .ksy language and then would use - * Kaitai Struct compiler to generate source code in desired target language. - * That code, in turn, would use this class and API to do the actual parsing - * job. - */ -class kstream { -public: - /** - * Constructs new Kaitai Stream object, wrapping a given std::istream. - * \param io istream object to use for this Kaitai Stream - */ - kstream(std::istream* io); - - /** - * Constructs new Kaitai Stream object, wrapping a given in-memory data - * buffer. - * \param data data buffer to use for this Kaitai Stream - */ - kstream(std::string& data); - - void close(); - - /** @name Stream positioning */ - //@{ - /** - * Check if stream pointer is at the end of stream. Note that the semantics - * are different from traditional STL semantics: one does *not* need to do a - * read (which will fail) after the actual end of the stream to trigger EOF - * flag, which can be accessed after that read. It is sufficient to just be - * at the end of the stream for this method to return true. - * \return "true" if we are located at the end of the stream. - */ - bool is_eof() const; - - /** - * Set stream pointer to designated position. - * \param pos new position (offset in bytes from the beginning of the stream) - */ - void seek(uint64_t pos); - - /** - * Get current position of a stream pointer. - * \return pointer position, number of bytes from the beginning of the stream - */ - uint64_t pos(); - - /** - * Get total size of the stream in bytes. - * \return size of the stream in bytes - */ - uint64_t size(); - //@} - - /** @name Integer numbers */ - //@{ - - // ------------------------------------------------------------------------ - // Signed - // ------------------------------------------------------------------------ - - int8_t read_s1(); - - // ........................................................................ - // Big-endian - // ........................................................................ - - int16_t read_s2be(); - int32_t read_s4be(); - int64_t read_s8be(); - - // ........................................................................ - // Little-endian - // ........................................................................ - - int16_t read_s2le(); - int32_t read_s4le(); - int64_t read_s8le(); - - // ------------------------------------------------------------------------ - // Unsigned - // ------------------------------------------------------------------------ - - uint8_t read_u1(); - - // ........................................................................ - // Big-endian - // ........................................................................ - - uint16_t read_u2be(); - uint32_t read_u4be(); - uint64_t read_u8be(); - - // ........................................................................ - // Little-endian - // ........................................................................ - - uint16_t read_u2le(); - uint32_t read_u4le(); - uint64_t read_u8le(); - - //@} - - /** @name Floating point numbers */ - //@{ - - // ........................................................................ - // Big-endian - // ........................................................................ - - float read_f4be(); - double read_f8be(); - - // ........................................................................ - // Little-endian - // ........................................................................ - - float read_f4le(); - double read_f8le(); - - //@} - - /** @name Unaligned bit values */ - //@{ - - void align_to_byte(); - uint64_t read_bits_int(int n); - - //@} - - /** @name Byte arrays */ - //@{ - - std::string read_bytes(std::streamsize len); - std::string read_bytes_full(); - std::string read_bytes_term(char term, bool include, bool consume, bool eos_error); - std::string ensure_fixed_contents(std::string expected); - - static std::string bytes_strip_right(std::string src, char pad_byte); - static std::string bytes_terminate(std::string src, char term, bool include); - static std::string bytes_to_str(std::string src, std::string src_enc); - - //@} - - /** @name Byte array processing */ - //@{ - - /** - * Performs a XOR processing with given data, XORing every byte of input with a single - * given value. - * @param data data to process - * @param key value to XOR with - * @return processed data - */ - static std::string process_xor_one(std::string data, uint8_t key); - - /** - * Performs a XOR processing with given data, XORing every byte of input with a key - * array, repeating key array many times, if necessary (i.e. if data array is longer - * than key array). - * @param data data to process - * @param key array of bytes to XOR with - * @return processed data - */ - static std::string process_xor_many(std::string data, std::string key); - - /** - * Performs a circular left rotation shift for a given buffer by a given amount of bits, - * using groups of 1 bytes each time. Right circular rotation should be performed - * using this procedure with corrected amount. - * @param data source data to process - * @param amount number of bits to shift by - * @return copy of source array with requested shift applied - */ - static std::string process_rotate_left(std::string data, int amount); - -#ifdef KS_ZLIB - /** - * Performs an unpacking ("inflation") of zlib-compressed data with usual zlib headers. - * @param data data to unpack - * @return unpacked data - * @throws IOException - */ - static std::string process_zlib(std::string data); -#endif - - //@} - - /** - * Performs modulo operation between two integers: dividend `a` - * and divisor `b`. Divisor `b` is expected to be positive. The - * result is always 0 <= x <= b - 1. - */ - static int mod(int a, int b); - - /** - * Converts given integer `val` to a decimal string representation. - * Should be used in place of std::to_string() (which is available only - * since C++11) in older C++ implementations. - */ - static std::string to_string(int val); - - /** - * Reverses given string `val`, so that the first character becomes the - * last and the last one becomes the first. This should be used to avoid - * the need of local variables at the caller. - */ - static std::string reverse(std::string val); - -private: - std::istream* m_io; - std::istringstream m_io_str; - int m_bits_left; - uint64_t m_bits; - - void init(); - void exceptions_enable() const; - - static uint64_t get_mask_ones(int n); - - static const int ZLIB_BUF_SIZE = 128 * 1024; -}; +// This typedef exists for API compatibility with previous versions of the +// KaitaiStruct C++ runtime. +typedef kistream kstream; } diff --git a/kaitai/kaitaistruct.h b/kaitai/kaitaistruct.h index 8172ede..a72f5bd 100644 --- a/kaitai/kaitaistruct.h +++ b/kaitai/kaitaistruct.h @@ -7,12 +7,12 @@ namespace kaitai { class kstruct { public: - kstruct(kstream *_io) { m__io = _io; } + kstruct(kistream *_io) { m__io = _io; } virtual ~kstruct() {} protected: - kstream *m__io; + kistream *m__io; public: - kstream *_io() { return m__io; } + kistream *_io() { return m__io; } }; } diff --git a/kaitai/kio.cpp b/kaitai/kio.cpp new file mode 100644 index 0000000..135a9e5 --- /dev/null +++ b/kaitai/kio.cpp @@ -0,0 +1,218 @@ +#include + +#include +#include + +using namespace ::kaitai; + +kio::kio() { +} + +kio::kio(std::ios* io) { + init(io); +} + +void kio::init(std::ios* io) { + m_io = io; + exceptions_enable(); +} + +void kio::close() { + // m_io->close(); +} + +void kio::exceptions_enable() const { + m_io->exceptions( + std::ios::eofbit | + std::ios::failbit | + std::ios::badbit + ); +} + +// ======================================================================== +// Byte arrays +// ======================================================================== + +std::string kio::bytes_strip_right(std::string src, char pad_byte) { + std::size_t new_len = src.length(); + + while (new_len > 0 && src[new_len - 1] == pad_byte) + new_len--; + + return src.substr(0, new_len); +} + +std::string kio::bytes_terminate(std::string src, char term, bool include) { + std::size_t new_len = 0; + std::size_t max_len = src.length(); + + while (new_len < max_len && src[new_len] != term) + new_len++; + + if (include && new_len < max_len) + new_len++; + + return src.substr(0, new_len); +} + +// ======================================================================== +// Byte array processing +// ======================================================================== + +std::string kio::process_xor_one(std::string data, uint8_t key) { + size_t len = data.length(); + std::string result(len, ' '); + + for (size_t i = 0; i < len; i++) + result[i] = data[i] ^ key; + + return result; +} + +std::string kio::process_xor_many(std::string data, std::string key) { + size_t len = data.length(); + size_t kl = key.length(); + std::string result(len, ' '); + + size_t ki = 0; + for (size_t i = 0; i < len; i++) { + result[i] = data[i] ^ key[ki]; + ki++; + if (ki >= kl) + ki = 0; + } + + return result; +} + +std::string kio::process_rotate_left(std::string data, int amount) { + size_t len = data.length(); + std::string result(len, ' '); + + for (size_t i = 0; i < len; i++) { + uint8_t bits = data[i]; + result[i] = (bits << amount) | (bits >> (8 - amount)); + } + + return result; +} + +// ======================================================================== +// Misc utility methods +// ======================================================================== + +int kio::mod(int a, int b) { + if (b <= 0) + throw std::invalid_argument("mod: divisor b <= 0"); + int r = a % b; + if (r < 0) + r += b; + return r; +} + +#include +std::string kio::to_string(int val) { + // if int is 32 bits, "-2147483648" is the longest string representation + // => 11 chars + zero => 12 chars + // if int is 64 bits, "-9223372036854775808" is the longest + // => 20 chars + zero => 21 chars + char buf[25]; + int got_len = snprintf(buf, sizeof(buf), "%d", val); + + // should never happen, but check nonetheless + if (got_len > sizeof(buf)) + throw std::invalid_argument("to_string: integer is longer than string buffer"); + + return std::string(buf); +} + +#include +std::string kio::reverse(std::string val) { + std::reverse(val.begin(), val.end()); + + return val; +} + +// ======================================================================== +// Other internal methods +// ======================================================================== + +uint64_t kio::get_mask_ones(int n) { + if (n == 64) { + return 0xFFFFFFFFFFFFFFFF; + } else { + return ((uint64_t) 1 << n) - 1; + } +} + +#ifndef KS_STR_DEFAULT_ENCODING +#define KS_STR_DEFAULT_ENCODING "UTF-8" +#endif + +#ifdef KS_STR_ENCODING_ICONV + +#include +#include +#include + +std::string kio::bytes_to_str(std::string src, std::string src_enc) { + iconv_t cd = iconv_open(KS_STR_DEFAULT_ENCODING, src_enc.c_str()); + + if (cd == (iconv_t) -1) { + if (errno == EINVAL) { + throw std::runtime_error("bytes_to_str: invalid encoding pair conversion requested"); + } else { + throw std::runtime_error("bytes_to_str: error opening iconv"); + } + } + + size_t src_len = src.length(); + size_t src_left = src_len; + + // Start with a buffer length of double the source length. + size_t dst_len = src_len * 2; + std::string dst(dst_len, ' '); + size_t dst_left = dst_len; + + char *src_ptr = &src[0]; + char *dst_ptr = &dst[0]; + + while (true) { + size_t res = iconv(cd, &src_ptr, &src_left, &dst_ptr, &dst_left); + + if (res == (size_t) -1) { + if (errno == E2BIG) { + // dst buffer is not enough to accomodate whole string + // enlarge the buffer and try again + size_t dst_used = dst_len - dst_left; + dst_left += dst_len; + dst_len += dst_len; + dst.resize(dst_len); + + // dst.resize might have allocated destination buffer in another area + // of memory, thus our previous pointer "dst" will be invalid; re-point + // it using "dst_used". + dst_ptr = &dst[dst_used]; + } else { + throw std::runtime_error("bytes_to_str: iconv error"); + } + } else { + // conversion successful + dst.resize(dst_len - dst_left); + break; + } + } + + if (iconv_close(cd) != 0) { + throw std::runtime_error("bytes_to_str: iconv close error"); + } + + return dst; +} +#elif defined(KS_STR_ENCODING_NONE) +std::string kio::bytes_to_str(std::string src, std::string src_enc) { + return src; +} +#else +#error Need to decide how to handle strings: please define one of: KS_STR_ENCODING_ICONV, KS_STR_ENCODING_NONE +#endif diff --git a/kaitai/kio.h b/kaitai/kio.h new file mode 100644 index 0000000..9400916 --- /dev/null +++ b/kaitai/kio.h @@ -0,0 +1,99 @@ +#ifndef KAITAI_KIO_H +#define KAITAI_KIO_H + +#include +#include + +namespace kaitai { + +class kio { +public: + kio(std::ios* io); + + void close(); + + /** @name Byte arrays */ + //@{ + + static std::string bytes_strip_right(std::string src, char pad_byte); + static std::string bytes_terminate(std::string src, char term, bool include); + static std::string bytes_to_str(std::string src, std::string src_enc); + + //@} + + /** @name Byte array processing */ + //@{ + + /** + * Performs a XOR processing with given data, XORing every byte of input with a single + * given value. + * @param data data to process + * @param key value to XOR with + * @return processed data + */ + static std::string process_xor_one(std::string data, uint8_t key); + + /** + * Performs a XOR processing with given data, XORing every byte of input with a key + * array, repeating key array many times, if necessary (i.e. if data array is longer + * than key array). + * @param data data to process + * @param key array of bytes to XOR with + * @return processed data + */ + static std::string process_xor_many(std::string data, std::string key); + + /** + * Performs a circular left rotation shift for a given buffer by a given amount of bits, + * using groups of 1 bytes each time. Right circular rotation should be performed + * using this procedure with corrected amount. + * @param data source data to process + * @param amount number of bits to shift by + * @return copy of source array with requested shift applied + */ + static std::string process_rotate_left(std::string data, int amount); + + //@} + + /** + * Performs modulo operation between two integers: dividend `a` + * and divisor `b`. Divisor `b` is expected to be positive. The + * result is always 0 <= x <= b - 1. + */ + static int mod(int a, int b); + + /** + * Converts given integer `val` to a decimal string representation. + * Should be used in place of std::to_string() (which is available only + * since C++11) in older C++ implementations. + */ + static std::string to_string(int val); + + /** + * Reverses given string `val`, so that the first character becomes the + * last and the last one becomes the first. This should be used to avoid + * the need of local variables at the caller. + */ + static std::string reverse(std::string val); + +protected: + /** + * This constructor allows initialization to be deferred. This is needed + * for when the stream is constructed in the derived class, since it will + * not be able to construct it before constructing kaitai::kio. + */ + kio(); + + void init(std::ios* io); + + static uint64_t get_mask_ones(int n); + + void exceptions_enable() const; + +private: + std::ios* m_io; +}; + +} + +#endif diff --git a/kaitai/kaitaistream.cpp b/kaitai/kistream.cpp similarity index 56% rename from kaitai/kaitaistream.cpp rename to kaitai/kistream.cpp index b17ca54..bf26c48 100644 --- a/kaitai/kaitaistream.cpp +++ b/kaitai/kistream.cpp @@ -1,63 +1,32 @@ -#include - -#if defined(__APPLE__) -#include -#include -#define bswap_16(x) OSSwapInt16(x) -#define bswap_32(x) OSSwapInt32(x) -#define bswap_64(x) OSSwapInt64(x) -#define __BYTE_ORDER BYTE_ORDER -#define __BIG_ENDIAN BIG_ENDIAN -#define __LITTLE_ENDIAN LITTLE_ENDIAN -#elif defined(_MSC_VER) // !__APPLE__ -#include -#define __LITTLE_ENDIAN 1234 -#define __BIG_ENDIAN 4321 -#define __BYTE_ORDER __LITTLE_ENDIAN -#define bswap_16(x) _byteswap_ushort(x) -#define bswap_32(x) _byteswap_ulong(x) -#define bswap_64(x) _byteswap_uint64(x) -#else // !__APPLE__ or !_MSC_VER -#include -#include -#endif +#include +#include #include #include #include -kaitai::kstream::kstream(std::istream* io) { +using namespace ::kaitai; + +kistream::kistream(std::istream* io): kio(io) { m_io = io; init(); } -kaitai::kstream::kstream(std::string& data): m_io_str(data) { +kistream::kistream(std::string& data): m_io_str(data) { + kio::init(&m_io_str); m_io = &m_io_str; init(); } -void kaitai::kstream::init() { - exceptions_enable(); +void kistream::init() { align_to_byte(); } -void kaitai::kstream::close() { - // m_io->close(); -} - -void kaitai::kstream::exceptions_enable() const { - m_io->exceptions( - std::istream::eofbit | - std::istream::failbit | - std::istream::badbit - ); -} - // ======================================================================== // Stream positioning // ======================================================================== -bool kaitai::kstream::is_eof() const { +bool kistream::is_eof() const { if (m_bits_left > 0) { return false; } @@ -77,15 +46,15 @@ bool kaitai::kstream::is_eof() const { } } -void kaitai::kstream::seek(uint64_t pos) { +void kistream::seek(uint64_t pos) { m_io->seekg(pos); } -uint64_t kaitai::kstream::pos() { +uint64_t kistream::pos() { return m_io->tellg(); } -uint64_t kaitai::kstream::size() { +uint64_t kistream::size() { std::iostream::pos_type cur_pos = m_io->tellg(); m_io->seekg(0, std::ios::end); std::iostream::pos_type len = m_io->tellg(); @@ -101,7 +70,7 @@ uint64_t kaitai::kstream::size() { // Signed // ------------------------------------------------------------------------ -int8_t kaitai::kstream::read_s1() { +int8_t kistream::read_s1() { char t; m_io->get(t); return t; @@ -111,7 +80,7 @@ int8_t kaitai::kstream::read_s1() { // Big-endian // ........................................................................ -int16_t kaitai::kstream::read_s2be() { +int16_t kistream::read_s2be() { int16_t t; m_io->read(reinterpret_cast(&t), 2); #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -120,7 +89,7 @@ int16_t kaitai::kstream::read_s2be() { return t; } -int32_t kaitai::kstream::read_s4be() { +int32_t kistream::read_s4be() { int32_t t; m_io->read(reinterpret_cast(&t), 4); #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -129,7 +98,7 @@ int32_t kaitai::kstream::read_s4be() { return t; } -int64_t kaitai::kstream::read_s8be() { +int64_t kistream::read_s8be() { int64_t t; m_io->read(reinterpret_cast(&t), 8); #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -142,7 +111,7 @@ int64_t kaitai::kstream::read_s8be() { // Little-endian // ........................................................................ -int16_t kaitai::kstream::read_s2le() { +int16_t kistream::read_s2le() { int16_t t; m_io->read(reinterpret_cast(&t), 2); #if __BYTE_ORDER == __BIG_ENDIAN @@ -151,7 +120,7 @@ int16_t kaitai::kstream::read_s2le() { return t; } -int32_t kaitai::kstream::read_s4le() { +int32_t kistream::read_s4le() { int32_t t; m_io->read(reinterpret_cast(&t), 4); #if __BYTE_ORDER == __BIG_ENDIAN @@ -160,7 +129,7 @@ int32_t kaitai::kstream::read_s4le() { return t; } -int64_t kaitai::kstream::read_s8le() { +int64_t kistream::read_s8le() { int64_t t; m_io->read(reinterpret_cast(&t), 8); #if __BYTE_ORDER == __BIG_ENDIAN @@ -173,7 +142,7 @@ int64_t kaitai::kstream::read_s8le() { // Unsigned // ------------------------------------------------------------------------ -uint8_t kaitai::kstream::read_u1() { +uint8_t kistream::read_u1() { char t; m_io->get(t); return t; @@ -183,7 +152,7 @@ uint8_t kaitai::kstream::read_u1() { // Big-endian // ........................................................................ -uint16_t kaitai::kstream::read_u2be() { +uint16_t kistream::read_u2be() { uint16_t t; m_io->read(reinterpret_cast(&t), 2); #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -192,7 +161,7 @@ uint16_t kaitai::kstream::read_u2be() { return t; } -uint32_t kaitai::kstream::read_u4be() { +uint32_t kistream::read_u4be() { uint32_t t; m_io->read(reinterpret_cast(&t), 4); #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -201,7 +170,7 @@ uint32_t kaitai::kstream::read_u4be() { return t; } -uint64_t kaitai::kstream::read_u8be() { +uint64_t kistream::read_u8be() { uint64_t t; m_io->read(reinterpret_cast(&t), 8); #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -214,7 +183,7 @@ uint64_t kaitai::kstream::read_u8be() { // Little-endian // ........................................................................ -uint16_t kaitai::kstream::read_u2le() { +uint16_t kistream::read_u2le() { uint16_t t; m_io->read(reinterpret_cast(&t), 2); #if __BYTE_ORDER == __BIG_ENDIAN @@ -223,7 +192,7 @@ uint16_t kaitai::kstream::read_u2le() { return t; } -uint32_t kaitai::kstream::read_u4le() { +uint32_t kistream::read_u4le() { uint32_t t; m_io->read(reinterpret_cast(&t), 4); #if __BYTE_ORDER == __BIG_ENDIAN @@ -232,7 +201,7 @@ uint32_t kaitai::kstream::read_u4le() { return t; } -uint64_t kaitai::kstream::read_u8le() { +uint64_t kistream::read_u8le() { uint64_t t; m_io->read(reinterpret_cast(&t), 8); #if __BYTE_ORDER == __BIG_ENDIAN @@ -249,7 +218,7 @@ uint64_t kaitai::kstream::read_u8le() { // Big-endian // ........................................................................ -float kaitai::kstream::read_f4be() { +float kistream::read_f4be() { uint32_t t; m_io->read(reinterpret_cast(&t), 4); #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -258,7 +227,7 @@ float kaitai::kstream::read_f4be() { return reinterpret_cast(t); } -double kaitai::kstream::read_f8be() { +double kistream::read_f8be() { uint64_t t; m_io->read(reinterpret_cast(&t), 8); #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -271,7 +240,7 @@ double kaitai::kstream::read_f8be() { // Little-endian // ........................................................................ -float kaitai::kstream::read_f4le() { +float kistream::read_f4le() { uint32_t t; m_io->read(reinterpret_cast(&t), 4); #if __BYTE_ORDER == __BIG_ENDIAN @@ -280,7 +249,7 @@ float kaitai::kstream::read_f4le() { return reinterpret_cast(t); } -double kaitai::kstream::read_f8le() { +double kistream::read_f8le() { uint64_t t; m_io->read(reinterpret_cast(&t), 8); #if __BYTE_ORDER == __BIG_ENDIAN @@ -293,12 +262,12 @@ double kaitai::kstream::read_f8le() { // Unaligned bit values // ======================================================================== -void kaitai::kstream::align_to_byte() { +void kistream::align_to_byte() { m_bits_left = 0; m_bits = 0; } -uint64_t kaitai::kstream::read_bits_int(int n) { +uint64_t kistream::read_bits_int(int n) { int bits_needed = n - m_bits_left; if (bits_needed > 0) { // 1 bit => 1 byte @@ -332,19 +301,11 @@ uint64_t kaitai::kstream::read_bits_int(int n) { return res; } -uint64_t kaitai::kstream::get_mask_ones(int n) { - if (n == 64) { - return 0xFFFFFFFFFFFFFFFF; - } else { - return ((uint64_t) 1 << n) - 1; - } -} - // ======================================================================== // Byte arrays // ======================================================================== -std::string kaitai::kstream::read_bytes(std::streamsize len) { +std::string kistream::read_bytes(std::streamsize len) { std::vector result(len); // NOTE: streamsize type is signed, negative values are only *supposed* to not be used. @@ -360,7 +321,7 @@ std::string kaitai::kstream::read_bytes(std::streamsize len) { return std::string(result.begin(), result.end()); } -std::string kaitai::kstream::read_bytes_full() { +std::string kistream::read_bytes_full() { std::iostream::pos_type p1 = m_io->tellg(); m_io->seekg(0, std::ios::end); std::iostream::pos_type p2 = m_io->tellg(); @@ -377,7 +338,7 @@ std::string kaitai::kstream::read_bytes_full() { return result; } -std::string kaitai::kstream::read_bytes_term(char term, bool include, bool consume, bool eos_error) { +std::string kistream::read_bytes_term(char term, bool include, bool consume, bool eos_error) { std::string result; std::getline(*m_io, result, term); if (m_io->eof()) { @@ -395,7 +356,7 @@ std::string kaitai::kstream::read_bytes_term(char term, bool include, bool consu return result; } -std::string kaitai::kstream::ensure_fixed_contents(std::string expected) { +std::string kistream::ensure_fixed_contents(std::string expected) { std::string actual = read_bytes(expected.length()); if (actual != expected) { @@ -406,74 +367,14 @@ std::string kaitai::kstream::ensure_fixed_contents(std::string expected) { return actual; } -std::string kaitai::kstream::bytes_strip_right(std::string src, char pad_byte) { - std::size_t new_len = src.length(); - - while (new_len > 0 && src[new_len - 1] == pad_byte) - new_len--; - - return src.substr(0, new_len); -} - -std::string kaitai::kstream::bytes_terminate(std::string src, char term, bool include) { - std::size_t new_len = 0; - std::size_t max_len = src.length(); - - while (new_len < max_len && src[new_len] != term) - new_len++; - - if (include && new_len < max_len) - new_len++; - - return src.substr(0, new_len); -} - // ======================================================================== // Byte array processing // ======================================================================== -std::string kaitai::kstream::process_xor_one(std::string data, uint8_t key) { - size_t len = data.length(); - std::string result(len, ' '); - - for (size_t i = 0; i < len; i++) - result[i] = data[i] ^ key; - - return result; -} - -std::string kaitai::kstream::process_xor_many(std::string data, std::string key) { - size_t len = data.length(); - size_t kl = key.length(); - std::string result(len, ' '); - - size_t ki = 0; - for (size_t i = 0; i < len; i++) { - result[i] = data[i] ^ key[ki]; - ki++; - if (ki >= kl) - ki = 0; - } - - return result; -} - -std::string kaitai::kstream::process_rotate_left(std::string data, int amount) { - size_t len = data.length(); - std::string result(len, ' '); - - for (size_t i = 0; i < len; i++) { - uint8_t bits = data[i]; - result[i] = (bits << amount) | (bits >> (8 - amount)); - } - - return result; -} - #ifdef KS_ZLIB #include -std::string kaitai::kstream::process_zlib(std::string data) { +std::string kistream::process_zlib(std::string data) { int ret; unsigned char *src_ptr = reinterpret_cast(&data[0]); @@ -517,115 +418,3 @@ std::string kaitai::kstream::process_zlib(std::string data) { return outstring; } #endif - -// ======================================================================== -// Misc utility methods -// ======================================================================== - -int kaitai::kstream::mod(int a, int b) { - if (b <= 0) - throw std::invalid_argument("mod: divisor b <= 0"); - int r = a % b; - if (r < 0) - r += b; - return r; -} - -#include -std::string kaitai::kstream::to_string(int val) { - // if int is 32 bits, "-2147483648" is the longest string representation - // => 11 chars + zero => 12 chars - // if int is 64 bits, "-9223372036854775808" is the longest - // => 20 chars + zero => 21 chars - char buf[25]; - int got_len = snprintf(buf, sizeof(buf), "%d", val); - - // should never happen, but check nonetheless - if (got_len > sizeof(buf)) - throw std::invalid_argument("to_string: integer is longer than string buffer"); - - return std::string(buf); -} - -#include -std::string kaitai::kstream::reverse(std::string val) { - std::reverse(val.begin(), val.end()); - - return val; -} - -// ======================================================================== -// Other internal methods -// ======================================================================== - -#ifndef KS_STR_DEFAULT_ENCODING -#define KS_STR_DEFAULT_ENCODING "UTF-8" -#endif - -#ifdef KS_STR_ENCODING_ICONV - -#include -#include -#include - -std::string kaitai::kstream::bytes_to_str(std::string src, std::string src_enc) { - iconv_t cd = iconv_open(KS_STR_DEFAULT_ENCODING, src_enc.c_str()); - - if (cd == (iconv_t) -1) { - if (errno == EINVAL) { - throw std::runtime_error("bytes_to_str: invalid encoding pair conversion requested"); - } else { - throw std::runtime_error("bytes_to_str: error opening iconv"); - } - } - - size_t src_len = src.length(); - size_t src_left = src_len; - - // Start with a buffer length of double the source length. - size_t dst_len = src_len * 2; - std::string dst(dst_len, ' '); - size_t dst_left = dst_len; - - char *src_ptr = &src[0]; - char *dst_ptr = &dst[0]; - - while (true) { - size_t res = iconv(cd, &src_ptr, &src_left, &dst_ptr, &dst_left); - - if (res == (size_t) -1) { - if (errno == E2BIG) { - // dst buffer is not enough to accomodate whole string - // enlarge the buffer and try again - size_t dst_used = dst_len - dst_left; - dst_left += dst_len; - dst_len += dst_len; - dst.resize(dst_len); - - // dst.resize might have allocated destination buffer in another area - // of memory, thus our previous pointer "dst" will be invalid; re-point - // it using "dst_used". - dst_ptr = &dst[dst_used]; - } else { - throw std::runtime_error("bytes_to_str: iconv error"); - } - } else { - // conversion successful - dst.resize(dst_len - dst_left); - break; - } - } - - if (iconv_close(cd) != 0) { - throw std::runtime_error("bytes_to_str: iconv close error"); - } - - return dst; -} -#elif defined(KS_STR_ENCODING_NONE) -std::string kaitai::kstream::bytes_to_str(std::string src, std::string src_enc) { - return src; -} -#else -#error Need to decide how to handle strings: please define one of: KS_STR_ENCODING_ICONV, KS_STR_ENCODING_NONE -#endif diff --git a/kaitai/kistream.h b/kaitai/kistream.h new file mode 100644 index 0000000..fae8416 --- /dev/null +++ b/kaitai/kistream.h @@ -0,0 +1,190 @@ +#ifndef KAITAI_KISTREAM_H +#define KAITAI_KISTREAM_H + +#include + +#include +#include +#include +#include + +namespace kaitai { + +/** + * Kaitai Stream class (kaitai::kistream) is an implementation of + * Kaitai Struct stream API + * for C++/STL. It's implemented as a wrapper over generic STL std::istream. + * + * It provides a wide variety of simple methods to read (parse) binary + * representations of primitive types, such as integer and floating + * point numbers, byte arrays and strings, and also provides stream + * positioning / navigation methods with unified cross-language and + * cross-toolkit semantics. + * + * Typically, end users won't access Kaitai Stream class manually, but would + * describe a binary structure format using .ksy language and then would use + * Kaitai Struct compiler to generate source code in desired target language. + * That code, in turn, would use this class and API to do the actual parsing + * job. + */ +class kistream : public virtual kio { +public: + /** + * Constructs new Kaitai Stream object, wrapping a given std::istream. + * \param io istream object to use for this Kaitai Stream + */ + kistream(std::istream* io); + + /** + * Constructs new Kaitai Stream object, wrapping a given in-memory data + * buffer. + * \param data data buffer to use for this Kaitai Stream + */ + kistream(std::string& data); + + /** @name Stream positioning */ + //@{ + /** + * Check if stream pointer is at the end of stream. Note that the semantics + * are different from traditional STL semantics: one does *not* need to do a + * read (which will fail) after the actual end of the stream to trigger EOF + * flag, which can be accessed after that read. It is sufficient to just be + * at the end of the stream for this method to return true. + * \return "true" if we are located at the end of the stream. + */ + bool is_eof() const; + + /** + * Set stream pointer to designated position. + * \param pos new position (offset in bytes from the beginning of the stream) + */ + void seek(uint64_t pos); + + /** + * Get current position of a stream pointer. + * \return pointer position, number of bytes from the beginning of the stream + */ + uint64_t pos(); + + /** + * Get total size of the stream in bytes. + * \return size of the stream in bytes + */ + uint64_t size(); + //@} + + /** @name Integer numbers */ + //@{ + + // ------------------------------------------------------------------------ + // Signed + // ------------------------------------------------------------------------ + + int8_t read_s1(); + + // ........................................................................ + // Big-endian + // ........................................................................ + + int16_t read_s2be(); + int32_t read_s4be(); + int64_t read_s8be(); + + // ........................................................................ + // Little-endian + // ........................................................................ + + int16_t read_s2le(); + int32_t read_s4le(); + int64_t read_s8le(); + + // ------------------------------------------------------------------------ + // Unsigned + // ------------------------------------------------------------------------ + + uint8_t read_u1(); + + // ........................................................................ + // Big-endian + // ........................................................................ + + uint16_t read_u2be(); + uint32_t read_u4be(); + uint64_t read_u8be(); + + // ........................................................................ + // Little-endian + // ........................................................................ + + uint16_t read_u2le(); + uint32_t read_u4le(); + uint64_t read_u8le(); + + //@} + + /** @name Floating point numbers */ + //@{ + + // ........................................................................ + // Big-endian + // ........................................................................ + + float read_f4be(); + double read_f8be(); + + // ........................................................................ + // Little-endian + // ........................................................................ + + float read_f4le(); + double read_f8le(); + + //@} + + /** @name Unaligned bit values */ + //@{ + + void align_to_byte(); + uint64_t read_bits_int(int n); + + //@} + + /** @name Byte arrays */ + //@{ + + std::string read_bytes(std::streamsize len); + std::string read_bytes_full(); + std::string read_bytes_term(char term, bool include, bool consume, bool eos_error); + std::string ensure_fixed_contents(std::string expected); + + //@} + + /** @name Byte array processing */ + //@{ + +#ifdef KS_ZLIB + /** + * Performs an unpacking ("inflation") of zlib-compressed data with usual zlib headers. + * @param data data to unpack + * @return unpacked data + * @throws IOException + */ + static std::string process_zlib(std::string data); +#endif + + //@} + +private: + std::istream* m_io; + std::istringstream m_io_str; + int m_bits_left; + uint64_t m_bits; + + void init(); + + static const int ZLIB_BUF_SIZE = 128 * 1024; +}; + +} + +#endif diff --git a/kaitai/kostream.cpp b/kaitai/kostream.cpp new file mode 100644 index 0000000..2cb70fc --- /dev/null +++ b/kaitai/kostream.cpp @@ -0,0 +1,180 @@ +#include +#include + +#include +#include +#include + +using namespace ::kaitai; + +kostream::kostream(std::ostream* io): kio(io) { + m_io = io; +} + +// ======================================================================== +// Integer numbers +// ======================================================================== + +// ------------------------------------------------------------------------ +// Signed +// ------------------------------------------------------------------------ + +void kostream::write_s1(int8_t t) { + m_io->put(t); +} + +// ........................................................................ +// Big-endian +// ........................................................................ + +void kostream::write_s2be(int16_t t) { +#if __BYTE_ORDER == __LITTLE_ENDIAN + t = bswap_16(t); +#endif + m_io->write(reinterpret_cast(&t), 2); +} + +void kostream::write_s4be(int32_t t) { +#if __BYTE_ORDER == __LITTLE_ENDIAN + t = bswap_32(t); +#endif + m_io->write(reinterpret_cast(&t), 4); +} + +void kostream::write_s8be(int64_t t) { +#if __BYTE_ORDER == __LITTLE_ENDIAN + t = bswap_64(t); +#endif + m_io->write(reinterpret_cast(&t), 8); +} + +// ........................................................................ +// Little-endian +// ........................................................................ + +void kostream::write_s2le(int16_t t) { +#if __BYTE_ORDER == __BIG_ENDIAN + t = bswap_16(t); +#endif + m_io->write(reinterpret_cast(&t), 2); +} + +void kostream::write_s4le(int32_t t) { +#if __BYTE_ORDER == __BIG_ENDIAN + t = bswap_32(t); +#endif + m_io->write(reinterpret_cast(&t), 4); +} + +void kostream::write_s8le(int64_t t) { +#if __BYTE_ORDER == __BIG_ENDIAN + t = bswap_64(t); +#endif + m_io->write(reinterpret_cast(&t), 8); +} + +// ------------------------------------------------------------------------ +// Unsigned +// ------------------------------------------------------------------------ + +void kostream::write_u1(uint8_t t) { + m_io->put(t); +} + +// ........................................................................ +// Big-endian +// ........................................................................ + +void kostream::write_u2be(uint16_t t) { +#if __BYTE_ORDER == __LITTLE_ENDIAN + t = bswap_16(t); +#endif + m_io->write(reinterpret_cast(&t), 8); +} + +void kostream::write_u4be(uint32_t t) { +#if __BYTE_ORDER == __LITTLE_ENDIAN + t = bswap_32(t); +#endif + m_io->write(reinterpret_cast(&t), 8); +} + +void kostream::write_u8be(uint64_t t) { +#if __BYTE_ORDER == __LITTLE_ENDIAN + t = bswap_64(t); +#endif + m_io->write(reinterpret_cast(&t), 8); +} + +// ........................................................................ +// Little-endian +// ........................................................................ + +void kostream::write_u2le(uint16_t t) { +#if __BYTE_ORDER == __BIG_ENDIAN + t = bswap_16(t); +#endif + m_io->write(reinterpret_cast(&t), 2); +} + +void kostream::write_u4le(uint32_t t) { +#if __BYTE_ORDER == __BIG_ENDIAN + t = bswap_32(t); +#endif + m_io->write(reinterpret_cast(&t), 4); +} + +void kostream::write_u8le(uint64_t t) { +#if __BYTE_ORDER == __BIG_ENDIAN + t = bswap_64(t); +#endif + m_io->write(reinterpret_cast(&t), 8); +} + +// ======================================================================== +// Floating point numbers +// ======================================================================== + +// ........................................................................ +// Big-endian +// ........................................................................ + +void kostream::write_f4be(float t) { +#if __BYTE_ORDER == __LITTLE_ENDIAN + t = bswap_32(t); +#endif + m_io->write(reinterpret_cast(&t), 4); +} + +void kostream::write_f8be(double t) { +#if __BYTE_ORDER == __LITTLE_ENDIAN + t = bswap_64(t); +#endif + m_io->write(reinterpret_cast(&t), 8); +} + +// ........................................................................ +// Little-endian +// ........................................................................ + +void kostream::write_f4le(float t) { +#if __BYTE_ORDER == __BIG_ENDIAN + t = bswap_32(t); +#endif + m_io->write(reinterpret_cast(&t), 4); +} + +void kostream::write_f8le(double t) { +#if __BYTE_ORDER == __BIG_ENDIAN + t = bswap_64(t); +#endif + m_io->write(reinterpret_cast(&t), 8); +} + +// ======================================================================== +// Byte arrays +// ======================================================================== + +void kostream::write_bytes(std::string bytes) { + m_io->write(bytes.data(), bytes.length()); +} diff --git a/kaitai/kostream.h b/kaitai/kostream.h new file mode 100644 index 0000000..a638be1 --- /dev/null +++ b/kaitai/kostream.h @@ -0,0 +1,119 @@ +#ifndef KAITAI_KOSTREAM_H +#define KAITAI_KOSTREAM_H + +#include + +#include +#include +#include +#include + +namespace kaitai { + +/** + * Kaitai Output Stream class (kaitai::kostream) is an implementation of + * Kaitai Struct stream API + * for C++/STL, for serialization. It's implemented as a wrapper over generic + * STL std::ostream. + * + * It provides a wide variety of simple methods to write (serialize) binary + * representations of primitive types, such as integer and floating + * point numbers, byte arrays and strings, with unified cross-language and + * cross-toolkit semantics. + * + * Typically, end users won't access Kaitai Stream classes manually, but would + * describe a binary structure format using .ksy language and then would use + * Kaitai Struct compiler to generate source code in desired target language. + * That code, in turn, would use this class and API to do the actual parsing + * job. + */ +class kostream : public virtual kio { +public: + /** + * Constructs new Kaitai Stream object, wrapping a given std::istream. + * \param io istream object to use for this Kaitai Stream + */ + kostream(std::ostream* io); + + /** @name Integer numbers */ + //@{ + + // ------------------------------------------------------------------------ + // Signed + // ------------------------------------------------------------------------ + + void write_s1(int8_t t); + + // ........................................................................ + // Big-endian + // ........................................................................ + + void write_s2be(int16_t t); + void write_s4be(int32_t t); + void write_s8be(int64_t t); + + // ........................................................................ + // Little-endian + // ........................................................................ + + void write_s2le(int16_t t); + void write_s4le(int32_t t); + void write_s8le(int64_t t); + + // ------------------------------------------------------------------------ + // Unsigned + // ------------------------------------------------------------------------ + + void write_u1(uint8_t t); + + // ........................................................................ + // Big-endian + // ........................................................................ + + void write_u2be(uint16_t t); + void write_u4be(uint32_t t); + void write_u8be(uint64_t t); + + // ........................................................................ + // Little-endian + // ........................................................................ + + void write_u2le(uint16_t t); + void write_u4le(uint32_t t); + void write_u8le(uint64_t t); + + //@} + + /** @name Floating point numbers */ + //@{ + + // ........................................................................ + // Big-endian + // ........................................................................ + + void write_f4be(float t); + void write_f8be(double t); + + // ........................................................................ + // Little-endian + // ........................................................................ + + void write_f4le(float t); + void write_f8le(double t); + + //@} + + /** @name Byte arrays */ + //@{ + + void write_bytes(std::string bytes); + + //@} + +private: + std::ostream* m_io; +}; + +} + +#endif