From 93e092de835dffe674a2c6d69c6acc7618404dcb Mon Sep 17 00:00:00 2001 From: Mikhail Yakshin Date: Sat, 23 Jul 2022 14:09:38 +0100 Subject: [PATCH 1/2] Added SubIO: an implementation of IO-like object that could be used for copy-free substream access with all the rest of integrity guarantees; added spec to test it locally --- lib/kaitai/struct/struct.rb | 79 ++++++++++++- spec/subio_spec.rb | 229 ++++++++++++++++++++++++++++++++++++ 2 files changed, 306 insertions(+), 2 deletions(-) create mode 100644 spec/subio_spec.rb diff --git a/lib/kaitai/struct/struct.rb b/lib/kaitai/struct/struct.rb index 13dee0c..3095cc6 100644 --- a/lib/kaitai/struct/struct.rb +++ b/lib/kaitai/struct/struct.rb @@ -97,10 +97,10 @@ def initialize(actual, expected) def initialize(arg) if arg.is_a?(String) @_io = StringIO.new(arg) - elsif arg.is_a?(IO) + elsif arg.is_a?(IO) or arg.is_a?(SubIO) @_io = arg else - raise TypeError.new('can be initialized with IO or String only') + raise TypeError.new('can be initialized with IO, SubIO or String only') end align_to_byte end @@ -565,6 +565,81 @@ def self.inspect_values(*args) end end +## +# Substream IO implementation: a IO object which wraps existing IO object +# and provides similar byte/bytes reading functionality, but only for a +# limited set of bytes starting from specified offset and spanning up to +# specified length. +class SubIO + attr_reader :parent_io + attr_reader :parent_offset + attr_reader :parent_len + attr_reader :pos + + def initialize(parent_io, parent_start, parent_len) + @parent_io = parent_io + @parent_start = parent_start + @parent_len = parent_len + @parent_end = @parent_start + @parent_len + @pos = 0 + @closed = false + end + + def eof? + raise IOError.new("closed stream") if @closed + + @pos >= @parent_len + end + + def seek(amount, whence = IO::SEEK_SET) + raise IOError.new("closed stream") if @closed + raise ArgumentError.new("Anything but IO::SEEK_SET is not supported in SubIO::seek") if whence != IO::SEEK_SET + raise TypeError.new("Need an integer argument for amount in SubIO::seek") unless amount.respond_to?(:to_int) + raise Errno::EINVAL.new("Negative position requested") if amount < 0 + @pos = amount.to_int + return 0 + end + + def read(len = nil) + raise IOError.new("closed stream") if @closed + + # remember position in parent IO + old_pos = @parent_io.pos + + # read until the end of substream + if len.nil? + len = @parent_len - @pos + return "" if len < 0 + else + # special case to requesting exactly 0 bytes + return "" if len == 0 + + # cap intent to read if going beyond substream boundary + left = @parent_len - @pos + + # if actually requested reading and we're beyond the boundary, return nil + return nil if left <= 0 + + # otherwise, still return something, but less than requested + len = left if len > left + end + + @parent_io.seek(@parent_start + @pos) + res = @parent_io.read(len) + read_len = res.size + @pos += read_len + + # restore position in parent IO + @parent_io.seek(old_pos) + + res + end + + def close + @closed = true + end +end + ## # Common ancestor for all error originating from Kaitai Struct usage. # Stores KSY source path, pointing to an element supposedly guilty of diff --git a/spec/subio_spec.rb b/spec/subio_spec.rb new file mode 100644 index 0000000..2af5c54 --- /dev/null +++ b/spec/subio_spec.rb @@ -0,0 +1,229 @@ +require 'kaitai/struct/struct' +require 'stringio' + +RSpec.describe Kaitai::Struct::SubIO do + context "in 12345 asking for 234" do + before(:each) do + parent_io = StringIO.new("12345") + @io = Kaitai::Struct::SubIO.new(parent_io, 1, 3) + @normal_io = StringIO.new("234") + end + + describe "#seek" do + it "can seek to 0" do + expect(@normal_io.seek(0)).to eq(0) + expect(@io.seek(0)).to eq(0) + + expect(@normal_io.pos).to eq(0) + expect(@io.pos).to eq(0) + end + + it "can seek to 2" do + expect(@normal_io.seek(2)).to eq(0) + expect(@io.seek(2)).to eq(0) + + expect(@normal_io.pos).to eq(2) + expect(@io.pos).to eq(2) + end + + it "can seek to 10 (beyond EOF)" do + expect(@normal_io.seek(10)).to eq(0) + expect(@io.seek(10)).to eq(0) + + expect(@normal_io.pos).to eq(10) + expect(@io.pos).to eq(10) + end + + it "cannot seek to -1" do + expect { @normal_io.seek(-1) }.to raise_error(Errno::EINVAL) + expect { @io.seek(-1) }.to raise_error(Errno::EINVAL) + end + + it "cannot seek to \"foo\"" do + expect { @normal_io.seek("foo") }.to raise_error(TypeError) + expect { @io.seek("foo") }.to raise_error(TypeError) + end + + it "can seek to 2.3" do + expect(@normal_io.seek(2.3)).to eq(0) + expect(@io.seek(2.3)).to eq(0) + + expect(@normal_io.pos).to eq(2) + expect(@io.pos).to eq(2) + end + end + + describe "#pos" do + it "returns 0 by default" do + expect(@normal_io.pos).to eq(0) + expect(@io.pos).to eq(0) + end + + it "returns 2 after reading 2 bytes" do + @normal_io.read(2) + @io.read(2) + + expect(@normal_io.pos).to eq(2) + expect(@io.pos).to eq(2) + end + + it "returns 3 after reading 4 bytes" do + @normal_io.read(4) + @io.read(4) + + expect(@normal_io.pos).to eq(3) + expect(@io.pos).to eq(3) + end + end + + describe "#eof?" do + it "returns false by default" do + expect(@normal_io.eof?).to eq(false) + expect(@io.eof?).to eq(false) + end + + it "returns false after reading 2 bytes" do + @normal_io.read(2) + @io.read(2) + + expect(@normal_io.eof?).to eq(false) + expect(@io.eof?).to eq(false) + end + + it "returns true after reading 3 bytes" do + @normal_io.read(3) + @io.read(3) + + expect(@normal_io.eof?).to eq(true) + expect(@io.eof?).to eq(true) + end + + it "returns true after reading 4 bytes" do + @normal_io.read(4) + @io.read(4) + + expect(@normal_io.eof?).to eq(true) + expect(@io.eof?).to eq(true) + end + + it "returns true after seeking at 3 bytes" do + @normal_io.seek(3) + @io.seek(3) + + expect(@normal_io.eof?).to eq(true) + expect(@io.eof?).to eq(true) + end + + it "returns true after seeking at 10 bytes" do + @normal_io.seek(10) + @io.seek(10) + + expect(@normal_io.eof?).to eq(true) + expect(@io.eof?).to eq(true) + end + end + + describe "#read" do + it "reads 234 with no arguments" do + expect(@normal_io.read).to eq("234") + expect(@io.read).to eq("234") + end + + it "reads 23 when asked to read 2" do + expect(@normal_io.read(2)).to eq("23") + expect(@io.read(2)).to eq("23") + end + + it "reads 234 when asked to read 3" do + expect(@normal_io.read(3)).to eq("234") + expect(@io.read(3)).to eq("234") + end + + it "reads 234 when asked to read 4" do + expect(@normal_io.read(4)).to eq("234") + expect(@io.read(4)).to eq("234") + end + + it "reads 234 when asked to read 10" do + expect(@normal_io.read(10)).to eq("234") + expect(@io.read(10)).to eq("234") + end + + it "reads 234 + empty when asked to read + read" do + expect(@normal_io.read).to eq("234") + expect(@io.read).to eq("234") + + expect(@normal_io.read).to eq("") + expect(@io.read).to eq("") + end + + it "reads 2 + 34 when asked to read(1) + read" do + expect(@normal_io.read(1)).to eq("2") + expect(@io.read(1)).to eq("2") + + expect(@normal_io.read).to eq("34") + expect(@io.read).to eq("34") + end + + it "reads 2 + 34 when asked to read(1) + read(2)" do + expect(@normal_io.read(1)).to eq("2") + expect(@io.read(1)).to eq("2") + + expect(@normal_io.read(2)).to eq("34") + expect(@io.read(2)).to eq("34") + end + + it "reads 2 + 34 when asked to read(1) + read(10)" do + expect(@normal_io.read(1)).to eq("2") + expect(@io.read(1)).to eq("2") + + expect(@normal_io.read(10)).to eq("34") + expect(@io.read(10)).to eq("34") + end + + context("after seek to EOF") do + before(:each) do + @normal_io.seek(3) + @io.seek(3) + end + + it "reads nil when asked to read(1)" do + expect(@normal_io.read(1)).to eq(nil) + expect(@io.read(1)).to eq(nil) + end + + it "reads empty when asked to read()" do + expect(@normal_io.read).to eq("") + expect(@io.read).to eq("") + end + + it "reads empty when asked to read(0)" do + expect(@normal_io.read(0)).to eq("") + expect(@io.read(0)).to eq("") + end + end + + context("after seek beyond EOF") do + before(:each) do + @normal_io.seek(10) + @io.seek(10) + end + + it "reads nil when asked to read(1)" do + expect(@normal_io.read(1)).to eq(nil) + expect(@io.read(1)).to eq(nil) + end + + it "reads empty when asked to read()" do + expect(@normal_io.read).to eq("") + expect(@io.read).to eq("") + end + + it "reads empty when asked to read(0)" do + expect(@normal_io.read(0)).to eq("") + expect(@io.read(0)).to eq("") + end + end + end + end +end From 217f3b41f9c201a051cbefc450b4257779938296 Mon Sep 17 00:00:00 2001 From: Mikhail Yakshin Date: Tue, 26 Jul 2022 01:08:37 +0100 Subject: [PATCH 2/2] SubIO: added getc implementation; Stream: added substream(n) --- lib/kaitai/struct/struct.rb | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/lib/kaitai/struct/struct.rb b/lib/kaitai/struct/struct.rb index 3095cc6..ac6ee89 100644 --- a/lib/kaitai/struct/struct.rb +++ b/lib/kaitai/struct/struct.rb @@ -516,6 +516,20 @@ def self.process_rotate_left(data, amount, group_size) # @!endgroup + ## + # Reserves next n bytes from current stream as a + # Kaitai::Struct::Stream substream. Substream has its own pointer + # and addressing in the range of [0, n) bytes. This stream's pointer + # is advanced to the position right after this substream. + # @param n [Fixnum] number of bytes to reserve for a substream + # @return [Stream] substream covering n bytes from the current + # position + def substream(n) + sub = Stream.new(SubIO.new(@_io, @_io.pos, n)) + @_io.seek(@_io.pos + n) + sub + end + ## # Resolves value using enum: if the value is not found in the map, # we'll just use literal value per se. @@ -600,6 +614,23 @@ def seek(amount, whence = IO::SEEK_SET) return 0 end + def getc + raise IOError.new("closed stream") if @closed + + return nil if @pos >= @parent_len + + # remember position in parent IO + old_pos = @parent_io.pos + @parent_io.seek(@parent_start + @pos) + res = @parent_io.getc + @pos += 1 + + # restore position in parent IO + @parent_io.seek(old_pos) + + res + end + def read(len = nil) raise IOError.new("closed stream") if @closed