From 55c2ab67b9253cb3b5c7826c11a89e0b3b04e1d5 Mon Sep 17 00:00:00 2001 From: nurhafiz Date: Fri, 10 May 2024 21:22:54 +0800 Subject: [PATCH] Add support for offsetting a stream using the built-in method and optimize the naive portion. --- src/Toimik.WarcProtocol/LineReader.cs | 22 +++++++++++----- .../Toimik.WarcProtocol.csproj | 4 +-- .../LineReaderTest.cs | 26 +++++++++++++++++++ .../Toimik.WarcProtocol.Tests.csproj | 1 + .../WarcParserTest.cs | 7 ++++- 5 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 tests/Toimik.WarcProtocol.Tests/LineReaderTest.cs diff --git a/src/Toimik.WarcProtocol/LineReader.cs b/src/Toimik.WarcProtocol/LineReader.cs index 7923386..202fb6e 100644 --- a/src/Toimik.WarcProtocol/LineReader.cs +++ b/src/Toimik.WarcProtocol/LineReader.cs @@ -37,14 +37,24 @@ public class LineReader(Stream stream, CancellationToken cancellationToken) public async Task Offset(long byteOffset) { - // NOTE: This is naively done because seek is unsupported by the underlying class - for (long i = 0; i < byteOffset; i++) + if (Stream.CanSeek) { - var readCount = await Stream.ReadAsync(buffer: (new byte[1]).AsMemory(start: 0, length: 1)).ConfigureAwait(false); - var isEofEncountered = readCount == 0; - if (isEofEncountered) + Stream.Seek(byteOffset, SeekOrigin.Begin); + } + else + { + long bytePosition = 0; + var buffer = new byte[1024]; + while (bytePosition < byteOffset) { - throw new ArgumentException("Offset exceeds file size.", nameof(byteOffset)); + var remainingCount = (int)Math.Min(buffer.Length, byteOffset - bytePosition); + var byteCount = await Stream.ReadAsync(buffer.AsMemory(0, remainingCount)).ConfigureAwait(false); + if (byteCount == 0) + { + break; + } + + bytePosition += byteCount; } } } diff --git a/src/Toimik.WarcProtocol/Toimik.WarcProtocol.csproj b/src/Toimik.WarcProtocol/Toimik.WarcProtocol.csproj index b9da7ab..315cb78 100644 --- a/src/Toimik.WarcProtocol/Toimik.WarcProtocol.csproj +++ b/src/Toimik.WarcProtocol/Toimik.WarcProtocol.csproj @@ -4,9 +4,9 @@ net8.0 enable Toimik.WarcProtocol - 0.10.5 + 0.10.6 Nurhafiz - 0.10.5 + 0.10.6 true Toimik diff --git a/tests/Toimik.WarcProtocol.Tests/LineReaderTest.cs b/tests/Toimik.WarcProtocol.Tests/LineReaderTest.cs new file mode 100644 index 0000000..805b1cc --- /dev/null +++ b/tests/Toimik.WarcProtocol.Tests/LineReaderTest.cs @@ -0,0 +1,26 @@ +namespace Toimik.WarcProtocol.Tests; + +using Moq; +using System; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using Xunit; + +public class LineReaderTest +{ + [Fact] + public async Task OffsetNaively() + { + var streamMock = new Mock(); + streamMock.Setup(s => s.CanSeek) + .Returns(false); + streamMock.SetupSequence(s => s.ReadAsync(It.IsAny>(), It.IsAny())) + .ReturnsAsync(1) + .ReturnsAsync(0); + + var lineReader = new LineReader(streamMock.Object, CancellationToken.None); + await lineReader.Offset(2); + return; + } +} \ No newline at end of file diff --git a/tests/Toimik.WarcProtocol.Tests/Toimik.WarcProtocol.Tests.csproj b/tests/Toimik.WarcProtocol.Tests/Toimik.WarcProtocol.Tests.csproj index 6e32196..d7e3b6e 100644 --- a/tests/Toimik.WarcProtocol.Tests/Toimik.WarcProtocol.Tests.csproj +++ b/tests/Toimik.WarcProtocol.Tests/Toimik.WarcProtocol.Tests.csproj @@ -12,6 +12,7 @@ runtime; build; native; contentfiles; analyzers; buildtransitive + all diff --git a/tests/Toimik.WarcProtocol.Tests/WarcParserTest.cs b/tests/Toimik.WarcProtocol.Tests/WarcParserTest.cs index ffd993c..3ca6dbe 100644 --- a/tests/Toimik.WarcProtocol.Tests/WarcParserTest.cs +++ b/tests/Toimik.WarcProtocol.Tests/WarcParserTest.cs @@ -338,16 +338,21 @@ public async Task MultilineHeaderValues() Assert.Equal("A", Encoding.UTF8.GetString(record.RecordBlock!)); } + // NOTE: This is no longer needed due to the inclusion of Stream.Seek(...) in LineReader that + // allows for an offset beyond the length of the stream. + /* [Fact] public async Task OffsetOverLimit() { var parser = new WarcParser(); var path = $"{DirectoryForInvalidRecords}incorrect_content_length.warc"; - var exception = await Assert.ThrowsAsync(async () => await parser.Parse(path, byteOffset: 1000).ToListAsync()); + var exception = await Assert.ThrowsAsync(async () => await + parser.Parse(path, byteOffset: 1000).ToListAsync()); Assert.Contains("Offset exceeds file size", exception.Message); } + */ [Fact] public async Task OffsetUnderLimit()