Skip to content

Commit

Permalink
Add support for offsetting a stream using the built-in method and opt…
Browse files Browse the repository at this point in the history
…imize the naive portion.
  • Loading branch information
nurhafiz committed May 10, 2024
1 parent 4f609d1 commit 55c2ab6
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 9 deletions.
22 changes: 16 additions & 6 deletions src/Toimik.WarcProtocol/LineReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,24 @@ public class LineReader(Stream stream, CancellationToken cancellationToken)

public async Task Offset(long byteOffset)
{
// NOTE: This is naively done because seek is unsupported by the underlying class
for (long i = 0; i < byteOffset; i++)
if (Stream.CanSeek)
{
var readCount = await Stream.ReadAsync(buffer: (new byte[1]).AsMemory(start: 0, length: 1)).ConfigureAwait(false);
var isEofEncountered = readCount == 0;
if (isEofEncountered)
Stream.Seek(byteOffset, SeekOrigin.Begin);
}
else
{
long bytePosition = 0;
var buffer = new byte[1024];
while (bytePosition < byteOffset)
{
throw new ArgumentException("Offset exceeds file size.", nameof(byteOffset));
var remainingCount = (int)Math.Min(buffer.Length, byteOffset - bytePosition);
var byteCount = await Stream.ReadAsync(buffer.AsMemory(0, remainingCount)).ConfigureAwait(false);
if (byteCount == 0)
{
break;
}

bytePosition += byteCount;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/Toimik.WarcProtocol/Toimik.WarcProtocol.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
<AssemblyName>Toimik.WarcProtocol</AssemblyName>
<PackageVersion>0.10.5</PackageVersion>
<PackageVersion>0.10.6</PackageVersion>
<Authors>Nurhafiz</Authors>
<Version>0.10.5</Version>
<Version>0.10.6</Version>
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
<Company>Toimik</Company>
<Description>
Expand Down
26 changes: 26 additions & 0 deletions tests/Toimik.WarcProtocol.Tests/LineReaderTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
namespace Toimik.WarcProtocol.Tests;

using Moq;
using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using Xunit;

public class LineReaderTest
{
[Fact]
public async Task OffsetNaively()
{
var streamMock = new Mock<Stream>();
streamMock.Setup(s => s.CanSeek)
.Returns(false);
streamMock.SetupSequence(s => s.ReadAsync(It.IsAny<Memory<byte>>(), It.IsAny<CancellationToken>()))
.ReturnsAsync(1)
.ReturnsAsync(0);

var lineReader = new LineReader(streamMock.Object, CancellationToken.None);
await lineReader.Offset(2);
return;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
<PackageReference Include="Moq" Version="4.20.70" />
<PackageReference Include="SharpZipLib" Version="1.4.2" />
<PackageReference Include="StyleCop.Analyzers" Version="1.1.118">
<PrivateAssets>all</PrivateAssets>
Expand Down
7 changes: 6 additions & 1 deletion tests/Toimik.WarcProtocol.Tests/WarcParserTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -338,16 +338,21 @@ public async Task MultilineHeaderValues()
Assert.Equal("A", Encoding.UTF8.GetString(record.RecordBlock!));
}

// NOTE: This is no longer needed due to the inclusion of Stream.Seek(...) in LineReader that
// allows for an offset beyond the length of the stream.
/*
[Fact]
public async Task OffsetOverLimit()
{
var parser = new WarcParser();
var path = $"{DirectoryForInvalidRecords}incorrect_content_length.warc";
var exception = await Assert.ThrowsAsync<ArgumentException>(async () => await parser.Parse(path, byteOffset: 1000).ToListAsync());
var exception = await Assert.ThrowsAsync<ArgumentException>(async () => await
parser.Parse(path, byteOffset: 1000).ToListAsync());
Assert.Contains("Offset exceeds file size", exception.Message);
}
*/

[Fact]
public async Task OffsetUnderLimit()
Expand Down

0 comments on commit 55c2ab6

Please sign in to comment.