Skip to content

Commit

Permalink
Merge pull request #496 from anforowicz/public-incomplete-input-error
Browse files Browse the repository at this point in the history
Support for resuming decoding after `UnexpectedEof`.
  • Loading branch information
fintelia authored Sep 17, 2024
2 parents 7dae687 + ee9cf88 commit d1027ab
Show file tree
Hide file tree
Showing 5 changed files with 622 additions and 182 deletions.
319 changes: 266 additions & 53 deletions fuzz/fuzz_targets/buf_independent.rs
Original file line number Diff line number Diff line change
@@ -1,81 +1,294 @@
//! This fuzzer tests that decoding results are the same regardless of the
//! details of how the `Read` trait exposes the underlying input via
//! the `fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize>`
//! method:
//!
//! * Whole slice - `impl Read for &[u8]`:
//! - The whole slice is available for reading (if it fits into `buf`)
//! - No IO errors are expected
//! - Motivation: This is the baseline
//! * Byte-by-byte - `SmalBuf<R>`:
//! - At most 1 byte can be read in a single call to `read`
//! - Motivation: Testing that decoding works regardless of how the input is split
//! into multiple `read` calls. (The test checks every possible `read` boundary in the input
//! buffer, even though in practice file or network buffers would split the input into only a
//! handful of chunks.)
//! * TODO: Intermittent EOFs:
//! - Intermittently `read` report 0 available bytes.
//! - Still no IO errors at the `Read` trait level
//! - Motivation: Testing support for decoding a streaming or partial input
//! (i.e. scenarios where initially only the first few interlaced passes
//! can be decoded, and where decoding is resumed after getting more complete
//! input).
#![no_main]

use libfuzzer_sys::fuzz_target;

use std::mem::discriminant;
use std::io::{BufRead, Read, Result};
use std::fmt::Debug;
use std::io::Read;

/// A reader that reads at most `n` bytes.
struct SmalBuf<R: BufRead> {
inner: R,
cap: usize,
}
mod smal_buf {

impl<R: BufRead> SmalBuf<R> {
fn new(inner: R, cap: usize) -> Self {
SmalBuf { inner, cap }
use std::io::Read;

/// A reader that reads at most `n` bytes.
pub struct SmalBuf<R: Read> {
inner: R,
cap: usize,
}

impl<R: Read> SmalBuf<R> {
pub fn new(inner: R, cap: usize) -> Self {
SmalBuf { inner, cap }
}
}
}

impl<R: BufRead> Read for SmalBuf<R> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
let len = buf.len().min(self.cap);
self.inner.read(&mut buf[..len])
impl<R: Read> Read for SmalBuf<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let len = buf.len().min(self.cap);
self.inner.read(&mut buf[..len])
}
}
}

impl<R: BufRead> BufRead for SmalBuf<R> {
fn fill_buf(&mut self) -> Result<&[u8]> {
let buf = self.inner.fill_buf()?;
let len = buf.len().min(self.cap);
Ok(&buf[..len])
mod intermittent_eofs {

use std::cell::RefCell;
use std::io::Read;
use std::rc::Rc;

/// A reader that returns `std::io::ErrorKind::UnexpectedEof` errors in every other read.
/// EOFs can be temporarily disabled and re-enabled later using the associated `Enabler`.
pub struct IntermittentEofs<R: Read> {
inner: R,

/// Controls whether intermittent EOFs happen at all.
enabler: Rc<Enabler>,

/// Controls whether an intermittent EOF will happen during the next `read`
/// (when enabled, intermittent EOFs happen every other `read`).
eof_soon: bool,
}

fn consume(&mut self, amt: usize) {
assert!(amt <= self.cap);
self.inner.consume(amt)
impl<R: Read> IntermittentEofs<R> {
pub fn new(inner: R) -> Self {
Self {
inner,
enabler: Rc::new(Enabler::new()),
eof_soon: true,
}
}

pub fn enabler(&self) -> Rc<Enabler> {
self.enabler.clone()
}
}
}

fuzz_target!(|data: &[u8]| {
// Small limits, we don't need them hopefully.
let limits = png::Limits { bytes: 1 << 16 };
impl<R: Read> Read for IntermittentEofs<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if self.enabler.is_enabled() && self.eof_soon {
self.eof_soon = false;
return Ok(0);
}

let reference = png::Decoder::new_with_limits(data, limits);
let smal = png::Decoder::new_with_limits(SmalBuf::new(data, 1), limits);
self.eof_soon = true;
self.inner.read(buf)
}
}

pub struct Enabler {
is_enabled: RefCell<bool>,
}

impl Enabler {
fn new() -> Self {
Self {
is_enabled: RefCell::new(true),
}
}

pub fn enable(&self) {
*self.is_enabled.borrow_mut() = true;
}

pub fn disable(&self) {
*self.is_enabled.borrow_mut() = false;
}

let _ = png_compare(reference, smal);
fn is_enabled(&self) -> bool {
*self.is_enabled.borrow()
}
}
}

fuzz_target!(|data: &[u8]| {
let _ = test_data(data);
});

#[inline(always)]
fn png_compare<R: BufRead, S: BufRead>(reference: png::Decoder<R>, smal: png::Decoder<S>)
-> std::result::Result<(), ()>
{
let mut smal = Some(smal);
let mut reference = reference.read_info().map_err(|_| {
assert!(smal.take().unwrap().read_info().is_err());
})?;

let mut smal = smal.take().unwrap().read_info().expect("Deviation");

assert_eq!(reference.info().raw_bytes(), smal.info().raw_bytes());
if reference.info().raw_bytes() > 5_000_000 {
fn test_data<'a>(data: &'a [u8]) -> Result<(), ()> {
let baseline_reader = Box::new(data);
let byte_by_byte_reader = Box::new(smal_buf::SmalBuf::new(data, 1));
let intermittent_eofs_reader = Box::new(intermittent_eofs::IntermittentEofs::new(
smal_buf::SmalBuf::new(data, 1),
));
let intermittent_eofs_enabler = intermittent_eofs_reader.enabler();
let data_readers: Vec<Box<dyn Read + 'a>> = vec![
baseline_reader,
byte_by_byte_reader,
intermittent_eofs_reader,
];

let decoders = data_readers
.into_iter()
.map(|data_reader| {
// Small limits, we don't need them hopefully.
let limits = png::Limits { bytes: 1 << 16 };
png::Decoder::new_with_limits(data_reader, limits)
})
.collect::<Vec<_>>();

// `Decoder.read_info` consumes `self` and is therefore not resumable. To work around that
// let's temporarily disable intermittent EOFs:
intermittent_eofs_enabler.disable();
let mut png_readers = decoders
.into_iter()
.map(|decoder| decoder.read_info())
.assert_all_results_are_consistent()
.collect::<Result<Vec<_>, _>>()
.map_err(|_| ())?;
intermittent_eofs_enabler.enable();

let info = png_readers
.iter()
.map(|r| r.info().clone())
.assert_all_items_are_same(|lhs: &png::Info, rhs: &png::Info| {
assert_same_info(lhs, rhs);

// The assert below is somewhat redundant, but we use `raw_bytes`
// later on, so let's double-check that it's the same everywhere.
assert_eq!(lhs.raw_bytes(), rhs.raw_bytes());
});
if info.raw_bytes() > 5_000_000 {
return Err(());
}

let mut ref_data = vec![0; reference.info().raw_bytes()];
let mut smal_data = vec![0; reference.info().raw_bytes()];
let mut buffers = vec![vec![0; info.raw_bytes()]; png_readers.len()];
loop {
let output_infos = png_readers
.iter_mut()
.zip(buffers.iter_mut())
.map(|(png_reader, buffer)| {
retry_after_eofs(|| png_reader.next_frame(buffer.as_mut_slice()))
})
.assert_all_results_are_consistent()
.collect::<Result<Vec<_>, _>>()
.map_err(|_| ())?;
output_infos.into_iter().assert_all_items_are_equal();
buffers.iter().assert_all_items_are_equal();
}
}

fn retry_after_eofs<T>(
mut f: impl FnMut() -> Result<T, png::DecodingError>,
) -> Result<T, png::DecodingError> {
loop {
let rref = reference.next_frame(&mut ref_data);
let rsmal = smal.next_frame(&mut smal_data);
match (rref, rsmal) {
(Ok(info), Ok(sinfo)) if ref_data == smal_data => assert_eq!(info, sinfo),
(Ok(_), Ok(_)) => panic!("Deviating data decoded"),
(Err(er), Err(es)) if discriminant(&er) == discriminant(&es) => break Ok(()),
(Err(ferr), Err(serr)) => panic!("Deviating errors {:?} vs {:?}", ferr, serr),
(Ok(_), Err(err)) => panic!("Small buffer failed {:?}", err),
(Err(err), Ok(_)) => panic!("Unexpected success: {:?}", err),
match f() {
Err(png::DecodingError::IoError(e))
if e.kind() == std::io::ErrorKind::UnexpectedEof =>
{
()
}
other_result => break other_result,
}
}
}

fn assert_same_info(lhs: &png::Info, rhs: &png::Info) {
// Check that all decoders report the same `IHDR` fields.
assert_eq!(lhs.width, rhs.width);
assert_eq!(lhs.height, rhs.height);
assert_eq!(lhs.bit_depth, rhs.bit_depth);
assert_eq!(lhs.color_type, rhs.color_type);
assert_eq!(lhs.interlaced, rhs.interlaced);

// Check all other `Info` fields that implement `Eq`.
assert_eq!(lhs.chrm_chunk, rhs.chrm_chunk);
assert_eq!(lhs.gama_chunk, rhs.gama_chunk);
assert_eq!(lhs.icc_profile, rhs.icc_profile);
assert_eq!(lhs.palette, rhs.palette);
assert_eq!(lhs.source_chromaticities, rhs.source_chromaticities);
assert_eq!(lhs.source_gamma, rhs.source_gamma);
assert_eq!(lhs.srgb, rhs.srgb);
assert_eq!(lhs.trns, rhs.trns);
}

trait IteratorExtensionsForFuzzing: Iterator + Sized {
/// Verifies that either 1) all items in the iterator are `Ok(_)` or 2) all items in the
/// iterator are `Err(_)`. Passes through unmodified iterator items.
fn assert_all_results_are_consistent<T>(self) -> impl Iterator<Item = Self::Item>
where
Self: Iterator<Item = Result<T, png::DecodingError>>,
{
// Eagerly collect all the items - this makes sure we check consistency of *all* results,
// even if a downstream iterator combinator consumes items lazily and never "pumps" some
// items via `next`. (`iter.take(2)` is one example of such lazy consumer;
// `iter_of_results.collect::<Result<Vec<_>, _>>()` is another.)
let all_results = self.collect::<Vec<_>>();

let any_err = all_results.iter().any(|res| res.is_err());
let any_ok = all_results.iter().any(|res| res.is_ok());
if any_err && any_ok {
// Replacing `Self::Item` with an "ok" string, because we want to support items
// that do not implement `Debug`.
let printable_results = all_results.iter().map(|res| res.as_ref().map(|_| "ok"));
for (i, res) in printable_results.enumerate() {
eprintln!("Result #{i}: {res:?}");
}
panic!("Inconsistent results - some are Ok(_) and some are Err(_)");
}

all_results.into_iter()
}

/// Verifies that all items in the iterator are the same (according to their `Eq`
/// implementation). Returns one of the items.
fn assert_all_items_are_equal(self) -> Self::Item
where
Self::Item: Debug + Eq,
{
self.assert_all_items_are_same(|lhs, rhs| assert_eq!(lhs, rhs))
}

/// Verifies that all items in the iterator are the same (according to the `assert_same`
/// function. Returns one of the items.
fn assert_all_items_are_same<F>(self, mut assert_same: F) -> <Self as Iterator>::Item
where
F: for<'a, 'b> FnMut(&'a Self::Item, &'b Self::Item),
{
self.enumerate()
.reduce(|(i, lhs), (j, rhs)| {
let panic = {
let mut assert_same = std::panic::AssertUnwindSafe(&mut assert_same);
let lhs = std::panic::AssertUnwindSafe(&lhs);
let rhs = std::panic::AssertUnwindSafe(&rhs);
std::panic::catch_unwind(move || assert_same(*lhs, *rhs))
};
match panic {
Ok(_) => (),
Err(panic) => {
eprintln!("Difference found when comparing item #{i} and #{j}.");
std::panic::resume_unwind(panic);
}
}
(
j, lhs, /* Arbitrary - could just as well return `rhs` */
)
})
.map(|(_index, item)| item)
.expect("Expecting a non-empty iterator")
}
}

impl<T> IteratorExtensionsForFuzzing for T where T: Iterator + Sized {}
11 changes: 3 additions & 8 deletions src/adam7.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/// [the Adam7 algorithm](https://en.wikipedia.org/wiki/Adam7_algorithm)
/// applies to a decoded row.
///
/// See also [crate::decoder::Reader::next_interlaced_row].
/// See also [Reader.next_interlaced_row](crate::decoder::Reader::next_interlaced_row).
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Adam7Info {
pub(crate) pass: u8,
Expand All @@ -27,10 +27,10 @@ impl Adam7Info {
/// in the 1st `pass`, the `width` is be 1/8th of the image width (rounded up as
/// necessary).
///
/// Note that in typical usage, `Adam7Info`s are returned by [Reader::next_interlaced_row]
/// Note that in typical usage, `Adam7Info`s are returned by [Reader.next_interlaced_row]
/// and there is no need to create them by calling `Adam7Info::new`. `Adam7Info::new` is
/// nevertheless exposed as a public API, because it helps to provide self-contained example
/// usage of [expand_interlaced_row].
/// usage of [expand_interlaced_row](crate::expand_interlaced_row).
pub fn new(pass: u8, line: u32, width: u32) -> Self {
assert!(1 <= pass && pass <= 7);
assert!(width > 0);
Expand Down Expand Up @@ -92,11 +92,6 @@ impl Adam7Iterator {
self.lines = lines.ceil() as u32;
self.line = 0;
}

/// The current pass#.
pub fn current_pass(&self) -> u8 {
self.current_pass
}
}

/// Iterates over `Adam7Info`s.
Expand Down
Loading

0 comments on commit d1027ab

Please sign in to comment.