From 26cb3edc10b4b910cdaf235cdf355396e788921a Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sat, 26 Oct 2024 18:54:17 +0200 Subject: [PATCH 01/25] Introduce Buf type --- src/buf.rs | 71 ++++++++++++++++++++++++++++++++++++------------ src/chunk.rs | 7 +++-- src/content.rs | 11 ++++---- src/font.rs | 20 +++++++------- src/functions.rs | 9 +++--- src/lib.rs | 5 ++-- src/object.rs | 36 ++++++++++++------------ src/renumber.rs | 7 +++-- 8 files changed, 102 insertions(+), 64 deletions(-) diff --git a/src/buf.rs b/src/buf.rs index b6bf397..97dc4ea 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -1,29 +1,54 @@ +use std::ops::{Deref, DerefMut}; use super::Primitive; -/// Additional methods for byte buffers. -pub trait BufExt { - fn push_val(&mut self, value: T); - fn push_int(&mut self, value: i32); - fn push_float(&mut self, value: f32); - fn push_decimal(&mut self, value: f32); - fn push_hex(&mut self, value: u8); - fn push_hex_u16(&mut self, value: u16); - fn push_octal(&mut self, value: u8); +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct Buf { + buf: Vec } -impl BufExt for Vec { +impl Deref for Buf { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.buf + } +} + +impl DerefMut for Buf { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.buf + } +} + +impl Buf { + pub(crate) fn new() -> Self { + Self { + buf: Vec::new() + } + } + + pub(crate) fn with_capacity(capacity: usize) -> Self { + Self { + buf: Vec::with_capacity(capacity) + } + } + + pub(crate) fn finish(self) -> Vec { + self.buf + } + #[inline] - fn push_val(&mut self, value: T) { + pub(crate) fn push_val(&mut self, value: T) { value.write(self); } #[inline] - fn push_int(&mut self, value: i32) { + pub(crate) fn push_int(&mut self, value: i32) { self.extend(itoa::Buffer::new().format(value).as_bytes()); } #[inline] - fn push_float(&mut self, value: f32) { + pub(crate) fn push_float(&mut self, value: f32) { // Don't write the decimal point if we don't need it. // Also, integer formatting is way faster. if value as i32 as f32 == value { @@ -35,12 +60,12 @@ impl BufExt for Vec { /// Like `push_float`, but forces the decimal point. #[inline] - fn push_decimal(&mut self, value: f32) { + pub(crate) fn push_decimal(&mut self, value: f32) { if value == 0.0 || (value.abs() > 1e-6 && value.abs() < 1e12) { self.extend(ryu::Buffer::new().format(value).as_bytes()); } else { #[inline(never)] - fn write_extreme(buf: &mut Vec, value: f32) { + fn write_extreme(buf: &mut Buf, value: f32) { use std::io::Write; write!(buf, "{}", value).unwrap(); } @@ -50,7 +75,17 @@ impl BufExt for Vec { } #[inline] - fn push_hex(&mut self, value: u8) { + pub(crate) fn extend(&mut self, other: &[u8]) { + self.buf.extend(other); + } + + #[inline] + pub(crate) fn push(&mut self, b: u8) { + self.buf.push(b); + } + + #[inline] + pub(crate) fn push_hex(&mut self, value: u8) { fn hex(b: u8) -> u8 { if b < 10 { b'0' + b @@ -64,13 +99,13 @@ impl BufExt for Vec { } #[inline] - fn push_hex_u16(&mut self, value: u16) { + pub(crate) fn push_hex_u16(&mut self, value: u16) { self.push_hex((value >> 8) as u8); self.push_hex(value as u8); } #[inline] - fn push_octal(&mut self, value: u8) { + pub(crate) fn push_octal(&mut self, value: u8) { fn octal(b: u8) -> u8 { b'0' + b } diff --git a/src/chunk.rs b/src/chunk.rs index e1f22b6..afc2676 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -1,3 +1,4 @@ +use crate::buf::Buf; use super::*; /// A builder for a collection of indirect PDF objects. @@ -12,7 +13,7 @@ use super::*; /// it at a time). #[derive(Clone)] pub struct Chunk { - pub(crate) buf: Vec, + pub(crate) buf: Buf, pub(crate) offsets: Vec<(Ref, usize)>, } @@ -25,7 +26,7 @@ impl Chunk { /// Create a new chunk with the specified initial capacity. pub fn with_capacity(capacity: usize) -> Self { - Self { buf: Vec::with_capacity(capacity), offsets: vec![] } + Self { buf: Buf::with_capacity(capacity), offsets: vec![] } } /// The number of bytes that were written so far. @@ -43,7 +44,7 @@ impl Chunk { /// Add all objects from another chunk to this one. pub fn extend(&mut self, other: &Chunk) { let base = self.len(); - self.buf.extend_from_slice(&other.buf); + self.buf.extend(&other.buf.as_slice()); self.offsets .extend(other.offsets.iter().map(|&(id, offset)| (id, base + offset))); } diff --git a/src/content.rs b/src/content.rs index cbc2638..19777ad 100644 --- a/src/content.rs +++ b/src/content.rs @@ -1,8 +1,9 @@ +use crate::buf::Buf; use super::*; /// A builder for a content stream. pub struct Content { - buf: Vec, + buf: Buf, q_depth: usize, } @@ -17,7 +18,7 @@ impl Content { /// Create a new content stream with the specified initial buffer capacity. pub fn with_capacity(capacity: usize) -> Self { - Self { buf: Vec::with_capacity(capacity), q_depth: 0 } + Self { buf: Buf::with_capacity(capacity), q_depth: 0 } } /// Start writing an arbitrary operation. @@ -31,7 +32,7 @@ impl Content { if self.buf.last() == Some(&b'\n') { self.buf.pop(); } - self.buf + self.buf.finish() } } @@ -39,14 +40,14 @@ impl Content { /// /// This struct is created by [`Content::op`]. pub struct Operation<'a> { - buf: &'a mut Vec, + buf: &'a mut Buf, op: &'a str, first: bool, } impl<'a> Operation<'a> { #[inline] - pub(crate) fn start(buf: &'a mut Vec, op: &'a str) -> Self { + pub(crate) fn start(buf: &'a mut Buf, op: &'a str) -> Self { Self { buf, op, first: true } } diff --git a/src/font.rs b/src/font.rs index 53fc4ad..a6e586c 100644 --- a/src/font.rs +++ b/src/font.rs @@ -1,5 +1,5 @@ use std::marker::PhantomData; - +use crate::buf::Buf; use super::*; /// Writer for a _Type-1 font dictionary_. @@ -849,8 +849,8 @@ impl WMode { /// A builder for a `/ToUnicode` character map stream. pub struct UnicodeCmap { - buf: Vec, - mappings: Vec, + buf: Buf, + mappings: Buf, count: i32, glyph_id: PhantomData, } @@ -870,7 +870,7 @@ where pub fn with_writing_mode(name: Name, info: SystemInfo, mode: WMode) -> Self { // https://www.adobe.com/content/dam/acom/en/devnet/font/pdfs/5014.CIDFont_Spec.pdf - let mut buf = Vec::new(); + let mut buf = Buf::new(); // Static header. buf.extend(b"%!PS-Adobe-3.0 Resource-CMap\n"); @@ -928,7 +928,7 @@ where Self { buf, - mappings: vec![], + mappings: Buf::new(), count: 0, glyph_id: PhantomData, } @@ -977,7 +977,7 @@ where self.buf.extend(b"%%EndResource\n"); self.buf.extend(b"%%EOF"); - self.buf + self.buf.finish() } fn flush_range(&mut self) { @@ -1005,19 +1005,19 @@ impl GlyphId for u16 {} /// Module to seal the `GlyphId` trait. mod private { - use crate::buf::BufExt; + use crate::buf::Buf; pub trait Sealed { const MIN: Self; const MAX: Self; - fn push(self, buf: &mut Vec); + fn push(self, buf: &mut Buf); } impl Sealed for u8 { const MIN: Self = u8::MIN; const MAX: Self = u8::MAX; - fn push(self, buf: &mut Vec) { + fn push(self, buf: &mut Buf) { buf.push_hex(self); } } @@ -1026,7 +1026,7 @@ mod private { const MIN: Self = u16::MIN; const MAX: Self = u16::MAX; - fn push(self, buf: &mut Vec) { + fn push(self, buf: &mut Buf) { buf.push_hex_u16(self); } } diff --git a/src/functions.rs b/src/functions.rs index a70f0a4..8f47f12 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -1,3 +1,4 @@ +use crate::buf::Buf; use super::*; /// Way the function is defined in. @@ -336,12 +337,12 @@ pub enum PostScriptOp<'a> { impl<'a> PostScriptOp<'a> { /// Encode a slice of operations into a byte stream. pub fn encode(ops: &[Self]) -> Vec { - let mut buf = Vec::new(); + let mut buf = Buf::new(); Self::write_slice(ops, &mut buf); - buf + buf.finish() } - fn write_slice(ops: &[Self], buf: &mut Vec) { + fn write_slice(ops: &[Self], buf: &mut Buf) { buf.push(b'{'); if ops.len() > 1 { buf.push(b'\n'); @@ -356,7 +357,7 @@ impl<'a> PostScriptOp<'a> { buf.push(b'}'); } - fn write(&self, buf: &mut Vec) { + fn write(&self, buf: &mut Buf) { match *self { Self::Real(r) => buf.push_decimal(r), Self::Integer(i) => buf.push_val(i), diff --git a/src/lib.rs b/src/lib.rs index 0c771f4..dd3ba6e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -81,7 +81,7 @@ valid PDFs. */ #![forbid(unsafe_code)] -#![deny(missing_docs)] +// #![deny(missing_docs)] #![allow(clippy::wrong_self_convention)] #[macro_use] @@ -197,7 +197,6 @@ use std::fmt::{self, Debug, Formatter}; use std::io::Write; use std::ops::{Deref, DerefMut}; -use self::buf::BufExt; use self::writers::*; /// A builder for a PDF file. @@ -366,7 +365,7 @@ impl Pdf { // Write the end of file marker. buf.extend(b"\n%%EOF"); - buf + buf.finish() } } diff --git a/src/object.rs b/src/object.rs index ea34a1f..955fb8e 100644 --- a/src/object.rs +++ b/src/object.rs @@ -2,13 +2,13 @@ use std::convert::TryFrom; use std::marker::PhantomData; use std::mem::ManuallyDrop; use std::num::NonZeroI32; - +use crate::buf::Buf; use super::*; /// A primitive PDF object. pub trait Primitive { /// Write the object into a buffer. - fn write(self, buf: &mut Vec); + fn write(self, buf: &mut Buf); } impl Primitive for &T @@ -16,14 +16,14 @@ where T: Copy, { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { (*self).write(buf); } } impl Primitive for bool { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { if self { buf.extend(b"true"); } else { @@ -34,14 +34,14 @@ impl Primitive for bool { impl Primitive for i32 { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.push_int(self); } } impl Primitive for f32 { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.push_float(self); } } @@ -69,7 +69,7 @@ impl Str<'_> { } impl Primitive for Str<'_> { - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { // We use: // - Literal strings for ASCII with nice escape sequences to make it // also be represented fully in visible ASCII. We also escape @@ -126,7 +126,7 @@ impl Primitive for Str<'_> { pub struct TextStr<'a>(pub &'a str); impl Primitive for TextStr<'_> { - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { // ASCII and PDFDocEncoding match for 32 up to 126. if self.0.bytes().all(|b| matches!(b, 32..=126)) { Str(self.0.as_bytes()).write(buf); @@ -150,7 +150,7 @@ impl Primitive for TextStr<'_> { pub struct Name<'a>(pub &'a [u8]); impl Primitive for Name<'_> { - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.reserve(1 + self.0.len()); buf.push(b'/'); for &byte in self.0 { @@ -196,7 +196,7 @@ pub struct Null; impl Primitive for Null { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.extend(b"null"); } } @@ -245,7 +245,7 @@ impl Ref { impl Primitive for Ref { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.push_int(self.0.get()); buf.extend(b" 0 R"); } @@ -281,7 +281,7 @@ impl Rect { impl Primitive for Rect { #[inline] - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.push(b'['); buf.push_val(self.x1); buf.push(b' '); @@ -393,7 +393,7 @@ impl Date { } impl Primitive for Date { - fn write(self, buf: &mut Vec) { + fn write(self, buf: &mut Buf) { buf.extend(b"(D:"); (|| { @@ -420,7 +420,7 @@ impl Primitive for Date { /// Writer for an arbitrary object. #[must_use = "not consuming this leaves the writer in an inconsistent state"] pub struct Obj<'a> { - buf: &'a mut Vec, + buf: &'a mut Buf, indirect: bool, indent: u8, } @@ -428,13 +428,13 @@ pub struct Obj<'a> { impl<'a> Obj<'a> { /// Start a new direct object. #[inline] - pub(crate) fn direct(buf: &'a mut Vec, indent: u8) -> Self { + pub(crate) fn direct(buf: &'a mut Buf, indent: u8) -> Self { Self { buf, indirect: false, indent } } /// Start a new indirect object. #[inline] - pub(crate) fn indirect(buf: &'a mut Vec, id: Ref) -> Self { + pub(crate) fn indirect(buf: &'a mut Buf, id: Ref) -> Self { buf.push_int(id.get()); buf.extend(b" 0 obj\n"); Self { buf, indirect: true, indent: 0 } @@ -500,7 +500,7 @@ pub trait Rewrite<'a> { /// Writer for an array. pub struct Array<'a> { - buf: &'a mut Vec, + buf: &'a mut Buf, indirect: bool, indent: u8, len: i32, @@ -646,7 +646,7 @@ impl<'a, T> TypedArray<'a, T> { /// Writer for a dictionary. pub struct Dict<'a> { - buf: &'a mut Vec, + buf: &'a mut Buf, indirect: bool, indent: u8, len: i32, diff --git a/src/renumber.rs b/src/renumber.rs index cca7791..959d2f8 100644 --- a/src/renumber.rs +++ b/src/renumber.rs @@ -1,4 +1,5 @@ -use crate::{BufExt, Chunk, Ref}; +use crate::{Chunk, Ref}; +use crate::buf::Buf; /// Renumbers a chunk of objects. /// @@ -43,7 +44,7 @@ fn extract_object(slice: &[u8]) -> Option<(i32, &[u8])> { /// Processes the interior of an indirect object and patches all indirect /// references. -fn patch_object(slice: &[u8], buf: &mut Vec, mapping: &mut dyn FnMut(Ref) -> Ref) { +fn patch_object(slice: &[u8], buf: &mut Buf, mapping: &mut dyn FnMut(Ref) -> Ref) { // Find the next point of interest: // - 'R' is interesting because it could be an indirect reference // - Anything that could contain indirect-reference-like things that are not @@ -202,7 +203,7 @@ mod tests { }); test!( - r.buf, + r.buf.finish(), b"1 0 obj", b"<<", b" /Nested <<", From ba1c728eaf18feb8e68dad3cbeb7be2fce77814c Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sat, 26 Oct 2024 18:57:22 +0200 Subject: [PATCH 02/25] Return content by default --- src/content.rs | 8 ++++---- src/font.rs | 4 ++-- src/functions.rs | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/content.rs b/src/content.rs index 19777ad..1b5d087 100644 --- a/src/content.rs +++ b/src/content.rs @@ -28,11 +28,11 @@ impl Content { } /// Return the raw constructed byte stream. - pub fn finish(mut self) -> Vec { + pub fn finish(mut self) -> Buf { if self.buf.last() == Some(&b'\n') { self.buf.pop(); } - self.buf.finish() + self.buf } } @@ -1656,7 +1656,7 @@ mod tests { .restore_state(); assert_eq!( - content.finish(), + content.finish().finish(), b"q\n1 2 3 4 re\nf\n[7 2] 4 d\n/MyImage Do\n2 3.5 /MyPattern scn\nQ" ); } @@ -1676,6 +1676,6 @@ mod tests { .show(Str(b"CD")); content.end_text(); - assert_eq!(content.finish(), b"/F1 12 Tf\nBT\n[] TJ\n[(AB) 2 (CD)] TJ\nET"); + assert_eq!(content.finish().finish(), b"/F1 12 Tf\nBT\n[] TJ\n[(AB) 2 (CD)] TJ\nET"); } } diff --git a/src/font.rs b/src/font.rs index a6e586c..1b7bbd3 100644 --- a/src/font.rs +++ b/src/font.rs @@ -965,7 +965,7 @@ where } /// Finish building the character map. - pub fn finish(mut self) -> Vec { + pub fn finish(mut self) -> Buf { // Flush the in-progress range. self.flush_range(); @@ -977,7 +977,7 @@ where self.buf.extend(b"%%EndResource\n"); self.buf.extend(b"%%EOF"); - self.buf.finish() + self.buf } fn flush_range(&mut self) { diff --git a/src/functions.rs b/src/functions.rs index 8f47f12..5c848c1 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -336,10 +336,10 @@ pub enum PostScriptOp<'a> { impl<'a> PostScriptOp<'a> { /// Encode a slice of operations into a byte stream. - pub fn encode(ops: &[Self]) -> Vec { + pub fn encode(ops: &[Self]) -> Buf { let mut buf = Buf::new(); Self::write_slice(ops, &mut buf); - buf.finish() + buf } fn write_slice(ops: &[Self], buf: &mut Buf) { @@ -447,7 +447,7 @@ mod tests { ]; assert_eq!( - PostScriptOp::encode(&ops), + PostScriptOp::encode(&ops).finish(), b"{\n3.0\n2.0\nmul\nexch\ndup\n0.0\nge\n{\n1.0\nadd\n}\n{neg}\nifelse\nadd\n}" ); } From 2d3cf458f6c07fdadd4ecafbd633549e8b84b0ac Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sat, 26 Oct 2024 20:01:31 +0200 Subject: [PATCH 03/25] First version --- src/buf.rs | 77 +++++++++++++++++++++++++++++++++------ src/chunk.rs | 10 +++--- src/content.rs | 9 +++-- src/font.rs | 93 ++++++++++++++++++++++++------------------------ src/functions.rs | 8 ++--- src/lib.rs | 10 +++--- src/object.rs | 58 +++++++++++++++++------------- src/renumber.rs | 18 +++++----- 8 files changed, 177 insertions(+), 106 deletions(-) diff --git a/src/buf.rs b/src/buf.rs index 97dc4ea..04550c2 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -1,9 +1,58 @@ -use std::ops::{Deref, DerefMut}; use super::Primitive; +use std::ops::{Deref, DerefMut}; + +#[derive(Clone, PartialEq, Debug, Default)] +pub struct Limits { + int: i32, + real: f32, + name_len: usize, + str_len: usize, + array_len: usize, + dict_entries: usize, +} + +impl Limits { + pub fn new() -> Self { + Self::default() + } + + pub(crate) fn register_int(&mut self, val: i32) { + self.int = self.int.max(val.abs()); + } + + pub(crate) fn register_real(&mut self, val: f32) { + self.real = self.real.max(val.abs()); + } + + pub(crate) fn register_name_len(&mut self, len: usize) { + self.name_len = self.name_len.max(len); + } + + pub(crate) fn register_str_len(&mut self, len: usize) { + self.str_len = self.str_len.max(len); + } -#[derive(Clone, PartialEq, Eq, Debug)] + pub(crate) fn register_array_len(&mut self, len: usize) { + self.array_len = self.array_len.max(len); + } + + pub(crate) fn register_dict_entries(&mut self, len: usize) { + self.dict_entries = self.dict_entries.max(len); + } + + pub fn merge(&mut self, other: &Limits) { + self.register_int(other.int); + self.register_real(other.real); + self.register_name_len(other.name_len); + self.register_array_len(other.array_len); + self.register_dict_entries(other.dict_entries); + } +} + +#[derive(Clone, PartialEq, Debug)] pub struct Buf { - buf: Vec + buf: Vec, + pub(crate) limits: Limits, } impl Deref for Buf { @@ -22,14 +71,13 @@ impl DerefMut for Buf { impl Buf { pub(crate) fn new() -> Self { - Self { - buf: Vec::new() - } + Self { buf: Vec::new(), limits: Limits::new() } } pub(crate) fn with_capacity(capacity: usize) -> Self { Self { - buf: Vec::with_capacity(capacity) + buf: Vec::with_capacity(capacity), + limits: Limits::new(), } } @@ -44,7 +92,8 @@ impl Buf { #[inline] pub(crate) fn push_int(&mut self, value: i32) { - self.extend(itoa::Buffer::new().format(value).as_bytes()); + self.limits.register_int(value); + self.extend_slice(itoa::Buffer::new().format(value).as_bytes()); } #[inline] @@ -61,8 +110,10 @@ impl Buf { /// Like `push_float`, but forces the decimal point. #[inline] pub(crate) fn push_decimal(&mut self, value: f32) { + self.limits.register_real(value); + if value == 0.0 || (value.abs() > 1e-6 && value.abs() < 1e12) { - self.extend(ryu::Buffer::new().format(value).as_bytes()); + self.extend_slice(ryu::Buffer::new().format(value).as_bytes()); } else { #[inline(never)] fn write_extreme(buf: &mut Buf, value: f32) { @@ -75,10 +126,16 @@ impl Buf { } #[inline] - pub(crate) fn extend(&mut self, other: &[u8]) { + pub(crate) fn extend_slice(&mut self, other: &[u8]) { self.buf.extend(other); } + #[inline] + pub(crate) fn extend(&mut self, other: &Buf) { + self.limits.merge(&other.limits); + self.buf.extend(&other.buf); + } + #[inline] pub(crate) fn push(&mut self, b: u8) { self.buf.push(b); diff --git a/src/chunk.rs b/src/chunk.rs index afc2676..97ae41a 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -1,5 +1,5 @@ -use crate::buf::Buf; use super::*; +use crate::buf::Buf; /// A builder for a collection of indirect PDF objects. /// @@ -44,7 +44,7 @@ impl Chunk { /// Add all objects from another chunk to this one. pub fn extend(&mut self, other: &Chunk) { let base = self.len(); - self.buf.extend(&other.buf.as_slice()); + self.buf.extend(&other.buf); self.offsets .extend(other.offsets.iter().map(|&(id, offset)| (id, base + offset))); } @@ -253,7 +253,7 @@ impl Chunk { /// file. /// /// You can create the content bytes using a [`Content`] builder. - pub fn form_xobject<'a>(&'a mut self, id: Ref, content: &'a [u8]) -> FormXObject<'a> { + pub fn form_xobject<'a>(&'a mut self, id: Ref, content: &'a Buf) -> FormXObject { FormXObject::start(self.stream(id, content)) } @@ -315,7 +315,7 @@ impl Chunk { pub fn stream_shading<'a>( &'a mut self, id: Ref, - content: &'a [u8], + content: &'a Buf, ) -> StreamShading<'a> { StreamShading::start(self.stream(id, content)) } @@ -326,7 +326,7 @@ impl Chunk { pub fn tiling_pattern<'a>( &'a mut self, id: Ref, - content: &'a [u8], + content: &'a Buf, ) -> TilingPattern<'a> { TilingPattern::start_with_stream(self.stream(id, content)) } diff --git a/src/content.rs b/src/content.rs index 1b5d087..0a10a0c 100644 --- a/src/content.rs +++ b/src/content.rs @@ -1,5 +1,5 @@ -use crate::buf::Buf; use super::*; +use crate::buf::Buf; /// A builder for a content stream. pub struct Content { @@ -88,7 +88,7 @@ impl Drop for Operation<'_> { if !self.first { self.buf.push(b' '); } - self.buf.extend(self.op.as_bytes()); + self.buf.extend_slice(self.op.as_bytes()); self.buf.push(b'\n'); } } @@ -1676,6 +1676,9 @@ mod tests { .show(Str(b"CD")); content.end_text(); - assert_eq!(content.finish().finish(), b"/F1 12 Tf\nBT\n[] TJ\n[(AB) 2 (CD)] TJ\nET"); + assert_eq!( + content.finish().finish(), + b"/F1 12 Tf\nBT\n[] TJ\n[(AB) 2 (CD)] TJ\nET" + ); } } diff --git a/src/font.rs b/src/font.rs index 1b7bbd3..3585d1d 100644 --- a/src/font.rs +++ b/src/font.rs @@ -1,6 +1,6 @@ -use std::marker::PhantomData; -use crate::buf::Buf; use super::*; +use crate::buf::Buf; +use std::marker::PhantomData; /// Writer for a _Type-1 font dictionary_. /// @@ -873,58 +873,58 @@ where let mut buf = Buf::new(); // Static header. - buf.extend(b"%!PS-Adobe-3.0 Resource-CMap\n"); - buf.extend(b"%%DocumentNeededResources: procset CIDInit\n"); - buf.extend(b"%%IncludeResource: procset CIDInit\n"); + buf.extend_slice(b"%!PS-Adobe-3.0 Resource-CMap\n"); + buf.extend_slice(b"%%DocumentNeededResources: procset CIDInit\n"); + buf.extend_slice(b"%%IncludeResource: procset CIDInit\n"); // Dynamic header. - buf.extend(b"%%BeginResource: CMap "); - buf.extend(name.0); + buf.extend_slice(b"%%BeginResource: CMap "); + buf.extend_slice(name.0); buf.push(b'\n'); - buf.extend(b"%%Title: ("); - buf.extend(name.0); + buf.extend_slice(b"%%Title: ("); + buf.extend_slice(name.0); buf.push(b' '); - buf.extend(info.registry.0); + buf.extend_slice(info.registry.0); buf.push(b' '); - buf.extend(info.ordering.0); + buf.extend_slice(info.ordering.0); buf.push(b' '); buf.push_int(info.supplement); - buf.extend(b")\n"); - buf.extend(b"%%Version: 1\n"); - buf.extend(b"%%EndComments\n"); + buf.extend_slice(b")\n"); + buf.extend_slice(b"%%Version: 1\n"); + buf.extend_slice(b"%%EndComments\n"); // General body. - buf.extend(b"/CIDInit /ProcSet findresource begin\n"); - buf.extend(b"12 dict begin\n"); - buf.extend(b"begincmap\n"); - buf.extend(b"/CIDSystemInfo 3 dict dup begin\n"); - buf.extend(b" /Registry "); + buf.extend_slice(b"/CIDInit /ProcSet findresource begin\n"); + buf.extend_slice(b"12 dict begin\n"); + buf.extend_slice(b"begincmap\n"); + buf.extend_slice(b"/CIDSystemInfo 3 dict dup begin\n"); + buf.extend_slice(b" /Registry "); buf.push_val(info.registry); - buf.extend(b" def\n"); - buf.extend(b" /Ordering "); + buf.extend_slice(b" def\n"); + buf.extend_slice(b" /Ordering "); buf.push_val(info.ordering); - buf.extend(b" def\n"); - buf.extend(b" /Supplement "); + buf.extend_slice(b" def\n"); + buf.extend_slice(b" /Supplement "); buf.push_val(info.supplement); - buf.extend(b" def\n"); - buf.extend(b"end def\n"); - buf.extend(b"/CMapName "); + buf.extend_slice(b" def\n"); + buf.extend_slice(b"end def\n"); + buf.extend_slice(b"/CMapName "); buf.push_val(name); - buf.extend(b" def\n"); - buf.extend(b"/CMapVersion 1 def\n"); - buf.extend(b"/CMapType 0 def\n"); - buf.extend(b"/WMode "); + buf.extend_slice(b" def\n"); + buf.extend_slice(b"/CMapVersion 1 def\n"); + buf.extend_slice(b"/CMapType 0 def\n"); + buf.extend_slice(b"/WMode "); buf.push_int(mode.to_int()); - buf.extend(b" def\n"); + buf.extend_slice(b" def\n"); // We just cover the whole unicode codespace. - buf.extend(b"1 begincodespacerange\n"); + buf.extend_slice(b"1 begincodespacerange\n"); buf.push(b'<'); G::MIN.push(&mut buf); - buf.extend(b"> <"); + buf.extend_slice(b"> <"); G::MAX.push(&mut buf); - buf.extend(b">\n"); - buf.extend(b"endcodespacerange\n"); + buf.extend_slice(b">\n"); + buf.extend_slice(b"endcodespacerange\n"); Self { buf, @@ -947,7 +947,7 @@ where ) { self.mappings.push(b'<'); glyph.push(&mut self.mappings); - self.mappings.extend(b"> <"); + self.mappings.extend_slice(b"> <"); for c in codepoints { for &mut part in c.encode_utf16(&mut [0; 2]) { @@ -955,7 +955,7 @@ where } } - self.mappings.extend(b">\n"); + self.mappings.extend_slice(b">\n"); self.count += 1; // At most 100 lines per range. @@ -970,12 +970,13 @@ where self.flush_range(); // End of body. - self.buf.extend(b"endcmap\n"); - self.buf.extend(b"CMapName currentdict /CMap defineresource pop\n"); - self.buf.extend(b"end\n"); - self.buf.extend(b"end\n"); - self.buf.extend(b"%%EndResource\n"); - self.buf.extend(b"%%EOF"); + self.buf.extend_slice(b"endcmap\n"); + self.buf + .extend_slice(b"CMapName currentdict /CMap defineresource pop\n"); + self.buf.extend_slice(b"end\n"); + self.buf.extend_slice(b"end\n"); + self.buf.extend_slice(b"%%EndResource\n"); + self.buf.extend_slice(b"%%EOF"); self.buf } @@ -983,9 +984,9 @@ where fn flush_range(&mut self) { if self.count > 0 { self.buf.push_int(self.count); - self.buf.extend(b" beginbfchar\n"); - self.buf.extend(&self.mappings); - self.buf.extend(b"endbfchar\n"); + self.buf.extend_slice(b" beginbfchar\n"); + self.buf.extend_slice(&self.mappings); + self.buf.extend_slice(b"endbfchar\n"); } self.count = 0; diff --git a/src/functions.rs b/src/functions.rs index 5c848c1..2d14735 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -1,5 +1,5 @@ -use crate::buf::Buf; use super::*; +use crate::buf::Buf; /// Way the function is defined in. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] @@ -364,16 +364,16 @@ impl<'a> PostScriptOp<'a> { Self::If(ops) => { Self::write_slice(ops, buf); buf.push(b'\n'); - buf.extend(self.operator()); + buf.extend_slice(self.operator()); } Self::IfElse(ops1, ops2) => { Self::write_slice(ops1, buf); buf.push(b'\n'); Self::write_slice(ops2, buf); buf.push(b'\n'); - buf.extend(self.operator()); + buf.extend_slice(self.operator()); } - _ => buf.extend(self.operator()), + _ => buf.extend_slice(self.operator()), } } diff --git a/src/lib.rs b/src/lib.rs index dd3ba6e..1d4345d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -224,7 +224,7 @@ impl Pdf { /// Create a new PDF with the specified initial buffer capacity. pub fn with_capacity(capacity: usize) -> Self { let mut chunk = Chunk::with_capacity(capacity); - chunk.buf.extend(b"%PDF-1.7\n%\x80\x80\x80\x80\n\n"); + chunk.buf.extend_slice(b"%PDF-1.7\n%\x80\x80\x80\x80\n\n"); Self { chunk, catalog_id: None, @@ -299,7 +299,7 @@ impl Pdf { let xref_len = 1 + offsets.last().map_or(0, |p| p.0.get()); let xref_offset = buf.len(); - buf.extend(b"xref\n0 "); + buf.extend_slice(b"xref\n0 "); buf.push_int(xref_len); buf.push(b'\n'); @@ -338,7 +338,7 @@ impl Pdf { } // Write the trailer dictionary. - buf.extend(b"trailer\n"); + buf.extend_slice(b"trailer\n"); let mut trailer = Obj::direct(&mut buf, 0).dict(); trailer.pair(Name(b"Size"), xref_len); @@ -360,11 +360,11 @@ impl Pdf { trailer.finish(); // Write where the cross-reference table starts. - buf.extend(b"\nstartxref\n"); + buf.extend_slice(b"\nstartxref\n"); write!(buf, "{}", xref_offset).unwrap(); // Write the end of file marker. - buf.extend(b"\n%%EOF"); + buf.extend_slice(b"\n%%EOF"); buf.finish() } } diff --git a/src/object.rs b/src/object.rs index 955fb8e..08a5e02 100644 --- a/src/object.rs +++ b/src/object.rs @@ -1,9 +1,9 @@ +use super::*; +use crate::buf::Buf; use std::convert::TryFrom; use std::marker::PhantomData; use std::mem::ManuallyDrop; use std::num::NonZeroI32; -use crate::buf::Buf; -use super::*; /// A primitive PDF object. pub trait Primitive { @@ -25,9 +25,9 @@ impl Primitive for bool { #[inline] fn write(self, buf: &mut Buf) { if self { - buf.extend(b"true"); + buf.extend_slice(b"true"); } else { - buf.extend(b"false"); + buf.extend_slice(b"false"); } } } @@ -70,7 +70,8 @@ impl Str<'_> { impl Primitive for Str<'_> { fn write(self, buf: &mut Buf) { - // We use: + buf.limits.register_str_len(self.0.len()); + // - Literal strings for ASCII with nice escape sequences to make it // also be represented fully in visible ASCII. We also escape // parentheses because they are delimiters. @@ -90,13 +91,13 @@ impl Primitive for Str<'_> { } buf.push(byte); } - b'\\' => buf.extend(br"\\"), + b'\\' => buf.extend_slice(br"\\"), b' '..=b'~' => buf.push(byte), - b'\n' => buf.extend(br"\n"), - b'\r' => buf.extend(br"\r"), - b'\t' => buf.extend(br"\t"), - b'\x08' => buf.extend(br"\b"), - b'\x0c' => buf.extend(br"\f"), + b'\n' => buf.extend_slice(br"\n"), + b'\r' => buf.extend_slice(br"\r"), + b'\t' => buf.extend_slice(br"\t"), + b'\x08' => buf.extend_slice(br"\b"), + b'\x0c' => buf.extend_slice(br"\f"), _ => { buf.push(b'\\'); buf.push_octal(byte); @@ -127,6 +128,8 @@ pub struct TextStr<'a>(pub &'a str); impl Primitive for TextStr<'_> { fn write(self, buf: &mut Buf) { + buf.limits.register_str_len(self.0.as_bytes().len()); + // ASCII and PDFDocEncoding match for 32 up to 126. if self.0.bytes().all(|b| matches!(b, 32..=126)) { Str(self.0.as_bytes()).write(buf); @@ -151,6 +154,8 @@ pub struct Name<'a>(pub &'a [u8]); impl Primitive for Name<'_> { fn write(self, buf: &mut Buf) { + buf.limits.register_name_len(self.0.len()); + buf.reserve(1 + self.0.len()); buf.push(b'/'); for &byte in self.0 { @@ -197,7 +202,7 @@ pub struct Null; impl Primitive for Null { #[inline] fn write(self, buf: &mut Buf) { - buf.extend(b"null"); + buf.extend_slice(b"null"); } } @@ -247,7 +252,7 @@ impl Primitive for Ref { #[inline] fn write(self, buf: &mut Buf) { buf.push_int(self.0.get()); - buf.extend(b" 0 R"); + buf.extend_slice(b" 0 R"); } } @@ -394,7 +399,7 @@ impl Date { impl Primitive for Date { fn write(self, buf: &mut Buf) { - buf.extend(b"(D:"); + buf.extend_slice(b"(D:"); (|| { write!(buf, "{:04}", self.year).unwrap(); @@ -436,7 +441,7 @@ impl<'a> Obj<'a> { #[inline] pub(crate) fn indirect(buf: &'a mut Buf, id: Ref) -> Self { buf.push_int(id.get()); - buf.extend(b" 0 obj\n"); + buf.extend_slice(b" 0 obj\n"); Self { buf, indirect: true, indent: 0 } } @@ -445,7 +450,7 @@ impl<'a> Obj<'a> { pub fn primitive(self, value: T) { value.write(self.buf); if self.indirect { - self.buf.extend(b"\nendobj\n\n"); + self.buf.extend_slice(b"\nendobj\n\n"); } } @@ -570,9 +575,10 @@ impl<'a> Array<'a> { impl Drop for Array<'_> { #[inline] fn drop(&mut self) { + self.buf.limits.register_array_len(self.len() as usize); self.buf.push(b']'); if self.indirect { - self.buf.extend(b"\nendobj\n\n"); + self.buf.extend_slice(b"\nendobj\n\n"); } } } @@ -653,7 +659,7 @@ pub struct Dict<'a> { } writer!(Dict: |obj| { - obj.buf.extend(b"<<"); + obj.buf.extend_slice(b"<<"); Self { buf: obj.buf, indirect: obj.indirect, @@ -721,15 +727,17 @@ impl<'a> Dict<'a> { impl Drop for Dict<'_> { #[inline] fn drop(&mut self) { + self.buf.limits.register_dict_entries(self.len as usize); + if self.len != 0 { self.buf.push(b'\n'); for _ in 0..self.indent - 2 { self.buf.push(b' '); } } - self.buf.extend(b">>"); + self.buf.extend_slice(b">>"); if self.indirect { - self.buf.extend(b"\nendobj\n\n"); + self.buf.extend_slice(b"\nendobj\n\n"); } } } @@ -847,11 +855,11 @@ impl<'a> Stream<'a> { impl Drop for Stream<'_> { fn drop(&mut self) { - self.dict.buf.extend(b"\n>>"); - self.dict.buf.extend(b"\nstream\n"); - self.dict.buf.extend(self.data.as_ref()); - self.dict.buf.extend(b"\nendstream"); - self.dict.buf.extend(b"\nendobj\n\n"); + self.dict.buf.extend_slice(b"\n>>"); + self.dict.buf.extend_slice(b"\nstream\n"); + self.dict.buf.extend_slice(self.data.as_ref()); + self.dict.buf.extend_slice(b"\nendstream"); + self.dict.buf.extend_slice(b"\nendobj\n\n"); } } diff --git a/src/renumber.rs b/src/renumber.rs index 959d2f8..a386d99 100644 --- a/src/renumber.rs +++ b/src/renumber.rs @@ -1,5 +1,5 @@ -use crate::{Chunk, Ref}; use crate::buf::Buf; +use crate::{Chunk, Ref}; /// Renumbers a chunk of objects. /// @@ -15,9 +15,9 @@ pub fn renumber(source: &Chunk, target: &mut Chunk, mapping: &mut dyn FnMut(Ref) target.buf.push_int(new.get()); target.buf.push(b' '); target.buf.push_int(gen); - target.buf.extend(b" obj\n"); + target.buf.extend_slice(b" obj\n"); patch_object(slice, &mut target.buf, mapping); - target.buf.extend(b"\nendobj\n\n"); + target.buf.extend_slice(b"\nendobj\n\n"); } } @@ -63,7 +63,7 @@ fn patch_object(slice: &[u8], buf: &mut Buf, mapping: &mut dyn FnMut(Ref) -> Ref b'R' => { if let Some((head, id, gen)) = validate_ref(&slice[..seen]) { let new = mapping(id); - buf.extend(&slice[written..head]); + buf.extend_slice(&slice[written..head]); buf.push_int(new.get()); buf.push(b' '); buf.push_int(gen); @@ -113,7 +113,7 @@ fn patch_object(slice: &[u8], buf: &mut Buf, mapping: &mut dyn FnMut(Ref) -> Ref seen += 1; } - buf.extend(&slice[written..]); + buf.extend_slice(&slice[written..]); } /// Validate a match for an indirect reference. @@ -184,9 +184,11 @@ mod tests { // Manually write an untidy object. c.offsets.push((Ref::new(8), c.buf.len())); - c.buf.extend(b"8 3 obj\n<>%\n\nendobj"); + // TODO: This won't update `limits` of `buf`. + c.buf.extend_slice(b"8 3 obj\n<>%\n\nendobj"); c.stream(Ref::new(17), b"1 0 R 2 0 R 3 0 R 4 0 R") .pair(Name(b"Ok"), TextStr(")4 0 R")) From 18b779ab75d5f5d4e9514fbb589615d02939b9f4 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sat, 26 Oct 2024 21:10:26 +0200 Subject: [PATCH 04/25] re=exprt --- src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 1d4345d..80759f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -186,7 +186,8 @@ pub mod types { pub use xobject::SMaskInData; } -pub use self::chunk::Chunk; +pub use self::buf::{Buf, Limits}; +pub use self::chunk::{Chunk}; pub use self::content::Content; pub use self::object::{ Array, Date, Dict, Filter, Finish, Name, Null, Obj, Primitive, Rect, Ref, Rewrite, From 897460913a6f7e64f423de4a62e751a719213f9d Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sat, 26 Oct 2024 21:13:14 +0200 Subject: [PATCH 05/25] more fixes --- src/chunk.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/chunk.rs b/src/chunk.rs index 97ae41a..a700439 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -253,7 +253,7 @@ impl Chunk { /// file. /// /// You can create the content bytes using a [`Content`] builder. - pub fn form_xobject<'a>(&'a mut self, id: Ref, content: &'a Buf) -> FormXObject { + pub fn form_xobject<'a>(&'a mut self, id: Ref, content: &'a [u8]) -> FormXObject { FormXObject::start(self.stream(id, content)) } @@ -326,7 +326,7 @@ impl Chunk { pub fn tiling_pattern<'a>( &'a mut self, id: Ref, - content: &'a Buf, + content: &'a [u8], ) -> TilingPattern<'a> { TilingPattern::start_with_stream(self.stream(id, content)) } From 0345d52075bb8cc33c36cd35e70273dc392108fa Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sat, 26 Oct 2024 21:22:20 +0200 Subject: [PATCH 06/25] make to_bytes public --- src/buf.rs | 2 +- src/content.rs | 4 ++-- src/functions.rs | 2 +- src/lib.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/buf.rs b/src/buf.rs index 04550c2..2e365fc 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -81,7 +81,7 @@ impl Buf { } } - pub(crate) fn finish(self) -> Vec { + pub fn to_bytes(self) -> Vec { self.buf } diff --git a/src/content.rs b/src/content.rs index 0a10a0c..e19ebff 100644 --- a/src/content.rs +++ b/src/content.rs @@ -1656,7 +1656,7 @@ mod tests { .restore_state(); assert_eq!( - content.finish().finish(), + content.finish().to_bytes(), b"q\n1 2 3 4 re\nf\n[7 2] 4 d\n/MyImage Do\n2 3.5 /MyPattern scn\nQ" ); } @@ -1677,7 +1677,7 @@ mod tests { content.end_text(); assert_eq!( - content.finish().finish(), + content.finish().to_bytes(), b"/F1 12 Tf\nBT\n[] TJ\n[(AB) 2 (CD)] TJ\nET" ); } diff --git a/src/functions.rs b/src/functions.rs index 2d14735..a51f241 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -447,7 +447,7 @@ mod tests { ]; assert_eq!( - PostScriptOp::encode(&ops).finish(), + PostScriptOp::encode(&ops).to_bytes(), b"{\n3.0\n2.0\nmul\nexch\ndup\n0.0\nge\n{\n1.0\nadd\n}\n{neg}\nifelse\nadd\n}" ); } diff --git a/src/lib.rs b/src/lib.rs index 80759f5..b7e0546 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -366,7 +366,7 @@ impl Pdf { // Write the end of file marker. buf.extend_slice(b"\n%%EOF"); - buf.finish() + buf.to_bytes() } } From 9e99c77c604277d39dc921658b2d95ec2fff8a24 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sat, 26 Oct 2024 21:27:08 +0200 Subject: [PATCH 07/25] Add limits getter --- src/buf.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/buf.rs b/src/buf.rs index 2e365fc..bcb203c 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -85,6 +85,10 @@ impl Buf { self.buf } + pub fn limits(&self) -> &Limits { + &self.limits + } + #[inline] pub(crate) fn push_val(&mut self, value: T) { value.write(self); From 22022321aa0f165e67c9387c889c80357122c2a1 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sat, 26 Oct 2024 21:40:41 +0200 Subject: [PATCH 08/25] Add limits to chunk --- src/chunk.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/chunk.rs b/src/chunk.rs index a700439..c8a0c50 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -41,6 +41,10 @@ impl Chunk { self.buf.as_slice() } + pub fn limits(&self) -> &Limits { + self.buf.limits() + } + /// Add all objects from another chunk to this one. pub fn extend(&mut self, other: &Chunk) { let base = self.len(); From 1f70211a69cb3cb0d27e5cb99491b762e3a322bd Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sat, 26 Oct 2024 21:42:30 +0200 Subject: [PATCH 09/25] add getters --- src/buf.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/buf.rs b/src/buf.rs index bcb203c..e681809 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -40,6 +40,26 @@ impl Limits { self.dict_entries = self.dict_entries.max(len); } + pub fn int(&self) -> i32 { + self.int + } + + pub fn real(&self) -> f32 { + self.real + } + + pub fn name_len(&self) -> usize { + self.name_len + } + + pub fn array_len(&self) -> usize { + self.array_len + } + + pub fn dict_entries(&self) -> usize { + self.dict_entries + } + pub fn merge(&mut self, other: &Limits) { self.register_int(other.int); self.register_real(other.real); From 664c270cc8e9290853867428dd75ab7199ecd3aa Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sat, 26 Oct 2024 21:43:43 +0200 Subject: [PATCH 10/25] add getters --- src/buf.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/buf.rs b/src/buf.rs index e681809..d503e82 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -60,10 +60,15 @@ impl Limits { self.dict_entries } + pub fn str_len(&self) -> usize { + self.str_len + } + pub fn merge(&mut self, other: &Limits) { self.register_int(other.int); self.register_real(other.real); self.register_name_len(other.name_len); + self.register_str_len(other.str_len); self.register_array_len(other.array_len); self.register_dict_entries(other.dict_entries); } From 8294635855a44aabddc3c6b9c468f0090fac5074 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sun, 27 Oct 2024 09:44:13 +0100 Subject: [PATCH 11/25] integrate postscript changes --- src/functions.rs | 25 ++++++++++++------------- src/renumber.rs | 2 +- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/functions.rs b/src/functions.rs index a51f241..6a3b568 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -1,5 +1,4 @@ use super::*; -use crate::buf::Buf; /// Way the function is defined in. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] @@ -238,8 +237,8 @@ impl<'a> PostScriptFunction<'a> { deref!('a, PostScriptFunction<'a> => Stream<'a>, stream); /// PostScript operators for use in Type 4 functions. -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum PostScriptOp<'a> { +#[derive(Debug, Clone, PartialEq)] +pub enum PostScriptOp { /// Push a real number. Real(f32), /// Push an integer number. @@ -316,9 +315,9 @@ pub enum PostScriptOp<'a> { Xor, /// Conditional. Runs if boolean argument is true. - If(&'a [Self]), + If(Vec), /// Conditional. Decides which branch to run depending on boolean argument. - IfElse(&'a [Self], &'a [Self]), + IfElse(Vec, Vec), /// Copy the top elements. One integer argument. Copy, @@ -334,7 +333,7 @@ pub enum PostScriptOp<'a> { Roll, } -impl<'a> PostScriptOp<'a> { +impl PostScriptOp { /// Encode a slice of operations into a byte stream. pub fn encode(ops: &[Self]) -> Buf { let mut buf = Buf::new(); @@ -358,18 +357,18 @@ impl<'a> PostScriptOp<'a> { } fn write(&self, buf: &mut Buf) { - match *self { - Self::Real(r) => buf.push_decimal(r), - Self::Integer(i) => buf.push_val(i), + match self { + Self::Real(r) => buf.push_decimal(*r), + Self::Integer(i) => buf.push_val(*i), Self::If(ops) => { - Self::write_slice(ops, buf); + Self::write_slice(&ops, buf); buf.push(b'\n'); buf.extend_slice(self.operator()); } Self::IfElse(ops1, ops2) => { - Self::write_slice(ops1, buf); + Self::write_slice(&ops1, buf); buf.push(b'\n'); - Self::write_slice(ops2, buf); + Self::write_slice(&ops2, buf); buf.push(b'\n'); buf.extend_slice(self.operator()); } @@ -442,7 +441,7 @@ mod tests { Dup, Real(0.0), Ge, - IfElse(&[Real(1.0), Add], &[Neg]), + IfElse(vec![Real(1.0), Add], vec![Neg]), Add, ]; diff --git a/src/renumber.rs b/src/renumber.rs index a386d99..b17604e 100644 --- a/src/renumber.rs +++ b/src/renumber.rs @@ -205,7 +205,7 @@ mod tests { }); test!( - r.buf.finish(), + r.buf.to_bytes(), b"1 0 obj", b"<<", b" /Nested <<", From 31565d1db4fd1c807222506741b584fbe57155d1 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sun, 27 Oct 2024 09:54:35 +0100 Subject: [PATCH 12/25] Change separators --- src/functions.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/functions.rs b/src/functions.rs index 6a3b568..8d279f1 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -344,11 +344,11 @@ impl PostScriptOp { fn write_slice(ops: &[Self], buf: &mut Buf) { buf.push(b'{'); if ops.len() > 1 { - buf.push(b'\n'); + buf.push(b' '); } for op in ops { op.write(buf); - buf.push(b'\n'); + buf.push(b' '); } if ops.len() == 1 { buf.pop(); @@ -358,18 +358,18 @@ impl PostScriptOp { fn write(&self, buf: &mut Buf) { match self { - Self::Real(r) => buf.push_decimal(*r), + Self::Real(r) => buf.push_float(*r), Self::Integer(i) => buf.push_val(*i), Self::If(ops) => { Self::write_slice(&ops, buf); - buf.push(b'\n'); + buf.push(b' '); buf.extend_slice(self.operator()); } Self::IfElse(ops1, ops2) => { Self::write_slice(&ops1, buf); - buf.push(b'\n'); + buf.push(b' '); Self::write_slice(&ops2, buf); - buf.push(b'\n'); + buf.push(b' '); buf.extend_slice(self.operator()); } _ => buf.extend_slice(self.operator()), From ea782ea29f7032b8825585010988b7500639acc7 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sun, 27 Oct 2024 10:01:21 +0100 Subject: [PATCH 13/25] revert push float change --- src/functions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/functions.rs b/src/functions.rs index 8d279f1..c70e40d 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -358,7 +358,7 @@ impl PostScriptOp { fn write(&self, buf: &mut Buf) { match self { - Self::Real(r) => buf.push_float(*r), + Self::Real(r) => buf.push_decimal(*r), Self::Integer(i) => buf.push_val(*i), Self::If(ops) => { Self::write_slice(&ops, buf); From cf042ebc61b6ecc1b4d6ff47f1de97a25490b141 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sun, 27 Oct 2024 11:26:42 +0100 Subject: [PATCH 14/25] revert changes --- src/functions.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/functions.rs b/src/functions.rs index c70e40d..2a15bb4 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -237,8 +237,8 @@ impl<'a> PostScriptFunction<'a> { deref!('a, PostScriptFunction<'a> => Stream<'a>, stream); /// PostScript operators for use in Type 4 functions. -#[derive(Debug, Clone, PartialEq)] -pub enum PostScriptOp { +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum PostScriptOp<'a> { /// Push a real number. Real(f32), /// Push an integer number. @@ -315,9 +315,9 @@ pub enum PostScriptOp { Xor, /// Conditional. Runs if boolean argument is true. - If(Vec), + If(&'a [Self]), /// Conditional. Decides which branch to run depending on boolean argument. - IfElse(Vec, Vec), + IfElse(&'a [Self], &'a [Self]), /// Copy the top elements. One integer argument. Copy, @@ -333,7 +333,7 @@ pub enum PostScriptOp { Roll, } -impl PostScriptOp { +impl PostScriptOp<'_> { /// Encode a slice of operations into a byte stream. pub fn encode(ops: &[Self]) -> Buf { let mut buf = Buf::new(); @@ -441,7 +441,7 @@ mod tests { Dup, Real(0.0), Ge, - IfElse(vec![Real(1.0), Add], vec![Neg]), + IfElse(&[Real(1.0), Add], &[Neg]), Add, ]; From 71b75935c8e42c4b861da6d54243dbeb433bada4 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Wed, 30 Oct 2024 21:08:25 +0100 Subject: [PATCH 15/25] tidy up a bit --- src/buf.rs | 13 +++++++++++++ src/chunk.rs | 1 + src/font.rs | 5 +++-- src/functions.rs | 10 +++++----- src/lib.rs | 2 +- src/object.rs | 3 ++- 6 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/buf.rs b/src/buf.rs index d503e82..9662278 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -1,6 +1,7 @@ use super::Primitive; use std::ops::{Deref, DerefMut}; +/// Track the limits of data types used in a buffer. #[derive(Clone, PartialEq, Debug, Default)] pub struct Limits { int: i32, @@ -12,6 +13,7 @@ pub struct Limits { } impl Limits { + /// Create a new `Limits` struct with all values initialized to zero. pub fn new() -> Self { Self::default() } @@ -40,30 +42,38 @@ impl Limits { self.dict_entries = self.dict_entries.max(len); } + /// Get the absolute value of the largest positive/negative integer number. pub fn int(&self) -> i32 { self.int } + /// Get the absolute value of the largest positive/negative real number. pub fn real(&self) -> f32 { self.real } + /// Get the maximum length of any used name. pub fn name_len(&self) -> usize { self.name_len } + /// Get the maximum length of any used array. pub fn array_len(&self) -> usize { self.array_len } + /// Get the maximum number of entries in any dictionary. pub fn dict_entries(&self) -> usize { self.dict_entries } + /// Get the maximum length of any used string. pub fn str_len(&self) -> usize { self.str_len } + /// Merge two `Limits` with each other, taking the maximum + /// of each field from both. pub fn merge(&mut self, other: &Limits) { self.register_int(other.int); self.register_real(other.real); @@ -74,6 +84,7 @@ impl Limits { } } +/// A buffer of arbitrary PDF content. #[derive(Clone, PartialEq, Debug)] pub struct Buf { buf: Vec, @@ -106,10 +117,12 @@ impl Buf { } } + /// Get the underlying bytes of the buffer. pub fn to_bytes(self) -> Vec { self.buf } + /// Return the limits of the buffer. pub fn limits(&self) -> &Limits { &self.limits } diff --git a/src/chunk.rs b/src/chunk.rs index c8a0c50..1be15d8 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -41,6 +41,7 @@ impl Chunk { self.buf.as_slice() } + /// Return the limits of the chunk. pub fn limits(&self) -> &Limits { self.buf.limits() } diff --git a/src/font.rs b/src/font.rs index 3585d1d..d65e48d 100644 --- a/src/font.rs +++ b/src/font.rs @@ -1,6 +1,7 @@ -use super::*; -use crate::buf::Buf; use std::marker::PhantomData; +use crate::buf::Buf; + +use super::*; /// Writer for a _Type-1 font dictionary_. /// diff --git a/src/functions.rs b/src/functions.rs index 9cbe3fd..8ce08f1 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -357,18 +357,18 @@ impl<'a> PostScriptOp<'a> { } fn write(&self, buf: &mut Buf) { - match self { - Self::Real(r) => buf.push_decimal(*r), - Self::Integer(i) => buf.push_val(*i), + match *self { + Self::Real(r) => buf.push_decimal(r), + Self::Integer(i) => buf.push_val(i), Self::If(ops) => { Self::write_slice(&ops, buf); buf.push(b' '); buf.extend_slice(self.operator()); } Self::IfElse(ops1, ops2) => { - Self::write_slice(&ops1, buf); + Self::write_slice(ops1, buf); buf.push(b' '); - Self::write_slice(&ops2, buf); + Self::write_slice(ops2, buf); buf.push(b' '); buf.extend_slice(self.operator()); } diff --git a/src/lib.rs b/src/lib.rs index b7e0546..0cc5a1b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -81,7 +81,7 @@ valid PDFs. */ #![forbid(unsafe_code)] -// #![deny(missing_docs)] +#![deny(missing_docs)] #![allow(clippy::wrong_self_convention)] #[macro_use] diff --git a/src/object.rs b/src/object.rs index 08a5e02..b305217 100644 --- a/src/object.rs +++ b/src/object.rs @@ -1,10 +1,11 @@ -use super::*; use crate::buf::Buf; use std::convert::TryFrom; use std::marker::PhantomData; use std::mem::ManuallyDrop; use std::num::NonZeroI32; +use super::*; + /// A primitive PDF object. pub trait Primitive { /// Write the object into a buffer. From 803ef27b74c257172bd932fc812e58a3c6155299 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Wed, 30 Oct 2024 21:30:59 +0100 Subject: [PATCH 16/25] do not implement DerefMut --- src/buf.rs | 31 +++++++++++++++---------------- src/content.rs | 2 +- src/font.rs | 2 +- src/functions.rs | 2 +- src/lib.rs | 14 +++++++------- src/object.rs | 16 ++++++++-------- 6 files changed, 33 insertions(+), 34 deletions(-) diff --git a/src/buf.rs b/src/buf.rs index 9662278..67a91cb 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -1,5 +1,5 @@ use super::Primitive; -use std::ops::{Deref, DerefMut}; +use std::ops::Deref; /// Track the limits of data types used in a buffer. #[derive(Clone, PartialEq, Debug, Default)] @@ -87,7 +87,7 @@ impl Limits { /// A buffer of arbitrary PDF content. #[derive(Clone, PartialEq, Debug)] pub struct Buf { - buf: Vec, + pub(crate) inner: Vec, pub(crate) limits: Limits, } @@ -95,31 +95,25 @@ impl Deref for Buf { type Target = Vec; fn deref(&self) -> &Self::Target { - &self.buf - } -} - -impl DerefMut for Buf { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.buf + &self.inner } } impl Buf { pub(crate) fn new() -> Self { - Self { buf: Vec::new(), limits: Limits::new() } + Self { inner: Vec::new(), limits: Limits::new() } } pub(crate) fn with_capacity(capacity: usize) -> Self { Self { - buf: Vec::with_capacity(capacity), + inner: Vec::with_capacity(capacity), limits: Limits::new(), } } /// Get the underlying bytes of the buffer. pub fn to_bytes(self) -> Vec { - self.buf + self.inner } /// Return the limits of the buffer. @@ -160,7 +154,7 @@ impl Buf { #[inline(never)] fn write_extreme(buf: &mut Buf, value: f32) { use std::io::Write; - write!(buf, "{}", value).unwrap(); + write!(buf.inner, "{}", value).unwrap(); } write_extreme(self, value); @@ -169,18 +163,18 @@ impl Buf { #[inline] pub(crate) fn extend_slice(&mut self, other: &[u8]) { - self.buf.extend(other); + self.inner.extend(other); } #[inline] pub(crate) fn extend(&mut self, other: &Buf) { self.limits.merge(&other.limits); - self.buf.extend(&other.buf); + self.inner.extend(&other.inner); } #[inline] pub(crate) fn push(&mut self, b: u8) { - self.buf.push(b); + self.inner.push(b); } #[inline] @@ -213,4 +207,9 @@ impl Buf { self.push(octal((value >> 3) & 7)); self.push(octal(value & 7)); } + + #[inline] + pub(crate) fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional) + } } diff --git a/src/content.rs b/src/content.rs index e19ebff..2d34e4a 100644 --- a/src/content.rs +++ b/src/content.rs @@ -30,7 +30,7 @@ impl Content { /// Return the raw constructed byte stream. pub fn finish(mut self) -> Buf { if self.buf.last() == Some(&b'\n') { - self.buf.pop(); + self.buf.inner.pop(); } self.buf } diff --git a/src/font.rs b/src/font.rs index d65e48d..286a806 100644 --- a/src/font.rs +++ b/src/font.rs @@ -991,7 +991,7 @@ where } self.count = 0; - self.mappings.clear(); + self.mappings.inner.clear(); } } diff --git a/src/functions.rs b/src/functions.rs index 8ce08f1..72296e2 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -351,7 +351,7 @@ impl<'a> PostScriptOp<'a> { buf.push(b' '); } if ops.len() == 1 { - buf.pop(); + buf.inner.pop(); } buf.push(b'}'); } diff --git a/src/lib.rs b/src/lib.rs index 0cc5a1b..4c4a85f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -241,7 +241,7 @@ impl Pdf { /// /// _Default value_: \x80\x80\x80\x80 pub fn set_binary_marker(&mut self, marker: &[u8; 4]) { - self.chunk.buf[10..14].copy_from_slice(marker); + self.chunk.buf.inner[10..14].copy_from_slice(marker); } /// Set the PDF version. @@ -252,10 +252,10 @@ impl Pdf { /// _Default value_: 1.7. pub fn set_version(&mut self, major: u8, minor: u8) { if major < 10 { - self.chunk.buf[5] = b'0' + major; + self.chunk.buf.inner[5] = b'0' + major; } if minor < 10 { - self.chunk.buf[7] = b'0' + minor; + self.chunk.buf.inner[7] = b'0' + minor; } } @@ -305,7 +305,7 @@ impl Pdf { buf.push(b'\n'); if offsets.is_empty() { - write!(buf, "0000000000 65535 f\r\n").unwrap(); + write!(buf.inner, "0000000000 65535 f\r\n").unwrap(); } let mut written = 0; @@ -330,11 +330,11 @@ impl Pdf { } let gen = if free_id == 0 { "65535" } else { "00000" }; - write!(buf, "{:010} {} f\r\n", next % xref_len, gen).unwrap(); + write!(buf.inner, "{:010} {} f\r\n", next % xref_len, gen).unwrap(); written += 1; } - write!(buf, "{:010} 00000 n\r\n", offset).unwrap(); + write!(buf.inner, "{:010} 00000 n\r\n", offset).unwrap(); written += 1; } @@ -362,7 +362,7 @@ impl Pdf { // Write where the cross-reference table starts. buf.extend_slice(b"\nstartxref\n"); - write!(buf, "{}", xref_offset).unwrap(); + write!(buf.inner, "{}", xref_offset).unwrap(); // Write the end of file marker. buf.extend_slice(b"\n%%EOF"); diff --git a/src/object.rs b/src/object.rs index b305217..6bcf98c 100644 --- a/src/object.rs +++ b/src/object.rs @@ -79,7 +79,7 @@ impl Primitive for Str<'_> { // - Hex strings for anything non-ASCII. if self.0.iter().all(|b| b.is_ascii()) { buf.reserve(self.0.len()); - buf.push(b'('); + buf.inner.push(b'('); let mut balanced = None; for &byte in self.0 { @@ -403,17 +403,17 @@ impl Primitive for Date { buf.extend_slice(b"(D:"); (|| { - write!(buf, "{:04}", self.year).unwrap(); - write!(buf, "{:02}", self.month?).unwrap(); - write!(buf, "{:02}", self.day?).unwrap(); - write!(buf, "{:02}", self.hour?).unwrap(); - write!(buf, "{:02}", self.minute?).unwrap(); - write!(buf, "{:02}", self.second?).unwrap(); + write!(buf.inner, "{:04}", self.year).unwrap(); + write!(buf.inner, "{:02}", self.month?).unwrap(); + write!(buf.inner, "{:02}", self.day?).unwrap(); + write!(buf.inner, "{:02}", self.hour?).unwrap(); + write!(buf.inner, "{:02}", self.minute?).unwrap(); + write!(buf.inner, "{:02}", self.second?).unwrap(); let utc_offset_hour = self.utc_offset_hour?; if utc_offset_hour == 0 && self.utc_offset_minute == 0 { buf.push(b'Z'); } else { - write!(buf, "{:+03}'{:02}", utc_offset_hour, self.utc_offset_minute) + write!(buf.inner, "{:+03}'{:02}", utc_offset_hour, self.utc_offset_minute) .unwrap(); } Some(()) From 8af675d0d447b3d733dc5bc98b2f943fc7a35124 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Wed, 30 Oct 2024 21:31:20 +0100 Subject: [PATCH 17/25] format --- src/font.rs | 2 +- src/lib.rs | 2 +- src/object.rs | 8 ++++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/font.rs b/src/font.rs index 286a806..73e46df 100644 --- a/src/font.rs +++ b/src/font.rs @@ -1,5 +1,5 @@ -use std::marker::PhantomData; use crate::buf::Buf; +use std::marker::PhantomData; use super::*; diff --git a/src/lib.rs b/src/lib.rs index 4c4a85f..dccb708 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -187,7 +187,7 @@ pub mod types { } pub use self::buf::{Buf, Limits}; -pub use self::chunk::{Chunk}; +pub use self::chunk::Chunk; pub use self::content::Content; pub use self::object::{ Array, Date, Dict, Filter, Finish, Name, Null, Obj, Primitive, Rect, Ref, Rewrite, diff --git a/src/object.rs b/src/object.rs index 6bcf98c..a17b499 100644 --- a/src/object.rs +++ b/src/object.rs @@ -413,8 +413,12 @@ impl Primitive for Date { if utc_offset_hour == 0 && self.utc_offset_minute == 0 { buf.push(b'Z'); } else { - write!(buf.inner, "{:+03}'{:02}", utc_offset_hour, self.utc_offset_minute) - .unwrap(); + write!( + buf.inner, + "{:+03}'{:02}", + utc_offset_hour, self.utc_offset_minute + ) + .unwrap(); } Some(()) })(); From ddae285c993a65e92aeb021cbc7fde4fbca7fd3c Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Wed, 30 Oct 2024 22:16:56 +0100 Subject: [PATCH 18/25] merge limits in renumber as well --- src/renumber.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/renumber.rs b/src/renumber.rs index b17604e..8f19d70 100644 --- a/src/renumber.rs +++ b/src/renumber.rs @@ -5,6 +5,8 @@ use crate::{Chunk, Ref}; /// /// See [`Chunk::renumber`] for more details. pub fn renumber(source: &Chunk, target: &mut Chunk, mapping: &mut dyn FnMut(Ref) -> Ref) { + target.buf.limits.merge(source.limits()); + let mut iter = source.offsets.iter().copied().peekable(); while let Some((id, offset)) = iter.next() { let new = mapping(id); From 2c80827535d6422b69dd0f4efb337f72f71f3601 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Wed, 30 Oct 2024 22:18:50 +0100 Subject: [PATCH 19/25] Fix test --- src/functions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/functions.rs b/src/functions.rs index 72296e2..a8883be 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -446,7 +446,7 @@ mod tests { ]; assert_eq!( - PostScriptOp::encode(&ops), + &PostScriptOp::encode(&ops).to_bytes(), b"{ 3.0 2.0 mul exch dup 0.0 ge { 1.0 add } {neg} ifelse add }" ); } From f95a19c07a1b3e3ee021c1199e91f19badb57d46 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Wed, 30 Oct 2024 22:22:00 +0100 Subject: [PATCH 20/25] Fix clippy --- benches/oneshot.rs | 2 +- src/functions.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benches/oneshot.rs b/benches/oneshot.rs index 844fedb..12ae997 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -47,7 +47,7 @@ fn bench_content() -> Vec { c.save_state(); c.set_flatness(10); c.restore_state(); - c.finish() + c.finish().to_bytes() } fn bench_new() -> Pdf { diff --git a/src/functions.rs b/src/functions.rs index a8883be..42de42c 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -361,7 +361,7 @@ impl<'a> PostScriptOp<'a> { Self::Real(r) => buf.push_decimal(r), Self::Integer(i) => buf.push_val(i), Self::If(ops) => { - Self::write_slice(&ops, buf); + Self::write_slice(ops, buf); buf.push(b' '); buf.extend_slice(self.operator()); } From e32816a6374d02980224d9e9b86c87d4b4cab588 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Tue, 12 Nov 2024 13:39:32 +0100 Subject: [PATCH 21/25] Apply some code review --- benches/oneshot.rs | 2 +- src/buf.rs | 69 +++++++++++++++++++++++----------------------- src/chunk.rs | 6 ++-- src/content.rs | 4 +-- src/functions.rs | 2 +- src/lib.rs | 2 +- src/renumber.rs | 3 +- 7 files changed, 44 insertions(+), 44 deletions(-) diff --git a/benches/oneshot.rs b/benches/oneshot.rs index 12ae997..056b617 100644 --- a/benches/oneshot.rs +++ b/benches/oneshot.rs @@ -47,7 +47,7 @@ fn bench_content() -> Vec { c.save_state(); c.set_flatness(10); c.restore_state(); - c.finish().to_bytes() + c.finish().into_bytes() } fn bench_new() -> Pdf { diff --git a/src/buf.rs b/src/buf.rs index 67a91cb..7236cac 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -1,7 +1,8 @@ use super::Primitive; + use std::ops::Deref; -/// Track the limits of data types used in a buffer. +/// Tracks the limits of data types used in a buffer. #[derive(Clone, PartialEq, Debug, Default)] pub struct Limits { int: i32, @@ -18,30 +19,6 @@ impl Limits { Self::default() } - pub(crate) fn register_int(&mut self, val: i32) { - self.int = self.int.max(val.abs()); - } - - pub(crate) fn register_real(&mut self, val: f32) { - self.real = self.real.max(val.abs()); - } - - pub(crate) fn register_name_len(&mut self, len: usize) { - self.name_len = self.name_len.max(len); - } - - pub(crate) fn register_str_len(&mut self, len: usize) { - self.str_len = self.str_len.max(len); - } - - pub(crate) fn register_array_len(&mut self, len: usize) { - self.array_len = self.array_len.max(len); - } - - pub(crate) fn register_dict_entries(&mut self, len: usize) { - self.dict_entries = self.dict_entries.max(len); - } - /// Get the absolute value of the largest positive/negative integer number. pub fn int(&self) -> i32 { self.int @@ -72,6 +49,30 @@ impl Limits { self.str_len } + pub(crate) fn register_int(&mut self, val: i32) { + self.int = self.int.max(val.abs()); + } + + pub(crate) fn register_real(&mut self, val: f32) { + self.real = self.real.max(val.abs()); + } + + pub(crate) fn register_name_len(&mut self, len: usize) { + self.name_len = self.name_len.max(len); + } + + pub(crate) fn register_str_len(&mut self, len: usize) { + self.str_len = self.str_len.max(len); + } + + pub(crate) fn register_array_len(&mut self, len: usize) { + self.array_len = self.array_len.max(len); + } + + pub(crate) fn register_dict_entries(&mut self, len: usize) { + self.dict_entries = self.dict_entries.max(len); + } + /// Merge two `Limits` with each other, taking the maximum /// of each field from both. pub fn merge(&mut self, other: &Limits) { @@ -91,14 +92,6 @@ pub struct Buf { pub(crate) limits: Limits, } -impl Deref for Buf { - type Target = Vec; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} - impl Buf { pub(crate) fn new() -> Self { Self { inner: Vec::new(), limits: Limits::new() } @@ -112,7 +105,7 @@ impl Buf { } /// Get the underlying bytes of the buffer. - pub fn to_bytes(self) -> Vec { + pub fn into_bytes(self) -> Vec { self.inner } @@ -213,3 +206,11 @@ impl Buf { self.inner.reserve(additional) } } + +impl Deref for Buf { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} diff --git a/src/chunk.rs b/src/chunk.rs index 1be15d8..7dab512 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -38,7 +38,7 @@ impl Chunk { /// The bytes already written so far. pub fn as_bytes(&self) -> &[u8] { - self.buf.as_slice() + self.buf.deref() } /// Return the limits of the chunk. @@ -258,7 +258,7 @@ impl Chunk { /// file. /// /// You can create the content bytes using a [`Content`] builder. - pub fn form_xobject<'a>(&'a mut self, id: Ref, content: &'a [u8]) -> FormXObject { + pub fn form_xobject<'a>(&'a mut self, id: Ref, content: &'a [u8]) -> FormXObject<'a> { FormXObject::start(self.stream(id, content)) } @@ -320,7 +320,7 @@ impl Chunk { pub fn stream_shading<'a>( &'a mut self, id: Ref, - content: &'a Buf, + content: &'a [u8], ) -> StreamShading<'a> { StreamShading::start(self.stream(id, content)) } diff --git a/src/content.rs b/src/content.rs index 2d34e4a..1012cb4 100644 --- a/src/content.rs +++ b/src/content.rs @@ -1656,7 +1656,7 @@ mod tests { .restore_state(); assert_eq!( - content.finish().to_bytes(), + content.finish().into_bytes(), b"q\n1 2 3 4 re\nf\n[7 2] 4 d\n/MyImage Do\n2 3.5 /MyPattern scn\nQ" ); } @@ -1677,7 +1677,7 @@ mod tests { content.end_text(); assert_eq!( - content.finish().to_bytes(), + content.finish().into_bytes(), b"/F1 12 Tf\nBT\n[] TJ\n[(AB) 2 (CD)] TJ\nET" ); } diff --git a/src/functions.rs b/src/functions.rs index 42de42c..230c2cf 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -446,7 +446,7 @@ mod tests { ]; assert_eq!( - &PostScriptOp::encode(&ops).to_bytes(), + &PostScriptOp::encode(&ops).into_bytes(), b"{ 3.0 2.0 mul exch dup 0.0 ge { 1.0 add } {neg} ifelse add }" ); } diff --git a/src/lib.rs b/src/lib.rs index dccb708..682ca59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -366,7 +366,7 @@ impl Pdf { // Write the end of file marker. buf.extend_slice(b"\n%%EOF"); - buf.to_bytes() + buf.into_bytes() } } diff --git a/src/renumber.rs b/src/renumber.rs index 8f19d70..e564cd1 100644 --- a/src/renumber.rs +++ b/src/renumber.rs @@ -186,7 +186,6 @@ mod tests { // Manually write an untidy object. c.offsets.push((Ref::new(8), c.buf.len())); - // TODO: This won't update `limits` of `buf`. c.buf.extend_slice(b"8 3 obj\n< Date: Thu, 14 Nov 2024 21:49:48 +0100 Subject: [PATCH 22/25] Add some documentation --- src/content.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/content.rs b/src/content.rs index 1012cb4..faa1bea 100644 --- a/src/content.rs +++ b/src/content.rs @@ -27,7 +27,15 @@ impl Content { Operation::start(&mut self.buf, operator) } - /// Return the raw constructed byte stream. + /// Return the buffer of the content stream. + /// + /// The buffer is essentially a thin wrapper around two objects: + /// - A [`Limits`] object, which can optionally be used to keep + /// track of data such as the largest used integer or + /// the longest string used in the content streams, which is useful information + /// for some export modes. + /// - The actual underlying data of the content stream, which can be written + /// to a chunk (and optionally apply a filter before doing so). pub fn finish(mut self) -> Buf { if self.buf.last() == Some(&b'\n') { self.buf.inner.pop(); From 34f6b8c573c917c0086e0bbae59e60952f9b9e64 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Thu, 14 Nov 2024 22:33:13 +0100 Subject: [PATCH 23/25] Add two test cases and fix two bugs --- src/buf.rs | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/object.rs | 5 ++++ 2 files changed, 75 insertions(+) diff --git a/src/buf.rs b/src/buf.rs index 7236cac..ce0cf1e 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -214,3 +214,73 @@ impl Deref for Buf { &self.inner } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Chunk, Content, Finish, Name, Rect, Ref, Str, TextStr}; + + #[test] + fn test_content_limits() { + let mut limits = Limits::default(); + + let mut content = Content::new(); + content.cubic_to(14.3, 16.2, 22.6, 30.9, 50.1, 40.0); + content.show(Str(b"Some text")); + content.set_font(Name(b"NotoSans"), 10.0); + let buf = content.finish(); + limits.merge(buf.limits()); + + let mut content = Content::new(); + content.line_to(55.0, -75.3); + content.set_font(Name(b"Noto"), 10.0); + content + .show_positioned() + .items() + .show(Str(b"A")) + .show(Str(b"B")) + .adjust(32.0); + content + .marked_content_point_with_properties(Name(b"Hi")) + .properties() + .actual_text(TextStr("text")); + let buf = content.finish(); + limits.merge(buf.limits()); + + assert_eq!( + limits, + Limits { + int: 55, + real: 75.3, + name_len: 10, + str_len: 9, + array_len: 3, + dict_entries: 1, + } + ) + } + + #[test] + fn test_chunk_limits() { + let mut limits = Limits::default(); + + let mut chunk = Chunk::new(); + let mut x_object = chunk.form_xobject(Ref::new(1), &[]); + x_object.bbox(Rect::new(4.0, 6.0, 22.1, 31.0)); + x_object.finish(); + + limits.merge(chunk.limits()); + + assert_eq!( + limits, + Limits { + int: 31, + real: 22.1, + name_len: 7, + str_len: 0, + array_len: 4, + dict_entries: 4, + } + ) + } +} \ No newline at end of file diff --git a/src/object.rs b/src/object.rs index a17b499..cf3c5de 100644 --- a/src/object.rs +++ b/src/object.rs @@ -297,6 +297,8 @@ impl Primitive for Rect { buf.push(b' '); buf.push_val(self.y2); buf.push(b']'); + + buf.limits.register_array_len(4); } } @@ -860,6 +862,9 @@ impl<'a> Stream<'a> { impl Drop for Stream<'_> { fn drop(&mut self) { + let dict_len = self.dict.len as usize; + self.dict.buf.limits.register_dict_entries(dict_len); + self.dict.buf.extend_slice(b"\n>>"); self.dict.buf.extend_slice(b"\nstream\n"); self.dict.buf.extend_slice(self.data.as_ref()); From 9dcf56b60c46d6194929a63c32ec7c50552eb81a Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Wed, 27 Nov 2024 15:16:23 +0100 Subject: [PATCH 24/25] Reformat --- src/buf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/buf.rs b/src/buf.rs index ce0cf1e..379cd66 100644 --- a/src/buf.rs +++ b/src/buf.rs @@ -283,4 +283,4 @@ mod tests { } ) } -} \ No newline at end of file +} From 33052e2f52a409f9285cbec8115d7a6c1264edf9 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Wed, 27 Nov 2024 15:31:46 +0100 Subject: [PATCH 25/25] Add an exmaple for tracking limits --- examples/limits.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 examples/limits.rs diff --git a/examples/limits.rs b/examples/limits.rs new file mode 100644 index 0000000..3a3ed6f --- /dev/null +++ b/examples/limits.rs @@ -0,0 +1,53 @@ +//! This example shows how you can track PDF limits of your chunks. + +use pdf_writer::{Chunk, Content, Limits, Name, Ref}; + +fn main() { + let mut limits = Limits::new(); + + let mut content = Content::new(); + content.transform([-3.4, 0.0, 0.0, 3.1, 100.0, 100.0]); + content.line_to(15.0, -26.1); + let buf = content.finish(); + // This will have the limits: + // - Max real number: 26.1 (for negative values we use their absolute value) + // - Max int number 100 (even though above 100.0 is a float number, it will be coerced into an + // integer, and thus counts towards the int limit) + limits.merge(buf.limits()); + + let mut chunk = Chunk::new(); + chunk.stream(Ref::new(1), &buf.into_bytes()); + chunk.type3_font(Ref::new(2)).name(Name(b"A_long_font_name")); + // This will update the limit for the maximum name and dictionary length. + limits.merge(chunk.limits()); + + // This is what the final PDF will look like. + assert_eq!( + chunk.as_bytes(), + b"1 0 obj +<< + /Length 34 +>> +stream +-3.4 0 0 3.1 100 100 cm +15 -26.1 l +endstream +endobj + +2 0 obj +<< + /Type /Font + /Subtype /Type3 + /Name /A_long_font_name +>> +endobj + +" + ); + + // And the limits should match, as well! + assert_eq!(limits.int(), 100); + assert_eq!(limits.real(), 26.1); + assert_eq!(limits.name_len(), 16); + assert_eq!(limits.dict_entries(), 3); +}