Skip to content

Commit

Permalink
Add methods to check for PDF/A compliance
Browse files Browse the repository at this point in the history
  • Loading branch information
reknih committed Jul 12, 2024
1 parent 130a849 commit 6e96e11
Show file tree
Hide file tree
Showing 6 changed files with 225 additions and 5 deletions.
15 changes: 15 additions & 0 deletions src/color.rs
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,21 @@ impl<'a> DeviceN<'a> {

DeviceNAttrs::start(self.array.push())
}

/// Finish writing the `DeviceN` color space array while checking some
/// provisions of PDF/A-2 clause 6.2.4.4 and 6.1.13.
pub fn finish_pdfa(self) -> PdfaResult<()> {
if self.array.len() > 8 {
return Err(PdfaError::TooManyColorants(self.array.len() as usize));
}

if !self.has_alternate || !self.has_tint {
return Err(PdfaError::MalformedDeviceNArray);
}

self.finish();
Ok(())
}
}

/// Writer for a _DeviceN attributes dictionary_. PDF 1.6+.
Expand Down
20 changes: 19 additions & 1 deletion src/content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use super::*;
/// A builder for a content stream.
pub struct Content {
buf: Vec<u8>,
q_nesting: usize,
}

/// Core methods.
Expand All @@ -16,7 +17,7 @@ impl Content {

/// Create a new content stream with the specified initial buffer capacity.
pub fn with_capacity(capacity: usize) -> Self {
Self { buf: Vec::with_capacity(capacity) }
Self { buf: Vec::with_capacity(capacity), q_nesting: 0 }
}

/// Start writing an arbitrary operation.
Expand Down Expand Up @@ -243,13 +244,30 @@ impl Content {
#[inline]
pub fn save_state(&mut self) -> &mut Self {
self.op("q");

// Saturating is okay here since we would have returned an error way
// before if the nesting was checked.
self.q_nesting = self.q_nesting.saturating_add(1);
self
}

/// `q`: Save the graphics state on the stack while checking that the
/// nesting limit in PDF/A-2 clause 6.1.13 is respected.
#[inline]
pub fn save_state_checked(&mut self) -> PdfaResult<&mut Self> {
if self.q_nesting >= 28 {
return Err(PdfaError::OverlyNestedGraphicsState);
}

Ok(self.save_state())
}

/// `Q`: Restore the graphics state from the stack.
#[inline]
pub fn restore_state(&mut self) -> &mut Self {
self.op("Q");

self.q_nesting = self.q_nesting.saturating_sub(1);
self
}

Expand Down
31 changes: 31 additions & 0 deletions src/font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -939,17 +939,46 @@ where
self.pair_with_multiple(glyph, [codepoint]);
}

/// Add a mapping from a glyph ID to a codepoint, checking for codepoints
/// that are invalid in some PDF/A profiles.
pub fn pair_pdfa(&mut self, glyph: G, codepoint: char) -> PdfaResult<()> {
self.pair_with_multiple_pdfa(glyph, [codepoint])
}

/// Add a mapping from a glyph ID to multiple codepoints.
pub fn pair_with_multiple(
&mut self,
glyph: G,
codepoints: impl IntoIterator<Item = char>,
) {
self.pair_with_multiple_impl(glyph, codepoints, false).unwrap();
}

/// Add a mapping from a glyph ID to multiple codepoints, checking for
/// codepoints that are invalid in some PDF/A profiles.
pub fn pair_with_multiple_pdfa(
&mut self,
glyph: G,
codepoints: impl IntoIterator<Item = char>,
) -> PdfaResult<()> {
self.pair_with_multiple_impl(glyph, codepoints, true)
}

fn pair_with_multiple_impl(
&mut self,
glyph: G,
codepoints: impl IntoIterator<Item = char>,
check_pdfa: bool,
) -> PdfaResult<()> {
self.mappings.push(b'<');
glyph.push(&mut self.mappings);
self.mappings.extend(b"> <");

for c in codepoints {
if check_pdfa && (c == '\u{0}' || c == '\u{feff}' || c == '\u{fffe}') {
return Err(PdfaError::InvalidCMapCodepoint);
}

for &mut part in c.encode_utf16(&mut [0; 2]) {
self.mappings.push_hex_u16(part);
}
Expand All @@ -962,6 +991,8 @@ where
if self.count >= 100 {
self.flush_range();
}

Ok(())
}

/// Finish building the character map.
Expand Down
23 changes: 21 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,15 @@ pub mod types {
pub use object::Predictor;
pub use renditions::{MediaClipType, RenditionType, TempFileType};
pub use structure::{
Direction, NumberingStyle, OutlineItemFlags, PageLayout, PageMode, StructRole,
TabOrder, TrappingStatus,
Direction, NumberingStyle, OutlineItemFlags, PageLayout, PageMode, PdfaError,
PdfaResult, StructRole, TabOrder, TrappingStatus,
};
pub use transitions::{TransitionAngle, TransitionStyle};
pub use xobject::SMaskInData;
}

use structure::{PdfaError, PdfaResult};

pub use self::chunk::Chunk;
pub use self::content::Content;
pub use self::object::{
Expand Down Expand Up @@ -276,6 +278,23 @@ impl Pdf {
self.indirect(id).start()
}

/// Write the cross-reference table and file trailer and return the
/// underlying buffer while checking the number of indirect objects and
/// whether a file ID was written for compliance with PDF/A.
///
/// Panics if any indirect reference id was used twice.
pub fn finish_pdfa(self) -> PdfaResult<Vec<u8>> {
if self.chunk.offsets.len() > 8388607 {
return Err(PdfaError::TooManyIndirectObjects(self.chunk.offsets.len()));
}

if self.file_id.is_none() {
return Err(PdfaError::MissingFileID);
}

Ok(self.finish())
}

/// Write the cross-reference table and file trailer and return the
/// underlying buffer.
///
Expand Down
49 changes: 48 additions & 1 deletion src/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ use std::marker::PhantomData;
use std::mem::ManuallyDrop;
use std::num::NonZeroI32;

use structure::{PdfaError, PdfaResult};

use super::*;

/// A primitive PDF object.
Expand Down Expand Up @@ -52,7 +54,18 @@ impl Primitive for f32 {
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Str<'a>(pub &'a [u8]);

impl Str<'_> {
impl<'a> Str<'a> {
/// Construct a new string and check that it is no longer than 32767 bytes.
///
/// This helps to ensure compliance with Section 6.1.8 in the PDF/A-2 spec.
pub fn pdfa(bytes: &'a [u8]) -> PdfaResult<Self> {
if bytes.len() > 32767 {
return Err(PdfaError::OverlongString(bytes.len()));
}

Ok(Self(bytes))
}

/// Whether the parentheses in the byte string are balanced.
fn is_balanced(self) -> bool {
let mut depth = 0;
Expand Down Expand Up @@ -149,6 +162,22 @@ impl Primitive for TextStr<'_> {
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Name<'a>(pub &'a [u8]);

impl<'a> Name<'a> {
/// Create a new name from a byte string and check that it is valid UTF-8
/// and no longer than 127 bytes.
///
/// This helps to ensure compliance with Section 6.1.8 in the PDF/A
/// specifications PDF/A-2, PDF/A-3, and PDF/A-4.
pub fn pdfa(bytes: &'a [u8]) -> PdfaResult<Self> {
if bytes.len() > 127 {
return Err(PdfaError::OverlongName(bytes.len()));
}

std::str::from_utf8(bytes).map_err(PdfaError::NameNotUtf8)?;
Ok(Self(bytes))
}
}

impl Primitive for Name<'_> {
fn write(self, buf: &mut Vec<u8>) {
buf.reserve(1 + self.0.len());
Expand Down Expand Up @@ -271,6 +300,24 @@ impl Rect {
Self { x1, y1, x2, y2 }
}

/// Create a new rectangle that complies with the implementation limits for
/// page sizes.
#[inline]
pub fn page(x1: f32, y1: f32, x2: f32, y2: f32) -> PdfaResult<Self> {
let width = (x2 - x1).abs();
let height = (y2 - y1).abs();

if !(3.0..=14400.0).contains(&width) {
return Err(PdfaError::PageWidthOutOfRange(width));
}

if !(3.0..=14400.0).contains(&height) {
return Err(PdfaError::PageHeightOutOfRange(height));
}

Ok(Self { x1, y1, x2, y2 })
}

/// Convert this rectangle into 8 floats describing the four corners of the
/// rectangle in counterclockwise order.
#[inline]
Expand Down
92 changes: 91 additions & 1 deletion src/structure.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::str::Utf8Error;

use crate::color::SeparationInfo;

use super::*;
Expand Down Expand Up @@ -141,9 +143,9 @@ impl<'a> Catalog<'a> {
///
/// Each entry in the array is an [output intent
/// dictionary.](writers::OutputIntent)
pub fn output_intents(&mut self) -> TypedArray<'_, Dict> {
///
/// Must be present in PDF/X documents, encouraged in PDF/A documents.
pub fn output_intents(&mut self) -> TypedArray<'_, OutputIntent> {
self.insert(Name(b"OutputIntents")).array().typed()
}
}
Expand Down Expand Up @@ -1570,3 +1572,91 @@ impl<'a> Metadata<'a> {
}

deref!('a, Metadata<'a> => Stream<'a>, stream);

/// A result type for operations that check for PDF/A compliance.
pub type PdfaResult<T> = Result<T, PdfaError>;

/// Errors that pdf-writer can automatically detect when writing PDF/A files.
///
/// Please note that these errors only enforce provisions of clauses 6.1.8,
/// 6.1.13, 6.1.3, and 6.2.11.7.2 of the PDF/A-2 spec. They do not enforce the
/// entire spec, so additional attention needs to be paid to write compliant
/// files.
///
/// Integer and float implementation limits are not checked since they are
/// already enforced by the `i32` and `f32` types, respectively.
#[derive(Debug, Clone, PartialEq)]
pub enum PdfaError {
/// A string contained more than 32767 bytes.
OverlongString(usize),
/// A name object contained more than 127 bytes.
OverlongName(usize),
/// A name object was not UTF-8 decodable.
NameNotUtf8(Utf8Error),
/// The file has more than 8388607 indirect objects.
TooManyIndirectObjects(usize),
/// The graphics state was nested more than 28 levels deep.
OverlyNestedGraphicsState,
/// A DeviceN color space had more than 8 colorants.
TooManyColorants(usize),
/// The DeviceN array does not comply with clause 8.6.6.5 of ISO
/// 32000-1:2008.
MalformedDeviceNArray,
/// The file trailer is missing a file ID.
///
/// Call [`crate::Pdf::set_file_id`] before finishing the file.
MissingFileID,
/// The CMap maps to a codepoint 0, U+FFFE, or U+FEFF.
///
/// Only applicable to PDF/A-2u, PDF/A-2a, and similar profiles in other
/// parts.
InvalidCMapCodepoint,
/// The page width is out of range.
PageWidthOutOfRange(f32),
/// The page height is out of range.
PageHeightOutOfRange(f32),
}

impl std::fmt::Display for PdfaError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Self::OverlongString(len) => {
write!(f, "string contained {} bytes but must not exceed 32767", len)
}
Self::OverlongName(len) => {
write!(f, "name contained {} bytes but must not exceed 127", len)
}
Self::NameNotUtf8(e) => write!(f, "name was not UTF-8 decodable ({})", e),
Self::TooManyIndirectObjects(count) => write!(
f,
"file has {} indirect objects but must not exceed 8388607",
count
),
Self::OverlyNestedGraphicsState => {
f.write_str("graphics state (q) was nested more than 28 levels deep")
}
Self::TooManyColorants(count) => write!(
f,
"DeviceN color space had {} colorants but must not exceed 8",
count
),
Self::MalformedDeviceNArray => f.write_str("DeviceN array is malformed"),
Self::MissingFileID => f.write_str("file trailer is missing a file ID"),
Self::InvalidCMapCodepoint => {
f.write_str("CMap maps to a forbidden codepoint")
}
Self::PageWidthOutOfRange(w) if *w < 3.0 => {
write!(f, "page width {} is too small (must be at least 3)", w)
}
Self::PageWidthOutOfRange(w) => {
write!(f, "page width {} is too large (must be at most 14400)", w)
}
Self::PageHeightOutOfRange(h) if *h < 3.0 => {
write!(f, "page height {} is too small (must be at least 3)", h)
}
Self::PageHeightOutOfRange(h) => {
write!(f, "page height {} is too large (must be at most 14400)", h)
}
}
}
}

0 comments on commit 6e96e11

Please sign in to comment.