From febef246a90c5c8e823c634f36847b56eb769a15 Mon Sep 17 00:00:00 2001 From: Ana Gelez Date: Wed, 17 Apr 2024 16:49:05 +0200 Subject: [PATCH] Support one byte characters in CMaps (#32) --- src/font.rs | 68 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/src/font.rs b/src/font.rs index de43f23..a864d88 100644 --- a/src/font.rs +++ b/src/font.rs @@ -1,3 +1,5 @@ +use std::marker::PhantomData; + use super::*; /// Writer for a _Type-1 font dictionary_. @@ -653,13 +655,17 @@ impl<'a> Cmap<'a> { deref!('a, Cmap<'a> => Stream<'a>, stream); /// A builder for a `/ToUnicode` character map stream. -pub struct UnicodeCmap { +pub struct UnicodeCmap { buf: Vec, mappings: Vec, count: i32, + glyph_id: PhantomData, } -impl UnicodeCmap { +impl UnicodeCmap +where + G: GlyphId, +{ /// Create a new, empty unicode character map. pub fn new(name: Name, info: SystemInfo) -> Self { // https://www.adobe.com/content/dam/acom/en/devnet/font/pdfs/5014.CIDFont_Spec.pdf @@ -710,25 +716,34 @@ impl UnicodeCmap { // We just cover the whole unicode codespace. buf.extend(b"1 begincodespacerange\n"); - buf.extend(b"<0000> \n"); + buf.push(b'<'); + G::MIN.push(&mut buf); + buf.extend(b"> <"); + G::MAX.push(&mut buf); + buf.extend(b">\n"); buf.extend(b"endcodespacerange\n"); - Self { buf, mappings: vec![], count: 0 } + Self { + buf, + mappings: vec![], + count: 0, + glyph_id: PhantomData, + } } /// Add a mapping from a glyph ID to a codepoint. - pub fn pair(&mut self, glyph: u16, codepoint: char) { + pub fn pair(&mut self, glyph: G, codepoint: char) { self.pair_with_multiple(glyph, [codepoint]); } /// Add a mapping from a glyph ID to multiple codepoints. pub fn pair_with_multiple( &mut self, - glyph: u16, + glyph: G, codepoints: impl IntoIterator, ) { self.mappings.push(b'<'); - self.mappings.push_hex_u16(glyph); + glyph.push(&mut self.mappings); self.mappings.extend(b"> <"); for c in codepoints { @@ -775,6 +790,45 @@ impl UnicodeCmap { } } +/// Type3 fonts require (in Acrobat at least) IDs in CMaps to be encoded with +/// one byte only, whereas other font types use two bytes. +/// +/// This trait provides an abstraction to support both. +pub trait GlyphId: private::Sealed {} + +impl GlyphId for u8 {} + +impl GlyphId for u16 {} + +/// Module to seal the `GlyphId` trait. +mod private { + use crate::buf::BufExt; + + pub trait Sealed { + const MIN: Self; + const MAX: Self; + fn push(self, buf: &mut Vec); + } + + impl Sealed for u8 { + const MIN: Self = u8::MIN; + const MAX: Self = u8::MAX; + + fn push(self, buf: &mut Vec) { + buf.push_hex(self); + } + } + + impl Sealed for u16 { + const MIN: Self = u16::MIN; + const MAX: Self = u16::MAX; + + fn push(self, buf: &mut Vec) { + buf.push_hex_u16(self); + } + } +} + /// Specifics about a character collection. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] pub struct SystemInfo<'a> {