From e390fe1ffc9df1e370fee0cfb191f16066aa385f Mon Sep 17 00:00:00 2001 From: Mykola Bohdiuk Date: Tue, 25 Jun 2024 19:13:03 +0300 Subject: [PATCH] [PERFORMANCE] Improve FileSystemFontProvider.scanFonts() performance by adding 'only headers' mode to TTF parser: * only read tables needed for FSFontInfo ('name', 'head', 'OS/2', 'CFF ', 'gcid') * 'CFF ' and 'head' table parsers finish as soon as it has all needed headers --- .../org/apache/fontbox/cff/CFFParser.java | 42 ++++- .../java/org/apache/fontbox/ttf/CFFTable.java | 23 ++- .../org/apache/fontbox/ttf/FontHeaders.java | 154 ++++++++++++++++++ .../org/apache/fontbox/ttf/HeaderTable.java | 10 ++ .../org/apache/fontbox/ttf/NamingTable.java | 31 +++- .../org/apache/fontbox/ttf/TTFParser.java | 111 ++++++++++++- .../fontbox/ttf/TrueTypeCollection.java | 39 ++++- .../org/apache/fontbox/ttf/TrueTypeFont.java | 36 ++++ .../pdmodel/font/FileSystemFontProvider.java | 146 +++++++++-------- 9 files changed, 500 insertions(+), 92 deletions(-) create mode 100644 fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java b/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java index f2b08cd106f..2b3df719e76 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.ttf.FontHeaders; import org.apache.pdfbox.io.RandomAccessRead; @@ -48,7 +49,8 @@ public class CFFParser private String[] stringIndex = null; private ByteSource source; - + private FontHeaders loadOnlyHeaders; + // for debugging only private String debugFontName; @@ -66,6 +68,11 @@ public interface ByteSource byte[] getBytes() throws IOException; } + public void setLoadOnlyHeaders(FontHeaders loadOnlyHeaders) + { + this.loadOnlyHeaders = loadOnlyHeaders; + } + /** * Parse CFF font using byte array, also passing in a byte source for future use. * @@ -91,17 +98,21 @@ public List parse(byte[] bytes, ByteSource source) throws IOException public List parse(RandomAccessRead randomAccessRead) throws IOException { // TODO do we need to store the source data of the font? It isn't used at all - byte[] bytes = new byte[(int) randomAccessRead.length()]; + // definitely don't need 'source' in 'loadOnlyHeaders' mode randomAccessRead.seek(0); - int remainingBytes = bytes.length; - int amountRead; - while ((amountRead = randomAccessRead.read(bytes, bytes.length - remainingBytes, - remainingBytes)) > 0) + if (loadOnlyHeaders == null) { - remainingBytes -= amountRead; + byte[] bytes = new byte[(int) randomAccessRead.length()]; + int remainingBytes = bytes.length; + int amountRead; + while ((amountRead = randomAccessRead.read(bytes, bytes.length - remainingBytes, + remainingBytes)) > 0) + { + remainingBytes -= amountRead; + } + randomAccessRead.seek(0); + this.source = new CFFBytesource(bytes); } - randomAccessRead.seek(0); - this.source = new CFFBytesource(bytes); return parse(new DataInputRandomAccessRead(randomAccessRead)); } @@ -492,6 +503,15 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue()); font = cffCIDFont; + if (loadOnlyHeaders != null) + { + loadOnlyHeaders.setOtfROS( + cffCIDFont.getRegistry(), + cffCIDFont.getOrdering(), + cffCIDFont.getSupplement()); + // we just read (Registry, Ordering, Supplement) and don't need anything else + return font; + } } else { @@ -501,6 +521,10 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr // name debugFontName = name; font.setName(name); + if (loadOnlyHeaders != null) + { + return font; // not a 'CFFCIDFont' => cannot read properties needed by LoadOnlyHeaders anyway + } // top dict font.addValueToTopDict("version", getString(topDict, "version")); diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java index 7d6d14cae0c..6f37ee9489f 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.fontbox.cff.CFFFont; import org.apache.fontbox.cff.CFFParser; +import org.apache.pdfbox.io.RandomAccessRead; /** * PostScript font program (compact font format). @@ -48,9 +49,27 @@ public class CFFTable extends TTFTable @Override void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { - byte[] bytes = data.read((int)getLength()); - +// assert data.getCurrentPosition() == getOffset(); CFFParser parser = new CFFParser(); + FontHeaders loadOnlyHeaders = ttf.getLoadOnlyHeaders(); + parser.setLoadOnlyHeaders(loadOnlyHeaders); + if (loadOnlyHeaders != null) + { + // TODO: measure performance and maybe use createSubView() for non-loadOnlyHeaders case + try (RandomAccessRead subReader = data.createSubView(getLength())) + { + if (subReader != null) + { + cffFont = parser.parse(subReader).get(0); + data.seek(getOffset() + getLength()); + initialized = true; + return; + } + assert loadOnlyHeaders != null + : "It is inefficient to read whole CFF table to parse only headers, please use RandomAccessReadUncachedDataStream"; + } + } + byte[] bytes = data.read((int)getLength()); cffFont = parser.parse(bytes, new CFFBytesource(ttf)).get(0); initialized = true; diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java b/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java new file mode 100644 index 00000000000..6a50d8a43c6 --- /dev/null +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.fontbox.ttf; + +import java.io.IOException; + +/** + * To improve performance of {@code FileSystemFontProvider.scanFonts(...)}, + * this class is used both as a marker (to skip unused data) and as a storage for collected data. + *

+ * Tables it needs:

    + *
  • NamingTable.TAG + *
  • HeaderTable.TAG + *
  • OS2WindowsMetricsTable.TAG + *
  • CFFTable.TAG (for OTF) + *
  • "gcid" (for non-OTF) + *
+ * + * @author Mykola Bohdiuk + */ +public final class FontHeaders +{ + static final int BYTES_GCID = 142; + + private IOException exception; + private String name; + private Integer headerMacStyle; + private OS2WindowsMetricsTable os2Windows; + private String fontFamily; + private String fontSubFamily; + private byte[] nonOtfGcid142; + // + private boolean isOTFAndPostScript; + private String otfRegistry; + private String otfOrdering; + private int otfSupplement; + + public IOException getException() + { + return exception; + } + + public String getName() + { + return name; + } + + /** + * null == no HeaderTable, {@code ttf.getHeader().getMacStyle()} + */ + public Integer getHeaderMacStyle() + { + return headerMacStyle; + } + + public OS2WindowsMetricsTable getOS2Windows() + { + return os2Windows; + } + + // only when LOGGER(FileSystemFontProvider).isTraceEnabled() tracing: FontFamily, FontSubfamily + public String getFontFamily() + { + return fontFamily; + } + + public String getFontSubFamily() + { + return fontSubFamily; + } + + public boolean isOpenTypePostScript() + { + return isOTFAndPostScript; + } + + public byte[] getNonOtfTableGCID142() + { + return nonOtfGcid142; + } + + public String getOtfRegistry() + { + return otfRegistry; + } + + public String getOtfOrdering() + { + return otfOrdering; + } + + public int getOtfSupplement() + { + return otfSupplement; + } + + void setException(IOException exception) + { + this.exception = exception; + } + + void setName(String name) + { + this.name = name; + } + + void setHeaderMacStyle(Integer headerMacStyle) + { + this.headerMacStyle = headerMacStyle; + } + + void setOs2Windows(OS2WindowsMetricsTable os2Windows) + { + this.os2Windows = os2Windows; + } + + void setFontFamily(String fontFamily, String fontSubFamily) + { + this.fontFamily = fontFamily; + this.fontSubFamily = fontSubFamily; + } + + void setNonOtfGcid142(byte[] nonOtfGcid142) + { + this.nonOtfGcid142 = nonOtfGcid142; + } + + void setIsOTFAndPostScript(boolean isOTFAndPostScript) + { + this.isOTFAndPostScript = isOTFAndPostScript; + } + + // public because CFFParser is in a different package + public void setOtfROS(String otfRegistry, String otfOrdering, int otfSupplement) + { + this.otfRegistry = otfRegistry; + this.otfOrdering = otfOrdering; + this.otfSupplement = otfSupplement; + } +} diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java index 4d19475db31..3f1073e79ce 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java @@ -74,6 +74,16 @@ public class HeaderTable extends TTFTable @Override void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { + FontHeaders outHeaders = ttf.getLoadOnlyHeaders(); + if (outHeaders != null) { + // 44 == 4 + 4 + 4 + 4 + 2 + 2 + 2*8 + 4*2 + data.seek(data.getCurrentPosition() + 44); + macStyle = data.readUnsignedShort(); + outHeaders.setHeaderMacStyle(macStyle); + initialized = true; + return; + } + version = data.read32Fixed(); fontRevision = data.read32Fixed(); checkSumAdjustment = data.readUnsignedInt(); diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java index 04ac0afe90a..00e7d3cd6fc 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java @@ -62,11 +62,15 @@ void read(TrueTypeFont ttf, TTFDataStream data) throws IOException int numberOfNameRecords = data.readUnsignedShort(); int offsetToStartOfStringStorage = data.readUnsignedShort(); nameRecords = new ArrayList<>(numberOfNameRecords); + FontHeaders onlyHeaders = ttf.getLoadOnlyHeaders(); for (int i=0; i< numberOfNameRecords; i++) { NameRecord nr = new NameRecord(); nr.initData(ttf, data); - nameRecords.add(nr); + if (onlyHeaders == null || isUsefulForOnlyHeaders(nr)) + { + nameRecords.add(nr); + } } for (NameRecord nr : nameRecords) @@ -86,7 +90,7 @@ void read(TrueTypeFont ttf, TTFDataStream data) throws IOException lookupTable = new HashMap<>(nameRecords.size()); fillLookupTable(); - readInterestingStrings(); + readInterestingStrings(onlyHeaders); initialized = true; } @@ -141,7 +145,7 @@ private void fillLookupTable() } } - private void readInterestingStrings() + private void readInterestingStrings(FontHeaders onlyHeaders) { // extract strings of interest fontFamily = getEnglishName(NameRecord.NAME_FONT_FAMILY_NAME); @@ -163,6 +167,27 @@ private void readInterestingStrings() { psName = psName.trim(); } + + if (onlyHeaders != null) + { + onlyHeaders.setName(psName); + onlyHeaders.setFontFamily(fontFamily, fontSubFamily); + } + } + + private static boolean isUsefulForOnlyHeaders(NameRecord nr) + { + int nameId = nr.getNameId(); + // see "psName =" and "getEnglishName()" + if (nameId == NameRecord.NAME_POSTSCRIPT_NAME + || nameId == NameRecord.NAME_FONT_FAMILY_NAME + || nameId == NameRecord.NAME_FONT_SUB_FAMILY_NAME) + { + int languageId = nr.getLanguageId(); + return languageId == NameRecord.LANGUAGE_UNICODE + || languageId == NameRecord.LANGUAGE_WINDOWS_EN_US; + } + return false; } /** diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java index 07942b64b8a..1a56fe590bd 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java @@ -20,6 +20,7 @@ import java.io.InputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessRead; /** @@ -32,6 +33,7 @@ public class TTFParser private static final Log LOG = LogFactory.getLog(TTFParser.class); private boolean isEmbedded = false; + private FontHeaders loadOnlyHeaders; /** * Constructor. @@ -60,7 +62,9 @@ public TTFParser(boolean isEmbedded) */ public TrueTypeFont parse(RandomAccessRead randomAccessRead) throws IOException { - RandomAccessReadDataStream dataStream = new RandomAccessReadDataStream(randomAccessRead); + TTFDataStream dataStream = loadOnlyHeaders != null + ? new RandomAccessReadUnbufferedDataStream(randomAccessRead) + : new RandomAccessReadDataStream(randomAccessRead); try { return parse(dataStream); @@ -115,6 +119,7 @@ TrueTypeFont parse(TTFDataStream raf) throws IOException { TrueTypeFont font = newFont(raf); font.setVersion(raf.read32Fixed()); + font.setLoadOnlyHeaders(loadOnlyHeaders); int numberOfTables = raf.readUnsignedShort(); int searchRange = raf.readUnsignedShort(); int entrySelector = raf.readUnsignedShort(); @@ -141,8 +146,21 @@ TrueTypeFont parse(TTFDataStream raf) throws IOException } } // parse tables - parseTables(font); - return font; + if (loadOnlyHeaders == null) + { + parseTables(font); + return font; + } + else + { + parseTableHeaders(font, loadOnlyHeaders); + return null; + } + } + + public void setLoadOnlyHeaders(FontHeaders loadOnlyHeaders) + { + this.loadOnlyHeaders = loadOnlyHeaders; } TrueTypeFont newFont(TTFDataStream raf) @@ -227,6 +245,93 @@ else if (!isOTF) } } + /** + * Based on {@link #parseTables()}. + * Parse all table headers and check if all needed tables are present. + * + * This method can be optimized further by skipping unused portions inside each individual table parser + * + * @param font the TrueTypeFont instance holding the parsed data. + * @throws IOException If there is an error parsing the TrueType font. + */ + private void parseTableHeaders(TrueTypeFont font, FontHeaders outHeaders) throws IOException + { + try + { + font.getNaming(); // calls NamingTable.readTable(); + } + catch (IOException ex) + { + return; // ignore, empty name is reported differently than exception + } + try { + font.getHeader(); // calls HeaderTable.readTable(); + + // only these 5 are used + // sFamilyClass = os2WindowsMetricsTable.getFamilyClass(); + // usWeightClass = os2WindowsMetricsTable.getWeightClass(); + // ulCodePageRange1 = (int) os2WindowsMetricsTable.getCodePageRange1(); + // ulCodePageRange2 = (int) os2WindowsMetricsTable.getCodePageRange2(); + // panose = os2WindowsMetricsTable.getPanose(); + outHeaders.setOs2Windows(font.getOS2Windows()); + + boolean isOTFAndPostScript; + if (font instanceof OpenTypeFont && ((OpenTypeFont) font).isPostScript()) + { + isOTFAndPostScript = true; + if (((OpenTypeFont) font).isSupportedOTF()) + { + ((OpenTypeFont) font).getCFF(); // calls CFFTable.readTable(); + } + } + else + { + isOTFAndPostScript = false; + TTFTable gcid = font.getTableMap().get("gcid"); + if (gcid != null && gcid.getLength() >= FontHeaders.BYTES_GCID) + { + outHeaders.setNonOtfGcid142(font.getTableNBytes(gcid, FontHeaders.BYTES_GCID)); + } + } + outHeaders.setIsOTFAndPostScript(isOTFAndPostScript); + + boolean isOTF = font instanceof OpenTypeFont; + boolean isPostScript = isOTF ? isOTFAndPostScript : font.tables.containsKey(CFFTable.TAG); + + if (isPostScript && !isOTF) + { + loadOnlyHeaders.setException(new IOException("True Type fonts using CFF outlines are not supported")); + return; + } + + // list taken from parseTables(), detect them, but don't spend time parsing + final String[] mandatoryTables = { + HeaderTable.TAG, + HorizontalHeaderTable.TAG, + MaximumProfileTable.TAG, + isEmbedded ? null : PostScriptTable.TAG, // in an embedded font this table is optional + isPostScript ? null : IndexToLocationTable.TAG, + isPostScript ? null : GlyphTable.TAG, + isEmbedded ? null : NamingTable.TAG, + HorizontalMetricsTable.TAG, + isEmbedded ? null : CmapTable.TAG, + }; + + for (String tag : mandatoryTables) + { + if (tag != null && !font.tables.containsKey(tag)) + { + loadOnlyHeaders.setException(new IOException("'" + tag + "' table is mandatory")); + return; + } + } + } catch (IOException ex) { + loadOnlyHeaders.setException(ex); + } finally { + IOUtils.closeQuietly(font); + } + } + protected boolean allowCFF() { return false; diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java index 24833712683..4e799e439a0 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java @@ -144,12 +144,31 @@ public void processAllFonts(TrueTypeFontProcessor trueTypeFontProcessor) throws { for (int i = 0; i < numFonts; i++) { - TrueTypeFont font = getFontAtIndex(i); + TrueTypeFont font = getFontAtIndex(i, null); trueTypeFontProcessor.process(font); } } - - private TrueTypeFont getFontAtIndex(int idx) throws IOException + + /** + * Run the callback for each TT font in the collection. + * + * @param trueTypeFontProcessor the object with the callback method. + * @throws IOException if something went wrong when calling the TrueTypeFontProcessor + */ + public void processAllFontHeaders(TrueTypeFontHeadersProcessor trueTypeFontProcessor) throws IOException + { + assert stream instanceof RandomAccessReadUnbufferedDataStream + : "For efficiency, we do not read whole file to byte[]"; + for (int i = 0; i < numFonts; i++) + { + FontHeaders headers = new FontHeaders(); + TrueTypeFont ttf = getFontAtIndex(i, headers); + IOUtils.closeQuietly(ttf); // all data is already saved in 'headers' + trueTypeFontProcessor.process(headers); + } + } + + private TrueTypeFont getFontAtIndex(int idx, FontHeaders onlyHeaders) throws IOException { stream.seek(fontOffsets[idx]); TTFParser parser; @@ -161,6 +180,7 @@ private TrueTypeFont getFontAtIndex(int idx) throws IOException { parser = new TTFParser(false); } + parser.setLoadOnlyHeaders(onlyHeaders); stream.seek(fontOffsets[idx]); return parser.parse(new TTCDataStream(stream)); } @@ -176,7 +196,7 @@ public TrueTypeFont getFontByName(String name) throws IOException { for (int i = 0; i < numFonts; i++) { - TrueTypeFont font = getFontAtIndex(i); + TrueTypeFont font = getFontAtIndex(i, null); if (font.getName().equals(name)) { return font; @@ -193,7 +213,16 @@ public interface TrueTypeFontProcessor { void process(TrueTypeFont ttf) throws IOException; } - + + /** + * Implement the callback method to call {@link TrueTypeCollection#processAllFontHeaders(TrueTypeFontHeadersProcessor)}. + */ + @FunctionalInterface + public interface TrueTypeFontHeadersProcessor + { + void process(FontHeaders ttf) throws IOException; + } + @Override public void close() throws IOException { diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java index c5fd653fe51..dc0bbf525bc 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java @@ -55,6 +55,7 @@ public class TrueTypeFont implements FontBoxFont, Closeable private final Object lockReadtable = new Object(); private final Object lockPSNames = new Object(); private final List enabledGsubFeatures = new ArrayList<>(); + private FontHeaders loadOnlyHeaders; /** * Constructor. Clients should use the TTFParser to create a new TrueTypeFont object. @@ -178,6 +179,32 @@ protected TTFTable getTable(String tag) throws IOException return table; } + /** + * Returns the raw bytes of the given table, no more than {@code limit} bytes. + * + * @param table the table to read. + * @param limit maximum length of array to return + * @return the raw bytes of the given table + * + * @throws IOException if there was an error accessing the table. + */ + public byte[] getTableNBytes(TTFTable table, int limit) throws IOException + { + synchronized (lockReadtable) + { + // save current position + long currentPosition = data.getCurrentPosition(); + data.seek(table.getOffset()); + + // read all data + byte[] bytes = data.read(Math.min(limit, (int) table.getLength())); + + // restore current position + data.seek(currentPosition); + return bytes; + } + } + /** * This will get the naming table for the true type font. * @@ -790,6 +817,15 @@ public void enableVerticalSubstitutions() enableGsubFeature("vert"); } + void setLoadOnlyHeaders(FontHeaders loadOnlyHeaders) { + this.loadOnlyHeaders = loadOnlyHeaders; + } + + /** Used by table parsers to detect 'only headers' mode */ + FontHeaders getLoadOnlyHeaders() { + return loadOnlyHeaders; + } + @Override public String toString() { diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java index cd09e36b98a..3b9181bb8af 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java @@ -37,9 +37,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fontbox.FontBoxFont; -import org.apache.fontbox.cff.CFFCIDFont; -import org.apache.fontbox.cff.CFFFont; -import org.apache.fontbox.ttf.NamingTable; +import org.apache.fontbox.ttf.FontHeaders; import org.apache.fontbox.ttf.OS2WindowsMetricsTable; import org.apache.fontbox.ttf.OTFParser; import org.apache.fontbox.ttf.OpenTypeFont; @@ -59,6 +57,12 @@ final class FileSystemFontProvider extends FontProvider { private static final Log LOG = LogFactory.getLog(FileSystemFontProvider.class); + /** + * This option changes publicly visible behaviour: ".pdfbox.cache" file will have hash="-" for all files. + * After implementing {@link FontHeaders}, parsing font headers is faster than checksumming anyway. + */ + private static final boolean SKIP_CHECKSUMS = "true".equals(System.getProperty("pdfbox.fontcache.skipchecksums")); + private static final String CHECKSUM_PLACEHOLDER = "-"; private final List fontInfoList = new ArrayList<>(); private final FontCache cache; @@ -315,7 +319,7 @@ private FSFontInfo createFSIgnored(File file, FontFormat format, String postScri String hash; try { - hash = computeHash(Files.newInputStream(file.toPath())); + hash = SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : computeHash(Files.newInputStream(file.toPath())); } catch (IOException ex) { @@ -537,6 +541,11 @@ private List loadDiskCache(List files) { try (BufferedReader reader = new BufferedReader(new FileReader(diskCacheFile))) { + // consequent lines usually share the same font file (e.g. "Courier", "Courier-Bold", "Courier-Oblique"). + // unused if SKIP_CHECKSUMS + File lastFile = null; + String lastHash = null; + // String line; while ((line = reader.readLine()) != null) { @@ -599,23 +608,36 @@ private List loadDiskCache(List files) } if (fontFile.exists()) { - boolean keep = false; // if the file exists, find out whether it's the same file. // first check whether time is different and if yes, whether hash is different - if (fontFile.lastModified() != lastModified) + boolean keep = fontFile.lastModified() == lastModified; + if (!keep && !SKIP_CHECKSUMS) { - String newHash = computeHash(Files.newInputStream(fontFile.toPath())); - if (newHash.equals(hash)) + String newHash; + if (hash.equals(lastHash) && fontFile.equals(lastFile)) + { + newHash = lastHash; // already computed + } + else + { + try + { + newHash = computeHash(Files.newInputStream(fontFile.toPath())); + lastFile = fontFile; + lastHash = newHash; + } + catch (IOException ex) + { + LOG.debug("Error reading font file " + fontFile.getAbsolutePath(), ex); + newHash = ""; + } + } + if (hash.equals(newHash)) { keep = true; lastModified = fontFile.lastModified(); - hash = newHash; } } - else - { - keep = true; - } if (keep) { FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName, @@ -658,9 +680,10 @@ private List loadDiskCache(List files) */ private void addTrueTypeCollection(final File ttcFile) throws IOException { - try (TrueTypeCollection ttc = new TrueTypeCollection(ttcFile)) + try (TrueTypeCollection ttc = TrueTypeCollection.createUnbuffered(ttcFile)) { - ttc.processAllFonts(ttf -> addTrueTypeFontImpl(ttf, ttcFile)); + String hash = SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : computeHash(Files.newInputStream(ttcFile.toPath())); + ttc.processAllFontHeaders(ttf -> addTrueTypeFontImpl(ttf, ttcFile, hash)); } catch (IOException e) { @@ -677,20 +700,22 @@ private void addTrueTypeFont(File ttfFile) throws IOException FontFormat fontFormat = null; try { + TTFParser parser; if (ttfFile.getPath().toLowerCase().endsWith(".otf")) { fontFormat = FontFormat.OTF; - OTFParser parser = new OTFParser(false); - OpenTypeFont otf = parser.parse(new RandomAccessReadBufferedFile(ttfFile)); - addTrueTypeFontImpl(otf, ttfFile); + parser = new OTFParser(false); } else { fontFormat = FontFormat.TTF; - TTFParser parser = new TTFParser(false); - TrueTypeFont ttf = parser.parse(new RandomAccessReadBufferedFile(ttfFile)); - addTrueTypeFontImpl(ttf, ttfFile); + parser = new TTFParser(false); } + FontHeaders headers = new FontHeaders(); + parser.setLoadOnlyHeaders(headers); + IOUtils.closeQuietly(parser.parse(new RandomAccessReadBufferedFile(ttfFile))); + addTrueTypeFontImpl(headers, ttfFile, + SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : computeHash(Files.newInputStream(ttfFile.toPath()))); } catch (IOException e) { @@ -702,25 +727,27 @@ private void addTrueTypeFont(File ttfFile) throws IOException /** * Adds an OTF or TTF font to the file cache. To reduce memory, the parsed font is not cached. */ - private void addTrueTypeFontImpl(TrueTypeFont ttf, File file) throws IOException + private void addTrueTypeFontImpl(FontHeaders ttf, File file, String fileHash) throws IOException { - try + final IOException exception = ttf.getException(); + if (exception == null) { // read PostScript name, if any - if (ttf.getName() != null && ttf.getName().contains("|")) + final String name = ttf.getName(); + if (name != null && name.contains("|")) { fontInfoList.add(createFSIgnored(file, FontFormat.TTF, "*skippipeinname*")); - LOG.warn("Skipping font with '|' in name " + ttf.getName() + " in file " + file); + LOG.warn("Skipping font with '|' in name " + name + " in file " + file); } - else if (ttf.getName() != null) + else if (name != null) { // ignore bitmap fonts - if (ttf.getHeader() == null) + Integer macStyle = ttf.getHeaderMacStyle(); + if (macStyle == null) { - fontInfoList.add(createFSIgnored(file, FontFormat.TTF, ttf.getName())); + fontInfoList.add(createFSIgnored(file, FontFormat.TTF, name)); return; } - int macStyle = ttf.getHeader().getMacStyle(); int sFamilyClass = -1; int usWeightClass = -1; @@ -738,36 +765,24 @@ else if (ttf.getName() != null) panose = os2WindowsMetricsTable.getPanose(); } - String hash = computeHash(ttf.getOriginalData()); - String format; - if (ttf instanceof OpenTypeFont && ((OpenTypeFont) ttf).isPostScript()) + FontFormat format; + CIDSystemInfo ros = null; + if (ttf.isOpenTypePostScript()) { - format = "OTF"; - CIDSystemInfo ros = null; - OpenTypeFont otf = (OpenTypeFont) ttf; - if (otf.isSupportedOTF() && otf.getCFF() != null) + format = FontFormat.OTF; + String registry = ttf.getOtfRegistry(); + String ordering = ttf.getOtfOrdering(); + if (registry != null || ordering != null) { - CFFFont cff = otf.getCFF().getFont(); - if (cff instanceof CFFCIDFont) - { - CFFCIDFont cidFont = (CFFCIDFont) cff; - String registry = cidFont.getRegistry(); - String ordering = cidFont.getOrdering(); - int supplement = cidFont.getSupplement(); - ros = new CIDSystemInfo(registry, ordering, supplement); - } + ros = new CIDSystemInfo(registry, ordering, ttf.getOtfSupplement()); } - fontInfoList.add(new FSFontInfo(file, FontFormat.OTF, ttf.getName(), ros, - usWeightClass, sFamilyClass, ulCodePageRange1, ulCodePageRange2, - macStyle, panose, this, hash, file.lastModified())); } else { - CIDSystemInfo ros = null; - if (ttf.getTableMap().containsKey("gcid")) + byte[] bytes = ttf.getNonOtfTableGCID142(); + if (bytes != null) { // Apple's AAT fonts have a "gcid" table with CID info - byte[] bytes = ttf.getTableBytes(ttf.getTableMap().get("gcid")); String reg = new String(bytes, 10, 64, StandardCharsets.US_ASCII); String registryName = reg.substring(0, reg.indexOf('\0')); String ord = new String(bytes, 76, 64, StandardCharsets.US_ASCII); @@ -775,22 +790,17 @@ else if (ttf.getName() != null) int supplementVersion = bytes[140] << 8 & (bytes[141] & 0xFF); ros = new CIDSystemInfo(registryName, orderName, supplementVersion); } - - format = "TTF"; - fontInfoList.add(new FSFontInfo(file, FontFormat.TTF, ttf.getName(), ros, - usWeightClass, sFamilyClass, ulCodePageRange1, ulCodePageRange2, - macStyle, panose, this, hash, file.lastModified())); + format = FontFormat.TTF; } + fontInfoList.add(new FSFontInfo(file, format, name, ros, + usWeightClass, sFamilyClass, ulCodePageRange1, ulCodePageRange2, + macStyle, panose, this, fileHash, file.lastModified())); if (LOG.isTraceEnabled()) { - NamingTable name = ttf.getNaming(); - if (name != null) - { - LOG.trace(format +": '" + name.getPostScriptName() + "' / '" + - name.getFontFamily() + "' / '" + - name.getFontSubFamily() + "'"); - } + LOG.trace(format.name() +": '" + name + "' / '" + + ttf.getFontFamily() + "' / '" + + ttf.getFontSubFamily() + "'"); } } else @@ -799,14 +809,10 @@ else if (ttf.getName() != null) LOG.warn("Missing 'name' entry for PostScript name in font " + file); } } - catch (IOException e) + else { fontInfoList.add(createFSIgnored(file, FontFormat.TTF, "*skipexception*")); - LOG.warn("Could not load font file: " + file, e); - } - finally - { - ttf.close(); + LOG.warn("Could not load font file: " + file, exception); } } @@ -830,7 +836,7 @@ private void addType1Font(File pfbFile) throws IOException LOG.warn("Skipping font with '|' in name " + type1.getName() + " in file " + pfbFile); return; } - String hash = computeHash(Files.newInputStream(pfbFile.toPath())); + String hash = SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : computeHash(Files.newInputStream(pfbFile.toPath())); fontInfoList.add(new FSFontInfo(pfbFile, FontFormat.PFB, type1.getName(), null, -1, -1, 0, 0, -1, null, this, hash, pfbFile.lastModified()));