diff --git a/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java b/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java index f2b08cd106f..0507e3a707c 100644 --- a/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.fontbox.ttf.FontHeaders; import org.apache.pdfbox.io.RandomAccessRead; @@ -106,13 +107,61 @@ public List parse(RandomAccessRead randomAccessRead) throws IOException } /** - * Parse CFF font using a DataInput as input. + * Extract "Registry", "Ordering" and "Supplement" properties from the first CFF subfont. * - * @param input the source to be parsed + * @param randomAccessRead the source to be parsed + * @param outHeaders where to put results * @return the parsed CFF fonts * @throws IOException If there is an error reading from the stream */ - private List parse(DataInput input) throws IOException + public void parseFirstSubFontROS(RandomAccessRead randomAccessRead, FontHeaders outHeaders) throws IOException + { + // this method is a simplified and merged version of parse(RandomAccessRead) > parse(DataInput) > parseFont(...) + + // start code from parse(RandomAccessRead) + randomAccessRead.seek(0); + DataInput input = new DataInputRandomAccessRead(randomAccessRead); + + // start code from parse(DataInput) + input = skipHeader(input); + String[] nameIndex = readStringIndexData(input); + if (nameIndex.length == 0) + { + outHeaders.setError("Name index missing in CFF font"); + return; + } + byte[][] topDictIndex = readIndexData(input); + if (topDictIndex.length == 0) + { + outHeaders.setError("Top DICT INDEX missing in CFF font"); + return; + } + + // 'stringIndex' is required by 'parseROS() > readString()' + stringIndex = readStringIndexData(input); + + // start code from parseFont(...) + DataInputByteArray topDictInput = new DataInputByteArray(topDictIndex[0]); + DictData topDict = readDictData(topDictInput); + + DictData.Entry syntheticBaseEntry = topDict.getEntry("SyntheticBase"); + if (syntheticBaseEntry != null) + { + outHeaders.setError("Synthetic Fonts are not supported"); + return; + } + + CFFCIDFont cffCIDFont = parseROS(topDict); + if (cffCIDFont != null) + { + outHeaders.setOtfROS( + cffCIDFont.getRegistry(), + cffCIDFont.getOrdering(), + cffCIDFont.getSupplement()); + } + } + + private DataInput skipHeader(DataInput input) throws IOException { String firstTag = readTagName(input); // try to determine which kind of font we have @@ -132,6 +181,19 @@ private List parse(DataInput input) throws IOException @SuppressWarnings("unused") Header header = readHeader(input); + return input; + } + + /** + * Parse CFF font using a DataInput as input. + * + * @param input the source to be parsed + * @return the parsed CFF fonts + * @throws IOException If there is an error reading from the stream + */ + private List parse(DataInput input) throws IOException + { + input = skipHeader(input); String[] nameIndex = readStringIndexData(input); if (nameIndex.length == 0) { @@ -463,6 +525,28 @@ private static Double readRealNumber(DataInput input) throws IOException } } + /** + * Extracts Registry, Ordering and Supplement from {@code topDict["ROS"]}. + */ + private CFFCIDFont parseROS(DictData topDict) throws IOException + { + // determine if this is a Type 1-equivalent font or a CIDFont + DictData.Entry rosEntry = topDict.getEntry("ROS"); + if (rosEntry != null) + { + if (rosEntry.size() < 3) + { + throw new IOException("ROS entry must have 3 elements"); + } + CFFCIDFont cffCIDFont = new CFFCIDFont(); + cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue())); + cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue())); + cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue()); + return cffCIDFont; + } + return null; + } + private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) throws IOException { // top dict @@ -476,21 +560,12 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr throw new IOException("Synthetic Fonts are not supported"); } - // determine if this is a Type 1-equivalent font or a CIDFont CFFFont font; - boolean isCIDFont = topDict.getEntry("ROS") != null; - if (isCIDFont) + CFFCIDFont cffCIDFont = parseROS(topDict); + // determine if this is a Type 1-equivalent font or a CIDFont + boolean isCIDFont = cffCIDFont != null; + if (cffCIDFont != null) { - CFFCIDFont cffCIDFont = new CFFCIDFont(); - DictData.Entry rosEntry = topDict.getEntry("ROS"); - if (rosEntry == null || rosEntry.size() < 3) - { - throw new IOException("ROS entry must have 3 elements"); - } - cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue())); - cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue())); - cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue()); - font = cffCIDFont; } else diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java index 7d6d14cae0c..930561d57d3 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java @@ -20,6 +20,8 @@ import java.io.IOException; import org.apache.fontbox.cff.CFFFont; import org.apache.fontbox.cff.CFFParser; +import org.apache.pdfbox.io.RandomAccessRead; +import org.apache.pdfbox.io.RandomAccessReadBuffer; /** * PostScript font program (compact font format). @@ -56,6 +58,27 @@ void read(TrueTypeFont ttf, TTFDataStream data) throws IOException initialized = true; } + /** {@inheritDoc} */ + @Override + void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders outHeaders) throws IOException + { + try (RandomAccessRead subReader = data.createSubView(getLength())) + { + RandomAccessRead reader; + if (subReader != null) + { + reader = subReader; + } + else + { + assert false : "It is inefficient to read TTFDataStream into an array"; + byte[] bytes = data.read((int)getLength()); + reader = new RandomAccessReadBuffer(bytes); + } + new CFFParser().parseFirstSubFontROS(reader, outHeaders); + } + } + /** * Returns the CFF font, which is a compact representation of a PostScript Type 1, or CIDFont * diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java b/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java new file mode 100644 index 00000000000..ea37cc87f36 --- /dev/null +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.fontbox.ttf; + +/** + * To improve performance of {@code FileSystemFontProvider.scanFonts(...)}, + * this class is used both as a marker (to skip unused data) and as a storage for collected data. + *

+ * Tables it needs:

    + *
  • NamingTable.TAG + *
  • HeaderTable.TAG + *
  • OS2WindowsMetricsTable.TAG + *
  • CFFTable.TAG (for OTF) + *
  • "gcid" (for non-OTF) + *
+ * + * @author Mykola Bohdiuk + */ +public final class FontHeaders +{ + static final int BYTES_GCID = 142; + + private String error; + private String name; + private Integer headerMacStyle; + private OS2WindowsMetricsTable os2Windows; + private String fontFamily; + private String fontSubFamily; + private byte[] nonOtfGcid142; + // + private boolean isOTFAndPostScript; + private String otfRegistry; + private String otfOrdering; + private int otfSupplement; + + public String getError() + { + return error; + } + + public String getName() + { + return name; + } + + /** + * null == no HeaderTable, {@code ttf.getHeader().getMacStyle()} + */ + public Integer getHeaderMacStyle() + { + return headerMacStyle; + } + + public OS2WindowsMetricsTable getOS2Windows() + { + return os2Windows; + } + + // only when LOGGER(FileSystemFontProvider).isTraceEnabled() tracing: FontFamily, FontSubfamily + public String getFontFamily() + { + return fontFamily; + } + + public String getFontSubFamily() + { + return fontSubFamily; + } + + public boolean isOpenTypePostScript() + { + return isOTFAndPostScript; + } + + public byte[] getNonOtfTableGCID142() + { + return nonOtfGcid142; + } + + public String getOtfRegistry() + { + return otfRegistry; + } + + public String getOtfOrdering() + { + return otfOrdering; + } + + public int getOtfSupplement() + { + return otfSupplement; + } + + public void setError(String exception) + { + this.error = exception; + } + + void setName(String name) + { + this.name = name; + } + + void setHeaderMacStyle(Integer headerMacStyle) + { + this.headerMacStyle = headerMacStyle; + } + + void setOs2Windows(OS2WindowsMetricsTable os2Windows) + { + this.os2Windows = os2Windows; + } + + void setFontFamily(String fontFamily, String fontSubFamily) + { + this.fontFamily = fontFamily; + this.fontSubFamily = fontSubFamily; + } + + void setNonOtfGcid142(byte[] nonOtfGcid142) + { + this.nonOtfGcid142 = nonOtfGcid142; + } + + void setIsOTFAndPostScript(boolean isOTFAndPostScript) + { + this.isOTFAndPostScript = isOTFAndPostScript; + } + + // public because CFFParser is in a different package + public void setOtfROS(String otfRegistry, String otfOrdering, int otfSupplement) + { + this.otfRegistry = otfRegistry; + this.otfOrdering = otfOrdering; + this.otfSupplement = otfSupplement; + } +} diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java index 4d19475db31..105c3393475 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java @@ -64,6 +64,16 @@ public class HeaderTable extends TTFTable super(); } + /** {@inheritDoc} */ + @Override + void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders outHeaders) throws IOException + { + // 44 == 4 + 4 + 4 + 4 + 2 + 2 + 2*8 + 4*2, see read() + data.seek(data.getCurrentPosition() + 44); + macStyle = data.readUnsignedShort(); + outHeaders.setHeaderMacStyle(macStyle); + } + /** * This will read the required data from the stream. * diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java index bc8fe05a349..3cd50524b5c 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/NamingTable.java @@ -57,6 +57,21 @@ public class NamingTable extends TTFTable */ @Override void read(TrueTypeFont ttf, TTFDataStream data) throws IOException + { + read(ttf, data, false); + initialized = true; + } + + /** {@inheritDoc} */ + @Override + void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders outHeaders) throws IOException + { + read(ttf, data, true); + outHeaders.setName(psName); + outHeaders.setFontFamily(fontFamily, fontSubFamily); + } + + private void read(TrueTypeFont ttf, TTFDataStream data, boolean onlyHeaders) throws IOException { int formatSelector = data.readUnsignedShort(); int numberOfNameRecords = data.readUnsignedShort(); @@ -66,7 +81,10 @@ void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { NameRecord nr = new NameRecord(); nr.initData(ttf, data); - nameRecords.add(nr); + if (!onlyHeaders || isUsefulForOnlyHeaders(nr)) + { + nameRecords.add(nr); + } } for (NameRecord nr : nameRecords) @@ -87,8 +105,6 @@ void read(TrueTypeFont ttf, TTFDataStream data) throws IOException lookupTable = new HashMap<>(nameRecords.size()); fillLookupTable(); readInterestingStrings(); - - initialized = true; } private Charset getCharset(NameRecord nr) @@ -162,6 +178,21 @@ private void readInterestingStrings() } } + private static boolean isUsefulForOnlyHeaders(NameRecord nr) + { + int nameId = nr.getNameId(); + // see "psName =" and "getEnglishName()" + if (nameId == NameRecord.NAME_POSTSCRIPT_NAME + || nameId == NameRecord.NAME_FONT_FAMILY_NAME + || nameId == NameRecord.NAME_FONT_SUB_FAMILY_NAME) + { + int languageId = nr.getLanguageId(); + return languageId == NameRecord.LANGUAGE_UNICODE + || languageId == NameRecord.LANGUAGE_WINDOWS_EN_US; + } + return false; + } + /** * Helper to get English names by best effort. */ diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadDataStream.java b/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadDataStream.java index 953fc4082ec..2034e3d7e0e 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadDataStream.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadDataStream.java @@ -19,9 +19,12 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessRead; +import org.apache.pdfbox.io.RandomAccessReadBuffer; /** * An implementation of the TTFDataStream using RandomAccessRead as source. @@ -30,6 +33,8 @@ */ class RandomAccessReadDataStream extends TTFDataStream { + private static final Log LOG = LogFactory.getLog(RandomAccessReadDataStream.class); + private final long length; private final byte[] data; private int currentPosition = 0; @@ -174,6 +179,20 @@ public int read(byte[] b, int off, int len) throws IOException return bytesToRead; } + @Override + public RandomAccessRead createSubView(long length) + { + try + { + return new RandomAccessReadBuffer(data).createView(currentPosition, length); + } + catch (IOException e) + { + LOG.warn("Could not create a SubView", e); + return null; + } + } + /** * {@inheritDoc} */ diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadUnbufferedDataStream.java b/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadUnbufferedDataStream.java new file mode 100644 index 00000000000..3e1d1616a93 --- /dev/null +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/RandomAccessReadUnbufferedDataStream.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.fontbox.ttf; + +import java.io.IOException; +import java.io.InputStream; +import org.apache.pdfbox.io.RandomAccessRead; +import org.apache.pdfbox.io.RandomAccessReadView; + +/** + * In contrast to {@link RandomAccessReadDataStream}, + * this class doesn't pre-load {@code RandomAccessRead} into a {@code byte[]}, + * it works with {@link RandomAccessRead} directly. + * + * Performance: it is much faster if most of the buffer is skipped, and slower if whole buffer is read() + */ +class RandomAccessReadUnbufferedDataStream extends TTFDataStream +{ + private final long length; + private final RandomAccessRead randomAccessRead; + + /** + * @throws IOException If there is a problem reading the source length. + */ + RandomAccessReadUnbufferedDataStream(RandomAccessRead randomAccessRead) throws IOException + { + this.length = randomAccessRead.length(); + this.randomAccessRead = randomAccessRead; + } + + /** + * {@inheritDoc} + */ + @Override + public long getCurrentPosition() throws IOException + { + return randomAccessRead.getPosition(); + } + + /** + * Close the underlying resources. + * + * @throws IOException If there is an error closing the resources. + */ + @Override + public void close() throws IOException + { + randomAccessRead.close(); + } + + /** + * {@inheritDoc} + */ + @Override + public int read() throws IOException + { + return randomAccessRead.read(); + } + + /** + * {@inheritDoc} + */ + @Override + public final long readLong() throws IOException + { + return ((long) readInt() << 32) | (readInt() & 0xFFFFFFFFL); + } + + /** + * {@inheritDoc} + */ + private int readInt() throws IOException + { + int b1 = read(); + int b2 = read(); + int b3 = read(); + int b4 = read(); + return (b1 << 24) | (b2 << 16) | (b3 << 8) | b4; + } + + /** + * {@inheritDoc} + */ + @Override + public void seek(long pos) throws IOException + { + randomAccessRead.seek(pos); + } + + /** + * {@inheritDoc} + */ + @Override + public int read(byte[] b, int off, int len) throws IOException + { + return randomAccessRead.read(b, off, len); + } + + /** + * Lifetime of returned InputStream is bound by {@code this} lifetime, it won't close underlying {@code RandomAccessRead}. + * + * {@inheritDoc} + */ + @Override + public InputStream getOriginalData() throws IOException + { + return new RandomAccessReadNonClosingInputStream(randomAccessRead.createView(0, length)); + } + + /** + * {@inheritDoc} + */ + @Override + public long getOriginalDataSize() + { + return length; + } + + @Override + public RandomAccessRead createSubView(long length) + { + try + { + return randomAccessRead.createView(randomAccessRead.getPosition(), length); + } + catch (IOException ex) + { + assert false : "Please implement " + randomAccessRead.getClass() + ".createView()"; + return null; + } + } + + private static final class RandomAccessReadNonClosingInputStream extends InputStream + { + + private final RandomAccessReadView randomAccessRead; + + public RandomAccessReadNonClosingInputStream(RandomAccessReadView randomAccessRead) + { + this.randomAccessRead = randomAccessRead; + } + + @Override + public int read() throws IOException + { + return randomAccessRead.read(); + } + + @Override + public int read(byte[] b) throws IOException + { + return randomAccessRead.read(b); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException + { + return randomAccessRead.read(b, off, len); + } + + @Override + public long skip(long n) throws IOException + { + randomAccessRead.seek(randomAccessRead.getPosition() + n); + return n; + } + + @Override + public void close() throws IOException + { + // WARNING: .close() will close RandomAccessReadMemoryMappedFile if this View was based on it +// randomAccessRead.close(); + } + } +} diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java index bcdae654406..dff61d8977b 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TTCDataStream.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.io.InputStream; +import org.apache.pdfbox.io.RandomAccessRead; /** * A wrapper for a TTF stream inside a TTC file, does not close the underlying shared stream. @@ -83,4 +84,9 @@ public long getOriginalDataSize() return stream.getOriginalDataSize(); } + @Override + public RandomAccessRead createSubView(long length) + { + return stream.createSubView(length); + } } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java index 4f6f96e3926..3ea024ee252 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java @@ -24,6 +24,7 @@ import java.nio.charset.StandardCharsets; import java.util.Calendar; import java.util.TimeZone; +import org.apache.pdfbox.io.RandomAccessRead; /** * An abstract class to read a data stream. @@ -279,6 +280,17 @@ public byte[] read(int numberOfBytes) throws IOException */ public abstract int read(byte[] b, int off, int len) throws IOException; + /** + * Creates a view from current position to {@code pos + length}. + * It can be faster than {@code read(length)} if you only need a few bytes. + * {@code SubView.close()} should never close {@code TTFDataStream.this}, only itself. + * + * @return A view or null (caller can use {@link #read} instead). Please close() the result + */ + public RandomAccessRead createSubView(long length) { + return null; + } + /** * Get the current position in the stream. * diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java index 07942b64b8a..154216f3c9a 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java @@ -104,6 +104,22 @@ public TrueTypeFont parseEmbedded(InputStream inputStream) throws IOException } } + /** + * Parse a RandomAccessRead and return a TrueType font. + * + * @param randomAccessRead The RandomAccessREad to be read from. It will be closed before returning. + * @return TrueType font headers. + * @throws IOException If there is an error parsing the TrueType font. + */ + public FontHeaders parseTableHeaders(RandomAccessRead randomAccessRead) throws IOException + { + try (TTFDataStream dataStream = new RandomAccessReadUnbufferedDataStream(randomAccessRead)) + { + return parseTableHeaders(dataStream); + // dataStream closes randomAccessRead + } + } + /** * Parse a file and get a true type font. * @@ -111,7 +127,7 @@ public TrueTypeFont parseEmbedded(InputStream inputStream) throws IOException * @return A TrueType font. * @throws IOException If there is an error parsing the TrueType font. */ - TrueTypeFont parse(TTFDataStream raf) throws IOException + private TrueTypeFont createFontWithTables(TTFDataStream raf) throws IOException { TrueTypeFont font = newFont(raf); font.setVersion(raf.read32Fixed()); @@ -140,7 +156,12 @@ TrueTypeFont parse(TTFDataStream raf) throws IOException } } } - // parse tables + return font; + } + + TrueTypeFont parse(TTFDataStream raf) throws IOException + { + TrueTypeFont font = createFontWithTables(raf); parseTables(font); return font; } @@ -227,6 +248,81 @@ else if (!isOTF) } } + /** + * Based on {@link #parseTables()}. + * Parse all table headers and check if all needed tables are present. + * + * This method can be optimized further by skipping unused portions inside each individual table parser + * + * @param font the TrueTypeFont instance holding the parsed data. + * @throws IOException If there is an error parsing the TrueType font. + */ + FontHeaders parseTableHeaders(TTFDataStream raf) throws IOException + { + FontHeaders outHeaders = new FontHeaders(); + try (TrueTypeFont font = createFontWithTables(raf)) + { + font.readTableHeaders(NamingTable.TAG, outHeaders); // calls NamingTable.readHeaders(); + font.readTableHeaders(HeaderTable.TAG, outHeaders); // calls HeaderTable.readHeaders(); + + // only these 5 are used + // sFamilyClass = os2WindowsMetricsTable.getFamilyClass(); + // usWeightClass = os2WindowsMetricsTable.getWeightClass(); + // ulCodePageRange1 = (int) os2WindowsMetricsTable.getCodePageRange1(); + // ulCodePageRange2 = (int) os2WindowsMetricsTable.getCodePageRange2(); + // panose = os2WindowsMetricsTable.getPanose(); + outHeaders.setOs2Windows(font.getOS2Windows()); + + boolean isOTFAndPostScript; + if (font instanceof OpenTypeFont && ((OpenTypeFont) font).isPostScript()) + { + isOTFAndPostScript = true; + if (((OpenTypeFont) font).isSupportedOTF()) + { + font.readTableHeaders(CFFTable.TAG, outHeaders); // calls CFFTable.readHeaders(); + } + } + else if (!(font instanceof OpenTypeFont) && font.tables.containsKey(CFFTable.TAG)) + { + outHeaders.setError("True Type fonts using CFF outlines are not supported"); + return outHeaders; + } + else + { + isOTFAndPostScript = false; + TTFTable gcid = font.getTableMap().get("gcid"); + if (gcid != null && gcid.getLength() >= FontHeaders.BYTES_GCID) + { + outHeaders.setNonOtfGcid142(font.getTableNBytes(gcid, FontHeaders.BYTES_GCID)); + } + } + outHeaders.setIsOTFAndPostScript(isOTFAndPostScript); + + // list taken from parseTables(), detect them, but don't spend time parsing + final String[] mandatoryTables = { + HeaderTable.TAG, + HorizontalHeaderTable.TAG, + MaximumProfileTable.TAG, + isEmbedded ? null : PostScriptTable.TAG, // in an embedded font this table is optional + isOTFAndPostScript ? null : IndexToLocationTable.TAG, + isOTFAndPostScript ? null : GlyphTable.TAG, + isEmbedded ? null : NamingTable.TAG, + HorizontalMetricsTable.TAG, + isEmbedded ? null : CmapTable.TAG, + }; + + for (String tag : mandatoryTables) + { + if (tag != null && !font.tables.containsKey(tag)) + { + outHeaders.setError("'" + tag + "' table is mandatory"); + return outHeaders; + } + } + } + return outHeaders; + } + protected boolean allowCFF() { return false; diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java index b917fd84cd9..cde6e932e84 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TTFTable.java @@ -126,4 +126,16 @@ public boolean getInitialized() void read(TrueTypeFont ttf, TTFDataStream data) throws IOException { } + + /** + * This will read required headers from the stream into outHeaders. + * + * @param ttf The font that is being read. + * @param data The stream to read the data from. + * @param outHeaders The class to write the data to. + * @throws IOException If there is an error reading the data. + */ + void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders outHeaders) throws IOException + { + } } diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java index afb2df7a56a..6ddad4dcc0a 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeCollection.java @@ -47,7 +47,7 @@ public class TrueTypeCollection implements Closeable */ public TrueTypeCollection(File file) throws IOException { - this(new RandomAccessReadBufferedFile(file), true); + this(createBufferedDataStream(new RandomAccessReadBufferedFile(file), true)); } /** @@ -58,7 +58,7 @@ public TrueTypeCollection(File file) throws IOException */ public TrueTypeCollection(InputStream stream) throws IOException { - this(new RandomAccessReadBuffer(stream), false); + this(createBufferedDataStream(new RandomAccessReadBuffer(stream), false)); } /** @@ -66,21 +66,12 @@ public TrueTypeCollection(InputStream stream) throws IOException * * @param randomAccessRead * @param closeAfterReading {@code true} to close randomAccessRead + * @param buffered {@code true} to use {@link RandomAccessReadDataStream}, {@code false} to use {@link RandomAccessReadUnbufferedDataStream} * @throws IOException If the font could not be parsed. */ - private TrueTypeCollection(RandomAccessRead randomAccessRead, boolean closeAfterReading) throws IOException + private TrueTypeCollection(TTFDataStream stream) throws IOException { - try - { - this.stream = new RandomAccessReadDataStream(randomAccessRead); - } - finally - { - if (closeAfterReading) - { - IOUtils.closeQuietly(randomAccessRead); - } - } + this.stream = stream; // TTC header String tag = stream.readTag(); @@ -107,12 +98,27 @@ private TrueTypeCollection(RandomAccessRead randomAccessRead, boolean closeAfter int ulDsigOffset = stream.readUnsignedShort(); } } - + + private static TTFDataStream createBufferedDataStream(RandomAccessRead randomAccessRead, boolean closeAfterReading) throws IOException + { + try + { + return new RandomAccessReadDataStream(randomAccessRead); + } + finally + { + if (closeAfterReading) + { + IOUtils.closeQuietly(randomAccessRead); + } + } + } + /** * Run the callback for each TT font in the collection. * * @param trueTypeFontProcessor the object with the callback method. - * @throws IOException if something went wrong when calling the TrueTypeFontProcessor + * @throws IOException if something went wrong when parsing any font or calling the TrueTypeFontProcessor */ public void processAllFonts(TrueTypeFontProcessor trueTypeFontProcessor) throws IOException { @@ -122,8 +128,37 @@ public void processAllFonts(TrueTypeFontProcessor trueTypeFontProcessor) throws trueTypeFontProcessor.process(font); } } - + + /** + * Run the callback for each TT font in the collection. + * + * @param trueTypeFontProcessor the object with the callback method. + * @throws IOException if something went wrong when parsing any font + */ + public static void processAllFontHeaders(File ttcFile, TrueTypeFontHeadersProcessor trueTypeFontProcessor) throws IOException + { + try ( + RandomAccessRead read = new RandomAccessReadBufferedFile(ttcFile); + TTFDataStream stream = new RandomAccessReadUnbufferedDataStream(read); + TrueTypeCollection ttc = new TrueTypeCollection(stream) + ) + { + for (int i = 0; i < ttc.numFonts; i++) + { + TTFParser parser = ttc.createFontParserAtIndexAndSeek(i); + FontHeaders headers = parser.parseTableHeaders(new TTCDataStream(ttc.stream)); + trueTypeFontProcessor.process(headers); + } + } + } + private TrueTypeFont getFontAtIndex(int idx) throws IOException + { + TTFParser parser = createFontParserAtIndexAndSeek(idx); + return parser.parse(new TTCDataStream(stream)); + } + + private TTFParser createFontParserAtIndexAndSeek(int idx) throws IOException { stream.seek(fontOffsets[idx]); TTFParser parser; @@ -136,7 +171,7 @@ private TrueTypeFont getFontAtIndex(int idx) throws IOException parser = new TTFParser(false); } stream.seek(fontOffsets[idx]); - return parser.parse(new TTCDataStream(stream)); + return parser; } /** @@ -167,7 +202,16 @@ public interface TrueTypeFontProcessor { void process(TrueTypeFont ttf) throws IOException; } - + + /** + * Implement the callback method to call {@link TrueTypeCollection#processAllFontHeaders(TrueTypeFontHeadersProcessor)}. + */ + @FunctionalInterface + public interface TrueTypeFontHeadersProcessor + { + void process(FontHeaders ttf); + } + @Override public void close() throws IOException { diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java b/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java index c5fd653fe51..276f1cef2b2 100644 --- a/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java +++ b/fontbox/src/main/java/org/apache/fontbox/ttf/TrueTypeFont.java @@ -178,6 +178,32 @@ protected TTFTable getTable(String tag) throws IOException return table; } + /** + * Returns the raw bytes of the given table, no more than {@code limit} bytes. + * + * @param table the table to read. + * @param limit maximum length of array to return + * @return the raw bytes of the given table + * + * @throws IOException if there was an error accessing the table. + */ + public byte[] getTableNBytes(TTFTable table, int limit) throws IOException + { + synchronized (lockReadtable) + { + // save current position + long currentPosition = data.getCurrentPosition(); + data.seek(table.getOffset()); + + // read all data + byte[] bytes = data.read(Math.min(limit, (int) table.getLength())); + + // restore current position + data.seek(currentPosition); + return bytes; + } + } + /** * This will get the naming table for the true type font. * @@ -385,6 +411,28 @@ void readTable(TTFTable table) throws IOException data.seek(currentPosition); } + /** + * Read the given table headers. Package-private, used by TTFParser only. + * + * @param tag the name of the table to be read + * @param outHeaders consumes headers + * + * @throws IOException if there was an error reading the table. + */ + void readTableHeaders(String tag, FontHeaders outHeaders) throws IOException + { + TTFTable table = tables.get(tag); + if (table != null) + { + // save current position + long currentPosition = data.getCurrentPosition(); + data.seek(table.getOffset()); + table.readHeaders(this, data, outHeaders); + // restore current position + data.seek(currentPosition); + } + } + /** * Returns the number of glyphs (MaximumProfile.numGlyphs). * diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java index cd09e36b98a..27104f7b77c 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/FileSystemFontProvider.java @@ -37,9 +37,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.fontbox.FontBoxFont; -import org.apache.fontbox.cff.CFFCIDFont; -import org.apache.fontbox.cff.CFFFont; -import org.apache.fontbox.ttf.NamingTable; +import org.apache.fontbox.ttf.FontHeaders; import org.apache.fontbox.ttf.OS2WindowsMetricsTable; import org.apache.fontbox.ttf.OTFParser; import org.apache.fontbox.ttf.OpenTypeFont; @@ -59,6 +57,12 @@ final class FileSystemFontProvider extends FontProvider { private static final Log LOG = LogFactory.getLog(FileSystemFontProvider.class); + /** + * This option changes publicly visible behaviour: ".pdfbox.cache" file will have hash="-" for all files. + * After implementing {@link FontHeaders}, parsing font headers is faster than checksumming anyway. + */ + private static final boolean SKIP_CHECKSUMS = "true".equals(System.getProperty("pdfbox.fontcache.skipchecksums")); + private static final String CHECKSUM_PLACEHOLDER = "-"; private final List fontInfoList = new ArrayList<>(); private final FontCache cache; @@ -315,7 +319,7 @@ private FSFontInfo createFSIgnored(File file, FontFormat format, String postScri String hash; try { - hash = computeHash(Files.newInputStream(file.toPath())); + hash = SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : computeHash(Files.newInputStream(file.toPath())); } catch (IOException ex) { @@ -382,25 +386,18 @@ private void scanFonts(List files) for (File file : files) { - try + String filePath = file.getPath().toLowerCase(); + if (filePath.endsWith(".ttf") || filePath.endsWith(".otf")) { - String filePath = file.getPath().toLowerCase(); - if (filePath.endsWith(".ttf") || filePath.endsWith(".otf")) - { - addTrueTypeFont(file); - } - else if (filePath.endsWith(".ttc") || filePath.endsWith(".otc")) - { - addTrueTypeCollection(file); - } - else if (filePath.endsWith(".pfb")) - { - addType1Font(file); - } + addTrueTypeFont(file); } - catch (IOException e) + else if (filePath.endsWith(".ttc") || filePath.endsWith(".otc")) + { + addTrueTypeCollection(file); + } + else if (filePath.endsWith(".pfb")) { - LOG.warn("Error parsing font " + file.getPath(), e); + addType1Font(file); } } } @@ -537,6 +534,11 @@ private List loadDiskCache(List files) { try (BufferedReader reader = new BufferedReader(new FileReader(diskCacheFile))) { + // consequent lines usually share the same font file (e.g. "Courier", "Courier-Bold", "Courier-Oblique"). + // unused if SKIP_CHECKSUMS + File lastFile = null; + String lastHash = null; + // String line; while ((line = reader.readLine()) != null) { @@ -599,23 +601,36 @@ private List loadDiskCache(List files) } if (fontFile.exists()) { - boolean keep = false; // if the file exists, find out whether it's the same file. // first check whether time is different and if yes, whether hash is different - if (fontFile.lastModified() != lastModified) + boolean keep = fontFile.lastModified() == lastModified; + if (!keep && !SKIP_CHECKSUMS) { - String newHash = computeHash(Files.newInputStream(fontFile.toPath())); - if (newHash.equals(hash)) + String newHash; + if (hash.equals(lastHash) && fontFile.equals(lastFile)) + { + newHash = lastHash; // already computed + } + else + { + try + { + newHash = computeHash(Files.newInputStream(fontFile.toPath())); + lastFile = fontFile; + lastHash = newHash; + } + catch (IOException ex) + { + LOG.debug("Error reading font file " + fontFile.getAbsolutePath(), ex); + newHash = ""; + } + } + if (hash.equals(newHash)) { keep = true; lastModified = fontFile.lastModified(); - hash = newHash; } } - else - { - keep = true; - } if (keep) { FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName, @@ -656,11 +671,13 @@ private List loadDiskCache(List files) /** * Adds a TTC or OTC to the file cache. To reduce memory, the parsed font is not cached. */ - private void addTrueTypeCollection(final File ttcFile) throws IOException + private void addTrueTypeCollection(final File ttcFile) { - try (TrueTypeCollection ttc = new TrueTypeCollection(ttcFile)) + try { - ttc.processAllFonts(ttf -> addTrueTypeFontImpl(ttf, ttcFile)); + String hash = SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : computeHash(Files.newInputStream(ttcFile.toPath())); + TrueTypeCollection.processAllFontHeaders(ttcFile, + ttf -> addTrueTypeFontImpl(ttf, ttcFile, hash)); } catch (IOException e) { @@ -672,25 +689,25 @@ private void addTrueTypeCollection(final File ttcFile) throws IOException /** * Adds an OTF or TTF font to the file cache. To reduce memory, the parsed font is not cached. */ - private void addTrueTypeFont(File ttfFile) throws IOException + private void addTrueTypeFont(File ttfFile) { FontFormat fontFormat = null; try { + TTFParser parser; if (ttfFile.getPath().toLowerCase().endsWith(".otf")) { fontFormat = FontFormat.OTF; - OTFParser parser = new OTFParser(false); - OpenTypeFont otf = parser.parse(new RandomAccessReadBufferedFile(ttfFile)); - addTrueTypeFontImpl(otf, ttfFile); + parser = new OTFParser(false); } else { fontFormat = FontFormat.TTF; - TTFParser parser = new TTFParser(false); - TrueTypeFont ttf = parser.parse(new RandomAccessReadBufferedFile(ttfFile)); - addTrueTypeFontImpl(ttf, ttfFile); + parser = new TTFParser(false); } + FontHeaders headers = parser.parseTableHeaders(new RandomAccessReadBufferedFile(ttfFile)); + addTrueTypeFontImpl(headers, ttfFile, + SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : computeHash(Files.newInputStream(ttfFile.toPath()))); } catch (IOException e) { @@ -702,25 +719,27 @@ private void addTrueTypeFont(File ttfFile) throws IOException /** * Adds an OTF or TTF font to the file cache. To reduce memory, the parsed font is not cached. */ - private void addTrueTypeFontImpl(TrueTypeFont ttf, File file) throws IOException + private void addTrueTypeFontImpl(FontHeaders ttf, File file, String fileHash) { - try + final String error = ttf.getError(); + if (error == null) { // read PostScript name, if any - if (ttf.getName() != null && ttf.getName().contains("|")) + final String name = ttf.getName(); + if (name != null && name.contains("|")) { fontInfoList.add(createFSIgnored(file, FontFormat.TTF, "*skippipeinname*")); - LOG.warn("Skipping font with '|' in name " + ttf.getName() + " in file " + file); + LOG.warn("Skipping font with '|' in name " + name + " in file " + file); } - else if (ttf.getName() != null) + else if (name != null) { // ignore bitmap fonts - if (ttf.getHeader() == null) + Integer macStyle = ttf.getHeaderMacStyle(); + if (macStyle == null) { - fontInfoList.add(createFSIgnored(file, FontFormat.TTF, ttf.getName())); + fontInfoList.add(createFSIgnored(file, FontFormat.TTF, name)); return; } - int macStyle = ttf.getHeader().getMacStyle(); int sFamilyClass = -1; int usWeightClass = -1; @@ -738,36 +757,24 @@ else if (ttf.getName() != null) panose = os2WindowsMetricsTable.getPanose(); } - String hash = computeHash(ttf.getOriginalData()); - String format; - if (ttf instanceof OpenTypeFont && ((OpenTypeFont) ttf).isPostScript()) + FontFormat format; + CIDSystemInfo ros = null; + if (ttf.isOpenTypePostScript()) { - format = "OTF"; - CIDSystemInfo ros = null; - OpenTypeFont otf = (OpenTypeFont) ttf; - if (otf.isSupportedOTF() && otf.getCFF() != null) + format = FontFormat.OTF; + String registry = ttf.getOtfRegistry(); + String ordering = ttf.getOtfOrdering(); + if (registry != null || ordering != null) { - CFFFont cff = otf.getCFF().getFont(); - if (cff instanceof CFFCIDFont) - { - CFFCIDFont cidFont = (CFFCIDFont) cff; - String registry = cidFont.getRegistry(); - String ordering = cidFont.getOrdering(); - int supplement = cidFont.getSupplement(); - ros = new CIDSystemInfo(registry, ordering, supplement); - } + ros = new CIDSystemInfo(registry, ordering, ttf.getOtfSupplement()); } - fontInfoList.add(new FSFontInfo(file, FontFormat.OTF, ttf.getName(), ros, - usWeightClass, sFamilyClass, ulCodePageRange1, ulCodePageRange2, - macStyle, panose, this, hash, file.lastModified())); } else { - CIDSystemInfo ros = null; - if (ttf.getTableMap().containsKey("gcid")) + byte[] bytes = ttf.getNonOtfTableGCID142(); + if (bytes != null) { // Apple's AAT fonts have a "gcid" table with CID info - byte[] bytes = ttf.getTableBytes(ttf.getTableMap().get("gcid")); String reg = new String(bytes, 10, 64, StandardCharsets.US_ASCII); String registryName = reg.substring(0, reg.indexOf('\0')); String ord = new String(bytes, 76, 64, StandardCharsets.US_ASCII); @@ -775,22 +782,17 @@ else if (ttf.getName() != null) int supplementVersion = bytes[140] << 8 & (bytes[141] & 0xFF); ros = new CIDSystemInfo(registryName, orderName, supplementVersion); } - - format = "TTF"; - fontInfoList.add(new FSFontInfo(file, FontFormat.TTF, ttf.getName(), ros, - usWeightClass, sFamilyClass, ulCodePageRange1, ulCodePageRange2, - macStyle, panose, this, hash, file.lastModified())); + format = FontFormat.TTF; } + fontInfoList.add(new FSFontInfo(file, format, name, ros, + usWeightClass, sFamilyClass, ulCodePageRange1, ulCodePageRange2, + macStyle, panose, this, fileHash, file.lastModified())); if (LOG.isTraceEnabled()) { - NamingTable name = ttf.getNaming(); - if (name != null) - { - LOG.trace(format +": '" + name.getPostScriptName() + "' / '" + - name.getFontFamily() + "' / '" + - name.getFontSubFamily() + "'"); - } + LOG.trace(format.name() +": '" + name + "' / '" + + ttf.getFontFamily() + "' / '" + + ttf.getFontSubFamily() + "'"); } } else @@ -799,21 +801,17 @@ else if (ttf.getName() != null) LOG.warn("Missing 'name' entry for PostScript name in font " + file); } } - catch (IOException e) + else { fontInfoList.add(createFSIgnored(file, FontFormat.TTF, "*skipexception*")); - LOG.warn("Could not load font file: " + file, e); - } - finally - { - ttf.close(); + LOG.warn("Could not load font file: " + file + ": " + error); } } /** * Adds a Type 1 font to the file cache. To reduce memory, the parsed font is not cached. */ - private void addType1Font(File pfbFile) throws IOException + private void addType1Font(File pfbFile) { try (InputStream input = new FileInputStream(pfbFile)) { @@ -830,7 +828,7 @@ private void addType1Font(File pfbFile) throws IOException LOG.warn("Skipping font with '|' in name " + type1.getName() + " in file " + pfbFile); return; } - String hash = computeHash(Files.newInputStream(pfbFile.toPath())); + String hash = SKIP_CHECKSUMS ? CHECKSUM_PLACEHOLDER : computeHash(Files.newInputStream(pfbFile.toPath())); fontInfoList.add(new FSFontInfo(pfbFile, FontFormat.PFB, type1.getName(), null, -1, -1, 0, 0, -1, null, this, hash, pfbFile.lastModified()));