Skip to content

Commit

Permalink
[FIX] Memory leak: RandomAccessRead passed to TrueTypeCollection cons…
Browse files Browse the repository at this point in the history
…tructor

[PERFORMANCE] Improve FileSystemFontProvider.scanFonts() performance by adding 'only headers' mode to TTF parser:
* only read tables needed for FSFontInfo ('name', 'head', 'OS/2', 'CFF ', 'gcid')
* 'CFF ' and 'head' table parsers finish as soon as it has all needed data (in 'only headers' mode)
* streamline I/O: replace readByte() with read(array), avoid allocating byte[] where possible
* NamingTable: use sorted list instead of multilevel HashMap, delay-load Strings
* skip checksumming as it is now faster to simply re-parse (gated with "pdfbox.fontcache.skipchecksums" for backward compatibility)
[DEV] Breaking change: NameRecord.getString() is now package-private and lazy, renamed to getStringLazy().
[DEV] Breaking change: new abstract method TTFDataStream.getSubReader()
  • Loading branch information
bogdiuk committed Apr 10, 2024
1 parent ee47441 commit 5ec36b8
Show file tree
Hide file tree
Showing 25 changed files with 957 additions and 272 deletions.
40 changes: 20 additions & 20 deletions fontbox/src/main/java/org/apache/fontbox/cff/CFFFont.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public abstract class CFFFont implements FontBoxFont
{
private String fontName;
private CFFCharset charset;
private CFFParser.ByteSource source;
// private CFFParser.ByteSource source;
protected final Map<String, Object> topDict = new LinkedHashMap<>();
protected byte[][] charStrings;
protected byte[][] globalSubrIndex;
Expand Down Expand Up @@ -140,25 +140,25 @@ public final List<byte[]> getCharStringBytes()
return Arrays.asList(charStrings);
}

/**
* Sets a byte source to re-read the CFF data in the future.
*/
final void setData(CFFParser.ByteSource source)
{
this.source = source;
}

/**
* Returns the CFF data.
*
* @return the cff data as byte array
*
* @throws IOException if the data could not be read
*/
public byte[] getData() throws IOException
{
return source.getBytes();
}
// /**
// * Sets a byte source to re-read the CFF data in the future.
// */
// final void setData(CFFParser.ByteSource source)
// {
// this.source = source;
// }
//
// /**
// * Returns the CFF data.
// *
// * @return the cff data as byte array
// *
// * @throws IOException if the data could not be read
// */
// public byte[] getData() throws IOException
// {
// return source.getBytes();
// }

/**
* Returns the number of charstrings in the font.
Expand Down
39 changes: 26 additions & 13 deletions fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.ttf.LoadOnlyHeaders;
import org.apache.pdfbox.io.RandomAccessRead;


Expand All @@ -47,7 +48,8 @@ public class CFFParser
private static final String TAG_TTFONLY = "\u0000\u0001\u0000\u0000";

private String[] stringIndex = null;
private ByteSource source;
// private ByteSource source;
private LoadOnlyHeaders loadOnlyHeaders;

// for debugging only
private String debugFontName;
Expand All @@ -66,6 +68,11 @@ public interface ByteSource
byte[] getBytes() throws IOException;
}

public void setLoadOnlyHeaders(LoadOnlyHeaders loadOnlyHeaders)
{
this.loadOnlyHeaders = loadOnlyHeaders;
}

/**
* Parse CFF font using byte array, also passing in a byte source for future use.
*
Expand All @@ -77,7 +84,7 @@ public interface ByteSource
public List<CFFFont> parse(byte[] bytes, ByteSource source) throws IOException
{
// TODO do we need to store the source data of the font? It isn't used at all
this.source = source;
// this.source = source;
return parse(new DataInputByteArray(bytes));
}

Expand All @@ -91,17 +98,10 @@ public List<CFFFont> parse(byte[] bytes, ByteSource source) throws IOException
public List<CFFFont> parse(RandomAccessRead randomAccessRead) throws IOException
{
// TODO do we need to store the source data of the font? It isn't used at all
byte[] bytes = new byte[(int) randomAccessRead.length()];
randomAccessRead.seek(0);
int remainingBytes = bytes.length;
int amountRead;
while ((amountRead = randomAccessRead.read(bytes, bytes.length - remainingBytes,
remainingBytes)) > 0)
{
remainingBytes -= amountRead;
}
randomAccessRead.seek(0);
this.source = new CFFBytesource(bytes);
// byte[] bytes = randomAccessRead.readNBytes((int) randomAccessRead.length());
// randomAccessRead.seek(0);
// this.source = new CFFBytesource(bytes);
return parse(new DataInputRandomAccessRead(randomAccessRead));
}

Expand Down Expand Up @@ -151,7 +151,7 @@ private List<CFFFont> parse(DataInput input) throws IOException
{
CFFFont font = parseFont(input, nameIndex[i], topDictIndex[i]);
font.setGlobalSubrIndex(globalSubrIndex);
font.setData(source);
// font.setData(source);
fonts.add(font);
}
return fonts;
Expand Down Expand Up @@ -492,6 +492,15 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr
cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());

font = cffCIDFont;
if (loadOnlyHeaders != null)
{
loadOnlyHeaders.setOtfROS(
cffCIDFont.getRegistry(),
cffCIDFont.getOrdering(),
cffCIDFont.getSupplement());
// we just read (Registry, Ordering, Supplement) and don't need anything else
return font;
}
}
else
{
Expand All @@ -501,6 +510,10 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr
// name
debugFontName = name;
font.setName(name);
if (loadOnlyHeaders != null)
{
return font; // not a 'CFFCIDFont' => cannot read properties needed by LoadOnlyHeaders
}

// top dict
font.addValueToTopDict("version", getString(topDict, "version"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,7 @@ public byte[] readBytes(int length) throws IOException
{
throw new IOException("length is negative");
}
if (randomAccessRead.length() - randomAccessRead.getPosition() < length)
{
throw new IOException("Premature end of buffer reached");
}
byte[] bytes = new byte[length];
for (int i = 0; i < length; i++)
{
bytes[i] = readByte();
}
return bytes;
return randomAccessRead.readExact(length);
}

@Override
Expand Down
21 changes: 18 additions & 3 deletions fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.fontbox.cff.CFFFont;
import org.apache.fontbox.cff.CFFParser;
import org.apache.pdfbox.io.RandomAccessRead;

/**
* PostScript font program (compact font format).
Expand Down Expand Up @@ -48,10 +49,24 @@ public class CFFTable extends TTFTable
@Override
void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
{
byte[] bytes = data.read((int)getLength());

CFFParser parser = new CFFParser();
cffFont = parser.parse(bytes, new CFFBytesource(ttf)).get(0);
parser.setLoadOnlyHeaders(ttf.getLoadOnlyHeaders());
// assert data.getCurrentPosition() == getOffset();
try (RandomAccessRead subReader = data.getSubReader(getLength()))
{
if (subReader != null)
{
cffFont = parser.parse(subReader).get(0);
data.seek(getOffset() + getLength());
}
else
{
assert ttf.getLoadOnlyHeaders() == null
: "It is inefficient to read whole CFF table to parse only headers, please use RandomAccessReadUncachedDataStream";
byte[] bytes = data.read((int)getLength());
cffFont = parser.parse(bytes, new CFFBytesource(ttf)).get(0);
}
}

initialized = true;
}
Expand Down
9 changes: 9 additions & 0 deletions fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ public class HeaderTable extends TTFTable
@Override
void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
{
LoadOnlyHeaders outHeaders = ttf.getLoadOnlyHeaders();
if (outHeaders != null) {
data.skip(44);
macStyle = data.readUnsignedShort();
outHeaders.setHeaderMacStyle(macStyle);
initialized = true;
return;
}

version = data.read32Fixed();
fontRevision = data.read32Fixed();
checkSumAdjustment = data.readUnsignedInt();
Expand Down
155 changes: 155 additions & 0 deletions fontbox/src/main/java/org/apache/fontbox/ttf/LoadOnlyHeaders.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.fontbox.ttf;

import java.io.IOException;

/**
* To improve performance of {@code FileSystemFontProvider.scanFonts(...)},
* this class is used both as a marker to skip
* unused data and as a storage for collected data.
* <p>
* Tables it needs:<ul>
* <li>NamingTable.TAG,
* <li>HeaderTable.TAG,
* <li>OS2WindowsMetricsTable.TAG,
* <li>OTF: CFFTable.TAG
* <li>non-OTF: "gcid"
* </ul>
*
* @author Mykola Bohdiuk
*/
public final class LoadOnlyHeaders
{
static final int BYTES_GCID = 142;

private IOException exception;
private String name;
private Integer headerMacStyle;
private OS2WindowsMetricsTable os2Windows;
private String fontFamily;
private String fontSubFamily;
private byte[] nonOtfGcid142;
//
private boolean isOTFAndPostScript;
private String otfRegistry;
private String otfOrdering;
private int otfSupplement;

public IOException getException()
{
return exception;
}

public String getName()
{
return name;
}

/**
* null == no HeaderTable, {@code ttf.getHeader().getMacStyle()}
*/
public Integer getHeaderMacStyle()
{
return headerMacStyle;
}

public OS2WindowsMetricsTable getOS2Windows()
{
return os2Windows;
}

// only when LOGGER(FileSystemFontProvider).isTraceEnabled() tracing: FontFamily, FontSubfamily
public String getFontFamily()
{
return fontFamily;
}

public String getFontSubFamily()
{
return fontSubFamily;
}

public boolean isOpenTypePostScript()
{
return isOTFAndPostScript;
}

public byte[] getNonOtfTableGCID142()
{
return nonOtfGcid142;
}

public String getOtfRegistry()
{
return otfRegistry;
}

public String getOtfOrdering()
{
return otfOrdering;
}

public int getOtfSupplement()
{
return otfSupplement;
}

void setException(IOException exception)
{
this.exception = exception;
}

void setName(String name)
{
this.name = name;
}

void setHeaderMacStyle(Integer headerMacStyle)
{
this.headerMacStyle = headerMacStyle;
}

void setOs2Windows(OS2WindowsMetricsTable os2Windows)
{
this.os2Windows = os2Windows;
}

void setFontFamily(String fontFamily, String fontSubFamily)
{
this.fontFamily = fontFamily;
this.fontSubFamily = fontSubFamily;
}

void setNonOtfGcid142(byte[] nonOtfGcid142)
{
this.nonOtfGcid142 = nonOtfGcid142;
}

void setIsOTFAndPostScript(boolean isOTFAndPostScript)
{
this.isOTFAndPostScript = isOTFAndPostScript;
}

// public because CFFParser is in a different package
public void setOtfROS(String otfRegistry, String otfOrdering, int otfSupplement)
{
this.otfRegistry = otfRegistry;
this.otfOrdering = otfOrdering;
this.otfSupplement = otfSupplement;
}
}
6 changes: 4 additions & 2 deletions fontbox/src/main/java/org/apache/fontbox/ttf/NameRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ void initData( TrueTypeFont ttf, TTFDataStream data ) throws IOException
*
* @return A string for this class.
*/
@Override
public String toString()
{
return
Expand All @@ -190,9 +191,10 @@ public String toString()
" " + string;
}
/**
* @return Returns the string.
* Use {@link NamingTable#getString(NameRecord)}
* @return Returns the string, if it was pre-loaded.
*/
public String getString()
String getStringLazy()
{
return string;
}
Expand Down
Loading

0 comments on commit 5ec36b8

Please sign in to comment.