diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java b/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
index afcab567af6..e384314d41f 100644
--- a/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
+++ b/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java
@@ -21,6 +21,9 @@
import org.apache.fontbox.ttf.model.GsubData;
import org.apache.fontbox.ttf.model.Language;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
/**
* Gets a {@link Language} specific instance of a {@link GsubWorker}
*
@@ -29,19 +32,25 @@
*/
public class GsubWorkerFactory
{
+ private static final Logger LOG = LogManager.getLogger(GsubWorkerFactory.class);
public GsubWorker getGsubWorker(CmapLookup cmapLookup, GsubData gsubData)
{
+ //TODO this needs to be redesigned / improved because if a font supports several languages,
+ // it will choose one of them and maybe not the one expected.
+ LOG.debug("Language: {}", gsubData.getLanguage());
switch (gsubData.getLanguage())
{
case BENGALI:
return new GsubWorkerForBengali(cmapLookup, gsubData);
+ case DEVANAGARI:
+ return new GsubWorkerForDevanagari(cmapLookup, gsubData);
+ //case GUJARATI:
+ // return new GsubWorkerForGujarati(cmapLookup, gsubData);
case LATIN:
return new GsubWorkerForLatin(cmapLookup, gsubData);
default:
return new DefaultGsubWorker();
}
-
}
-
}
diff --git a/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDevanagari.java b/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDevanagari.java
new file mode 100644
index 00000000000..3e35988873f
--- /dev/null
+++ b/fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerForDevanagari.java
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fontbox.ttf.gsub;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.fontbox.ttf.CmapLookup;
+import org.apache.fontbox.ttf.model.GsubData;
+import org.apache.fontbox.ttf.model.ScriptFeature;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+/**
+ *
+ * Devanagari-specific implementation of GSUB system
+ *
+ * @author JAVAUSER
+ *
+ */
+public class GsubWorkerForDevanagari implements GsubWorker
+{
+ private static final Logger LOG = LogManager.getLogger(GsubWorkerForDevanagari.class);
+
+ private static final String RKRF_FEATURE = "rkrf";
+ private static final String VATU_FEATURE = "vatu";
+
+ /**
+ * This sequence is very important. This has been taken from https://docs.microsoft.com/en-us/typography/script-development/devanagari
+ */
+ private static final List FEATURES_IN_ORDER = Arrays.asList("locl", "nukt", "akhn",
+ "rphf", RKRF_FEATURE,"blwf", "half", VATU_FEATURE, "cjct", "pres", "abvs", "blws",
+ "psts", "haln", "calt");
+
+ // Reph glyphs
+ private static final char[] REPH_CHARS = {'\u0930', '\u094D'};
+ // Glyphs to precede reph
+ private static final char[] BEFORE_REPH_CHARS={'\u093E','\u0940'};
+
+ // Devanagari vowel sign I
+ private static final char BEFORE_HALF_CHAR = '\u093F';
+
+ private final CmapLookup cmapLookup;
+ private final GsubData gsubData;
+
+ private final List rephGlyphIds;
+ private final List beforeRephGlyphIds;
+ private final List beforeHalfGlyphIds;
+
+ GsubWorkerForDevanagari(CmapLookup cmapLookup, GsubData gsubData)
+ {
+ this.cmapLookup = cmapLookup;
+ this.gsubData = gsubData;
+ beforeHalfGlyphIds = getBeforeHalfGlyphIds();
+ rephGlyphIds = getRephGlyphIds();
+ beforeRephGlyphIds = getbeforeRephGlyphIds();
+ }
+
+ @Override
+ public List applyTransforms(List originalGlyphIds)
+ {
+ List intermediateGlyphsFromGsub = adjustRephPosition(originalGlyphIds);
+ intermediateGlyphsFromGsub = repositionGlyphs(intermediateGlyphsFromGsub);
+ for (String feature : FEATURES_IN_ORDER)
+ {
+ if (!gsubData.isFeatureSupported(feature))
+ {
+ if (feature.equals(RKRF_FEATURE) && gsubData.isFeatureSupported(VATU_FEATURE))
+ {
+ // Create your own rkrf feature from vatu feature
+ intermediateGlyphsFromGsub = applyRKRFFeature(
+ gsubData.getFeature(VATU_FEATURE),
+ intermediateGlyphsFromGsub);
+ }
+ LOG.debug("the feature {} was not found", feature);
+ continue;
+ }
+
+ LOG.debug("applying the feature {}", feature);
+ ScriptFeature scriptFeature = gsubData.getFeature(feature);
+ intermediateGlyphsFromGsub = applyGsubFeature(scriptFeature,
+ intermediateGlyphsFromGsub);
+ }
+ return Collections.unmodifiableList(intermediateGlyphsFromGsub);
+ }
+
+ private List applyRKRFFeature(ScriptFeature rkrfGlyphsForSubstitution,
+ List originalGlyphIds)
+ {
+ Set> rkrfGlyphIds = rkrfGlyphsForSubstitution.getAllGlyphIdsForSubstitution();
+ if (rkrfGlyphIds.isEmpty())
+ {
+ LOG.debug("Glyph substitution list for {} is empty.", rkrfGlyphsForSubstitution.getName());
+ return originalGlyphIds;
+ }
+ // Replace this with better implementation to get second GlyphId from rkrfGlyphIds
+ int rkrfReplacement = 0;
+ for (List firstList : rkrfGlyphIds)
+ {
+ if (firstList.size() > 1)
+ {
+ rkrfReplacement = firstList.get(1);
+ break;
+ }
+ }
+
+ if (rkrfReplacement == 0)
+ {
+ LOG.debug("Cannot find rkrf candidate. The rkrfGlyphIds doesn't contain lists of two elements.");
+ return originalGlyphIds;
+ }
+
+ List rkrfList = new ArrayList<>(originalGlyphIds);
+ for (int index = originalGlyphIds.size() - 1; index > 1; index--)
+ {
+ int raGlyph = originalGlyphIds.get(index);
+ if (raGlyph == rephGlyphIds.get(0))
+ {
+ int viramaGlyph = originalGlyphIds.get(index - 1);
+ if (viramaGlyph == rephGlyphIds.get(1))
+ {
+ rkrfList.set(index - 1, rkrfReplacement);
+ rkrfList.remove(index);
+ }
+ }
+ }
+ return rkrfList;
+ }
+
+ private List adjustRephPosition(List originalGlyphIds)
+ {
+ List rephAdjustedList = new ArrayList<>(originalGlyphIds);
+ for (int index = 0; index < originalGlyphIds.size() - 2; index++)
+ {
+ int raGlyph = originalGlyphIds.get(index);
+ int viramaGlyph = originalGlyphIds.get(index + 1);
+ if (raGlyph == rephGlyphIds.get(0) && viramaGlyph == rephGlyphIds.get(1))
+ {
+ int nextConsonantGlyph = originalGlyphIds.get(index + 2);
+ rephAdjustedList.set(index, nextConsonantGlyph);
+ rephAdjustedList.set(index + 1, raGlyph);
+ rephAdjustedList.set(index + 2, viramaGlyph);
+
+ if (index + 3 < originalGlyphIds.size())
+ {
+ int matraGlyph = originalGlyphIds.get(index + 3);
+ if (beforeRephGlyphIds.contains(matraGlyph))
+ {
+ rephAdjustedList.set(index + 1, matraGlyph);
+ rephAdjustedList.set(index + 2, raGlyph);
+ rephAdjustedList.set(index + 3, viramaGlyph);
+ }
+ }
+ }
+ }
+ return rephAdjustedList;
+ }
+
+ private List repositionGlyphs(List originalGlyphIds)
+ {
+ List repositionedGlyphIds = new ArrayList<>(originalGlyphIds);
+ int listSize = repositionedGlyphIds.size();
+ int foundIndex = listSize - 1;
+ int nextIndex = listSize - 2;
+ while (nextIndex > -1)
+ {
+ int glyph = repositionedGlyphIds.get(foundIndex);
+ int prevIndex = foundIndex + 1;
+ if (beforeHalfGlyphIds.contains(glyph))
+ {
+ repositionedGlyphIds.remove(foundIndex);
+ repositionedGlyphIds.add(nextIndex--, glyph);
+ }
+ else if (rephGlyphIds.get(1).equals(glyph) && prevIndex < listSize)
+ {
+ int prevGlyph = repositionedGlyphIds.get(prevIndex);
+ if (beforeHalfGlyphIds.contains(prevGlyph))
+ {
+ repositionedGlyphIds.remove(prevIndex);
+ repositionedGlyphIds.add(nextIndex--, prevGlyph);
+ }
+ }
+ foundIndex = nextIndex--;
+ }
+ return repositionedGlyphIds;
+ }
+
+ private List applyGsubFeature(ScriptFeature scriptFeature, List originalGlyphs)
+ {
+ Set> allGlyphIdsForSubstitution = scriptFeature.getAllGlyphIdsForSubstitution();
+ if (allGlyphIdsForSubstitution.isEmpty())
+ {
+ LOG.debug("getAllGlyphIdsForSubstitution() for {} is empty", scriptFeature.getName());
+ return originalGlyphs;
+ }
+ GlyphArraySplitter glyphArraySplitter = new GlyphArraySplitterRegexImpl(
+ allGlyphIdsForSubstitution);
+ List> tokens = glyphArraySplitter.split(originalGlyphs);
+ List gsubProcessedGlyphs = new ArrayList<>(tokens.size());
+ tokens.forEach(chunk ->
+ {
+ if (scriptFeature.canReplaceGlyphs(chunk))
+ {
+ Integer glyphId = scriptFeature.getReplacementForGlyphs(chunk);
+ gsubProcessedGlyphs.add(glyphId);
+ }
+ else
+ {
+ gsubProcessedGlyphs.addAll(chunk);
+ }
+ });
+ LOG.debug("originalGlyphs: {}, gsubProcessedGlyphs: {}", originalGlyphs, gsubProcessedGlyphs);
+ return gsubProcessedGlyphs;
+ }
+
+ private List getBeforeHalfGlyphIds()
+ {
+ List glyphIds = new ArrayList<>();
+ glyphIds.add(getGlyphId(BEFORE_HALF_CHAR));
+ return Collections.unmodifiableList(glyphIds);
+ }
+
+ private List getRephGlyphIds()
+ {
+ List result = new ArrayList<>();
+ for (char character : REPH_CHARS)
+ {
+ result.add(getGlyphId(character));
+ }
+ return Collections.unmodifiableList(result);
+ }
+
+ private List getbeforeRephGlyphIds()
+ {
+ List glyphIds = new ArrayList<>();
+ for (char character : BEFORE_REPH_CHARS)
+ {
+ glyphIds.add(getGlyphId(character));
+ }
+ return Collections.unmodifiableList(glyphIds);
+ }
+
+ private Integer getGlyphId(char character)
+ {
+ return cmapLookup.getGlyphId(character);
+ }
+}
diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEmbedding.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEmbedding.java
index a6dd621e50f..74326e47fc1 100644
--- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEmbedding.java
+++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestFontEmbedding.java
@@ -19,8 +19,11 @@
import java.io.ByteArrayOutputStream;
import java.io.File;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@@ -45,6 +48,7 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.parallel.Execution;
@@ -245,6 +249,52 @@ void testBengali() throws IOException
//assertEquals(expectedExtractedtext, extracted.replaceAll("\r", "").trim());
}
+ @Test
+ void testDevanagari() throws IOException
+ {
+ String DEVANAGARI_TEXT = "प्रदेश ग्रामीण व्यवसायिक, लक्ष्मिपति, लक्षित, मक्खि उपलब्धि, प्रसिद्धि";
+
+ String expectedExtractedtext = DEVANAGARI_TEXT;
+ File pdf = new File(OUT_DIR, "Devanagari.pdf");
+
+ try (PDDocument document = new PDDocument())
+ {
+ PDPage page = new PDPage(PDRectangle.A4);
+ document.addPage(page);
+ PDFont font = PDType0Font.load(document,
+ this.getClass().getResourceAsStream("/org/apache/pdfbox/ttf/Lohit-Devanagari.ttf"));
+
+ try (PDPageContentStream contentStream = new PDPageContentStream(document, page))
+ {
+ contentStream.beginText();
+ contentStream.setFont(font, 20);
+ contentStream.newLineAtOffset(50, 700);
+ contentStream.showText(DEVANAGARI_TEXT);
+ contentStream.endText();
+ }
+
+ document.save(pdf);
+ }
+
+ File IN_DIR = new File("src/test/resources/org/apache/pdfbox/ttf");
+
+ // compare rendering
+ if (!TestPDFToImage.doTestFile(pdf, IN_DIR.getAbsolutePath(), OUT_DIR.getAbsolutePath()))
+ {
+ // don't fail, rendering is different on different systems, result must be viewed manually
+ fail("Rendering of " + pdf + " failed or is not identical to expected rendering in " + IN_DIR + " directory");
+ }
+
+ // Check text extraction
+ String extracted = getUnicodeText(pdf);
+
+ try (OutputStream os = new FileOutputStream(new File(OUT_DIR, "Devanagari.txt")))
+ {
+ os.write(extracted.getBytes(StandardCharsets.UTF_8));
+ //assertEquals(expectedExtractedtext, extracted.replaceAll("\r", "").trim());
+ }
+ }
+
/**
* Test corner case of PDFBOX-4302.
*