Merge pull request #274 from anthrotype/duplicate-prod-names

handle duplicate production names, with invalid PS chars, or too long (> 63 chars)
googlefonts · Sep 10, 2018 · ccad4b7 · ccad4b7
2 parents 70a7382 + 6dd6113
commit ccad4b7
Show file tree

Hide file tree

Showing 3 changed files with 89 additions and 16 deletions.
diff --git a/Lib/ufo2ft/__init__.py b/Lib/ufo2ft/__init__.py
@@ -93,7 +93,7 @@ def compileOTF(
         featureCompilerClass=featureCompilerClass,
     )
 
-    postProcessor = PostProcessor(otf, ufo)
+    postProcessor = PostProcessor(otf, ufo, glyphSet=glyphSet)
     otf = postProcessor.process(useProductionNames, optimizeCFF)
 
     return otf

diff --git a/Lib/ufo2ft/postProcessor.py b/Lib/ufo2ft/postProcessor.py
@@ -8,6 +8,7 @@
     GLYPHS_DONT_USE_PRODUCTION_NAMES
 )
 import logging
+import re
 
 
 logger = logging.getLogger(__name__)
@@ -18,8 +19,12 @@ class PostProcessor(object):
     info from the source UFO where necessary.
     """
 
-    def __init__(self, otf, ufo):
+    GLYPH_NAME_INVALID_CHARS = re.compile("[^0-9a-zA-Z_.]")
+    MAX_GLYPH_NAME_LENGTH = 63
+
+    def __init__(self, otf, ufo, glyphSet=None):
         self.ufo = ufo
+        self.glyphSet = glyphSet if glyphSet is not None else ufo
         stream = BytesIO()
         otf.save(stream)
         stream.seek(0)
@@ -59,15 +64,10 @@ def process(self, useProductionNames=None, optimizeCFF=True):
 
     def _rename_glyphs_from_ufo(self):
         """Rename glyphs using ufo.lib.public.postscriptNames in UFO."""
-
-        rename_map = {
-            g.name: self._build_production_name(g) for g in self.ufo}
-        # .notdef may not be present in the original font
-        rename_map[".notdef"] = ".notdef"
-        rename = lambda names: [rename_map[n] for n in names]
+        rename_map = self._build_production_names()
 
         otf = self.otf
-        otf.setGlyphOrder(rename(otf.getGlyphOrder()))
+        otf.setGlyphOrder([rename_map.get(n, n) for n in otf.getGlyphOrder()])
 
         # we need to compile format 2 'post' table so that the 'extraNames'
         # attribute is updated with the list of the names outside the
@@ -82,7 +82,43 @@ def _rename_glyphs_from_ufo(self):
             char_strings = cff.CharStrings.charStrings
             cff.CharStrings.charStrings = {
                 rename_map.get(n, n): v for n, v in char_strings.items()}
-            cff.charset = rename(cff.charset)
+            cff.charset = [rename_map.get(n, n) for n in cff.charset]
+
+    def _build_production_names(self):
+        seen = {}
+        rename_map = {}
+        for name in self.otf.getGlyphOrder():
+            prod_name = self._build_production_name(self.glyphSet[name])
+
+            # strip invalid characters not allowed in postscript glyph names
+            if name != prod_name:
+                valid_name = self.GLYPH_NAME_INVALID_CHARS.sub("", prod_name)
+                if len(valid_name) > self.MAX_GLYPH_NAME_LENGTH:
+                    # if the length of the generated production name is too
+                    # long, try to fall back to the original name
+                    valid_name = self.GLYPH_NAME_INVALID_CHARS.sub("", name)
+            else:
+                valid_name = self.GLYPH_NAME_INVALID_CHARS.sub("", name)
+
+            if len(valid_name) > self.MAX_GLYPH_NAME_LENGTH:
+                logger.warning(
+                    "glyph name length exceeds 63 characters: '%s'", valid_name
+                )
+            # add a suffix to make the production names unique
+            rename_map[name] = self._unique_name(valid_name, seen)
+        return rename_map
+
+    @staticmethod
+    def _unique_name(name, seen):
+        """Append incremental '.N' suffix if glyph is a duplicate."""
+        if name in seen:
+            n = seen[name]
+            while (name + ".%d" % n) in seen:
+                n += 1
+            seen[name] = n + 1
+            name += ".%d" % n
+        seen[name] = 1
+        return name
 
     def _build_production_name(self, glyph):
         """Build a production name for a single glyph."""
@@ -100,21 +136,21 @@ def _build_production_name(self, glyph):
 
         # use production name + last (non-script) suffix if possible
         parts = glyph.name.rsplit('.', 1)
-        if len(parts) == 2 and parts[0] in self.ufo:
+        if len(parts) == 2 and parts[0] in self.glyphSet:
             return '%s.%s' % (
-                self._build_production_name(self.ufo[parts[0]]), parts[1])
+                self._build_production_name(self.glyphSet[parts[0]]), parts[1])
 
         # use ligature name, making sure to look up components with suffixes
         parts = glyph.name.split('.', 1)
         if len(parts) == 2:
             liga_parts = ['%s.%s' % (n, parts[1]) for n in parts[0].split('_')]
         else:
             liga_parts = glyph.name.split('_')
-        if len(liga_parts) > 1 and all(n in self.ufo for n in liga_parts):
-            unicode_vals = [self.ufo[n].unicode for n in liga_parts]
+        if len(liga_parts) > 1 and all(n in self.glyphSet for n in liga_parts):
+            unicode_vals = [self.glyphSet[n].unicode for n in liga_parts]
             if all(v and v <= 0xffff for v in unicode_vals):
                 return 'uni' + ''.join('%04X' % v for v in unicode_vals)
             return '_'.join(
-                self._build_production_name(self.ufo[n]) for n in liga_parts)
+                self._build_production_name(self.glyphSet[n]) for n in liga_parts)
 
         return glyph.name
diff --git a/tests/outlineCompiler_test.py b/tests/outlineCompiler_test.py
@@ -16,6 +16,7 @@
 )
 from ufo2ft import compileTTF
 import os
+import logging
 import pytest
 
 
@@ -642,7 +643,7 @@ def test_compile_with_custom_postscript_names_notdef_preserved(
         self, testufo
     ):
         custom_names = dict(self.CUSTOM_POSTSCRIPT_NAMES)
-        custom_names[".notdef"] = "defnot"
+        del custom_names[".notdef"]
         testufo.lib["public.postscriptNames"] = custom_names
         result = compileTTF(testufo, useProductionNames=True)
         assert result.getGlyphOrder() == [
@@ -662,6 +663,42 @@ def test_compile_with_custom_postscript_names_notdef_preserved(
             "lll",
         ]
 
+    def test_warn_name_exceeds_max_length(self, testufo, caplog):
+        long_name = 64 * "a"
+        testufo.newGlyph(long_name)
+
+        with caplog.at_level(logging.WARNING, logger="ufo2ft.postProcessor"):
+            result = compileTTF(testufo, useProductionNames=True)
+
+        assert "length exceeds 63 characters" in caplog.text
+        assert long_name in result.getGlyphOrder()
+
+    def test_duplicate_glyph_names(self, testufo):
+        order = ["ab", "ab.1", "a-b", "a/b", "ba"]
+        testufo.lib["public.glyphOrder"] = order
+        testufo.lib["public.postscriptNames"] = {"ba": "ab"}
+        for name in order:
+            if name not in testufo:
+                testufo.newGlyph(name)
+
+        result = compileTTF(testufo, useProductionNames=True).getGlyphOrder()
+
+        assert result[1] == "ab"
+        assert result[2] == "ab.1"
+        assert result[3] == "ab.2"
+        assert result[4] == "ab.3"
+        assert result[5] == "ab.4"
+
+    def test_too_long_production_name(self, testufo):
+        name = "_".join(("a",) * 16)
+        testufo.newGlyph(name)
+
+        result = compileTTF(testufo, useProductionNames=True).getGlyphOrder()
+
+        # the production name uniXXXX would exceed the max length so the
+        # original name is used
+        assert name in result
+
 
 ASCII = [unichr(c) for c in range(0x20, 0x7E)]