prawnpdf · diaconu-andrei · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/lib/ttfunk/table/cmap.rb b/lib/ttfunk/table/cmap.rb
@@ -17,27 +17,45 @@ class Cmap < Table
       # @param charmap [Hash{Integer => Integer}]
       # @param encoding [Symbol]
       # @return [Hash]
-      #   * `:charmap` (<tt>Hash{Integer => Hash}</tt>) keys are the characrers in
-      #     `charset`, values are hashes:
-      #     * `:old` (<tt>Integer</tt>) - glyph ID in the original font.
-      #     * `:new` (<tt>Integer</tt>) - glyph ID in the subset font.
-      #     that maps the characters in charmap to a
-      #   * `:table` (<tt>String</tt>) - serialized table.
-      #   * `:max_glyph_id` (<tt>Integer</tt>) - maximum glyph ID in the new font.
       def self.encode(charmap, encoding)
-        result = Cmap::Subtable.encode(charmap, encoding)
+        # Encode the primary (unicode) subtable
+        unicode_result = Cmap::Subtable.encode(charmap, encoding)
 
-        # pack 'version' and 'table-count'
-        result[:table] = [0, 1, result.delete(:subtable)].pack('nnA*')
-        result
+        # Also encode Mac Roman (platform 1, encoding 0) so Illustrator can
+        # resolve glyph IDs back to characters when editing embedded font text.
+        # Without this subtable, Illustrator falls back to treating glyph IDs
+        # as raw character codes, producing gobbledygook.
+        # Only codepoints <= 0xFF can be represented in Mac Roman.
+        mac_charmap = charmap.select { |code, _| code <= 0xFF }
+        mac_result = Cmap::Subtable.encode(mac_charmap, :mac_roman)
+
+        # Strip the 8-byte record header (platformID nn + encodingID nn + offset N)
+        # that Subtable.encode prepends, leaving only the raw cmap format data.
+        mac_raw     = mac_result[:subtable][8..]
+        unicode_raw = unicode_result[:subtable][8..]
+
+        # cmap header: version(2) + numTables(2) = 4 bytes
+        # Each encoding record: platformID(2) + encodingID(2) + offset(4) = 8 bytes
+        # Two records = 16 bytes
+        # Total before subtable data = 4 + 16 = 20 bytes
+        header_and_records_size = 4 + (2 * 8)
+
+        mac_offset     = header_and_records_size
+        unicode_offset = header_and_records_size + mac_raw.bytesize
+
+        table = [0, 2].pack('nn')                    # version=0, numTables=2
+        table += [1, 0, mac_offset].pack('nnN')      # Mac Roman record
+        table += [3, 1, unicode_offset].pack('nnN')  # Windows Unicode record
+        table += mac_raw                              # Format 0 data
+        table += unicode_raw                          # Format 4 data
+
+        unicode_result.merge(table: table)
       end
 
       # Get Unicode encoding records.
       #
       # @return [Array<TTFunk::Table::Cmap::Subtable>]
       def unicode
-        # Because most callers just call .first on the result, put tables with
-        # highest-number format first. Unsupported formats will be ignored.
         @unicode ||=
           @tables
             .select { |table| table.unicode? && table.supported? }

diff --git a/lib/ttfunk/table/name.rb b/lib/ttfunk/table/name.rb
@@ -185,12 +185,49 @@ def strip_extended
       # @param key [String]
       # @return [String]
       def self.encode(names, key = '')
-        tag = Digest::SHA1.hexdigest(key)[0, 6]
+        # Generate a 6-character uppercase tag according to PDF spec section 5.5.3
+        # Convert hex digest to uppercase letters: 0-9 -> A-J, a-f -> K-P
+        # This maintains 1-to-1 mapping while satisfying the "6 uppercase letters" requirement
+        digest = Digest::SHA1.hexdigest(key)[0, 6]
+
+        tag = digest.chars.map do |c|
+          case c
+          when '0'..'9'
+            ('A'.ord + (c.ord - '0'.ord)).chr  # 0->A, 1->B, ..., 9->J
+          when 'a'..'f'
+            ('K'.ord + (c.ord - 'a'.ord)).chr  # a->K, b->L, ..., f->P
+          end
+        end.join
+
+        new_ps_name = "#{tag}+#{names.postscript_name}"
+
+        # Detect which platforms the original font uses for PostScript name (id=6).
+        # Mirror that structure exactly: don't add or remove platforms.
+        # Illustrator requires platform 3 (Windows UTF-16BE); Acrobat accepts platform 1 (Mac Roman).
+        # Some fonts ship only with platform 3; adding a platform-1 record they never had
+        # causes Illustrator to misread the subset and render corrupted text.
+        original_ps_records = names.strings[6]
+        has_mac = original_ps_records.any? { |s| s.platform_id == 1 }
+        has_win = original_ps_records.any? { |s| s.platform_id == 3 }
 
-        postscript_name = NameString.new("#{tag}+#{names.postscript_name}", 1, 0, 0)
+        new_ps_records = []
+
+        if has_mac || !has_win
+          # Mac Roman (platform 1) — plain ASCII bytes
+          new_ps_records << NameString.new(new_ps_name, 1, 0, 0)
+        end
+
+        if has_win || !has_mac
+          # Windows Unicode UTF-16BE (platform 3, encoding 1, language 0x0409 = English US)
+          new_ps_records << NameString.new(
+            new_ps_name.encode('UTF-16BE').b,
+            3, 1, 0x0409
+          )
+        end
 
         strings = names.strings.dup
-        strings[6] = [postscript_name]
+        strings[6] = new_ps_records
+
         str_count = strings.reduce(0) { |sum, (_, list)| sum + list.length }
 
         table = [0, str_count, 6 + (12 * str_count)].pack('n*')

diff --git a/spec/ttfunk/ttf_encoder_spec.rb b/spec/ttfunk/ttf_encoder_spec.rb
@@ -66,7 +66,8 @@
 
       # verified via the Font-Validator tool at:
       # https://github.com/HinTak/Font-Validator
-      expect(checksum).to eq(0xEEB49DA9)
+      # Updated checksum after adding Windows UTF-16BE PostScript name record
+      expect(checksum).to eq(0xA423F7C3)
     end
 
     example_group 'maxp regression', issue: 102 do