Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 31 additions & 13 deletions lib/ttfunk/table/cmap.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,45 @@ class Cmap < Table
# @param charmap [Hash{Integer => Integer}]
# @param encoding [Symbol]
# @return [Hash]
# * `:charmap` (<tt>Hash{Integer => Hash}</tt>) keys are the characrers in
# `charset`, values are hashes:
# * `:old` (<tt>Integer</tt>) - glyph ID in the original font.
# * `:new` (<tt>Integer</tt>) - glyph ID in the subset font.
# that maps the characters in charmap to a
# * `:table` (<tt>String</tt>) - serialized table.
# * `:max_glyph_id` (<tt>Integer</tt>) - maximum glyph ID in the new font.
def self.encode(charmap, encoding)
result = Cmap::Subtable.encode(charmap, encoding)
# Encode the primary (unicode) subtable
unicode_result = Cmap::Subtable.encode(charmap, encoding)

# pack 'version' and 'table-count'
result[:table] = [0, 1, result.delete(:subtable)].pack('nnA*')
result
# Also encode Mac Roman (platform 1, encoding 0) so Illustrator can
# resolve glyph IDs back to characters when editing embedded font text.
# Without this subtable, Illustrator falls back to treating glyph IDs
# as raw character codes, producing gobbledygook.
# Only codepoints <= 0xFF can be represented in Mac Roman.
mac_charmap = charmap.select { |code, _| code <= 0xFF }
mac_result = Cmap::Subtable.encode(mac_charmap, :mac_roman)

# Strip the 8-byte record header (platformID nn + encodingID nn + offset N)
# that Subtable.encode prepends, leaving only the raw cmap format data.
mac_raw = mac_result[:subtable][8..]
unicode_raw = unicode_result[:subtable][8..]

# cmap header: version(2) + numTables(2) = 4 bytes
# Each encoding record: platformID(2) + encodingID(2) + offset(4) = 8 bytes
# Two records = 16 bytes
# Total before subtable data = 4 + 16 = 20 bytes
header_and_records_size = 4 + (2 * 8)

mac_offset = header_and_records_size
unicode_offset = header_and_records_size + mac_raw.bytesize

table = [0, 2].pack('nn') # version=0, numTables=2
table += [1, 0, mac_offset].pack('nnN') # Mac Roman record
table += [3, 1, unicode_offset].pack('nnN') # Windows Unicode record
table += mac_raw # Format 0 data
table += unicode_raw # Format 4 data

unicode_result.merge(table: table)
end

# Get Unicode encoding records.
#
# @return [Array<TTFunk::Table::Cmap::Subtable>]
def unicode
# Because most callers just call .first on the result, put tables with
# highest-number format first. Unsupported formats will be ignored.
@unicode ||=
@tables
.select { |table| table.unicode? && table.supported? }
Expand Down
43 changes: 40 additions & 3 deletions lib/ttfunk/table/name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,49 @@ def strip_extended
# @param key [String]
# @return [String]
def self.encode(names, key = '')
tag = Digest::SHA1.hexdigest(key)[0, 6]
# Generate a 6-character uppercase tag according to PDF spec section 5.5.3
# Convert hex digest to uppercase letters: 0-9 -> A-J, a-f -> K-P
# This maintains 1-to-1 mapping while satisfying the "6 uppercase letters" requirement
digest = Digest::SHA1.hexdigest(key)[0, 6]

tag = digest.chars.map do |c|
case c
when '0'..'9'
('A'.ord + (c.ord - '0'.ord)).chr # 0->A, 1->B, ..., 9->J
when 'a'..'f'
('K'.ord + (c.ord - 'a'.ord)).chr # a->K, b->L, ..., f->P
end
end.join

new_ps_name = "#{tag}+#{names.postscript_name}"

# Detect which platforms the original font uses for PostScript name (id=6).
# Mirror that structure exactly: don't add or remove platforms.
# Illustrator requires platform 3 (Windows UTF-16BE); Acrobat accepts platform 1 (Mac Roman).
# Some fonts ship only with platform 3; adding a platform-1 record they never had
# causes Illustrator to misread the subset and render corrupted text.
original_ps_records = names.strings[6]
has_mac = original_ps_records.any? { |s| s.platform_id == 1 }
has_win = original_ps_records.any? { |s| s.platform_id == 3 }

postscript_name = NameString.new("#{tag}+#{names.postscript_name}", 1, 0, 0)
new_ps_records = []

if has_mac || !has_win
# Mac Roman (platform 1) — plain ASCII bytes
new_ps_records << NameString.new(new_ps_name, 1, 0, 0)
end

if has_win || !has_mac
# Windows Unicode UTF-16BE (platform 3, encoding 1, language 0x0409 = English US)
new_ps_records << NameString.new(
new_ps_name.encode('UTF-16BE').b,
3, 1, 0x0409
)
end

strings = names.strings.dup
strings[6] = [postscript_name]
strings[6] = new_ps_records

str_count = strings.reduce(0) { |sum, (_, list)| sum + list.length }

table = [0, str_count, 6 + (12 * str_count)].pack('n*')
Expand Down
3 changes: 2 additions & 1 deletion spec/ttfunk/ttf_encoder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@

# verified via the Font-Validator tool at:
# https://github.com/HinTak/Font-Validator
expect(checksum).to eq(0xEEB49DA9)
# Updated checksum after adding Windows UTF-16BE PostScript name record
expect(checksum).to eq(0xA423F7C3)
end

example_group 'maxp regression', issue: 102 do
Expand Down
Loading