Skip to content

Instantly share code, notes, and snippets.

@updateing
Created September 21, 2018 15:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save updateing/a8b546df19c5ff429103093668557d40 to your computer and use it in GitHub Desktop.
Save updateing/a8b546df19c5ff429103093668557d40 to your computer and use it in GitHub Desktop.
subset_noto_cjk.py in multiple processes
#!/usr/bin/python
# coding=UTF-8
#
# Copyright 2016 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create a curated subset of Noto CJK for Android."""
from multiprocessing import Process
import os
from fontTools import ttLib
from nototools import font_data
from nototools import tool_utils
from nototools import ttc_utils
# Characters supported in Noto CJK fonts that UTR #51 recommends default to
# emoji-style.
EMOJI_IN_CJK = {
0x26BD, # ⚽ SOCCER BALL
0x26BE, # ⚾ BASEBALL
0x1F18E, # πŸ†Ž NEGATIVE SQUARED AB
0x1F191, # πŸ†‘ SQUARED CL
0x1F192, # πŸ†’ SQUARED COOL
0x1F193, # πŸ†“ SQUARED FREE
0x1F194, # πŸ†” SQUARED ID
0x1F195, # πŸ†• SQUARED NEW
0x1F196, # πŸ†– SQUARED NG
0x1F197, # πŸ†— SQUARED OK
0x1F198, # πŸ†˜ SQUARED SOS
0x1F199, # πŸ†™ SQUARED UP WITH EXCLAMATION MARK
0x1F19A, # πŸ†š SQUARED VS
0x1F201, # 🈁 SQUARED KATAKANA KOKO
0x1F21A, # 🈚 SQUARED CJK UNIFIED IDEOGRAPH-7121
0x1F22F, # 🈯 SQUARED CJK UNIFIED IDEOGRAPH-6307
0x1F232, # 🈲 SQUARED CJK UNIFIED IDEOGRAPH-7981
0x1F233, # 🈳 SQUARED CJK UNIFIED IDEOGRAPH-7A7A
0x1F234, # 🈴 SQUARED CJK UNIFIED IDEOGRAPH-5408
0x1F235, # 🈡 SQUARED CJK UNIFIED IDEOGRAPH-6E80
0x1F236, # 🈢 SQUARED CJK UNIFIED IDEOGRAPH-6709
0x1F238, # 🈸 SQUARED CJK UNIFIED IDEOGRAPH-7533
0x1F239, # 🈹 SQUARED CJK UNIFIED IDEOGRAPH-5272
0x1F23A, # 🈺 SQUARED CJK UNIFIED IDEOGRAPH-55B6
0x1F250, # πŸ‰ CIRCLED IDEOGRAPH ADVANTAGE
0x1F251, # πŸ‰‘ CIRCLED IDEOGRAPH ACCEPT
}
# Characters we have decided we are doing as emoji-style in Android,
# despite UTR #51's recommendation
ANDROID_EMOJI = {
0x2600, # β˜€ BLACK SUN WITH RAYS
0x2601, # ☁ CLOUD
0X260E, # ☎ BLACK TELEPHONE
0x261D, # ☝ WHITE UP POINTING INDEX
0x263A, # ☺ WHITE SMILING FACE
0x2660, # β™  BLACK SPADE SUIT
0x2663, # ♣ BLACK CLUB SUIT
0x2665, # β™₯ BLACK HEART SUIT
0x2666, # ♦ BLACK DIAMOND SUIT
0x270C, # ✌ VICTORY HAND
0x2744, # ❄ SNOWFLAKE
0x2764, # ❀ HEAVY BLACK HEART
}
# We don't want support for ASCII control chars.
CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F');
EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS)
def remove_from_cmap(infile, outfile, exclude=frozenset()):
"""Removes a set of characters from a font file's cmap table."""
font = ttLib.TTFont(infile)
font_data.delete_from_cmap(font, exclude)
font.save(outfile)
TEMP_DIR = 'subsetted'
def remove_codepoints_from_ttc(ttc_name):
otf_names = ttc_utils.ttcfile_extract(ttc_name, TEMP_DIR)
with tool_utils.temp_chdir(TEMP_DIR):
process_pool = []
for index, otf_name in enumerate(otf_names):
print 'Subsetting %s...' % otf_name
proc = Process(target=remove_from_cmap, args=(otf_name, otf_name), kwargs={"exclude": EXCLUDED_CODEPOINTS})
proc.start()
process_pool.append(proc)
for proc in process_pool:
proc.join()
ttc_utils.ttcfile_build(ttc_name, otf_names)
for f in otf_names:
os.remove(f)
#remove_codepoints_from_ttc('NotoSansCJK-Thin.ttc')
#remove_codepoints_from_ttc('NotoSansCJK-Light.ttc')
#remove_codepoints_from_ttc('NotoSansCJK-Regular.ttc')
#remove_codepoints_from_ttc('NotoSansCJK-Medium.ttc')
#remove_codepoints_from_ttc('NotoSansCJK-Bold.ttc')
#remove_codepoints_from_ttc('NotoSansCJK-Black.ttc')
#remove_codepoints_from_ttc('NotoSerifCJK-Light.ttc')
#remove_codepoints_from_ttc('NotoSerifCJK-Medium.ttc')
#remove_codepoints_from_ttc('NotoSerifCJK-Regular.ttc')
#remove_codepoints_from_ttc('NotoSerifCJK-SemiBold.ttc')
#remove_codepoints_from_ttc('NotoSerifCJK-Bold.ttc')
#remove_codepoints_from_ttc('NotoSerifCJK-Black.ttc')
FONT_LIST = ['NotoSerifCJK-SemiBold.ttc', 'NotoSerifCJK-Light.ttc', 'NotoSerifCJK-Bold.ttc', 'NotoSerifCJK-Black.ttc', 'NotoSerifCJK-Medium.ttc']
process_pool = []
for font in FONT_LIST:
proc = Process(target=remove_codepoints_from_ttc, args=(font,))
proc.start()
process_pool.append(proc)
for proc in process_pool:
proc.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment