Skip to content

Instantly share code, notes, and snippets.

@shyang
Created May 5, 2012 18:27
Show Gist options
  • Save shyang/2604584 to your computer and use it in GitHub Desktop.
Save shyang/2604584 to your computer and use it in GitHub Desktop.
Create Softbank and Unified Emoji mapping for iOS.
#! /usr/bin/env python3
'''Create Softbank and Unified Emoji mapping for iOS.'''
# download full.html to same directory first
# curl -O http://www.unicode.org/%7Escherer/emoji4unicode/snapshot/full.html
import re
import sys
def convert(code):
if code.startswith('U+'):
hex = code.replace('U+', '')
integer = int(hex, 16)
if integer <= 127:
return '\\x%02X' % integer
elif integer <= 65535:
return '\\u%04X' % integer
return '\\U%08X' % integer
return code
# 1:1 mapping
all_unified = []
all_softbank = []
# 2:1 mapping
multi_unified = []
multi_softbank = []
with open('full.html') as html:
for line in html.readlines():
id = re.findall('^<tr id=(e-...)', line)
if id:
id, rep, name_anno, *rest = re.findall('<td.*?</td>', line)
unified = re.findall("U\+[0-9A-F]{4,5}(?: U\+[0-9A-F]{4,5})?", rep)
if unified:
maps = []
for i in rest:
found = re.findall("class='round_trip'.*?(U\+[0-9A-F]{4,5})", i)
maps.append(found)
unified = unified[0]
softbank = maps[2]
if softbank:
softbank = convert(softbank[0])
if ' ' in unified:
a, b = unified.split(' ')
unified = convert(a) + convert(b)
multi_unified.append(unified)
multi_softbank.append(softbank)
else:
unified = convert(unified)
all_unified.append(unified)
all_softbank.append(softbank)
def assert_unique(array):
assert(len(set(array)) == len(array))
def print_array(name, array):
assert_unique(array)
print('static NSString *const %s[] = {' % name, end='')
for u in array:
print('@"' + u + '",', end=' ')
print('};')
def print_string(name, array):
print('static NSString *const %s = @"' % name, end='')
for i in array:
print(i, end='')
print('";')
print_array('all_unified', all_unified)
print_array('all_softbank', all_softbank)
print_array('multi_unified', multi_unified)
print_array('multi_softbank', multi_softbank)
print('// just all_unified')
print_string('all_unified_set', all_unified)
print('// all_softbank + multi_softbank')
print_string('softbank_set', all_softbank + multi_softbank)
print('static const NSInteger kNumOfEmoji = %d;' % len(all_unified))
print('static const NSInteger kNumOfMultiUnicodeEmoji = %d;' % len(multi_unified))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment