fulcrum6378/emoji-sequences-resolver.py

## emoji-sequences-resolver.py
import codecs


def uz(s: str, maxim: int) -> str:
    while len(s) < maxim:
        s = '0' + s
    return s


def ucToChar(unicode: str):
    if len(unicode) <= 4:
        return codecs.decode(bytes('\\u' + uz(unicode, 4), 'utf-8'), 'unicode_escape')
    else:
        return codecs.decode(bytes('\\U' + uz(unicode, 8), 'utf-8'), 'unicode_escape')


with open('emoji-sequences.txt', 'r', encoding='utf-8') as f:
    lines = f.read().split('\n')

with open('emoji-zwj-sequences.txt', 'r', encoding='utf-8') as f:
    lines.extend(f.read().split('\n'))

emojis: list = list()
for ln in lines:
    if ln == '' or ln.startswith('#') or '# E' not in ln: continue
    spl = ln.split(';')[0]
    while spl.endswith(' '): spl = spl[:-1]
    if '..' not in spl and ' ' not in spl:
        try:
            emojis.append(ucToChar(spl))
        except UnicodeDecodeError:
            raise Exception("<" + spl + ">")  # ln
    elif ' ' in spl:
        multi = list()
        for x in spl.split(' '): multi.append(ucToChar(x))
        emojis.append(''.join(multi))
        if multi[-1] == '\ufe0f':  # Android compatibility (which adds 10 more kilobytes)
            emojis.append(''.join(multi[:-1]))
    else:
        a, b = spl.split('..')
        x = a
        while int(x, 16) != int(b, 16) + 1:
            emojis.extend(ucToChar(x))
            x = hex(int(x, 16) + 1)[2:]

with open('emojis.txt', 'w', encoding='utf-8') as f:
    f.write(' '.join(emojis))

# For more info: https://www.unicode.org/reports/tr51/
# How-to: https://stackoverflow.com/questions/51585291/how-to-interpret-unicode-notation-in-python
	import codecs


	def uz(s: str, maxim: int) -> str:
	while len(s) < maxim:
	s = '0' + s
	return s


	def ucToChar(unicode: str):
	if len(unicode) <= 4:
	return codecs.decode(bytes('\\u' + uz(unicode, 4), 'utf-8'), 'unicode_escape')
	else:
	return codecs.decode(bytes('\\U' + uz(unicode, 8), 'utf-8'), 'unicode_escape')


	with open('emoji-sequences.txt', 'r', encoding='utf-8') as f:
	lines = f.read().split('\n')

	with open('emoji-zwj-sequences.txt', 'r', encoding='utf-8') as f:
	lines.extend(f.read().split('\n'))

	emojis: list = list()
	for ln in lines:
	if ln == '' or ln.startswith('#') or '# E' not in ln: continue
	spl = ln.split(';')[0]
	while spl.endswith(' '): spl = spl[:-1]
	if '..' not in spl and ' ' not in spl:
	try:
	emojis.append(ucToChar(spl))
	except UnicodeDecodeError:
	raise Exception("<" + spl + ">") # ln
	elif ' ' in spl:
	multi = list()
	for x in spl.split(' '): multi.append(ucToChar(x))
	emojis.append(''.join(multi))
	if multi[-1] == '\ufe0f': # Android compatibility (which adds 10 more kilobytes)
	emojis.append(''.join(multi[:-1]))
	else:
	a, b = spl.split('..')
	x = a
	while int(x, 16) != int(b, 16) + 1:
	emojis.extend(ucToChar(x))
	x = hex(int(x, 16) + 1)[2:]

	with open('emojis.txt', 'w', encoding='utf-8') as f:
	f.write(' '.join(emojis))

	# For more info: https://www.unicode.org/reports/tr51/
	# How-to: https://stackoverflow.com/questions/51585291/how-to-interpret-unicode-notation-in-python