alexott/emojis-test.txt

## emojis-test.txt
test ♈ up ☝️ light skin ☝🏻 Zimbabwe 🇿🇼 England 🏴 keycap0  0️⃣end 0

## emojis.py
"""Module to work with emojis in text"""
import pickle

# TODO: add the function that will load all emojis, with their names, etc

def add_emoji(emojis, ch1, ch2 = ''):
#    print('ch1=' + ch1 + ', ch2=' + ch2)
    maybe_map = emojis.get(ch1, False)
    if maybe_map:
        if isinstance(maybe_map, dict):
            maybe_map[ch2]=True
        else:
            maybe_map={ch2: True, '': True}
    elif len(ch2) != 0:
        emojis[ch1]={ch2: True}
    else:
        emojis[ch1]=True

# emoji-all.txt consists of files emoji-zwj-sequences.txt, emoji-data.txt, and
# emoji-sequences.txt downloaded from https://unicode.org/Public/emoji/12.0/,
# and with manually removed entries for #, numbers, and copyright/trademark/regmark

# TODO: make a list of exclusions, and load all files without manual editing

def load_emojis(fname="emoji-all.txt"):
    """Loads emojis from emoji-sequences.txt downloaded from https://unicode.org/Public/emoji/12.0/.
    Returns a dictionary where key is the first character of emoji, and value is either True
    if emoji consists of only of one unicode character, or dictionary with the rest of unicode characters.
    In this dictionary the '' key represents the emoji that may consist of one, or more optional characters"""
    emojis = {}
    with open(fname, "r") as f:
        for cnt, line in enumerate(f):
            idx = line.find(';')
            if len(line) == 0 or line[0] == '#' or idx == -1:
                continue
            line = line[0:idx]
            line = line.strip()
            if len(line) == 0:
                continue
            idx = line.find('..')
            if idx != -1:
                first_num=line[0:idx]
                sec_num=line[(idx+2):]
                for i in range(int(first_num, 16), int(sec_num, 16)+1):
                    add_emoji(emojis, chr(i))
            else:
                idx=line.find(' ')
                if idx != -1:
                    first_num=line[0:idx]
                    sec_num=line[(idx+1):]
                    if sec_num.find(' ') != -1:
                        sec_str=''
                        for i in sec_num.split(' '):
                            sec_str=sec_str+chr(int(i, 16))
                        add_emoji(emojis, chr(int(first_num, 16)), sec_str)
                    else:
                        add_emoji(emojis, chr(int(first_num, 16)), chr(int(sec_num, 16)))
                else:
                    add_emoji(emojis, chr(int(line, 16)))
    return emojis

def generate_pickle(pickle_file="emojis.pickle", emoji_file="emoji-all.txt"):
    emojis = load_emojis(emoji_file)
    with open(pickle_file, "wb") as f:
        pickle.dump(emojis, f)


def load_pickle(pickle_file="emojis.pickle"):
    emojis={}
    try:
        with open(pickle_file, "rb") as f:
            emojis=pickle.load(f)
    except pickle.PickleError as ex:
        print('Pickling error: {}'.foramt(ex))
    except IOError:
        print('Cannot open ' + pickle_file)

    return emojis

def strip_emojis(emojis, txt):
    txt_len=len(txt)
    cnt=0
    res_text=''
    while cnt < txt_len:
        c = txt[cnt]
        if c in emojis:
            maybe_map=emojis[c]
            if isinstance(maybe_map, dict):
                found_full=False
                sub_text=txt[(cnt+1):]
                for k,v in maybe_map.items():
                    if k == '':
                        continue
                    if sub_text.startswith(k):
                        found_full=True
                        cnt = cnt + len(k)
                if not found_full and '' not in maybe_map:
                    res_text = res_text + c
        else:
            res_text = res_text + c

        cnt = cnt + 1

    return res_text

# Test:
# with open('emoji-test.txt', encoding='utf8') as f:
#    emoji_test = f.read().strip()
	"""Module to work with emojis in text"""
	import pickle

	# TODO: add the function that will load all emojis, with their names, etc

	def add_emoji(emojis, ch1, ch2 = ''):
	# print('ch1=' + ch1 + ', ch2=' + ch2)
	maybe_map = emojis.get(ch1, False)
	if maybe_map:
	if isinstance(maybe_map, dict):
	maybe_map[ch2]=True
	else:
	maybe_map={ch2: True, '': True}
	elif len(ch2) != 0:
	emojis[ch1]={ch2: True}
	else:
	emojis[ch1]=True

	# emoji-all.txt consists of files emoji-zwj-sequences.txt, emoji-data.txt, and
	# emoji-sequences.txt downloaded from https://unicode.org/Public/emoji/12.0/,
	# and with manually removed entries for #, numbers, and copyright/trademark/regmark

	# TODO: make a list of exclusions, and load all files without manual editing

	def load_emojis(fname="emoji-all.txt"):
	"""Loads emojis from emoji-sequences.txt downloaded from https://unicode.org/Public/emoji/12.0/.
	Returns a dictionary where key is the first character of emoji, and value is either True
	if emoji consists of only of one unicode character, or dictionary with the rest of unicode characters.
	In this dictionary the '' key represents the emoji that may consist of one, or more optional characters"""
	emojis = {}
	with open(fname, "r") as f:
	for cnt, line in enumerate(f):
	idx = line.find(';')
	if len(line) == 0 or line[0] == '#' or idx == -1:
	continue
	line = line[0:idx]
	line = line.strip()
	if len(line) == 0:
	continue
	idx = line.find('..')
	if idx != -1:
	first_num=line[0:idx]
	sec_num=line[(idx+2):]
	for i in range(int(first_num, 16), int(sec_num, 16)+1):
	add_emoji(emojis, chr(i))
	else:
	idx=line.find(' ')
	if idx != -1:
	first_num=line[0:idx]
	sec_num=line[(idx+1):]
	if sec_num.find(' ') != -1:
	sec_str=''
	for i in sec_num.split(' '):
	sec_str=sec_str+chr(int(i, 16))
	add_emoji(emojis, chr(int(first_num, 16)), sec_str)
	else:
	add_emoji(emojis, chr(int(first_num, 16)), chr(int(sec_num, 16)))
	else:
	add_emoji(emojis, chr(int(line, 16)))
	return emojis

	def generate_pickle(pickle_file="emojis.pickle", emoji_file="emoji-all.txt"):
	emojis = load_emojis(emoji_file)
	with open(pickle_file, "wb") as f:
	pickle.dump(emojis, f)


	def load_pickle(pickle_file="emojis.pickle"):
	emojis={}
	try:
	with open(pickle_file, "rb") as f:
	emojis=pickle.load(f)
	except pickle.PickleError as ex:
	print('Pickling error: {}'.foramt(ex))
	except IOError:
	print('Cannot open ' + pickle_file)

	return emojis

	def strip_emojis(emojis, txt):
	txt_len=len(txt)
	cnt=0
	res_text=''
	while cnt < txt_len:
	c = txt[cnt]
	if c in emojis:
	maybe_map=emojis[c]
	if isinstance(maybe_map, dict):
	found_full=False
	sub_text=txt[(cnt+1):]
	for k,v in maybe_map.items():
	if k == '':
	continue
	if sub_text.startswith(k):
	found_full=True
	cnt = cnt + len(k)
	if not found_full and '' not in maybe_map:
	res_text = res_text + c
	else:
	res_text = res_text + c

	cnt = cnt + 1

	return res_text

	# Test:
	# with open('emoji-test.txt', encoding='utf8') as f:
	# emoji_test = f.read().strip()