serif/morse_vs_ascii.py

## morse_vs_ascii.py
#!/usr/bin/env python3


import re
from typing import Dict, List
from os import path
from urllib.request import urlopen
from string import ascii_uppercase
# from itertools import product
# from collections import Counter


def main():
    t: Translator = Translator()
    print('\nGenerating corpus data')
    x: Corpus = Corpus()

    # Show dict contents
    print(f'{t.d_morse=}')
    print(f'{t.d_morse_bin=}')
    print()

    # Most common letters
    t.header()
    for c in 'ETAOINSHR':
        t.test(c, True)
    print()

    # Least common letters
    t.header()
    for c in 'JXQZ':
        t.test(c, True)
    print()

    # Text test 1
    t.test('HELLO WORLD.')

    # Text test 2, Article 1 of the Universal Declaration of Human Rights
    a1: str = 'All human beings are born free and equal in dignity and rights. '
    a1 += 'They are endowed with reason and conscience '
    a1 += 'and should act towards one another in a spirit of brotherhood.'
    a1 = a1.upper()
    t.test(a1)
    t.fast_test(x.freq)

    # Bit width comparison
    t.test_widths(a1)

    # Generate naively optimized Morse
    print('\n-- Optimized by Wikipedia\'s letter frequency\n')
    g: Generator = Generator()
    t.d_morse = g.morse
    t.gen_morse_bin()
    t.test(a1)
    t.fast_test(x.freq)

    # Generate better optimized Morse
    print('\n-- Optimized by frequency of corpus with CW abbreviations\n')
    g: Generator = Generator(etaoin=''.join(x.freq.keys()))
    t.d_morse = g.morse
    t.gen_morse_bin()
    t.test(a1)
    t.fast_test(x.freq)


class Translator:
    d_morse: Dict[str, str]
    d_morse_bin: Dict[str, str]

    def __init__(self):
        # Dict: Morse
        self.d_morse = {
            'A': '.–',
            'B': '–...',
            'C': '–.–.',
            'D': '–..',
            'E': '.',
            'F': '..–.',
            'G': '––.',
            'H': '....',
            'I': '..',
            'J': '.–––',
            'K': '–.–',
            'L': '.–..',
            'M': '––',
            'N': '–.',
            'O': '–––',
            'P': '.––.',
            'Q': '––.–',
            'R': '.–.',
            'S': '...',
            'T': '–',
            'U': '..–',
            'V': '...–',
            'W': '.––',
            'X': '–..–',
            'Y': '–.––',
            'Z': '––..',
            '1': '.––––',
            '2': '..–––',
            '3': '...––',
            '4': '....–',
            '5': '.....',
            '6': '–....',
            '7': '––...',
            '8': '–––..',
            '9': '––––.',
            '0': '–––––',
            '.': '.–.–.–',
            ',': '––..––',
            '?': '..––..',
            '\'': '.––––.',
            '!': '–.–.––',
            '/': '–..–.',
            '(': '–.––.',
            ')': '–.––.–',
            '&': '.–...',
            ':': '–––...',
            ';': '–.–.–.',
            '=': '–...–',
            '+': '.–.–.',
            '-': '–....–',
            '_': '..––.–',
            '"': '.–..–.',
            '$': '...–..–',
            '@': '.––.–.',
            'À': '.––.–',
            'Ä': '.–.–',
            'Ć': '–.–..',
            'Š': '––––',
            'Ð': '..––.',
            'È': '.–..–',
            'É': '.–..–',
            'Ĝ': '––.–.',
            'Ĵ': '.–––.',
            'Ñ': '––.––',
            'Ó': '–––.',
            'Ś': '...–...',
            'Ŝ': '...–.',
            'Þ': '.––..',
            'Ü': '..––',
            'Ź': '––..–.',
            'Ż': '––..–',
            ' ': ' ',
        }
        self.gen_morse_bin()

    def gen_morse_bin(self):
        # Dict: Morse Bin
        self.d_morse_bin = {}
        for k in self.d_morse.keys():
            morse: str = self.d_morse[k]
            bin_str: str = ''
            for c in morse:
                if c == ' ':
                    bin_str += '00'
                elif c == '.':
                    bin_str += '10'
                elif c == '–':
                    bin_str += '1110'
                else:
                    exit(f'unexpected char "{c}" in d_morse["{k}"]')
            bin_str += '00'
            self.d_morse_bin[k] = bin_str

    def to_morse(self, abc: str, space: bool = True) -> str:
        out: str = ''
        for c in abc:
            if out in self.d_morse:
                out += self.d_morse[c]
            if space:
                out += ' '
        return out

    def to_morse_bin(self, abc: str, space: bool = True) -> str:
        out: str = ''
        for c in abc:
            out += self.d_morse_bin[c]
            if space:
                out += ' '
        return out

    @staticmethod
    def to_bin(abc: str, bits: int = 5, space: bool = True) -> str:
        if not 0 < bits < 9:
            exit(f'invalid bit width: {bits}')
        out: str = ''
        for c in abc:
            if c == ' ':
                out += '0' * bits
            elif c == '.':
                out += '0' * (bits-1) + '1'
            else:
                ascii_val = ord(c)
                bin_val = bin(ascii_val)[2:].zfill(8)[8-bits:]
                out += bin_val
            if space:
                out += ' '
        return out

    @staticmethod
    def header() -> None:
        print('Letter | Morse | Morse binary | Morse duration | 5-ASCII binary')
        print('---|---|----|----|----')

    def test(self, text: str, one_line: bool = False) -> None:
        morse_str: str = self.to_morse(text)
        morse_bin_str: str = self.to_morse_bin(text)
        morse_bin_len: str = self.bit_count(morse_bin_str)
        ascii_str: str = self.to_bin(text)
        ascii_str_len = self.bit_count(ascii_str)
        if one_line:
            line: str = ' | '.join([text, morse_str, morse_bin_str, morse_bin_len, ascii_str])
        else:
            line: str = f'"{text}"\n\n* **Morse:** {morse_str}\n* **Morse binary, '
            line += f'{morse_bin_len} bits:** {morse_bin_str}\n'
            line += f'* **ASCII, {ascii_str_len} bits:** {ascii_str}\n'
        print(line)

    def fast_test(self, freq: Dict[str, int]) -> None:
        count: int = 0
        for k, v in freq.items():
            if k.strip() == '':
                continue
            count += int(self.bit_count(self.to_morse_bin(k))) * v
        print(f'Library test, {count:,} bits')

    def test_widths(self, text: str) -> None:
        out: str = ''
        for width in [5, 6, 8]:
            ascii_str = self.to_bin(text, bits=width)
            length = self.bit_count(ascii_str)
            out += f'\n* {width}-ASCII: {length} bits'
        morse_bin_str: str = self.to_morse_bin(text)
        morse_bin_len: str = self.bit_count(morse_bin_str)
        out += f'\n*  Morse : {morse_bin_len} bits'
        print(out)

    @staticmethod
    def bit_count(stream) -> str:
        return str(len(stream.replace(' ', '')))


class Generator:
    slots: Dict[str, int] = {}
    morse: Dict[str, str] = {}
    etaoin: str = ''

    def __init__(self, etaoin: str = 'ETAOINSHRDLCUMWFGYPBVKJXQZ1234567890.,?!/ÀÄĆŠÉÈŃÓ'):
        self.etaoin = etaoin
        self.create_sorted_slots()
        self.fill_slots()

    def create_sorted_slots(self) -> None:
        # cx = [''.join(c).replace(' ', '') for c in product(' 24', repeat=5)]
        # print(f'{cx=}')
        # return
        # for i, _ in enumerate(cx):
        #   cx[i] = cx[i].replace(' ', '')
        # for x in cx:
        #   print(x)
        # return
        for c1 in [0, 2, 4]:
            for c2 in [0, 2, 4]:
                for c3 in [0, 2, 4]:
                    for c4 in [0, 2, 4]:
                        for c5 in [0, 2, 4]:
                            out: str = ''.join(
                                [str(c) for c in [c1, c2, c3, c4, c5] if c != 0])
                            if out == '':
                                continue
                            duration: int = sum(int(c) for c in out) * 10
                            duration += sum([1 for c in out if c == '2'])
                            out = out.replace('2', '.').replace('4', '–')
                            self.slots[out] = duration
        self.slots = dict(sorted(self.slots.items(), key=lambda x: x[1]))
        # for k, v in self.slots.items():
        #     print(str(k).ljust(8), v)
        # print(len(self.slots))

    def fill_slots(self):
        neo: List[str] = list(self.slots.keys())
        for i, c in enumerate(self.etaoin):
            self.morse[c] = neo[i]
            # print(c, neo[i])
        self.morse[' '] = ' '


class Corpus:
    text: str = ''
    freq: Dict[str, int] = {}

    def __init__(self) -> None:
        if path.isfile('corpus'):
            self.read()
            self.shorten()
        else:
            self.download()
            self.shorten()
            self.write()
        self.count()

    def download(self):
        prefix: str = 'https://www.gutenberg.org/'
        urls: Dict[str, str] = {
            'Dracula': 'cache/epub/345/pg345.txt',
            'Frankenstein': 'cache/epub/84/pg84.txt',
            'Moby Dick': 'files/2701/2701-0.txt',
            'Alice in Wonderland': 'cache/epub/11/pg11.txt'
        }
        for k, v in urls.items():
            print('Downloading', k)
            self.text += urlopen(prefix+v).read().decode('utf-8').upper()

    def shorten(self):
        abbrev: Dict[str, str] = {
            'ADDRESS': 'ADRS',
            'AGAIN': 'AGN',
            'ANTENNA': 'ANT',
            'BETTER': 'BTR',
            'CALLED': 'CLD',
            'CALLING': 'CLG',
            'CAN\'T': 'CNT',
            'COME': 'CUM',
            'CONDITION': 'CONDX',
            'CONDITIONS': 'CONDX',
            'YES': 'C',
            'CORRECT': 'C',
            'AFFIRMATIVE': 'C',
            'CONFIRM': 'CFM',
            'CHECK': 'CK',
            'SEE YOU LATER': 'CUL',
            'SEE YOU': 'CU',
            'DAY': 'DA',
            'DELIVERED': 'DLVD',
            'DIFFERENCE': 'DIFF',
            'DIFFERENT': 'DIFF',
            'DOWN': 'DN',
            'DEAR': 'DR',
            'FROM': 'DE',
            'LONG DISTANCE': 'DX',
            'LONG': 'LNG',
            'DISTANCE': 'DX',
            'DISTANT': 'DX',
            'FOREIGN': 'DX',
            'ELEMENT': 'EL',
            'FOR': 'FER',
            'FINE BUSINESS': 'FB',
            'EXCELLENT': 'FB',
            'WONDERFUL': 'FB',
            'GUESS': 'GESS',
            'GOOD AFTERNOON': 'GA',
            'GOOD EVENING': 'GE',
            'GOOD MORNING': 'GM',
            'GOOD NIGHT': 'GN',
            'GOODNIGHT': 'GN',
            'GOOD': 'GD',
            'GOING': 'GG',
            'GIVE': 'GV',
            'GIVING': 'GVG',
            'HERE': 'HR',
            'HEAR': 'HR',
            'HOPE': 'HP',
            'LEAVE': 'LV',
            'LEAVING': 'LVG',
            'MESSAGE': 'MSG',
            'MY NAME': 'MN',
            'NO MORE': 'NM',
            'NO': 'N',
            'NOTHING': 'NIL',
            'NOW': 'NW',
            'NUMBER': 'NR',
            'OKAY': 'OK',
            'OLD BOY': 'OB',
            'OLD CHAP': 'OC',
            'OLD MAN': 'OM',
            'OPERATOR': 'OP',
            'HUSBAND': 'OM',
            'PACKAGE': 'PKG',
            'PLEASE': 'PLS',
            'PAPER': 'PPR',
            'PREFIX': 'PX',
            'PRESS': 'PX',
            'POWER': 'PWR',
            'REFER TO': 'RFR',
            'REFERRING TO': 'RFR',
            'REGARDING': 'RE',
            'CONCERNING': 'RE',
            'I AM IN': 'QTH',
            'RECEIVED': 'R',
            'RECEIVE': 'RX',
            'RECEIVER': 'RX',
            'REPORT': 'RPT',
            'REPEAT': 'RPT',
            'SAID': 'SED',
            'SAYS': 'SEZ',
            'SIGNED': 'SGD',
            'SERVICE': 'SVC',
            'SO FAR': 'SFR',
            'SIGNATURE': 'SIG',
            'SIGNAL': 'SIG',
            'DEAD': 'SK',
            'DECEASED': 'SK',
            'GHOST': 'SK',
            'SORRY': 'SRI',
            'STATION': 'STN',
            'SOME': 'SUM',
            'THAT': 'TT',
            'THANK YOU': 'TU',
            'THANKS': 'TU',
            'TRANSMIT': 'TX',
            'TRANSMITTER': 'TX',
            'TRAFFIC': 'TFC',
            'TEXT': 'TXT',
            'TOMORROW': 'TMW',
            'TRICKS': 'TRIX',
            'THAT IS': 'TTS',
            'WORD': 'W',
            'WORDS': 'W',
            'WORD AFTER': 'WA',
            'WORD BEFORE': 'WB',
            'TRANSCEIVER': 'XCVR',
            'WELL': 'WL',
            'WILL': 'WL',
            'WITH': 'WID',
            'WIFE': 'XYL',
            'WORKED': 'WKD',
            'WORKING': 'WKG',
            'WOULD': 'WUD',
            'YOURS': 'URS',
            'WOMAN': 'YL',
            'GIRL': 'YL',
            'YEAR': 'YR',
            'YEARS': 'YRS',
            'YOU': 'U',
            'YOUR': 'UR',
            'YOU\'RE': 'UR',
            'LATER': 'LTR',
            'LETTER': 'LTR',
            'GOODBYE': '73',
            'FAREWELL': '73',
        }
        merge: Dict[str, str] = {
            '[': '(',
            ']': ')',
            '{': '(',
            '}': ')',
            '_': ' ',
            '“': '"',
            '”': '"',
            '‘': '\'',
            '’': '\'',
            '×': 'x',
            '*': '',
            '\\': '/',
            ';': ':',
            '—': '-',
            '<': '(',
            '>': ')',
            '$': '',
            '£': '',
            '%': '0/0',
            'Å': 'À',
            'Ą': 'Ä',
            'Æ': 'Ä',
            'Ĉ': 'Ć',
            'Ç': 'Ć',
            'Ĥ': 'Š',
            'Ę': 'É',
            'Ł': 'È',
            'Ñ': 'Ń',
            'Ö': 'Ó',
            'Ø': 'Ó',
        }
        print('Abbreviating words')
        for k, v in abbrev.items():
            replace = v
            find = f'\\b{k}\\b'
            self.text = re.sub(find, replace, self.text)

        print('Merging characters')
        for k, v in merge.items():
            find, replace = k, v
            self.text = self.text.replace(find, replace)

    def write(self) -> None:
        with open('corpus', 'w') as f:
            f.write(self.text)

    def read(self) -> None:
        with open('corpus', 'r') as f:
            self.text = f.read()

    def count(self) -> None:
        valid: str = ascii_uppercase + '1234567890' + 'ÀÄĆŠÉÈŃÓ'
        counts: Dict[str, int] = {}
        for line in self.text:
            for c in line:
                if c in valid:
                    if c in counts.keys():
                        counts[c] += 1
                    else:
                        counts[c] = 1

        self.freq = dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))


if __name__ == '__main__':
    main()
	#!/usr/bin/env python3


	import re
	from typing import Dict, List
	from os import path
	from urllib.request import urlopen
	from string import ascii_uppercase
	# from itertools import product
	# from collections import Counter


	def main():
	t: Translator = Translator()
	print('\nGenerating corpus data')
	x: Corpus = Corpus()

	# Show dict contents
	print(f'{t.d_morse=}')
	print(f'{t.d_morse_bin=}')
	print()

	# Most common letters
	t.header()
	for c in 'ETAOINSHR':
	t.test(c, True)
	print()

	# Least common letters
	t.header()
	for c in 'JXQZ':
	t.test(c, True)
	print()

	# Text test 1
	t.test('HELLO WORLD.')

	# Text test 2, Article 1 of the Universal Declaration of Human Rights
	a1: str = 'All human beings are born free and equal in dignity and rights. '
	a1 += 'They are endowed with reason and conscience '
	a1 += 'and should act towards one another in a spirit of brotherhood.'
	a1 = a1.upper()
	t.test(a1)
	t.fast_test(x.freq)

	# Bit width comparison
	t.test_widths(a1)

	# Generate naively optimized Morse
	print('\n-- Optimized by Wikipedia\'s letter frequency\n')
	g: Generator = Generator()
	t.d_morse = g.morse
	t.gen_morse_bin()
	t.test(a1)
	t.fast_test(x.freq)

	# Generate better optimized Morse
	print('\n-- Optimized by frequency of corpus with CW abbreviations\n')
	g: Generator = Generator(etaoin=''.join(x.freq.keys()))
	t.d_morse = g.morse
	t.gen_morse_bin()
	t.test(a1)
	t.fast_test(x.freq)


	class Translator:
	d_morse: Dict[str, str]
	d_morse_bin: Dict[str, str]

	def __init__(self):
	# Dict: Morse
	self.d_morse = {
	'A': '.–',
	'B': '–...',
	'C': '–.–.',
	'D': '–..',
	'E': '.',
	'F': '..–.',
	'G': '––.',
	'H': '....',
	'I': '..',
	'J': '.–––',
	'K': '–.–',
	'L': '.–..',
	'M': '––',
	'N': '–.',
	'O': '–––',
	'P': '.––.',
	'Q': '––.–',
	'R': '.–.',
	'S': '...',
	'T': '–',
	'U': '..–',
	'V': '...–',
	'W': '.––',
	'X': '–..–',
	'Y': '–.––',
	'Z': '––..',
	'1': '.––––',
	'2': '..–––',
	'3': '...––',
	'4': '....–',
	'5': '.....',
	'6': '–....',
	'7': '––...',
	'8': '–––..',
	'9': '––––.',
	'0': '–––––',
	'.': '.–.–.–',
	',': '––..––',
	'?': '..––..',
	'\'': '.––––.',
	'!': '–.–.––',
	'/': '–..–.',
	'(': '–.––.',
	')': '–.––.–',
	'&': '.–...',
	':': '–––...',
	';': '–.–.–.',
	'=': '–...–',
	'+': '.–.–.',
	'-': '–....–',
	'_': '..––.–',
	'"': '.–..–.',
	'$': '...–..–',
	'@': '.––.–.',
	'À': '.––.–',
	'Ä': '.–.–',
	'Ć': '–.–..',
	'Š': '––––',
	'Ð': '..––.',
	'È': '.–..–',
	'É': '.–..–',
	'Ĝ': '––.–.',
	'Ĵ': '.–––.',
	'Ñ': '––.––',
	'Ó': '–––.',
	'Ś': '...–...',
	'Ŝ': '...–.',
	'Þ': '.––..',
	'Ü': '..––',
	'Ź': '––..–.',
	'Ż': '––..–',
	' ': ' ',
	}
	self.gen_morse_bin()

	def gen_morse_bin(self):
	# Dict: Morse Bin
	self.d_morse_bin = {}
	for k in self.d_morse.keys():
	morse: str = self.d_morse[k]
	bin_str: str = ''
	for c in morse:
	if c == ' ':
	bin_str += '00'
	elif c == '.':
	bin_str += '10'
	elif c == '–':
	bin_str += '1110'
	else:
	exit(f'unexpected char "{c}" in d_morse["{k}"]')
	bin_str += '00'
	self.d_morse_bin[k] = bin_str

	def to_morse(self, abc: str, space: bool = True) -> str:
	out: str = ''
	for c in abc:
	if out in self.d_morse:
	out += self.d_morse[c]
	if space:
	out += ' '
	return out

	def to_morse_bin(self, abc: str, space: bool = True) -> str:
	out: str = ''
	for c in abc:
	out += self.d_morse_bin[c]
	if space:
	out += ' '
	return out

	@staticmethod
	def to_bin(abc: str, bits: int = 5, space: bool = True) -> str:
	if not 0 < bits < 9:
	exit(f'invalid bit width: {bits}')
	out: str = ''
	for c in abc:
	if c == ' ':
	out += '0' * bits
	elif c == '.':
	out += '0' * (bits-1) + '1'
	else:
	ascii_val = ord(c)
	bin_val = bin(ascii_val)[2:].zfill(8)[8-bits:]
	out += bin_val
	if space:
	out += ' '
	return out

	@staticmethod
	def header() -> None:
	print('Letter \| Morse \| Morse binary \| Morse duration \| 5-ASCII binary')
	print('---\|---\|----\|----\|----')

	def test(self, text: str, one_line: bool = False) -> None:
	morse_str: str = self.to_morse(text)
	morse_bin_str: str = self.to_morse_bin(text)
	morse_bin_len: str = self.bit_count(morse_bin_str)
	ascii_str: str = self.to_bin(text)
	ascii_str_len = self.bit_count(ascii_str)
	if one_line:
	line: str = ' \| '.join([text, morse_str, morse_bin_str, morse_bin_len, ascii_str])
	else:
	line: str = f'"{text}"\n\n* Morse: {morse_str}\n* **Morse binary, '
	line += f'{morse_bin_len} bits:** {morse_bin_str}\n'
	line += f'* ASCII, {ascii_str_len} bits: {ascii_str}\n'
	print(line)

	def fast_test(self, freq: Dict[str, int]) -> None:
	count: int = 0
	for k, v in freq.items():
	if k.strip() == '':
	continue
	count += int(self.bit_count(self.to_morse_bin(k))) * v
	print(f'Library test, {count:,} bits')

	def test_widths(self, text: str) -> None:
	out: str = ''
	for width in [5, 6, 8]:
	ascii_str = self.to_bin(text, bits=width)
	length = self.bit_count(ascii_str)
	out += f'\n* {width}-ASCII: {length} bits'
	morse_bin_str: str = self.to_morse_bin(text)
	morse_bin_len: str = self.bit_count(morse_bin_str)
	out += f'\n* Morse : {morse_bin_len} bits'
	print(out)

	@staticmethod
	def bit_count(stream) -> str:
	return str(len(stream.replace(' ', '')))


	class Generator:
	slots: Dict[str, int] = {}
	morse: Dict[str, str] = {}
	etaoin: str = ''

	def __init__(self, etaoin: str = 'ETAOINSHRDLCUMWFGYPBVKJXQZ1234567890.,?!/ÀÄĆŠÉÈŃÓ'):
	self.etaoin = etaoin
	self.create_sorted_slots()
	self.fill_slots()

	def create_sorted_slots(self) -> None:
	# cx = [''.join(c).replace(' ', '') for c in product(' 24', repeat=5)]
	# print(f'{cx=}')
	# return
	# for i, _ in enumerate(cx):
	# cx[i] = cx[i].replace(' ', '')
	# for x in cx:
	# print(x)
	# return
	for c1 in [0, 2, 4]:
	for c2 in [0, 2, 4]:
	for c3 in [0, 2, 4]:
	for c4 in [0, 2, 4]:
	for c5 in [0, 2, 4]:
	out: str = ''.join(
	[str(c) for c in [c1, c2, c3, c4, c5] if c != 0])
	if out == '':
	continue
	duration: int = sum(int(c) for c in out) * 10
	duration += sum([1 for c in out if c == '2'])
	out = out.replace('2', '.').replace('4', '–')
	self.slots[out] = duration
	self.slots = dict(sorted(self.slots.items(), key=lambda x: x[1]))
	# for k, v in self.slots.items():
	# print(str(k).ljust(8), v)
	# print(len(self.slots))

	def fill_slots(self):
	neo: List[str] = list(self.slots.keys())
	for i, c in enumerate(self.etaoin):
	self.morse[c] = neo[i]
	# print(c, neo[i])
	self.morse[' '] = ' '


	class Corpus:
	text: str = ''
	freq: Dict[str, int] = {}

	def __init__(self) -> None:
	if path.isfile('corpus'):
	self.read()
	self.shorten()
	else:
	self.download()
	self.shorten()
	self.write()
	self.count()

	def download(self):
	prefix: str = 'https://www.gutenberg.org/'
	urls: Dict[str, str] = {
	'Dracula': 'cache/epub/345/pg345.txt',
	'Frankenstein': 'cache/epub/84/pg84.txt',
	'Moby Dick': 'files/2701/2701-0.txt',
	'Alice in Wonderland': 'cache/epub/11/pg11.txt'
	}
	for k, v in urls.items():
	print('Downloading', k)
	self.text += urlopen(prefix+v).read().decode('utf-8').upper()

	def shorten(self):
	abbrev: Dict[str, str] = {
	'ADDRESS': 'ADRS',
	'AGAIN': 'AGN',
	'ANTENNA': 'ANT',
	'BETTER': 'BTR',
	'CALLED': 'CLD',
	'CALLING': 'CLG',
	'CAN\'T': 'CNT',
	'COME': 'CUM',
	'CONDITION': 'CONDX',
	'CONDITIONS': 'CONDX',
	'YES': 'C',
	'CORRECT': 'C',
	'AFFIRMATIVE': 'C',
	'CONFIRM': 'CFM',
	'CHECK': 'CK',
	'SEE YOU LATER': 'CUL',
	'SEE YOU': 'CU',
	'DAY': 'DA',
	'DELIVERED': 'DLVD',
	'DIFFERENCE': 'DIFF',
	'DIFFERENT': 'DIFF',
	'DOWN': 'DN',
	'DEAR': 'DR',
	'FROM': 'DE',
	'LONG DISTANCE': 'DX',
	'LONG': 'LNG',
	'DISTANCE': 'DX',
	'DISTANT': 'DX',
	'FOREIGN': 'DX',
	'ELEMENT': 'EL',
	'FOR': 'FER',
	'FINE BUSINESS': 'FB',
	'EXCELLENT': 'FB',
	'WONDERFUL': 'FB',
	'GUESS': 'GESS',
	'GOOD AFTERNOON': 'GA',
	'GOOD EVENING': 'GE',
	'GOOD MORNING': 'GM',
	'GOOD NIGHT': 'GN',
	'GOODNIGHT': 'GN',
	'GOOD': 'GD',
	'GOING': 'GG',
	'GIVE': 'GV',
	'GIVING': 'GVG',
	'HERE': 'HR',
	'HEAR': 'HR',
	'HOPE': 'HP',
	'LEAVE': 'LV',
	'LEAVING': 'LVG',
	'MESSAGE': 'MSG',
	'MY NAME': 'MN',
	'NO MORE': 'NM',
	'NO': 'N',
	'NOTHING': 'NIL',
	'NOW': 'NW',
	'NUMBER': 'NR',
	'OKAY': 'OK',
	'OLD BOY': 'OB',
	'OLD CHAP': 'OC',
	'OLD MAN': 'OM',
	'OPERATOR': 'OP',
	'HUSBAND': 'OM',
	'PACKAGE': 'PKG',
	'PLEASE': 'PLS',
	'PAPER': 'PPR',
	'PREFIX': 'PX',
	'PRESS': 'PX',
	'POWER': 'PWR',
	'REFER TO': 'RFR',
	'REFERRING TO': 'RFR',
	'REGARDING': 'RE',
	'CONCERNING': 'RE',
	'I AM IN': 'QTH',
	'RECEIVED': 'R',
	'RECEIVE': 'RX',
	'RECEIVER': 'RX',
	'REPORT': 'RPT',
	'REPEAT': 'RPT',
	'SAID': 'SED',
	'SAYS': 'SEZ',
	'SIGNED': 'SGD',
	'SERVICE': 'SVC',
	'SO FAR': 'SFR',
	'SIGNATURE': 'SIG',
	'SIGNAL': 'SIG',
	'DEAD': 'SK',
	'DECEASED': 'SK',
	'GHOST': 'SK',
	'SORRY': 'SRI',
	'STATION': 'STN',
	'SOME': 'SUM',
	'THAT': 'TT',
	'THANK YOU': 'TU',
	'THANKS': 'TU',
	'TRANSMIT': 'TX',
	'TRANSMITTER': 'TX',
	'TRAFFIC': 'TFC',
	'TEXT': 'TXT',
	'TOMORROW': 'TMW',
	'TRICKS': 'TRIX',
	'THAT IS': 'TTS',
	'WORD': 'W',
	'WORDS': 'W',
	'WORD AFTER': 'WA',
	'WORD BEFORE': 'WB',
	'TRANSCEIVER': 'XCVR',
	'WELL': 'WL',
	'WILL': 'WL',
	'WITH': 'WID',
	'WIFE': 'XYL',
	'WORKED': 'WKD',
	'WORKING': 'WKG',
	'WOULD': 'WUD',
	'YOURS': 'URS',
	'WOMAN': 'YL',
	'GIRL': 'YL',
	'YEAR': 'YR',
	'YEARS': 'YRS',
	'YOU': 'U',
	'YOUR': 'UR',
	'YOU\'RE': 'UR',
	'LATER': 'LTR',
	'LETTER': 'LTR',
	'GOODBYE': '73',
	'FAREWELL': '73',
	}
	merge: Dict[str, str] = {
	'[': '(',
	']': ')',
	'{': '(',
	'}': ')',
	'_': ' ',
	'“': '"',
	'”': '"',
	'‘': '\'',
	'’': '\'',
	'×': 'x',
	'*': '',
	'\\': '/',
	';': ':',
	'—': '-',
	'<': '(',
	'>': ')',
	'$': '',
	'£': '',
	'%': '0/0',
	'Å': 'À',
	'Ą': 'Ä',
	'Æ': 'Ä',
	'Ĉ': 'Ć',
	'Ç': 'Ć',
	'Ĥ': 'Š',
	'Ę': 'É',
	'Ł': 'È',
	'Ñ': 'Ń',
	'Ö': 'Ó',
	'Ø': 'Ó',
	}
	print('Abbreviating words')
	for k, v in abbrev.items():
	replace = v
	find = f'\\b{k}\\b'
	self.text = re.sub(find, replace, self.text)

	print('Merging characters')
	for k, v in merge.items():
	find, replace = k, v
	self.text = self.text.replace(find, replace)

	def write(self) -> None:
	with open('corpus', 'w') as f:
	f.write(self.text)

	def read(self) -> None:
	with open('corpus', 'r') as f:
	self.text = f.read()

	def count(self) -> None:
	valid: str = ascii_uppercase + '1234567890' + 'ÀÄĆŠÉÈŃÓ'
	counts: Dict[str, int] = {}
	for line in self.text:
	for c in line:
	if c in valid:
	if c in counts.keys():
	counts[c] += 1
	else:
	counts[c] = 1

	self.freq = dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))


	if __name__ == '__main__':
	main()