snowwm/multitable-helper.py

## multitable-helper.py
#!/usr/bin/env python3

import sys, random, textwrap, re
from collections import defaultdict

SRC_FILE     = './multitab_vcb.src.ini'
DST_FILE     = './multitab_vcb.ini'
DST_ENCODING = 'cp1251'

MAX_TRANSLATIONS = 2
MAX_ENTRIES      = 25  # max 100
MAX_WORDLEN      = 11


class Dictionary(defaultdict):
    def __init__(self):
        super().__init__(list)
        self.weights = defaultdict(lambda: 1)

    def add_entry(self, word, translations, weight=1):
        cell = self[word]
        cell.extend(translations)
        assert len(cell) <= MAX_TRANSLATIONS, f'MAX_TRANSLATIONS exceeded for "{word}"'

        self.weights[word] = max(self.weights[word], weight)

    def del_entry(self, word):
        self.pop(word)
        self.weights.pop(word)


def read_dict():
    with open(SRC_FILE, 'r') as src:
        e2r = Dictionary()  # Eng to Rus
        r2e = Dictionary()  # Rus to Eng
        cur_weight = 1

        for line in src:
            line = line.partition(';')[0].strip()  # ignore comments
            if not line:
                continue

            if line.startswith('['):
                match = re.match(r'\[(\d+) ', line)
                cur_weight = int(match.group(1)) if match else 1
                continue

            line = line.partition('=')
            en = [w.strip() for w in line[0].split(',')]
            ru = [w.strip() for w in line[2].split(',')]

            for ew in filter(lambda x: len(x) <= MAX_WORDLEN, en):
                e2r.add_entry(ew, ru, cur_weight)

            for rw in filter(lambda x: len(x) <= MAX_WORDLEN, ru):
                r2e.add_entry(rw, en, cur_weight)

        return e2r, r2e


def write_dict(d):
    with open(DST_FILE, 'w', encoding=DST_ENCODING, newline='\r\n') as dst:
        for _ in range(min(MAX_ENTRIES, len(d))):
            word = random.choices(list(d.keys()), d.weights.values())[0]

            dst.write(word)
            dst.write(' = ')
            dst.write(','.join([w.lower() for w in d[word]]))
            dst.write('\n')

            d.del_entry(word)


if __name__ == '__main__':
    e2r, r2e = read_dict()

    # r2e probability
    threshold = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
    use_r2e = random.random() < threshold

    print(textwrap.dedent(f'''\
              Dictionary Statistics:
                  English words: {len(e2r)}
                  Russian words: {len(r2e)}
              Writing {'rus->eng' if use_r2e else 'eng->rus'} translation'''))

    write_dict(r2e if use_r2e else e2r)
	#!/usr/bin/env python3

	import sys, random, textwrap, re
	from collections import defaultdict

	SRC_FILE = './multitab_vcb.src.ini'
	DST_FILE = './multitab_vcb.ini'
	DST_ENCODING = 'cp1251'

	MAX_TRANSLATIONS = 2
	MAX_ENTRIES = 25 # max 100
	MAX_WORDLEN = 11


	class Dictionary(defaultdict):
	def __init__(self):
	super().__init__(list)
	self.weights = defaultdict(lambda: 1)

	def add_entry(self, word, translations, weight=1):
	cell = self[word]
	cell.extend(translations)
	assert len(cell) <= MAX_TRANSLATIONS, f'MAX_TRANSLATIONS exceeded for "{word}"'

	self.weights[word] = max(self.weights[word], weight)

	def del_entry(self, word):
	self.pop(word)
	self.weights.pop(word)


	def read_dict():
	with open(SRC_FILE, 'r') as src:
	e2r = Dictionary() # Eng to Rus
	r2e = Dictionary() # Rus to Eng
	cur_weight = 1

	for line in src:
	line = line.partition(';')[0].strip() # ignore comments
	if not line:
	continue

	if line.startswith('['):
	match = re.match(r'\[(\d+) ', line)
	cur_weight = int(match.group(1)) if match else 1
	continue

	line = line.partition('=')
	en = [w.strip() for w in line[0].split(',')]
	ru = [w.strip() for w in line[2].split(',')]

	for ew in filter(lambda x: len(x) <= MAX_WORDLEN, en):
	e2r.add_entry(ew, ru, cur_weight)

	for rw in filter(lambda x: len(x) <= MAX_WORDLEN, ru):
	r2e.add_entry(rw, en, cur_weight)

	return e2r, r2e


	def write_dict(d):
	with open(DST_FILE, 'w', encoding=DST_ENCODING, newline='\r\n') as dst:
	for _ in range(min(MAX_ENTRIES, len(d))):
	word = random.choices(list(d.keys()), d.weights.values())[0]

	dst.write(word)
	dst.write(' = ')
	dst.write(','.join([w.lower() for w in d[word]]))
	dst.write('\n')

	d.del_entry(word)


	if __name__ == '__main__':
	e2r, r2e = read_dict()

	# r2e probability
	threshold = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
	use_r2e = random.random() < threshold

	print(textwrap.dedent(f'''\
	Dictionary Statistics:
	English words: {len(e2r)}
	Russian words: {len(r2e)}
	Writing {'rus->eng' if use_r2e else 'eng->rus'} translation'''))

	write_dict(r2e if use_r2e else e2r)