rillian/analysis.py

## analysis.py
#!/usr/bin/env python3

import collections
import unicodedata

histogram = collections.Counter()

with open('cop_wordlist.combined') as f:
    for line in f.readlines():
        # Skip dictionary header.
        if line.startswith('dictionary='):
            continue
        line = line.strip()
        p = line.split(',')
        if len(p) != 2:
            print('Bad line:', p)
            continue
        word, frequency = line.split(',')
        _, word = word.split('=')
        histogram.update(word)

for char in histogram.keys():
    if unicodedata.combining(char):
        print(f'u+{ord(char):06x} {unicodedata.name(char)} {histogram[char]}')
	#!/usr/bin/env python3

	import collections
	import unicodedata

	histogram = collections.Counter()

	with open('cop_wordlist.combined') as f:
	for line in f.readlines():
	# Skip dictionary header.
	if line.startswith('dictionary='):
	continue
	line = line.strip()
	p = line.split(',')
	if len(p) != 2:
	print('Bad line:', p)
	continue
	word, frequency = line.split(',')
	_, word = word.split('=')
	histogram.update(word)

	for char in histogram.keys():
	if unicodedata.combining(char):
	print(f'u+{ord(char):06x} {unicodedata.name(char)} {histogram[char]}')