Skip to content

Instantly share code, notes, and snippets.

@ptbrowne
Last active July 13, 2018 15:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ptbrowne/05d8231bde5b6e963278e6ec451a2fbf to your computer and use it in GitHub Desktop.
Save ptbrowne/05d8231bde5b6e963278e6ec451a2fbf to your computer and use it in GitHub Desktop.
alphabet.py
from collections import defaultdict
import sys
from copy import deepcopy
def find_common_prefix(word1, word2):
"""Find the longest common prefix between two words"""
i = 0
l1 = len(word1)
l2 = len(word2)
while l1 > i and l2 > i and word1[i] == word2[i]:
i += 1
return word1[0:i]
def test_find_common_prefix():
assert find_common_prefix('ABB', 'AB') == 'AB'
assert find_common_prefix('A', 'B') == ''
assert find_common_prefix('ABCDE', 'ABCDEF') == 'ABCDE'
assert find_common_prefix('DEHIO', '') == ''
def learn_one(rules, word_before, word_after):
"""From two words, will learn a rule of precedence and add
it to the rule repository.
Also adds the letters from both words to the rules repository
(there may be some letters for which we do not have any precedence
knowledge but have at least the knowledge that they exist.
> rules = defaultdict(set)
> learn_one(rules, 'AB', 'AC')
> rules
{"A": set(), "C": {"B"}, "B": set() }
"""
if not word_before or not word_after:
return
common = find_common_prefix(word_before, word_after)
lcom = len(common)
before = word_before[lcom] if lcom < len(word_before) else None
after = word_after[lcom] if lcom < len(word_after) else None
if before and after and not after in rules[before]:
rules[after].add(before)
for l in word_before:
rules[l] # acknowledge the letter
for l in word_after:
rules[l] # acknowledge the letter
def test_learn():
rules = defaultdict(set)
learn_one(rules, 'AB', 'AC')
learn_one(rules, 'B', 'D')
learn_one(rules, 'ABC', 'ABDE')
assert 'B' in rules['C']
assert 'B' in rules['D']
assert 'C' in rules['D']
assert 'D' in rules
assert 'E' in rules
def learn_precedence_rules(words):
"""From a sorted list of words, learn the rules of precedence.
Output is a defaultdict where keys are letters ("A", "B") and
values are all the letters before the key.
For the latin alphabet :
A -> []
B -> ["A"]
C -> ["B", "C"]
"""
rules = defaultdict(set)
word_before = None
l = len(words) + 0.0
for word in words:
learn_one(rules, word_before, word)
word_before = word
return rules
def remove_letter(rules, letter):
"""Remove all occurences of a letter from the rules of precedence"""
del rules[letter]
for s in rules.values():
if letter in s:
s.remove(letter)
def find_possible_order(rules):
"""From the rules of precedence, outputs a possible order"""
rules = deepcopy(rules) # going to mutate rules
result = []
while len(rules):
min_len = min(len(s) for s in rules.values())
for letter, letters_before in rules.items():
if len(letters_before) == min_len:
result.append(letter)
remove_letter(rules, letter)
break
return "".join(result)
def test_find_possible_order():
res = find_possible_order({
"A": set(),
"B": set(['A']),
"C": set(['B']),
"D": set(['B', 'C'])
})
assert res == 'ABCD'
def find_alphabet(words):
"""Given a list of words in lexicographic order, but formed from characters of an
unknown alphabet, returns the list of characters in the correct order:
the "alphabet" for these words."""
rules = learn_precedence_rules(words)
return find_possible_order(rules)
def main():
with open('/usr/share/dict/words') as f:
words = [l.strip().lower() for l in f.readlines()]
alphabet = find_alphabet(words)
print("".join(alphabet))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment