Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/usr/bin/python
# -*- coding: utf-8 -*-
def compare_esperanto_strings(x_mixed_case, y_mixed_case):
# case insensitive alphabetical sort
# permitting whole latin alphabet
if type(x_mixed_case) == str:
x = x_mixed_case.decode('utf8').lower().strip()
else:
x = x_mixed_case.lower().strip()
if type(y_mixed_case) == str:
y = y_mixed_case.decode('utf8').lower().strip()
else:
y = y_mixed_case.lower().strip()
# space is first in the alphabet so 'a b' comes before 'ab'
# - second so that affixes come first
# characters at the end just defensive programming
alphabet = [u' ', u'-', u'a', u'b', u'c', u'\u0109', u'd', u'e', u'f', u'g', u'\u011d', u'h',
u'\u0125', u'i', u'j', u'\u0135', u'k', u'l', u'm', u'n', u'o', u'p',
u'q', u'r', u's', u'\u015d', u't', u'u', u'\u016d', u'v', u'w', u'x',
u'y', u'z', u'\'', u'(', u')', u'.', u'*', u',', u'\u03c3',
# that's a σ (sigma) at the end
]
for i in range(min(len(x),len(y))):
if alphabet.index(x[i]) < alphabet.index(y[i]):
return -1
elif alphabet.index(x[i]) > alphabet.index(y[i]):
return 1
# longer strings come afterwards
if len(x) < len(y):
return -1
elif len(x) > len(y):
return 1
else:
return 0
if __name__ == '__main__':
dump = open('dump.txt', 'r')
lines = dump.readlines()
lines.sort(cmp=compare_esperanto_strings)
for line in lines:
print line.strip() # getting trailing newlines without for some reason
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment