Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/usr/bin/python
# -*- coding: utf-8 -*-
def compare_esperanto_strings(x_mixed_case, y_mixed_case):
# case insensitive alphabetical sort
# permitting whole latin alphabet
if type(x_mixed_case) == str:
x = x_mixed_case.decode('utf8').lower().strip()
else:
x = x_mixed_case.lower().strip()
if type(y_mixed_case) == str:
y = y_mixed_case.decode('utf8').lower().strip()
else:
y = y_mixed_case.lower().strip()
# space is first in the alphabet so 'a b' comes before 'ab'
# - second so that affixes come first
# characters at the end just defensive programming
alphabet = [u' ', u'-', u'a', u'b', u'c', u'\u0109', u'd', u'e', u'f', u'g', u'\u011d', u'h',
u'\u0125', u'i', u'j', u'\u0135', u'k', u'l', u'm', u'n', u'o', u'p',
u'q', u'r', u's', u'\u015d', u't', u'u', u'\u016d', u'v', u'w', u'x',
u'y', u'z', u'\'', u'(', u')', u'.', u'*', u',', u'\u03c3',
# that's a σ (sigma) at the end
]
for i in range(min(len(x),len(y))):
if alphabet.index(x[i]) < alphabet.index(y[i]):
return -1
elif alphabet.index(x[i]) > alphabet.index(y[i]):
return 1
# longer strings come afterwards
if len(x) < len(y):
return -1
elif len(x) > len(y):
return 1
else:
return 0
if __name__ == '__main__':
dump = open('dump.txt', 'r')
lines = dump.readlines()
lines.sort(cmp=compare_esperanto_strings)
for line in lines:
print line.strip() # getting trailing newlines without for some reason
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.