Skip to content

Instantly share code, notes, and snippets.

@jone
Last active December 19, 2015 13:39
Show Gist options
  • Save jone/5963880 to your computer and use it in GitHub Desktop.
Save jone/5963880 to your computer and use it in GitHub Desktop.
Sorting strings diacritics insensitive and case insensitive. Diacritics are combined characters such as ^,¨,`
import unicodedata
def make_sortable(text):
"""Converts a string to a sortable string by lowercasing
it and removing diacritics.
"""
if isinstance(text, str):
text = text.decode('utf-8')
if not isinstance(text, unicode):
return text
text = text.lower()
normalized = unicodedata.normalize('NFKD', text)
text = u''.join([c for c in normalized if not unicodedata.combining(c)])
text = text.encode('utf-8')
return text
sorted(['fff', 'zzz', 'ööö', 'ZZZ', 'FFF'], key=make_sortable)
# --> ['fff', 'FFF', 'ööö', 'zzz', 'ZZZ']
sorted(['fff', 'zzz', 'ööö', 'ZZZ', 'FFF'])
# --> ['FFF', 'ZZZ', 'fff', 'zzz', 'ööö']
# -*- coding: utf-8 -*-
# import make_sortable
from unittest2 import TestCase
class TestMakeSortable(TestCase):
def test_accepts_strings(self):
self.assertEquals('foo', make_sortable('foo'))
def test_converts_unicodes_to_strings(self):
self.assertEquals('foo', make_sortable(u'foo'))
def test_returns_nonstrings_without_modifications(self):
self.assertEquals(
[None,
5,
True],
[make_sortable(None),
make_sortable(5),
make_sortable(True)])
def test_lowercases_everything(self):
self.assertEquals('foo', make_sortable('FoO'))
def test_removes_diacritics(self):
self.assertEquals(
['francais',
'hauser'],
[make_sortable('français'),
make_sortable('Häuser')])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment