Skip to content

Instantly share code, notes, and snippets.

@turicas
Created December 3, 2011 23:22
Show Gist options
  • Star 16 You must be signed in to star a gist
  • Fork 10 You must be signed in to fork a gist
  • Save turicas/1428479 to your computer and use it in GitHub Desktop.
Save turicas/1428479 to your computer and use it in GitHub Desktop.
Create slugs and abbreviate names using Python
test:
clear
nosetests --with-coverage --cover-package name_utils test_name_utils.py
clean:
find -regex '.*\.pyc' -exec rm {} \;
find -regex '.*~' -exec rm {} \;
.PHONY: test clean
#!/usr/bin/env python
#coding: utf-8
from unicodedata import normalize
def slug(text, encoding=None,
permitted_chars='abcdefghijklmnopqrstuvwxyz0123456789-'):
if isinstance(text, str):
text = text.decode(encoding or 'ascii')
clean_text = text.strip().replace(' ', '-').lower()
while '--' in clean_text:
clean_text = clean_text.replace('--', '-')
ascii_text = normalize('NFKD', clean_text).encode('ascii', 'ignore')
strict_text = map(lambda x: x if x in permitted_chars else '', ascii_text)
return ''.join(strict_text)
def abbreviate(name, pretty=False):
names = name.split()
if len(names) == 2:
return name
result = [names[0]]
tiny_name = False
for surname in names[1:-1]:
if len(surname) <= 3:
result.append(surname)
tiny_name = True
else:
if pretty and tiny_name:
result.append(surname)
else:
result.append(surname[0] + '.')
tiny_name = False
result.append(names[-1])
return ' '.join(result)
#!/usr/bin/env python
# coding: utf-8
import unittest
from name_utils import slug, abbreviate
class TestSlug(unittest.TestCase):
def test_should_always_return_lowercase_words(self):
self.assertEquals(slug('ALVAROJUSTEN'), 'alvarojusten')
def test_should_replace_space_with_dash(self):
self.assertEquals(slug('Alvaro Justen'), 'alvaro-justen')
def test_should_ignore_unecessary_spaces(self):
self.assertEquals(slug(' alvaro justen '), 'alvaro-justen')
def test_should_replace_nonascii_chars_with_corresponding_ascii_chars(self):
self.assertEquals(slug('áÁàÀãÃâÂäÄ'.decode('utf8')), 'aaaaaaaaaa')
self.assertEquals(slug('éÉèÈẽẼêÊëË'.decode('utf8')), 'eeeeeeeeee')
self.assertEquals(slug('íÍìÌĩĨîÎïÏ'.decode('utf8')), 'iiiiiiiiii')
self.assertEquals(slug('óÓòÒõÕôÔöÖ'.decode('utf8')), 'oooooooooo')
self.assertEquals(slug('úÚùÙũŨûÛüÜ'.decode('utf8')), 'uuuuuuuuuu')
self.assertEquals(slug('ćĆĉĈçÇ'.decode('utf8')), 'cccccc')
def test_should_accept_unicode_text(self):
self.assertEquals(slug(u'Álvaro Justen'), 'alvaro-justen')
def test_should_accept_other_input_encodings(self):
slugged_text = slug(u'Álvaro Justen'.encode('utf16'), 'utf16')
self.assertEquals(slugged_text, 'alvaro-justen')
def test_should_accept_only_ascii_letters_and_numbers(self):
slugged_text = slug('''qwerty123456"'@#$%*()_+\|<>,.;:/?]~[`{}^ ''')
self.assertEquals(slugged_text, 'qwerty123456')
def test_should_accept_only_chars_in_permitted_chars_parameter(self):
slugged_text = slug('''0987654321gfdsazxcvb''',
permitted_chars='abc123')
self.assertEquals(slugged_text, '321acb')
class TestAbbreviate(unittest.TestCase):
def test_name_and_last_name_should_return_equal(self):
name = 'Álvaro Justen'
expected = 'Álvaro Justen'
self.assertEquals(abbreviate(name), expected)
def test_name_with_two_surnames_should_abbreviate_the_middle_one(self):
name = 'Álvaro Fernandes Justen'
expected = 'Álvaro F. Justen'
self.assertEquals(abbreviate(name), expected)
def test_three_surnames_should_abbreviate_the_two_in_the_middle(self):
name = 'Álvaro Fernandes Abreu Justen'
expected = 'Álvaro F. A. Justen'
self.assertEquals(abbreviate(name), expected)
def test_should_not_abbreviate_tiny_words(self):
name = 'Álvaro Fernandes de Abreu Justen'
expected = 'Álvaro F. de A. Justen'
self.assertEquals(abbreviate(name), expected)
name = 'Fulano da Costa e Silva'
expected = 'Fulano da C. e Silva'
self.assertEquals(abbreviate(name), expected)
name = 'Fulano dos Santos'
expected = 'Fulano dos Santos'
self.assertEquals(abbreviate(name), expected)
def test_should_not_abbreviate_next_surname_if_pretty_is_True(self):
name = 'Álvaro Fernandes de Abreu Justen'
expected = 'Álvaro F. de Abreu Justen'
self.assertEquals(abbreviate(name, pretty=True), expected)
name = 'Rafael da Costa Rodrigues Silva'
expected = 'Rafael da Costa R. Silva'
self.assertEquals(abbreviate(name, pretty=True), expected)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment