public
Last active

Create slugs and abbreviate names using Python

  • Download Gist
Makefile
Makefile
1 2 3 4 5 6 7 8 9
test:
clear
nosetests --with-coverage --cover-package name_utils test_name_utils.py
 
clean:
find -regex '.*\.pyc' -exec rm {} \;
find -regex '.*~' -exec rm {} \;
 
.PHONY: test clean
name_utils.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
#!/usr/bin/env python
#coding: utf-8
 
from unicodedata import normalize
 
 
def slug(text, encoding=None,
permitted_chars='abcdefghijklmnopqrstuvwxyz0123456789-'):
if isinstance(text, str):
text = text.decode(encoding or 'ascii')
clean_text = text.strip().replace(' ', '-').lower()
while '--' in clean_text:
clean_text = clean_text.replace('--', '-')
ascii_text = normalize('NFKD', clean_text).encode('ascii', 'ignore')
strict_text = map(lambda x: x if x in permitted_chars else '', ascii_text)
return ''.join(strict_text)
 
def abbreviate(name, pretty=False):
names = name.split()
if len(names) == 2:
return name
result = [names[0]]
tiny_name = False
for surname in names[1:-1]:
if len(surname) <= 3:
result.append(surname)
tiny_name = True
else:
if pretty and tiny_name:
result.append(surname)
else:
result.append(surname[0] + '.')
tiny_name = False
result.append(names[-1])
return ' '.join(result)
test_name_utils.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
#!/usr/bin/env python
# coding: utf-8
 
import unittest
from name_utils import slug, abbreviate
 
 
class TestSlug(unittest.TestCase):
def test_should_always_return_lowercase_words(self):
self.assertEquals(slug('ALVAROJUSTEN'), 'alvarojusten')
 
def test_should_replace_space_with_dash(self):
self.assertEquals(slug('Alvaro Justen'), 'alvaro-justen')
 
def test_should_ignore_unecessary_spaces(self):
self.assertEquals(slug(' alvaro justen '), 'alvaro-justen')
 
def test_should_replace_nonascii_chars_with_corresponding_ascii_chars(self):
self.assertEquals(slug('áÁàÀãÃâÂäÄ'.decode('utf8')), 'aaaaaaaaaa')
self.assertEquals(slug('éÉèÈẽẼêÊëË'.decode('utf8')), 'eeeeeeeeee')
self.assertEquals(slug('íÍìÌĩĨîÎïÏ'.decode('utf8')), 'iiiiiiiiii')
self.assertEquals(slug('óÓòÒõÕôÔöÖ'.decode('utf8')), 'oooooooooo')
self.assertEquals(slug('úÚùÙũŨûÛüÜ'.decode('utf8')), 'uuuuuuuuuu')
self.assertEquals(slug('ćĆĉĈçÇ'.decode('utf8')), 'cccccc')
 
def test_should_accept_unicode_text(self):
self.assertEquals(slug(u'Álvaro Justen'), 'alvaro-justen')
 
def test_should_accept_other_input_encodings(self):
slugged_text = slug(u'Álvaro Justen'.encode('utf16'), 'utf16')
self.assertEquals(slugged_text, 'alvaro-justen')
 
def test_should_accept_only_ascii_letters_and_numbers(self):
slugged_text = slug('''qwerty123456"'@#$%*()_+\|<>,.;:/?]~[`{}^ ''')
self.assertEquals(slugged_text, 'qwerty123456')
 
def test_should_accept_only_chars_in_permitted_chars_parameter(self):
slugged_text = slug('''0987654321gfdsazxcvb''',
permitted_chars='abc123')
self.assertEquals(slugged_text, '321acb')
 
class TestAbbreviate(unittest.TestCase):
def test_name_and_last_name_should_return_equal(self):
name = 'Álvaro Justen'
expected = 'Álvaro Justen'
self.assertEquals(abbreviate(name), expected)
 
def test_name_with_two_surnames_should_abbreviate_the_middle_one(self):
name = 'Álvaro Fernandes Justen'
expected = 'Álvaro F. Justen'
self.assertEquals(abbreviate(name), expected)
 
def test_three_surnames_should_abbreviate_the_two_in_the_middle(self):
name = 'Álvaro Fernandes Abreu Justen'
expected = 'Álvaro F. A. Justen'
self.assertEquals(abbreviate(name), expected)
 
def test_should_not_abbreviate_tiny_words(self):
name = 'Álvaro Fernandes de Abreu Justen'
expected = 'Álvaro F. de A. Justen'
self.assertEquals(abbreviate(name), expected)
name = 'Fulano da Costa e Silva'
expected = 'Fulano da C. e Silva'
self.assertEquals(abbreviate(name), expected)
name = 'Fulano dos Santos'
expected = 'Fulano dos Santos'
self.assertEquals(abbreviate(name), expected)
 
def test_should_not_abbreviate_next_surname_if_pretty_is_True(self):
name = 'Álvaro Fernandes de Abreu Justen'
expected = 'Álvaro F. de Abreu Justen'
self.assertEquals(abbreviate(name, pretty=True), expected)
name = 'Rafael da Costa Rodrigues Silva'
expected = 'Rafael da Costa R. Silva'
self.assertEquals(abbreviate(name, pretty=True), expected)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.