Skip to content

Instantly share code, notes, and snippets.

@seven1m
Last active December 18, 2015 16:09
Show Gist options
  • Save seven1m/5809211 to your computer and use it in GitHub Desktop.
Save seven1m/5809211 to your computer and use it in GitHub Desktop.
A Python function and a String-based class for capitalizing a proper name. There are a handful of tests; save the snippet to a .py file and run it from a terminal.
# -*- coding: iso-8859-1 -*-
# The code here is based loosely on John Cardinal's notes found at:
# http://www.johncardinal.com/tmgutil/capitalizenames.htm
# 2006-03-16
# Thanks to David Kern <kernd@reasonspace.com> for fixing some bugs.
suffixes = "II|(II)|III|(III)|IV|(IV)|VI|(VI)|VII|(VII)|2nd|(2nd)|3rd|(3rd)|4th|(4th)|5th|(5th)".split('|')
# The names listed here are included by permission from John Cardinal's TMG Utility.
# http://www.johncardinal.com/tmgutil/index.htm
# John Cardinal maintains the copyright for this list of names.
surnames = "ApShaw|d'Albini|d'Aubigney|d'Aubigné|d'Autry|d'Entremont|d'Hurst|D'ovidio|da Graça|DaSilva|DeAnda|deAnnethe|deAubigne|deAubigny|DeBardelaben|DeBardeleben|DeBaugh|deBeauford|DeBerry|deBethune|DeBetuile|DeBoard|DeBoer|DeBohun|DeBord|DeBose|DeBrouwer|DeBroux|DeBruhl|deBruijn|deBrus|deBruse|deBrusse|DeBruyne|DeBusk|DeCamp|deCastilla|DeCello|deClare|DeClark|DeClerck|DeCoste|deCote|DeCoudres|DeCoursey|DeCredico|deCuire|DeCuyre|DeDominicios|DeDuyster|DeDuytscher|DeDuytser|deFiennes|DeFord|DeForest|DeFrance|DeFriece|DeGarmo|deGraaff|DeGraff|DeGraffenreid|DeGraw|DeGrenier|DeGroats|DeGroft|DeGrote|DeHaan|DeHaas|DeHaddeclive|deHannethe|DeHatclyf|DeHaven|DeHeer|DeJager|DeJarnette|DeJean|DeJong|deJonge|deKemmeter|deKirketon|DeKroon|deKype|del-Rosario|dela Chamotte|DeLa Cuadra|DeLa Force|dela Fountaine|dela Greña|dela Place|DeLa Ward|DeLaci|DeLacy|DeLaet|DeLalonde|DelAmarre|DeLancey|DeLascy|DelAshmutt|DeLassy|DeLattre|DeLaughter|DeLay|deLessine|DelGado|DelGaudio|DeLiberti|DeLoache|DeLoatch|DeLoch|DeLockwood|DeLong|DeLozier|DeLuca|DeLucenay|deLucy|DeMars|DeMartino|deMaule|DeMello|DeMinck|DeMink|DeMoree|DeMoss|DeMott|DeMuynck|deNiet|DeNise|DeNure|DePalma|DePasquale|dePender|dePercy|DePoe|DePriest|DePu|DePui|DePuis|DeReeper|deRochette|deRose|DeRossett|DeRover|deRuggele|deRuggle|DeRuyter|deSaint-Sauveur|DeSantis|desCuirs|DeSentis|DeShane|DeSilva|DesJardins|DesMarest|deSoleure|DeSoto|DeSpain|DeStefano|deSwaert|deSwart|DeVall|DeVane|DeVasher|DeVasier|DeVaughan|DeVaughn|DeVault|DeVeau|DeVeault|deVilleneuve|DeVilliers|DeVinney|DeVito|deVogel|DeVolder|DeVolld|DeVore|deVos|DeVries|deVries|DeWall|DeWaller|DeWalt|deWashington|deWerly|deWessyngton|DeWet|deWinter|DeWitt|DeWolf|DeWolfe|DeWolff|DeWoody|DeYager|DeYarmett|DeYoung|DiCicco|DiCredico|DiFillippi|DiGiacomo|DiMarco|DiMeo|DiMonte|DiNonno|DiPietro|diPilato|DiPrima|DiSalvo|du Bosc|du Hurst|DuFort|DuMars|DuPre|DuPue|DuPuy|FitzUryan|kummel|LaBarge|LaBarr|LaBauve|LaBean|LaBelle|LaBerteaux|LaBine|LaBonte|LaBorde|LaBounty|LaBranche|LaBrash|LaCaille|LaCasse|LaChapelle|LaClair|LaComb|LaCoste|LaCount|LaCour|LaCroix|LaFarlett|LaFarlette|LaFerry|LaFlamme|LaFollette|LaForge|LaFortune|LaFoy|LaFramboise|LaFrance|LaFuze|LaGioia|LaGrone|LaLiberte|LaLonde|LaLone|LaMaster|LaMay|LaMere|LaMont|LaMotte|LaPeer|LaPierre|LaPlante|LaPoint|LaPointe|LaPorte|LaPrade|LaRocca|LaRochelle|LaRose|LaRue|LaVallee|LaVaque|LaVeau|LeBleu|LeBoeuf|LeBoiteaux|LeBoyteulx|LeCheminant|LeClair|LeClerc|LeCompte|LeCroy|LeDuc|LeFevbre|LeFever|LeFevre|LeFlore|LeGette|LeGrand|LeGrave|LeGro|LeGros|LeJeune|LeMaistre|LeMaitre|LeMaster|LeMesurier|LeMieux|LeMoe|LeMoigne|LeMoine|LeNeve|LePage|LeQuire|LeQuyer|LeRou|LeRoy|LeSuer|LeSueur|LeTardif|LeVally|LeVert|LoMonaco|Macabe|Macaluso|MacaTasney|Macaulay|Macchitelli|Maccoone|Maccurry|Macdermattroe|Macdiarmada|Macelvaine|Macey|Macgraugh|Machan|Machann|Machum|Maciejewski|Maciel|Mackaben|Mackall|Mackartee|Mackay|Macken|Mackert|Mackey|Mackie|Mackin|Mackins|Macklin|Macko|Macksey|Mackwilliams|Maclean|Maclinden|Macomb|Macomber|Macon|Macoombs|Macraw|Macumber|Macurdy|Macwilliams|MaGuinness|MakCubyn|MakCumby|Mcelvany|Mcsherry|Op den Dyck|Op den Graeff|regory|Schweißguth|StElmo|StGelais|StJacques|te Boveldt|VanAernam|VanAken|VanAlstine|VanAmersfoort|VanAntwerp|VanArlem|VanArnam|VanArnem|VanArnhem|VanArnon|VanArsdale|VanArsdalen|VanArsdol|vanAssema|vanAsten|VanAuken|VanAwman|VanBaucom|VanBebber|VanBeber|VanBenschoten|VanBibber|VanBilliard|vanBlare|vanBlaricom|VanBuren|VanBuskirk|VanCamp|VanCampen|VanCleave|VanCleef|VanCleve|VanCouwenhoven|VanCovenhoven|VanCowenhoven|VanCuren|VanDalsem|VanDam|VanDe Poel|vanden Dijkgraaf|vanden Kommer|VanDer Aar|vander Gouwe|VanDer Honing|VanDer Hooning|vander Horst|vander Kroft|vander Krogt|VanDer Meer|vander Meulen|vander Putte|vander Schooren|VanDer Veen|VanDer Ven|VanDer Wal|VanDer Weide|VanDer Willigen|vander Wulp|vander Zanden|vander Zwan|VanDer Zweep|VanDeren|VanDerlaan|VanDerveer|VanderWoude|VanDeursen|VanDeusen|vanDijk|VanDoren|VanDorn|VanDort|VanDruff|VanDryer|VanDusen|VanDuzee|VanDuzen|VanDuzer|VanDyck|VanDyke|VanEman|VanEmmen|vanEmmerik|VanEngen|vanErp|vanEssen|VanFleet|VanGalder|VanGelder|vanGerrevink|VanGog|vanGogh|VanGorder|VanGordon|VanGroningen|VanGuilder|VanGundy|VanHaaften|VanHaute|VanHees|vanHeugten|VanHise|VanHoeck|VanHoek|VanHook|vanHoorn|VanHoornbeeck|VanHoose|VanHooser|VanHorn|VanHorne|VanHouten|VanHoye|VanHuijstee|VanHuss|VanImmon|VanKersschaever|VanKeuren|VanKleeck|VanKoughnet|VanKouwenhoven|VanKuykendaal|vanLeeuwen|vanLent|vanLet|VanLeuven|vanLingen|VanLoozen|VanLopik|VanLuven|vanMaasdijk|VanMele|VanMeter|vanMoorsel|VanMoorst|VanMossevelde|VanNaarden|VanNamen|VanNemon|VanNess|VanNest|VanNimmen|vanNobelen|VanNorman|VanNormon|VanNostrunt|VanNote|VanOker|vanOosten|VanOrden|VanOrder|VanOrma|VanOrman|VanOrnum|VanOstrander|VanOvermeire|VanPelt|VanPool|VanPoole|VanPoorvliet|VanPutten|vanRee|VanRhijn|vanRijswijk|VanRotmer|VanSchaick|vanSchelt|VanSchoik|VanSchoonhoven|VanSciver|VanScoy|VanScoyoc|vanSeters|VanSickle|VanSky|VanSnellenberg|vanStaveren|VanStraten|VanSuijdam|VanTassel|VanTassell|VanTessel|VanTexel|VanTuyl|VanValckenburgh|vanValen|VanValkenburg|VanVelsor|VanVelzor|VanVlack|VanVleck|VanVleckeren|VanWaard|VanWart|VanWassenhove|VanWinkle|VanWoggelum|vanWordragen|VanWormer|VanZuidam|VanZuijdam|VonAdenbach|vonAllmen|vonBardeleben|vonBerckefeldt|VonBergen|vonBreyman|VonCannon|vonFreymann|vonHeimburg|VonHuben|vonKramer|vonKruchenburg|vonPostel|VonRohr|VonRohrbach|VonSass|VonSasse|vonSchlotte|VonSchneider|VonSeldern|VonSpringer|VonVeyelmann|VonZweidorff".split('|')
#"# fix syntax highlighting in vim
import re
mc = re.compile( r"^Mc(\w)(?=\w)", re.I )
mac = re.compile( r"^Mac(\w)(?=\w)", re.I )
class Name(str):
"""A Class (based on the string type) that properly capitalizes a name."""
def __new__(cls, value=''):
original = value
proper = Capitalize(value)
obj = str.__new__(cls, proper)
obj.original = original
return obj
def Capitalize(name):
"""Does the work of capitalizing a name (can be a full name)."""
hyphen_indexes = []
while name.find('-') > -1:
index = name.find('-')
hyphen_indexes.append(index)
name = name[:index] + ' ' + name[index+1:]
name = name.split()
name = [w.capitalize() for w in name] # standard capitalization
# "Mcx" should be "McX"
index = 0
for w in name:
try: name[index] = mc.sub( "Mc"+w[2].upper(), w )
except: pass
index += 1
# "Macx" should be "MacX"
index = 0
for w in name:
try: name[index] = mac.sub( "Mac"+w[3].upper(), w )
except: pass
index += 1
name = ' '.join( name )
for index in hyphen_indexes:
name = name[:index] + '-' + name[index+1:]
# funky stuff (no capitalization)
name = name.replace( " Dit ", " dit " )
name = name.replace( " Van ", " van " )
name = name.replace( " De ", " de " )
# special surnames and suffixes
name += ' '
for surname in surnames + suffixes:
pos = name.lower().find( surname.lower() )
if pos > -1:
# surname/suffix must be:
# 1. at start of name or after a space
# -and-
# 2. followed by the end of string or a space
if (((pos == 0) or (pos > 0 and name[pos-1] == ' '))
and ((len(name) == pos+len(surname))
or (name[pos+len(surname)] == ' '))):
name = name[:pos] + surname + name[pos+len(surname):]
return name.strip()
import unittest
class TestCapitalize(unittest.TestCase):
def test_simple(self):
self.assertEqual(Capitalize("john smith"), "John Smith")
def test_mc(self):
self.assertEqual(Capitalize("BOB MCELROY"), "Bob McElroy")
def test_mac(self):
self.assertEqual(Capitalize("josh macelvany"), "Josh MacElvany")
def test_surname(self):
self.assertEqual(Capitalize("BILL VANWINKLE"), "Bill VanWinkle")
def test_van_space(self):
self.assertEqual(Capitalize("joe van buren"), "Joe van Buren")
def test_suffix(self):
self.assertEqual(Capitalize("bob jones, iii"), "Bob Jones, III")
def test_hyphenated(self):
self.assertEqual(Capitalize("mary johnson-smith"), "Mary Johnson-Smith")
def test_iv_not_in_suffix(self):
self.assertEqual(Capitalize("DAVID SULLIVAN"), "David Sullivan")
def test_mc_in_middle(self):
self.assertEqual(Capitalize("jason bumcorn"), "Jason Bumcorn")
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment