Skip to content

Instantly share code, notes, and snippets.

@hdon
Last active August 29, 2015 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hdon/34938a1b55c96ea69178 to your computer and use it in GitHub Desktop.
Save hdon/34938a1b55c96ea69178 to your computer and use it in GitHub Desktop.
Generate non-colliding, intuitive usernames from domain names of 8- and 16-character lengths
import re
words = filter(lambda s:all([ord(c)<128 for c in s]), map(str.strip, open('/usr/share/dict/words')))
domainExp = re.compile('^[a-z0-9_]+$')
numbersExp = re.compile('[0-9]+')
englishWords = \
filter(
lambda s: domainExp.match(s),
map(str.lower,
map(str.strip,
open('/usr/share/dict/words')
)
)
)
names = dict()
rnames = dict()
nameSources = dict()
TLDs = dict()
numCollisions = 0
def splitWords(name):
foundWords = list()
originalName = name
while name:
foundWord = None
foundWordPos = -1
if name[0] == '-':
name = name[1:]
continue
numbers = numbersExp.match(name)
if numbers:
foundWord = numbers.string
###print ' v found numbers'
foundWordPos = numbers.start()
for word in englishWords:
#print name, word
wordPos = name.find(word)
###print name, word, wordPos
if wordPos > -1:
if (not foundWord) \
or (wordPos < foundWordPos) \
or ((wordPos == foundWordPos) and (len(word) > len(foundWord))):
###print ' ^ found word!'
foundWord = word
foundWordPos = wordPos
if foundWord:
#print originalName, name, foundWord, foundWordPos
###print ' ^ consuming word!', foundWord
assert foundWordPos >= 0
if foundWordPos > 0:
foundWords.append(name[:foundWordPos])
foundWords.append(name[foundWordPos : foundWordPos + len(foundWord)])
name = name[foundWordPos + len(foundWord):]
###print ' ^ remaining:', name
else:
###print ' v bailing out'
foundWords.append(name)
break
###print 'splitWords is done:', originalName, foundWords
return foundWords
for name in map(str.strip, open('domains')):
if name in names:
raise KeyError('Duplicate name "%s"' % name)
components = name.split('.')
assert len(components) == 2
name0, tld = components
name1 = name0 + tld
TLDs[tld] = True
while 1:
# Calculate new names, pass 0
namePass = 0
newnames = (
name1 if len(name1) <= 16 else
name0 + tld if len(name0 + tld) <= 16 else
name0[0:16-len(tld)] + tld
,
name1 if len(name1) <= 8 else
name0 + tld if len(name0 + tld) <= 8 else
name0[0:8-len(tld)] + tld
)
# Validate pass 0 names
if newnames[0] not in rnames and newnames[1] not in rnames:
break # No collision
# Calculate new names, pass 1
namePass = 1
words = splitWords(name0)
newname8 = ''
chunkSize = max(3, 8/len(words)+1)
for word in words:
newname8 += word[0:chunkSize]
if len(newname8) >= 8:
newname8 = newname8[0:8]
break
newname16 = ''
chunkSize = max(3, 16/len(words)+1)
for word in words:
newname16 += word[0:chunkSize]
if len(newname16) >= 16:
newname16 = newname16[0:16]
break
###print name, words
###print newname8
###print newname16
newnames = (newname16, newname8)
# Validate pass 1 names
if newnames[0] not in rnames and newnames[1] not in rnames:
break # No collision
# Calculate new names, pass 2. incorporates pass1's data
namePass = 2
newnames = (
newnames[0][0:16-len(tld)] + tld,
newnames[1][0: 8-len(tld)] + tld
)
# Validate pass 2 names
if newnames[0] not in rnames and newnames[1] not in rnames:
break # No collision
# copy and paste lel
# Calculate new names, pass 3
namePass = 3
words = splitWords(name0)
newname8 = ''
chunkSize = max(3, 8/len(words)+1)
for word in words:
newname8 += word[0:chunkSize]
if len(newname8) >= 8:
newname8 = newname8[0:8]
break
newname8 = newname8[:-3] + words[-1][0:3]
newname16 = ''
chunkSize = max(3, 16/len(words)+1)
for word in words:
newname16 += word[0:chunkSize]
if len(newname16) >= 16:
newname16 = newname16[0:16]
break
newname16 = newname16[:-3] + words[-1][0:3]
###print name, words
###print newname8
###print newname16
newnames = (newname16, newname8)
# Validate pass 3 names
if newnames[0] not in rnames and newnames[1] not in rnames:
break # No collision
# Calculate new names, pass 4. incorporates pass3's data
namePass = 4
newnames = (
newnames[0][0:16-len(tld)] + tld,
newnames[1][0: 8-len(tld)] + tld
)
# Validate pass 4 names
if newnames[0] not in rnames and newnames[1] not in rnames:
break # No collision
# Collision!
print 'collision:'
if newnames[0] in rnames:
print '\t16-character username:', newnames[0], 'already taken by', rnames[newnames[0]], 'in pass', nameSources[newnames[0]]
if newnames[1] in rnames:
print '\t16-character username:', newnames[1], 'already taken by', rnames[newnames[1]], 'in pass', nameSources[newnames[1]]
raise OverflowError('permanent collision for %s' % name)
break
names[name] = newnames
rnames[newnames[0]] = name
rnames[newnames[1]] = name
nameSources[newnames[0]] = namePass
nameSources[newnames[1]] = namePass
if numCollisions:
print '%d collisions found' % numCollisions
raise SystemExit
for name in names:
print '"%s","%s","%s"' % (name, names[name][0], names[name][1])
"homesforsaleinsunnycalifornia.us","homesforsaleinus","homesfus"
"homesforsaleinsunnyflorida.us","homforsalinsunus","homforus"
"homesforsaleinsunnyflorida.net","homforsalinsunet","homfonet"
"homesforsaleinsunnycalifornia.com","homesforsaleicom","homescom"
"homesforsaleinsunnycalifornia.org","homesforsaleiorg","homesorg"
"homesforsaleinsunnycalifornia.net","homesforsaleinet","homesnet"
"homesforsaleinsunnyflorida.org","homforsalinsuorg","homfoorg"
"homesforsaleinsunnyflorida.com","homforsalinsunny","homforsa"
"srtwireless.com","srtwirelesscom","srtwicom"
"mymmsmail.com","mymmsmailcom","mymmscom"
"helio.com","heliocom","heliocom"
"comspeco.net","comspeconet","comspnet"
"myboostmobile.com","myboostmobilecom","myboocom"
"southernlinc.com","southernlinccom","srtwireless.com","srtwirelesscom","srtwicom"
"mymmsmail.com","mymmsmailcom","mymmscom"
"helio.com","heliocom","heliocom"
"comspeco.net","comspeconet","comspnet"
"myboostmobile.com","myboostmobilecom","myboocom"
"southernlinc.com","southernlinccom","southcom"
"myblue.com","mybluecom","myblucom"
"my2way.com","my2waycom","my2wacom"
"radiopaging.com","radiopaging","radiopag"
"skearthlink.com","skearthlinkcom","skearcom"
"pageme.net","pagemenet","pagemnet"
"cellonemail.com","cellonemailcom","cellocom"
"breweloquiwireless.net","breweloquiwirnet","brewenet"
"advmms.com","advmmscom","advmmcom"
"iwspcs.com","iwspcscom","iwspccom"
"archwireless.net","archwirelessnet","archwnet"
"gocbw.com","gocbwcom","gocbwcom"
"mymobile.com","mymobilecom","mymobcom"
"pageoneinc.com","pageoneinccom","pageocom"
"vtext.com","vtextcom","vtextcom"
"airlinkmobile.com","airlinkmobilecom","airlicom"
"smseloquiwireless.net","smseloquiwirenet","smselnet"
"attmobility.com","attmobilitycom","attmocom"
"tmail.com","tmailcom","tmailcom"
"mobilemessage.com","mobilemessage","mobilmes"
"simmetrypcs.net","simmetrypcsnet","simmenet"
"mycingularblackberry.com","mycingularblacom","mycincom"
"cell1net.net","cell1netnet","cell1net"
"mymetropcs.com","mymetropcscom","mymetcom"
"edgehiptop.com","edgehiptopcom","edgehcom"
"sprintpcs.com","sprintpcscom","sprincom"
"mymmode.com","mymmodecom","mymmocom"
"blswe.com","blswecom","blswecom"
"iwarn.com","iwarncom","iwarncom"
"mobilcom.net","mobilcomnet","mobilnet"
"edgesend.com","edgesendcom","edgescom"
"radiocommpaging.com","radiocommpagicom","radiocom"
"mycellonephone.com","mycellonephoncom","mycelcom"
"2pageme.net","2pagemenet","2pagenet"
"csouth1.net","csouth1net","csoutnet"
"cookmail.com","cookmailcom","cookmcom"
"attmobile.net","attmobilenet","attmonet"
"redicall.com","redicallcom","rediccom"
"mytelepage.com","mytelepagecom","mytelcom"
"csouth1.com","csouth1com","csoutcom"
"sbcpaging.com","sbcpagingcom","sbcpacom"
"allbeep.com","allbeepcom","allbecom"
"cwemail.com","cwemailcom","cwemacom"
"tmomail.net","tmomailnet","tmomanet"
"ivcdata.com","ivcdatacom","ivcdacom"
"racomm.com","racommcom","racomcom"
"eloqui.net","eloquinet","eloqunet"
"rcsepager.com","rcsepagercom","rcsepcom"
"ivcel.net","ivcelnet","ivcelnet"
"bellsouthwirelessemail.net","bellouthwireenet","belounet"
"cingularme.net","cingularmenet","cingunet"
"pacbellpcs.net","pacbellpcsnet","pacbenet"
"sk-earthlink.com","sk-earthlinkcom","sk-eacom"
"myhelio.com","myheliocom","myhelcom"
"rccpaging.com","rccpagingcom","rccpacom"
"iwspcs.net","iwspcsnet","iwspcnet"
"ipnpaging.com","ipnpagingcom","ipnpacom"
"page-us.com","page-uscom","page-com"
"ccc3g.net","ccc3gnet","ccc3gnet"
"mid-texcellular.com","mid-texcellulcom","mid-tcom"
"ccc3g.info","ccc3ginfo","ccc3info"
"corrwireless.net","corrwirelessnet","corrwnet"
"dcswi.com","dcswicom","dcswicom"
"cellcomsms.com","cellcomsms","celcomsm"
"cwemail.net","cwemailnet","cwemanet"
"vzwpix.com","vzwpixcom","vzwpicom"
"pagerpeople.com","pagerpeoplecom","pagercom"
"sbc2way.com","sbc2waycom","sbc2wcom"
"cwwsms.com","cwwsmscom","cwwsmcom"
"centennialwireless.com","centenniawireles","centewir"
"blswe.net","blswenet","blswenet"
"cingularme.us","cingularmeus","cingulus"
"edgesend.net","edgesendnet","edgesnet"
"c1usa.com","c1usacom","c1usacom"
"ccc3g.us","ccc3gus","ccc3gus"
"skytel.com","skytelcom","skytecom"
"leaco.net","leaconet","leaconet"
"mpot.com","mpotcom","mpotcom"
"mylyrix.net","mylyrixnet","mylyrnet"
"minncommpaging.com","minncommpagincom","minnccom"
"mycingular.com","mycingular","mycingul"
"bellsouthwirelessemail.com","bellouthwireemai","belouthw"
"info2go.com","info2gocom","info2com"
"myawi.com","myawicom","myawicom"
"myairmail.com","myairmailcom","myaircom"
"page-all.com","pageall","pageall"
"zsend.com","zsendcom","zsendcom"
"simmetrypcs.com","simmetrypcscom","simmecom"
"edgehiptopmail.com","edgehiptopmail","edghipto"
"awsms.com","awsmscom","awsmscom"
"bellsouthtms.net","bellsouthtmsnet","bellsnet"
"radiocontrolit.com","radiocontroit","radconit"
"attmobility.net","attmobility","attmobil"
"sbcemail.com","sbcemailcom","sbcemcom"
"cingularme.com","cingularmecom","cingucom"
"ccc3g.biz","ccc3gbiz","ccc3gbiz"
"airmessage.net","airmessagenet","airmenet"
"mmseloquiwireless.net","mmseloquiwirenet","mmselnet"
"corrwireless.com","corrwirelesscom","corrwcom"
"satellink.net","satellinknet","satelnet"
"omnicom-paging.com","omnicom-pagincom","omniccom"
"cccomm.biz","cccommbiz","cccombiz"
"tuyomail.com","tuyomailcom","tuyomcom"
"txtpage.com","txtpagecom","txtpacom"
"prestopaging.com","prestopagingcom","prestcom"
"onelinkpcs.com","onelinkpcscom","onelicom"
"wwtext.net","wwtextnet","wwtexnet"
"alaskadigitel.com","alaskadigitelcom","alaskcom"
"cell1wv.net","cell1wv","cell1wv"
"mywireless.com","mywirelesscom","mywircom"
"quiktxt.com","quiktxtcom","quiktcom"
"skymail.com","skymailcom","skymacom"
"viaeromail.com","viaeromailcom","viaercom"
"pagemci.com","pagemcicom","pagemcom"
"firstcellular.net","firstcellularnet","firstnet"
"mycingular.net","mycingur","mycinr"
"bellsouthtms.com","bellsouthtmscom","bellscom"
"gwteletech.com","gwteletechcom","gwtelcom"
"ubetwireless.com","ubetwirelesscom","ubetwcom"
"mycellone.net","mycellonenet","mycelnet"
"mycricket.com","mycricketcom","mycricom"
"mycingularblackberry.net","mycingularblanet","mycinnet"
"cellcompage.com","cellcompagecom","cellccom"
"union-tel.com","union-telcom","unioncom"
"utext.com","utextcom","utextcom"
"pagenet.net","pagenetnet","pagennet"
"sbycomm.com","sbycommcom","sbycocom"
"usamobility.net","usamobilitynet","usamonet"
"midwestwireless.net","midwestwirelenet","midwenet"
"pioneerenidcellular.com","pioneerenidcecom","pionecom"
"myc29.net","myc29net","myc29net"
"leaco.org","leacoorg","leacoorg"
"mohavewireless.com","mohavewirelescom","mohavcom"
"epageme.com","epagemecom","epagecom"
"mobilcom.com","mobilcomcom","mobilcom"
"sccvmail.com","sccvmailcom","sccvmcom"
"edgewireless.com","edgewirelesscom","edgewcom"
"c1wi.com","c1wicom","c1wicom"
"imcingular.com","imcingularcom","imcincom"
"ivctext.com","ivctextcom","ivctecom"
"pageallcom.com","pageallcomcom","pageacom"
"mobilecomm.net","mobilecomm","mobcomm"
"mydcs1.com","mydcs1com","mydcscom"
"guamcell.com","guamcellcom","guamccom"
"airpage.net","airpagenet","airpanet"
"viaerosms.com","viaerosms","viaeroms"
"2pagemci.com","2pagemcicom","2pagecom"
"calnorth.net","calnorthnet","calnonet"
"corrcomm.com","corrcommcom","corrccom"
"edgemobile.net","edgemobilenet","edgemnet"
"clearlydigital.com","clearlydigitacom","clearcom"
"centennialcom.com","centennialcomcom","centecom"
"mmode.com","mmodecom","mmodecom"
"viaeromms.com","viaeromms","viaeromm"
"southcom"
"myblue.com","mybluecom","myblucom"
"my2way.com","my2waycom","my2wacom"
"radiopaging.com","radiopaging","radiopag"
"skearthlink.com","skearthlinkcom","skearcom"
"pageme.net","pagemenet","pagemnet"
"cellonemail.com","cellonemailcom","cellocom"
"breweloquiwireless.net","breweloquiwirnet","brewenet"
"advmms.com","advmmscom","advmmcom"
"iwspcs.com","iwspcscom","iwspccom"
"archwireless.net","archwirelessnet","archwnet"
"gocbw.com","gocbwcom","gocbwcom"
"mymobile.com","mymobilecom","mymobcom"
"pageoneinc.com","pageoneinccom","pageocom"
"vtext.com","vtextcom","vtextcom"
"airlinkmobile.com","airlinkmobilecom","airlicom"
"smseloquiwireless.net","smseloquiwirenet","smselnet"
"attmobility.com","attmobilitycom","attmocom"
"tmail.com","tmailcom","tmailcom"
"mobilemessage.com","mobilemessage","mobilmes"
"simmetrypcs.net","simmetrypcsnet","simmenet"
"mycingularblackberry.com","mycingularblacom","mycincom"
"cell1net.net","cell1netnet","cell1net"
"mymetropcs.com","mymetropcscom","mymetcom"
"edgehiptop.com","edgehiptopcom","edgehcom"
"sprintpcs.com","sprintpcscom","sprincom"
"mymmode.com","mymmodecom","mymmocom"
"blswe.com","blswecom","blswecom"
"iwarn.com","iwarncom","iwarncom"
"mobilcom.net","mobilcomnet","mobilnet"
"edgesend.com","edgesendcom","edgescom"
"radiocommpaging.com","radiocommpagicom","radiocom"
"mycellonephone.com","mycellonephoncom","mycelcom"
"2pageme.net","2pagemenet","2pagenet"
"csouth1.net","csouth1net","csoutnet"
"cookmail.com","cookmailcom","cookmcom"
"attmobile.net","attmobilenet","attmonet"
"redicall.com","redicallcom","rediccom"
"mytelepage.com","mytelepagecom","mytelcom"
"csouth1.com","csouth1com","csoutcom"
"sbcpaging.com","sbcpagingcom","sbcpacom"
"allbeep.com","allbeepcom","allbecom"
"cwemail.com","cwemailcom","cwemacom"
"tmomail.net","tmomailnet","tmomanet"
"ivcdata.com","ivcdatacom","ivcdacom"
"racomm.com","racommcom","racomcom"
"eloqui.net","eloquinet","eloqunet"
"rcsepager.com","rcsepagercom","rcsepcom"
"ivcel.net","ivcelnet","ivcelnet"
"bellsouthwirelessemail.net","bellouthwireenet","belounet"
"cingularme.net","cingularmenet","cingunet"
"pacbellpcs.net","pacbellpcsnet","pacbenet"
"sk-earthlink.com","sk-earthlinkcom","sk-eacom"
"myhelio.com","myheliocom","myhelcom"
"rccpaging.com","rccpagingcom","rccpacom"
"iwspcs.net","iwspcsnet","iwspcnet"
"ipnpaging.com","ipnpagingcom","ipnpacom"
"page-us.com","page-uscom","page-com"
"ccc3g.net","ccc3gnet","ccc3gnet"
"mid-texcellular.com","mid-texcellulcom","mid-tcom"
"ccc3g.info","ccc3ginfo","ccc3info"
"corrwireless.net","corrwirelessnet","corrwnet"
"dcswi.com","dcswicom","dcswicom"
"cellcomsms.com","cellcomsms","celcomsm"
"cwemail.net","cwemailnet","cwemanet"
"vzwpix.com","vzwpixcom","vzwpicom"
"pagerpeople.com","pagerpeoplecom","pagercom"
"sbc2way.com","sbc2waycom","sbc2wcom"
"cwwsms.com","cwwsmscom","cwwsmcom"
"centennialwireless.com","centenniawireles","centewir"
"blswe.net","blswenet","blswenet"
"cingularme.us","cingularmeus","cingulus"
"edgesend.net","edgesendnet","edgesnet"
"c1usa.com","c1usacom","c1usacom"
"ccc3g.us","ccc3gus","ccc3gus"
"skytel.com","skytelcom","skytecom"
"leaco.net","leaconet","leaconet"
"mpot.com","mpotcom","mpotcom"
"mylyrix.net","mylyrixnet","mylyrnet"
"minncommpaging.com","minncommpagincom","minnccom"
"mycingular.com","mycingular","mycingul"
"bellsouthwirelessemail.com","bellouthwireemai","belouthw"
"info2go.com","info2gocom","info2com"
"myawi.com","myawicom","myawicom"
"myairmail.com","myairmailcom","myaircom"
"page-all.com","pageall","pageall"
"zsend.com","zsendcom","zsendcom"
"simmetrypcs.com","simmetrypcscom","simmecom"
"edgehiptopmail.com","edgehiptopmail","edghipto"
"awsms.com","awsmscom","awsmscom"
"bellsouthtms.net","bellsouthtmsnet","bellsnet"
"radiocontrolit.com","radiocontroit","radconit"
"attmobility.net","attmobility","attmobil"
"sbcemail.com","sbcemailcom","sbcemcom"
"cingularme.com","cingularmecom","cingucom"
"ccc3g.biz","ccc3gbiz","ccc3gbiz"
"airmessage.net","airmessagenet","airmenet"
"mmseloquiwireless.net","mmseloquiwirenet","mmselnet"
"corrwireless.com","corrwirelesscom","corrwcom"
"satellink.net","satellinknet","satelnet"
"omnicom-paging.com","omnicom-pagincom","omniccom"
"cccomm.biz","cccommbiz","cccombiz"
"tuyomail.com","tuyomailcom","tuyomcom"
"txtpage.com","txtpagecom","txtpacom"
"prestopaging.com","prestopagingcom","prestcom"
"onelinkpcs.com","onelinkpcscom","onelicom"
"wwtext.net","wwtextnet","wwtexnet"
"alaskadigitel.com","alaskadigitelcom","alaskcom"
"cell1wv.net","cell1wv","cell1wv"
"mywireless.com","mywirelesscom","mywircom"
"quiktxt.com","quiktxtcom","quiktcom"
"skymail.com","skymailcom","skymacom"
"viaeromail.com","viaeromailcom","viaercom"
"pagemci.com","pagemcicom","pagemcom"
"firstcellular.net","firstcellularnet","firstnet"
"mycingular.net","mycingur","mycinr"
"bellsouthtms.com","bellsouthtmscom","bellscom"
"gwteletech.com","gwteletechcom","gwtelcom"
"ubetwireless.com","ubetwirelesscom","ubetwcom"
"mycellone.net","mycellonenet","mycelnet"
"mycricket.com","mycricketcom","mycricom"
"mycingularblackberry.net","mycingularblanet","mycinnet"
"cellcompage.com","cellcompagecom","cellccom"
"union-tel.com","union-telcom","unioncom"
"utext.com","utextcom","utextcom"
"pagenet.net","pagenetnet","pagennet"
"sbycomm.com","sbycommcom","sbycocom"
"usamobility.net","usamobilitynet","usamonet"
"midwestwireless.net","midwestwirelenet","midwenet"
"pioneerenidcellular.com","pioneerenidcecom","pionecom"
"myc29.net","myc29net","myc29net"
"leaco.org","leacoorg","leacoorg"
"mohavewireless.com","mohavewirelescom","mohavcom"
"epageme.com","epagemecom","epagecom"
"mobilcom.com","mobilcomcom","mobilcom"
"sccvmail.com","sccvmailcom","sccvmcom"
"edgewireless.com","edgewirelesscom","edgewcom"
"c1wi.com","c1wicom","c1wicom"
"imcingular.com","imcingularcom","imcincom"
"ivctext.com","ivctextcom","ivctecom"
"pageallcom.com","pageallcomcom","pageacom"
"mobilecomm.net","mobilecomm","mobcomm"
"mydcs1.com","mydcs1com","mydcscom"
"guamcell.com","guamcellcom","guamccom"
"airpage.net","airpagenet","airpanet"
"viaerosms.com","viaerosms","viaeroms"
"2pagemci.com","2pagemcicom","2pagecom"
"calnorth.net","calnorthnet","calnonet"
"corrcomm.com","corrcommcom","corrccom"
"edgemobile.net","edgemobilenet","edgemnet"
"clearlydigital.com","clearlydigitacom","clearcom"
"centennialcom.com","centennialcomcom","centecom"
"mmode.com","mmodecom","mmodecom"
"viaeromms.com","viaeromms","viaeromm"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment