Skip to content

Instantly share code, notes, and snippets.

Last active August 29, 2015 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hdon/34938a1b55c96ea69178 to your computer and use it in GitHub Desktop.
Save hdon/34938a1b55c96ea69178 to your computer and use it in GitHub Desktop.
Generate non-colliding, intuitive usernames from domain names of 8- and 16-character lengths
import re
words = filter(lambda s:all([ord(c)<128 for c in s]), map(str.strip, open('/usr/share/dict/words')))
domainExp = re.compile('^[a-z0-9_]+$')
numbersExp = re.compile('[0-9]+')
englishWords = \
lambda s: domainExp.match(s),
names = dict()
rnames = dict()
nameSources = dict()
TLDs = dict()
numCollisions = 0
def splitWords(name):
foundWords = list()
originalName = name
while name:
foundWord = None
foundWordPos = -1
if name[0] == '-':
name = name[1:]
numbers = numbersExp.match(name)
if numbers:
foundWord = numbers.string
###print ' v found numbers'
foundWordPos = numbers.start()
for word in englishWords:
#print name, word
wordPos = name.find(word)
###print name, word, wordPos
if wordPos > -1:
if (not foundWord) \
or (wordPos < foundWordPos) \
or ((wordPos == foundWordPos) and (len(word) > len(foundWord))):
###print ' ^ found word!'
foundWord = word
foundWordPos = wordPos
if foundWord:
#print originalName, name, foundWord, foundWordPos
###print ' ^ consuming word!', foundWord
assert foundWordPos >= 0
if foundWordPos > 0:
foundWords.append(name[foundWordPos : foundWordPos + len(foundWord)])
name = name[foundWordPos + len(foundWord):]
###print ' ^ remaining:', name
###print ' v bailing out'
###print 'splitWords is done:', originalName, foundWords
return foundWords
for name in map(str.strip, open('domains')):
if name in names:
raise KeyError('Duplicate name "%s"' % name)
components = name.split('.')
assert len(components) == 2
name0, tld = components
name1 = name0 + tld
TLDs[tld] = True
while 1:
# Calculate new names, pass 0
namePass = 0
newnames = (
name1 if len(name1) <= 16 else
name0 + tld if len(name0 + tld) <= 16 else
name0[0:16-len(tld)] + tld
name1 if len(name1) <= 8 else
name0 + tld if len(name0 + tld) <= 8 else
name0[0:8-len(tld)] + tld
# Validate pass 0 names
if newnames[0] not in rnames and newnames[1] not in rnames:
break # No collision
# Calculate new names, pass 1
namePass = 1
words = splitWords(name0)
newname8 = ''
chunkSize = max(3, 8/len(words)+1)
for word in words:
newname8 += word[0:chunkSize]
if len(newname8) >= 8:
newname8 = newname8[0:8]
newname16 = ''
chunkSize = max(3, 16/len(words)+1)
for word in words:
newname16 += word[0:chunkSize]
if len(newname16) >= 16:
newname16 = newname16[0:16]
###print name, words
###print newname8
###print newname16
newnames = (newname16, newname8)
# Validate pass 1 names
if newnames[0] not in rnames and newnames[1] not in rnames:
break # No collision
# Calculate new names, pass 2. incorporates pass1's data
namePass = 2
newnames = (
newnames[0][0:16-len(tld)] + tld,
newnames[1][0: 8-len(tld)] + tld
# Validate pass 2 names
if newnames[0] not in rnames and newnames[1] not in rnames:
break # No collision
# copy and paste lel
# Calculate new names, pass 3
namePass = 3
words = splitWords(name0)
newname8 = ''
chunkSize = max(3, 8/len(words)+1)
for word in words:
newname8 += word[0:chunkSize]
if len(newname8) >= 8:
newname8 = newname8[0:8]
newname8 = newname8[:-3] + words[-1][0:3]
newname16 = ''
chunkSize = max(3, 16/len(words)+1)
for word in words:
newname16 += word[0:chunkSize]
if len(newname16) >= 16:
newname16 = newname16[0:16]
newname16 = newname16[:-3] + words[-1][0:3]
###print name, words
###print newname8
###print newname16
newnames = (newname16, newname8)
# Validate pass 3 names
if newnames[0] not in rnames and newnames[1] not in rnames:
break # No collision
# Calculate new names, pass 4. incorporates pass3's data
namePass = 4
newnames = (
newnames[0][0:16-len(tld)] + tld,
newnames[1][0: 8-len(tld)] + tld
# Validate pass 4 names
if newnames[0] not in rnames and newnames[1] not in rnames:
break # No collision
# Collision!
print 'collision:'
if newnames[0] in rnames:
print '\t16-character username:', newnames[0], 'already taken by', rnames[newnames[0]], 'in pass', nameSources[newnames[0]]
if newnames[1] in rnames:
print '\t16-character username:', newnames[1], 'already taken by', rnames[newnames[1]], 'in pass', nameSources[newnames[1]]
raise OverflowError('permanent collision for %s' % name)
names[name] = newnames
rnames[newnames[0]] = name
rnames[newnames[1]] = name
nameSources[newnames[0]] = namePass
nameSources[newnames[1]] = namePass
if numCollisions:
print '%d collisions found' % numCollisions
raise SystemExit
for name in names:
print '"%s","%s","%s"' % (name, names[name][0], names[name][1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment