Skip to content

Instantly share code, notes, and snippets.

@adamneilson
Created October 30, 2013 10:31
Show Gist options
  • Save adamneilson/7230370 to your computer and use it in GitHub Desktop.
Save adamneilson/7230370 to your computer and use it in GitHub Desktop.
Generates available domain names
#!/usr/bin/python -O
import random
import string
import subprocess
import threading
import time
# get a list of words with only ASCII characters, and surround them with ^ and $ to demarcate the word boundaries
words = [w.strip().lower() for w in open("/usr/share/dict/words").readlines()]
words = ["^" + w + "$" for w in words if w != "" and all([c in string.ascii_lowercase for c in w])]
# construct a discrete-time markov chain of n-grams
n = 5 # this is the "n" in n-grams, try adjusting this for different results
transitions = {} # keys are n-grams, values are dicts mapping subsequent n-grams to probabilities
frequencies = {} # keys are n-grams, values are normalized frequencies [0, 1] of occurrence in the wordlist
for word in words:
for i in range(len(word) + 1 - n):
gram = word[i : i + n]
if gram in frequencies:
frequencies[gram] += 1
else:
frequencies[gram] = 1
for i in range(len(word) - n):
gram = word[i : i + n]
next = word[i + 1 : i + n + 1]
if gram not in transitions:
transitions[gram] = {}
if next in transitions[gram]:
transitions[gram][next] += 1
else:
transitions[gram][next] = 1
for gram in frequencies:
frequencies[gram] /= float(len(frequencies))
for gram in transitions:
total = 0
for next in transitions[gram]:
total += transitions[gram][next]
for next in transitions[gram]:
transitions[gram][next] /= float(total)
# sample a probability mass function
# pmf: dict mapping elements to probabilities
def sample(pmf):
sample = random.random()
cdf = 0.0
for e in pmf:
cdf += pmf[e]
if cdf >= sample:
return e
return random.choice(pmf.keys())
# compute a conditional probability mass function
# pmf: dict mapping elements to probabilities
# condition: boolean-valued function to condition on
def conditional(pmf, condition):
total_mass = 0.0
cond = {}
for e in pmf:
if condition(e):
cond[e] = pmf[e]
total_mass += pmf[e]
for e in cond:
cond[e] /= total_mass
return cond
# compute the prefix frequencies
# prefixes are n-grams that appear at the beginning of words
prefix_frequencies = conditional(frequencies, lambda x: x[0] == "^")
# generate a new letter according to the markov chain (make sure len(word) >= n)
def evolve(word):
# grab the last n characters and make sure the n-gram is in our model
gram = word[-n:]
if gram not in transitions:
# uh oh, just return a random letter to keep things moving
return random.choice(string.ascii_lowercase + "$")
# sample the n-grams that we can transition to
return sample(transitions[gram])[-1:]
# generate a word according to the markov chain
def gen_word():
# start with a prefix
word = sample(prefix_frequencies)
# wait until the markov chain adds a terminator to the word
while word[-1] != "$":
# generate a new letter and append it to the word
word += evolve(word)
# optional: sometimes domains are multiple word-like lexemes concatenated together
if word[-1] == "$" and random.random() > 0.7 and len(word) < 8:
word += sample(prefix_frequencies)
# remove the boundary markers and return the word
return word.replace("^", "").replace("$", "")
# check whether a domain is taken (e.g., "example.com")
# returns True if the domain is taken, False if it is available, or None if the request timed out
def check(domain):
# use the "whois" command to determine availability, and timeout after a few seconds in case it hangs
process = subprocess.Popen(["whois", domain], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
end_time = time.clock() + 4.0
while time.clock() < end_time:
if process.poll() is not None:
return "No match for" not in process.stdout.read()
time.sleep(0.1)
try:
process.kill()
except:
pass
return None
# generate domain names forever
while True:
# generate a few domains and pick the smallest
domain = sorted([gen_word() for i in range(3)], key=lambda x: len(x))[0] + ".com"
# report whether the domain is available
if check(domain) == False: # could be True, False, or None
print domain + " <-- Available!"
#else:
# print domain
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment