Skip to content

Instantly share code, notes, and snippets.

@Oshuma
Created August 20, 2010 21:49
Show Gist options
  • Save Oshuma/541273 to your computer and use it in GitHub Desktop.
Save Oshuma/541273 to your computer and use it in GitHub Desktop.
#!/usr/bin/python-2.6
# Writen by http://github.com/mdellavo and posted here, because he's too fucking lazy.
import itertools, string, random, re, time
import adns
tlds = [ 'com' , 'net' , 'us' , 'mobi' , 'ws',
'me' , 'info' , 'biz' , 'org' , 'ca',
'jobs' , 'am' , 'be' , 'de' , 'es' ]
nonword_chars = re.compile('\W+')
clean_word = lambda w: nonword_chars.sub('', w.lower())
dictionary = set([ clean_word(word) for word in open('/usr/share/dict/words')])
def words(size):
'''
Generate permutations of letters for size
'''
for i in itertools.permutations(string.lowercase, size):
for j in tlds:
yield ''.join(i) + '.' + j
def dictionary_words():
'''
Generate words from a dictionary
'''
for i in dictionary:
for j in tlds:
if i.endswith(j):
yield i[:-len(j)] + '.' + j
def pairs(xs):
'''
Break a sequence into pairs
'''
return zip(xs, xs[1:])
def triples(xs):
'''
Break a sequence into triples
'''
return zip(xs, xs[1:], xs[2:])
def build_dist(words):
'''
Build distribution of letters
'''
counts = {}
for word in words:
for a,b in map(lambda x: (x[0]+x[1], x[2]), triples(word)):
if a not in counts:
counts[a] = {}
counts[a][b] = 1 + counts[a].get(b, 0)
for a in counts:
total = float(sum(counts[a].values()))
for b in counts[a]:
counts[a][b] /= total
return counts
def walk(dist, initial):
'''
Walk a step given a dist and initial key
'''
rv = ''
if initial in dist:
selected = random.random()
total = 0.0
for k,v in dist[initial].items():
rv = k
total += v
if selected < total:
break
return rv
def markov_word(dist, size):
'''
Build a single word of size length
'''
initial = random.choice(dist.keys())
chars = list(initial)
for i in range(size):
chars.append(walk(dist, chars[-2] + chars[-1]))
return ''.join(chars)
def markov_words(n, size):
'''
Generate n words of size length
'''
dist = build_dist(dictionary)
for i in range(n):
for j in tlds:
yield markov_word(dist, size) + '.' + j
def main():
resolver = adns.init()
results = dict()
total = 0
for word in markov_words(10, 4):
results[resolver.submit(word, adns.rr.ADDR)] = word
total += 1
while resolver.allqueries():
for result in resolver.completed():
word = results[result]
i = result.wait()
if not i[3]:
print word
del results[result]
else:
time.sleep(1)
return total
if __name__ == '__main__':
start = time.time()
total = main()
end = time.time()
print 'Queried %d words in %0.2fs' % (total, end-start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment