Created
August 20, 2010 21:49
-
-
Save Oshuma/541273 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python-2.6 | |
# Writen by http://github.com/mdellavo and posted here, because he's too fucking lazy. | |
import itertools, string, random, re, time | |
import adns | |
tlds = [ 'com' , 'net' , 'us' , 'mobi' , 'ws', | |
'me' , 'info' , 'biz' , 'org' , 'ca', | |
'jobs' , 'am' , 'be' , 'de' , 'es' ] | |
nonword_chars = re.compile('\W+') | |
clean_word = lambda w: nonword_chars.sub('', w.lower()) | |
dictionary = set([ clean_word(word) for word in open('/usr/share/dict/words')]) | |
def words(size): | |
''' | |
Generate permutations of letters for size | |
''' | |
for i in itertools.permutations(string.lowercase, size): | |
for j in tlds: | |
yield ''.join(i) + '.' + j | |
def dictionary_words(): | |
''' | |
Generate words from a dictionary | |
''' | |
for i in dictionary: | |
for j in tlds: | |
if i.endswith(j): | |
yield i[:-len(j)] + '.' + j | |
def pairs(xs): | |
''' | |
Break a sequence into pairs | |
''' | |
return zip(xs, xs[1:]) | |
def triples(xs): | |
''' | |
Break a sequence into triples | |
''' | |
return zip(xs, xs[1:], xs[2:]) | |
def build_dist(words): | |
''' | |
Build distribution of letters | |
''' | |
counts = {} | |
for word in words: | |
for a,b in map(lambda x: (x[0]+x[1], x[2]), triples(word)): | |
if a not in counts: | |
counts[a] = {} | |
counts[a][b] = 1 + counts[a].get(b, 0) | |
for a in counts: | |
total = float(sum(counts[a].values())) | |
for b in counts[a]: | |
counts[a][b] /= total | |
return counts | |
def walk(dist, initial): | |
''' | |
Walk a step given a dist and initial key | |
''' | |
rv = '' | |
if initial in dist: | |
selected = random.random() | |
total = 0.0 | |
for k,v in dist[initial].items(): | |
rv = k | |
total += v | |
if selected < total: | |
break | |
return rv | |
def markov_word(dist, size): | |
''' | |
Build a single word of size length | |
''' | |
initial = random.choice(dist.keys()) | |
chars = list(initial) | |
for i in range(size): | |
chars.append(walk(dist, chars[-2] + chars[-1])) | |
return ''.join(chars) | |
def markov_words(n, size): | |
''' | |
Generate n words of size length | |
''' | |
dist = build_dist(dictionary) | |
for i in range(n): | |
for j in tlds: | |
yield markov_word(dist, size) + '.' + j | |
def main(): | |
resolver = adns.init() | |
results = dict() | |
total = 0 | |
for word in markov_words(10, 4): | |
results[resolver.submit(word, adns.rr.ADDR)] = word | |
total += 1 | |
while resolver.allqueries(): | |
for result in resolver.completed(): | |
word = results[result] | |
i = result.wait() | |
if not i[3]: | |
print word | |
del results[result] | |
else: | |
time.sleep(1) | |
return total | |
if __name__ == '__main__': | |
start = time.time() | |
total = main() | |
end = time.time() | |
print 'Queried %d words in %0.2fs' % (total, end-start) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment