Skip to content

Instantly share code, notes, and snippets.

@zupo
Created May 7, 2016 07:43
Show Gist options
  • Save zupo/f1ef04b524fea76b6597011e490a6c9b to your computer and use it in GitHub Desktop.
Save zupo/f1ef04b524fea76b6597011e490a6c9b to your computer and use it in GitHub Desktop.
#!/usr/bin/python
"""How many Alexa top 1 million sites have German (.de) nameservers?
top.csv is downloaded from Alexa: http://s3.amazonaws.com/alexa-static/top-1m.csv.zip
"""
import gevent.monkey
gevent.monkey.patch_all()
import gevent
from gevent import Greenlet
from gevent.pool import Pool
import dns.resolver
import random
count = 0
skipped = 0
failed = 0
pool = Pool(100)
def fetch(id_, domain):
global count
try:
for record in dns.resolver.query(domain, 'NS'):
if record.to_text().endswith('.de.'):
count += 1
print('[{}/{}] Found domain with .de NS: {}'.format(
count, id_, domain))
return
except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers):
global skipped
skipped += 1
except Exception:
global failed
failed += 1
raise
def asynchronous():
domains = []
with open('top.csv') as f:
domains = [(line.strip('\n').split(',')[0], line.strip('\n').split(',')[1]) for line in f.readlines()]
threads = [pool.spawn(fetch, id_, domain) for id_, domain in domains]
pool.join()
print('ALL: {}, COUNT: {}, SKIPPED: {}, FAILED: {}'.format(
domains[-1][0], count, skipped, failed))
asynchronous()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment