Created
February 15, 2015 20:42
-
-
Save aeris/1a1ba71264c9c1e49e03 to your computer and use it in GitHub Desktop.
Fetch NS record from alexa top 100k
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'dnsruby' | |
require 'set' | |
require 'thread' | |
require 'parallel' | |
def merge(dns, auth) | |
auth.each do |a| | |
if dns.include? a | |
dns[a] += 1 | |
else | |
dns[a] = 1 | |
end | |
end | |
end | |
def merge_n(dns, a, n) | |
if dns.include? a | |
dns[a] += n | |
else | |
dns[a] = n | |
end | |
end | |
TLD = %w(co.at co.cr co.id co.il co.in co.jp co.ke co.kr com.ar com.au com.bd com.br com.cn com.co com.hk com.lb com.mk com.mx com.my com.np com.ph com.pl com.py com.sa com.sg com.tr com.tw com.ua com.ve com.vn co.nz co.th co.uk co.za edu.br edu.cn edu.hk edu.pl edu.sa edu.sg gouv.fr gov.au gov.br gov.cl gov.eg gov.in gov.ru gov.tr gov.tw gov.uk hc.ru ne.jp ne.kr net.ar net.au net.br net.cn net.co net.ec net.id net.il net.in net.mx net.my net.nz net.pe net.pk net.pl net.ru net.sa net.sg net.th net.tr net.tw net.ua net.uk net.ve net.vn or.jp or.kr or.us sh.cn) | |
def extract(hostname) | |
case hostname | |
when /\.awsdns-\d+\./ then 'awsdns' | |
when /\.ultradns\.co\.uk$/, /\.ultradns\.biz"/, /\.ultradns\.com$/, | |
/\.ultradns\.info$/, /\.ultradns\.net$/, /\.ultradns\.org$/ then 'ultradns' | |
when /\.akam\.net/, /\.akamai\.net$/ then 'akamai' | |
else hostname.split('.').reverse.take(TLD.any? { |tld| hostname.end_with? ".#{tld}" } ? 3 : 2).reverse.join('.') | |
end | |
end | |
# dns = {} | |
# File.open('top-10k-ns.csv', 'r') do |from| | |
# while line = from.gets | |
# line.strip! | |
# host, value = line.split ',' | |
# merge_n dns, extract(host), value.to_i | |
# end | |
# end | |
# File.open('top-10k-ns2.csv', 'w') do |to| | |
# dns.each do |host, n| | |
# to.puts "#{host},#{n}" | |
# end | |
# end | |
# exit | |
res = Dnsruby::Resolver.new | |
dns = {} | |
hosts = [] | |
File.open('top-1m.csv', 'r') do |file| | |
i = 0 | |
while line = file.gets | |
hosts << line.strip.split(',')[1] | |
i += 1 | |
puts i if i % 100 == 0 | |
break if i == 100000 | |
end | |
end | |
semaphore = Mutex.new | |
Parallel.each hosts, progress: 'Testing', in_threads: 8 do |host| | |
begin | |
answer = res.query host | |
auth = answer.authority.select { |a| a.is_a? Dnsruby::RR::IN::NS }.collect { |a| a.rdata.to_s.downcase } | |
semaphore.synchronize do | |
merge dns, auth.collect { |a| extract a } | |
end | |
rescue Exception => e | |
end | |
end | |
File.open('top-100k-ns.csv', 'w') do |file| | |
dns.each { |a, n| file.puts "#{a},#{n},#{a.split('.').reverse.take(2).reverse.join('.')}" } | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment