Skip to content

Instantly share code, notes, and snippets.

@aeris
Created February 15, 2015 20:42
Show Gist options
  • Save aeris/1a1ba71264c9c1e49e03 to your computer and use it in GitHub Desktop.
Save aeris/1a1ba71264c9c1e49e03 to your computer and use it in GitHub Desktop.
Fetch NS record from alexa top 100k
#!/usr/bin/env ruby
require 'dnsruby'
require 'set'
require 'thread'
require 'parallel'
def merge(dns, auth)
auth.each do |a|
if dns.include? a
dns[a] += 1
else
dns[a] = 1
end
end
end
def merge_n(dns, a, n)
if dns.include? a
dns[a] += n
else
dns[a] = n
end
end
TLD = %w(co.at co.cr co.id co.il co.in co.jp co.ke co.kr com.ar com.au com.bd com.br com.cn com.co com.hk com.lb com.mk com.mx com.my com.np com.ph com.pl com.py com.sa com.sg com.tr com.tw com.ua com.ve com.vn co.nz co.th co.uk co.za edu.br edu.cn edu.hk edu.pl edu.sa edu.sg gouv.fr gov.au gov.br gov.cl gov.eg gov.in gov.ru gov.tr gov.tw gov.uk hc.ru ne.jp ne.kr net.ar net.au net.br net.cn net.co net.ec net.id net.il net.in net.mx net.my net.nz net.pe net.pk net.pl net.ru net.sa net.sg net.th net.tr net.tw net.ua net.uk net.ve net.vn or.jp or.kr or.us sh.cn)
def extract(hostname)
case hostname
when /\.awsdns-\d+\./ then 'awsdns'
when /\.ultradns\.co\.uk$/, /\.ultradns\.biz"/, /\.ultradns\.com$/,
/\.ultradns\.info$/, /\.ultradns\.net$/, /\.ultradns\.org$/ then 'ultradns'
when /\.akam\.net/, /\.akamai\.net$/ then 'akamai'
else hostname.split('.').reverse.take(TLD.any? { |tld| hostname.end_with? ".#{tld}" } ? 3 : 2).reverse.join('.')
end
end
# dns = {}
# File.open('top-10k-ns.csv', 'r') do |from|
# while line = from.gets
# line.strip!
# host, value = line.split ','
# merge_n dns, extract(host), value.to_i
# end
# end
# File.open('top-10k-ns2.csv', 'w') do |to|
# dns.each do |host, n|
# to.puts "#{host},#{n}"
# end
# end
# exit
res = Dnsruby::Resolver.new
dns = {}
hosts = []
File.open('top-1m.csv', 'r') do |file|
i = 0
while line = file.gets
hosts << line.strip.split(',')[1]
i += 1
puts i if i % 100 == 0
break if i == 100000
end
end
semaphore = Mutex.new
Parallel.each hosts, progress: 'Testing', in_threads: 8 do |host|
begin
answer = res.query host
auth = answer.authority.select { |a| a.is_a? Dnsruby::RR::IN::NS }.collect { |a| a.rdata.to_s.downcase }
semaphore.synchronize do
merge dns, auth.collect { |a| extract a }
end
rescue Exception => e
end
end
File.open('top-100k-ns.csv', 'w') do |file|
dns.each { |a, n| file.puts "#{a},#{n},#{a.split('.').reverse.take(2).reverse.join('.')}" }
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment