Skip to content

Instantly share code, notes, and snippets.

@jvaill
Created March 24, 2012 17:47
Show Gist options
  • Save jvaill/2185540 to your computer and use it in GitHub Desktop.
Save jvaill/2185540 to your computer and use it in GitHub Desktop.
Finds available domain names
#
# Domain Squeeze PoC
# by Justin Vaillancourt - @jvaill
#
# usage: ruby domain_squeeze.rb "search query"
#
# dependencies:
# => whois
# => text-hyphen
# => text
# => colorize
#
# TODO:
# => "pomme" should yield results like "pomm.ee"
# .. add letters to match TLDs
# => "extraordinaire + camel" should yield results like "extraordinam.el"
# .. merge sounds together? will need to change the sorting algo as well
require 'whois'
require 'text/hyphen'
require 'text'
require 'colorize'
require 'resolv'
def to_alphanumeric(string)
string.gsub /[^0-9a-z]/i, ''
end
def tlds
@tlds ||= Whois::Server.definitions[:tld].collect do |tld|
host = tld[0]
alphanumeric = to_alphanumeric(host)
{host: host, alphanumeric: alphanumeric}
end
end
def tokenize_sounds(string)
@text_hyphen ||= Text::Hyphen.new(left: 0, right: 0)
hyphenated_string = @text_hyphen.visualize(string)
hyphenated_string.split '-'
end
def sound_combinations(sounds)
sounds.collect! do |sound|
next [sound] unless sound.size > 1
first_char = sound[0]
other_chars = sound[1..-1].split(//)
combinations = 1.upto(other_chars.size).flat_map do |n|
other_chars.combination(n).to_a.collect do |combination|
([first_char] + combination).join
end
end.uniq
[first_char] + combinations
end
end
# http://www.ruby-forum.com/topic/95519
def cartesian_product(*args)
result = [[]]
while [] != args
t, result = result, []
b, *args = args
t.each do |a|
b.each do |n|
result << [a, n].join
end
end
end
result
end
def possible_domains(query)
result = []
sounds = tokenize_sounds(query)
sound_combinations = sound_combinations(sounds)
queries = cartesian_product(*sound_combinations)
queries.each do |query|
tlds.each do |tld|
regex_escaped_tld = Regexp.escape(tld[:alphanumeric])
tld_regex = /#{regex_escaped_tld}\z/i
if query =~ tld_regex
domain = query.gsub(tld_regex, tld[:host])
result << domain
end
end
end
result
end
def sort_domains(domains, query)
domains.sort do |x, y|
x = to_alphanumeric(x)
y = to_alphanumeric(y)
x_distance = Text::Levenshtein.distance(query, x)
y_distance = Text::Levenshtein.distance(query, y)
x_distance <=> y_distance
end
end
def whois_available?(domain)
begin
Whois.available?(domain)
rescue Whois::WebInterfaceError,
Whois::NoInterfaceError,
Whois::ResponseIsThrottled
# no information available
:error
end
end
def resolv
@resolv ||= Resolv::DNS.new
end
def host_has_ip?(domain)
begin
resolv.getaddress domain
rescue
# no information available
false
end
end
def available_domains(query)
query = to_alphanumeric(query)
domains = possible_domains(query)
domains = sort_domains(domains, query)
domains.each do |domain|
begin
# try a whois lookup first (certain)
is_whois_available = whois_available?(domain)
if is_whois_available == true
yield domain, true
# if that fails, host to ip will have to do (uncertain)
elsif is_whois_available == :error
yield domain, false unless host_has_ip?(domain)
end
end
end
end
if ARGV.empty?
puts "usage: ruby #{__FILE__} \"search query\""
Kernel::exit
end
search_query = ARGV[0]
puts "Searching available domains with query \"#{search_query}\"..".light_red
available_domains(search_query) do |domain, certain|
puts certain ? domain.light_green : domain.light_yellow
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment