Skip to content

Instantly share code, notes, and snippets.

@just-digital
Created February 2, 2012 02:59
Show Gist options
  • Save just-digital/1721132 to your computer and use it in GitHub Desktop.
Save just-digital/1721132 to your computer and use it in GitHub Desktop.
Simple Python script to manifest a list of potential domain names from a keyword phrase/string. Script will also validate the domain.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
stopwords = " a to the ltd is for of on will with pty" # not definitive
legalchars = "abcdefghijklmnopqrstuvwxyx1234567890"
replace = (
(("Ã","Å","Ä","À","Á","Â","å","å","ä","à","á","â"),"a"),
(("Ç","Č","ç","č"),"c"),
(("É","È","Ê","Ë","Ĕ","è","ê","ë","ĕ","é"),"e"),
(("Ğ","Ģ","ģ","ğ"),"g"),
(("Ï","Î","Í","Ì","ï","î","í","ì"),"i"),
(("Ñ","ñ"),"n"),
(("Ö","Ô","Ō","Ò","Ó","Ø","ö","ô","ō","ò","ó","ø"),"o"),
(("Ŝ","Ş","ŝ","ş"),"s"),
(("Ü","Ū","Û","Ù","Ú","ü","ū","û","ù","ú"),"u"),
(("Ÿ","ÿ"),"y"),
)
domains = (
".co.nz",
".net.nz",
".org.nz",
".ac.nz",
".geek.nz",
".gen.nz",
".iwi.nz",
".maori.nz",
".school.nz",
)
def remove_blanks(lst):
newlst = []
for l in lst:
if l:
newlst.append(l)
return newlst
def replace_chars(subject):
""" swap out gliphs for acceptible replacement """
for r in replace:
for c in r[0]:
subject = subject.replace(c,r[1])
return subject
def remove_chars(subject):
""" Replace all chars that arent in allowed list """
for c in subject:
if c != " " and c not in legalchars:
subject=subject.replace(c,"")
return subject
def remove_stopwords(subjects):
""" removes any stop words in the provided list """
new_subject = []
sw = []
stops = stopwords.split(" ")
for s in stops:
if s:
sw.append(s)
for w in subjects:
if w not in sw:
new_subject.append(w)
return new_subject
def wordvariants(name):
""" returns a list of word variants (doesn't append domain name)"""
name = name.lower()
name = replace_chars(name)
name = remove_chars(name)
words = remove_blanks(name.split(" "))
variants = []
variants.append("".join(words)) #all spaces removed
variants.append("-".join(words)) #words joined by dashes
words_no_stopwords = remove_blanks(remove_stopwords(words))
nsw = "".join(words_no_stopwords) #words with stop words removed spaces removed
# no point in processing any dups
if nsw not in variants:
variants.append(nsw) #words with stop words removed joined by dashes
nsw = "-".join(words_no_stopwords)
if nsw not in variants:
variants.append(nsw) #words with stop words removed spaces removed
for w in words_no_stopwords:
variants.append(w) #try just single words
return variants
def manifest_domains(subject):
words = wordvariants(subject)
domain_options = []
for w in words:
for d in domains:
domain_options.append(w + d)
return domain_options
def test_domains(domains):
import socket
passed = []
failed = []
for d in domains:
try:
ip = socket.gethostbyname(d)
if ip:
passed.append({ d:ip })
print d,ip
else:
failed.append(d)
except socket.gaierror:
# [Errno -5] No address associated with hostname
failed.append(d)
return passed, failed
def manifest_and_test(subject):
domains = manifest_domains(subject)
return test_domains(domains)
if __name__ == "__main__":
if len(sys.argv) >= 2:
subject = sys.argv[1]
passed, failed = manifest_and_test(subject)
print "Available:"
for d in failed:
print d
print "Domains in use:"
for d in passed:
#print "%s (%s)" % (d,passed[d])
print d
else:
print "Usage: python manifest_url.py \"A name to check\""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment