Skip to content

Instantly share code, notes, and snippets.

@pc
Created July 21, 2008 20:40
Show Gist options
  • Save pc/160 to your computer and use it in GitHub Desktop.
Save pc/160 to your computer and use it in GitHub Desktop.
import google
import time, csv, urllib, os
google.LICENSE_KEY = 'YOURKEY'
# Same usage: python search.py wp.conf
# And wp.conf should look like:
# en.wikipedia.org
# --
# foo
# word
# encyclopedia
# larry sanger
# jimbo wales
# your mom
class SearchAnalyzer:
def __init__(self, site, searches):
self.site = site
self.searches = searches
def siteVariants(self):
return ('http://' + self.site, 'https://' + self.site,
'http://www.' + self.site, 'https://www.' + self.site)
def matchSite(self, url):
for s in self.siteVariants():
if url.startswith(s):
return True
return False
def csvFile(self):
return self.site + ".csv"
def htmlFile(self):
return self.site + ".html"
def analyze(self):
start_range = range(0, 100, 10)
results = []
for search in self.searches:
page = []
for s in start_range:
try:
page.append(google.doGoogleSearch(search, start = s))
except Exception, e:
print 'google.py exception at:', search, s, e
pass
time.sleep(0.2)
found = False
rank = 0
for p in page:
for item in p.results:
rank += 1
if self.matchSite(item.URL):
results.append([time.time(), item.URL, search, rank])
found = True
if not found:
results.append([time.time(), self.site, search, -1])
f = open(self.csvFile(), "ab")
writer = csv.writer(f)
writer.writerows(results)
f.close()
f = open(self.csvFile(), "rb")
reader = csv.reader(f)
searches = {}
for line in reader:
ident = '%s,%s' % (line[2], line[1])
if ident not in searches:
searches[ident] = []
searches[ident].append(str(100 - int(line[3])))
f.close()
sorted_searches = []
for k in searches:
keywords, url = k.split(',')
lst = ','.join(map(lambda x: str(int(x) + 1), searches[k]))
sorted_searches.append({
'query': keywords,
'query_encoded': urllib.urlencode({'q': keywords}),
'url': url,
'lst': lst,
'last_rank': (100 - int(searches[k][-1])),
})
sorted_searches.sort(cmp = lambda x,y: cmp(x['query'], y['query']))
html = open(self.htmlFile(), "w")
html.write('<html>')
akeys = ['',] # list of primary keywords goes here
bkeys = ['',] # list of secondary keywords goes here
alst = []
blst = []
clst = []
for d in sorted_searches:
if d['query'] in akeys:
alst.append(d)
elif d['query'] in bkeys:
blst.append(d)
else:
clst.append(d)
for k,l in [['Primary', alst], ['Secondary', blst], ['All', clst]]:
html.write('<h1 style="clear: both; font-family: georgia; font-weight: normal; border-bottom: 1px solid #ccc;">%s</h1>' % (k))
for d in l:
html.write('''<div style="float: left; text-align: center; margin: 20px 10px; padding: 10px; overflow: hidden; width: 300px;"><div style="margin-bottom: 20px;"><a title="%(query)s" href="http://www.google.com/search?%(query_encoded)s">%(query)s</a><br /><small><a href="%(url)s" title="%(url)s">%(url)s</a><br />Last rank: %(last_rank)s</small></div>
<a href="http://chart.apis.google.com/chart?chs=500x500&cht=ls&chco=cc0000&chls=1,0,0&chf=bg,s,efefef&chd=t:%(lst)s&chxt=r&chxl=0:|100|90|80|70|60|50|40|30|20|10|1&chm=r,ccdff9,0,0.90,1.00|r,E5ECF9,0,0.80,0.90"><img border="0" src="http://chart.apis.google.com/chart?chs=300x300&cht=ls&chco=cc0000&chls=1,0,0&chf=bg,s,efefef&chd=t:%(lst)s&chxt=r&chxl=0:|100|90|80|70|60|50|40|30|20|10|1&chm=r,ccdff9,0,0.90,1.00|r,E5ECF9,0,0.80,0.90" title="Last rank: %(last_rank)s" /></a>
</div>
''' % d)
html.write('</html>')
html.close()
argv = os.sys.argv
if len(argv) == 2:
conf = open(argv[1]).read().split("\n")
site = conf[0]
keywords = conf[2:-1]
print "Analyzing", site, "with", len(keywords), "keywords"
SearchAnalyzer(site, keywords).analyze()
if __name__ == '__main__':
SearchAnalyzer('en.wikipedia.org', ['word']).analyze()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment