Skip to content

Instantly share code, notes, and snippets.

@NikolasTzimoulis
Created April 23, 2016 20:35
Show Gist options
  • Save NikolasTzimoulis/1d589828e610a6d889398be87a7daf3e to your computer and use it in GitHub Desktop.
Save NikolasTzimoulis/1d589828e610a6d889398be87a7daf3e to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import json
import urllib
import urlparse
import time
import os
searchTerm = "software engineer"
websites = ["indeed.com", "monster.com"]
maxResults = 8 # how many results to grab per website at most
waitSeconds = 30 # how many seconds to wait between requests
logFileName = "log.html" # file name where the results will be written
logFile = open(logFileName, 'w')
first = True
for w in websites:
logFile.write("<h1>"+w+"</h1>")
for request in range(int(maxResults/8)):
if not first: time.sleep(waitSeconds)
first = False
print "Results from " + w + ", page " + str(request+1) + ":"
try:
query = urllib.urlencode({'q': searchTerm+" site:"+w})
response = urllib.urlopen ( 'http://ajax.googleapis.com/ajax/services/search/web?rsz=8&v=1.0&' + query + "&start=" + str(request) ).read()
jsonObject = json.loads ( response )
results = jsonObject [ 'responseData' ] [ 'results' ]
for result in results:
title = result['title']
url = urllib.unquote(result['url']).decode('utf8')
print ( title + '; ' + url )
logFile.write("<p><a href=\""+url+"\">"+title+"</a></p>")
except: pass
logFile.close()
os.startfile(logFileName)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment