Skip to content

Instantly share code, notes, and snippets.

@young001
Created April 24, 2013 05:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save young001/5449751 to your computer and use it in GitHub Desktop.
Save young001/5449751 to your computer and use it in GitHub Desktop.
mutil threads
import requests
from threading import Thread
import Queue
import httpsqs
import os
#gevent_profiler.print_percentages(True)
#gevent_profiler.time_blocking(True)
#gevent_profiler.set_stats_output('my-stats.txt')
user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:10.0)\
Gecko/20100101 Firefox/10.0'
headers = { 'User-Agent' : user_agent }
def get(url):
r = requests.get(url,headers=headers,timeout=10)
return r
if(os.path.exists("./urls_httpsqs")):
pass
else:
os.makedirs("./urls_httpsqs")
class URLThread(Thread):
def __init__(self, queue, timeout=10, allow_redirects=True):
super(URLThread, self).__init__()
#self.url = url
self.timeout = timeout
self.allow_redirects = allow_redirects
self.response = None
self.headers = { 'User-Agent' : user_agent }
#self.db = MySQLdb.connect(host='***',user='root',passwd='***',charset='utf8',db='delicious',use_unicode=True)
#self.cur = self.db.cursor()
self.queue = queue
def save_disk(self,res,pid):
datafile = open("./urls_httpsqs/%s"%pid,"w")
datafile.write(res.content)
datafile.close()
def insert_into_avail(self,db,url):
cur = db.cursor()
cur.execute("insert into avail_urls(url) values (%s)",url)
print "good url",url
print 'last is', cur.lastrowid
lastrowid = cur.lastrowid
db.commit()
cur.close()
db.close()
return lastrowid
def insert_into_fail(self,db,url):
cur = db.cursor()
cur.execute("insert into fail_urls(url) values (%s)",url)
print "bad url",url
print 'bad last is',cur.lastrowid
lastrowid = cur.lastrowid
db.commit()
cur.close()
db.close()
return lastrowid
def run(self):
while True:
url = self.queue.get("urls")
print "getting",url
try:
db = MySQLdb.connect(host='*',user='root',passwd='***',charset='utf8',db='delicious',use_unicode=True)
self.response = requests.get(url, timeout = self.timeout, headers = self.headers, allow_redirects = self.allow_redirects)
pid = self.insert_into_avail(db,url)
self.save_disk(self.response,pid)
except Exception , what:
print what
self.insert_into_fail(db,url)
pass
#finally:
#self.queue.task_done()
#queue = Queue.Queue(50)
queue = httpsqs.Httpsqs("125.221.225.12")
#gevent_profiler.attach()
for i in range(20):
URLThread(queue).start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment