Created
April 24, 2013 05:04
-
-
Save young001/5449751 to your computer and use it in GitHub Desktop.
mutil threads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from threading import Thread | |
import Queue | |
import httpsqs | |
import os | |
#gevent_profiler.print_percentages(True) | |
#gevent_profiler.time_blocking(True) | |
#gevent_profiler.set_stats_output('my-stats.txt') | |
user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:10.0)\ | |
Gecko/20100101 Firefox/10.0' | |
headers = { 'User-Agent' : user_agent } | |
def get(url): | |
r = requests.get(url,headers=headers,timeout=10) | |
return r | |
if(os.path.exists("./urls_httpsqs")): | |
pass | |
else: | |
os.makedirs("./urls_httpsqs") | |
class URLThread(Thread): | |
def __init__(self, queue, timeout=10, allow_redirects=True): | |
super(URLThread, self).__init__() | |
#self.url = url | |
self.timeout = timeout | |
self.allow_redirects = allow_redirects | |
self.response = None | |
self.headers = { 'User-Agent' : user_agent } | |
#self.db = MySQLdb.connect(host='***',user='root',passwd='***',charset='utf8',db='delicious',use_unicode=True) | |
#self.cur = self.db.cursor() | |
self.queue = queue | |
def save_disk(self,res,pid): | |
datafile = open("./urls_httpsqs/%s"%pid,"w") | |
datafile.write(res.content) | |
datafile.close() | |
def insert_into_avail(self,db,url): | |
cur = db.cursor() | |
cur.execute("insert into avail_urls(url) values (%s)",url) | |
print "good url",url | |
print 'last is', cur.lastrowid | |
lastrowid = cur.lastrowid | |
db.commit() | |
cur.close() | |
db.close() | |
return lastrowid | |
def insert_into_fail(self,db,url): | |
cur = db.cursor() | |
cur.execute("insert into fail_urls(url) values (%s)",url) | |
print "bad url",url | |
print 'bad last is',cur.lastrowid | |
lastrowid = cur.lastrowid | |
db.commit() | |
cur.close() | |
db.close() | |
return lastrowid | |
def run(self): | |
while True: | |
url = self.queue.get("urls") | |
print "getting",url | |
try: | |
db = MySQLdb.connect(host='*',user='root',passwd='***',charset='utf8',db='delicious',use_unicode=True) | |
self.response = requests.get(url, timeout = self.timeout, headers = self.headers, allow_redirects = self.allow_redirects) | |
pid = self.insert_into_avail(db,url) | |
self.save_disk(self.response,pid) | |
except Exception , what: | |
print what | |
self.insert_into_fail(db,url) | |
pass | |
#finally: | |
#self.queue.task_done() | |
#queue = Queue.Queue(50) | |
queue = httpsqs.Httpsqs("125.221.225.12") | |
#gevent_profiler.attach() | |
for i in range(20): | |
URLThread(queue).start() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment