Skip to content

Instantly share code, notes, and snippets.

@helloqiu
Last active March 21, 2016 11:18
Show Gist options
  • Save helloqiu/f17c7a18908512d23a87 to your computer and use it in GitHub Desktop.
Save helloqiu/f17c7a18908512d23a87 to your computer and use it in GitHub Desktop.
A crawler for shapaozi.
# coding=utf-8
import requests
import threading
import queue
import time
thread_num = 5
dog_wait_time = 0.01
input = open('../magnet.txt', 'r')
output = open('result.txt', 'w')
class Dog(threading.Thread):
def __init__(self, dog_id, wait_time, bag):
threading.Thread.__init__(self)
self.wait_time = wait_time
self.bag = bag
self.work = False
self.id = dog_id
def sniff(self):
while True:
if not self.bag.empty():
url = self.bag.get()
self.work = True
r = requests.get(url)
print("Dog %d is getting %s" % (self.id, url))
if r.status_code == 200:
info = r.json()["infohash"].encode('utf-8').decode('utf-8')
title = r.json()["title"].encode('utf-8').decode('utf-8')
try:
print("Dog %d gets %s" % (self.id, info + " " + title))
output.write(info + "\n")
output.write(title + "\n")
except Exception as e:
print(e)
self.work = False
else:
print("Dog %d gets %s Failed!" % (self.id, url))
time.sleep(self.wait_time)
else:
break
def run(self):
self.sniff()
class Bag():
def __init__(self):
self.__queue = queue.Queue()
def put(self, url):
self.__queue.put(url)
def get(self):
return self.__queue.get()
def empty(self):
return self.__queue.empty()
if __name__ == "__main__":
try:
bag = Bag()
dogs = []
print("Init ...")
for line in input:
infohash = line[line.rfind(':') + 1:len(line) - 1].upper()
url = "http://www.shapaozi.me/api/infohash/{}".format(infohash)
bag.put(url)
print("Ready")
for i in range(0, thread_num):
dog = Dog(i, dog_wait_time, bag)
dog.start()
dogs.append(dog)
while True:
time.sleep(1)
if bag.empty():
break
finally:
input.close()
output.close()
@helloqiu
Copy link
Author

Add try&finally to ensure the file can be written.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment