Created
January 31, 2019 12:26
-
-
Save 01x01/a3e4d9218e286aea9e13d5a70c67e655 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from datetime import datetime | |
from queue import Queue | |
from threading import Thread | |
import requests | |
requests.packages.urllib3.disable_warnings() | |
from bs4 import BeautifulSoup | |
import re | |
if not os.path.exists('img'): | |
os.mkdir('img') | |
# 声明一个队列 | |
Q = Queue() | |
def producer(pages): | |
for page in range(1,pages+1): | |
# 提取每一页的图片 url 加入队列 | |
print("[-] 收集第 {} 页".format(str(page))) | |
url = "http://simpledesktops.com/browse/"+str(page)+"/" | |
r = requests.get(url,verify=False) | |
html = r.text | |
soup = BeautifulSoup(html,'html.parser') | |
try: | |
imgs = soup.find_all('img') | |
for img in imgs: | |
img_url = img['src'] | |
Q.put(img_url) | |
except: | |
pass | |
def worker(i): | |
# 取出队列的值,按顺序取,下载图片 | |
while not Q.empty(): | |
img_url = Q.get() | |
text = re.search('(http://static.simpledesktops.com/uploads/desktops/\d+/\d+/\d+/(.*?png)).*?png',img_url) | |
new_img_url = text.group(1) | |
r = requests.get(new_img_url,verify=False) | |
path = "img/"+text.group(2) | |
print("[-] 线程 {} 开始下载 {} 开始时间:{}".format(i,text.group(2),datetime.now())) | |
with open(path,'wb') as f: | |
f.write(r.content) | |
Q.all_tasks_done | |
if __name__ =="__main__": | |
# 一定要将数据加入队列,否则是启动不了的,因为队列为空 | |
producer(50) | |
# 线程的声明 | |
ts = [Thread(target=worker,args=(i,)) for i in range(50)] | |
for t in ts: | |
t.start() | |
for t in ts: | |
t.join() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment