Created
March 12, 2019 04:44
-
-
Save akkuman/2221447f131292d3f869c28dabf703f1 to your computer and use it in GitHub Desktop.
[getwallpaper] get wallpaper from wall.alphacoders.com #crawler #Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding=utf-8 | |
import requests | |
import re | |
import os | |
import sys | |
proxies = { "http": "http://127.0.0.1:1080", "https": "http://127.0.0.1:1080", } | |
download_dir = './pic/' | |
downloaded_num = 0 | |
total = 0 | |
def download_pic(url, name, pic_type): | |
global proxies | |
global download_dir | |
global downloaded_num | |
global total | |
# if dir isn't exist, create a dir to download pic | |
if not os.path.exists(download_dir): | |
os.makedirs(download_dir) | |
# download pic to special dir | |
r = requests.get(url, proxies=proxies) | |
downloaded_num += 1 | |
with open('%s/%s.%s'%(download_dir, name, pic_type), 'wb') as f: | |
f.write(r.content) | |
print('[{:5d}/{}] {}.{} Done!'.format(downloaded_num, total, name, pic_type)) | |
def get_download_link(wallpaper_id, wallpaper_type, server, user_id): | |
global proxies | |
post_data = { | |
'wallpaper_id': wallpaper_id, | |
'type': wallpaper_type, | |
'server': server, | |
'user_id': user_id, | |
} | |
r = requests.post('https://wall.alphacoders.com/get_download_link.php', data=post_data, proxies=proxies) | |
download_pic(r.text, wallpaper_id, wallpaper_type) | |
def getwallpaper(keyword): | |
global proxies | |
global total | |
p_nextpage = re.compile(r"<a id='next_page' href=[\'\"](.+?)[\'\"]>") | |
p_item = re.compile(r'data-id="(\d+?)" data-type="(\w+?)" data-server="(\w+?)" data-user-id="(\d+?)"') | |
page_num = 1 | |
while 1: | |
r_page = requests.get('https://wall.alphacoders.com/search.php?search=%s&lang=Chinese&page=%d' % (keyword.lower(), page_num), proxies=proxies) | |
nextpage_link = p_nextpage.search(r_page.text) | |
# if there isn't any search result, it will exit the loop | |
if nextpage_link == None: | |
print("Sorry, we have no results for your search!") | |
break | |
if page_num == 1: | |
total = int(re.search(r"<h1 class='center title'>\s+?(\d+)(.+?)\s+?</h1>", r_page.text).group(1)) | |
print("the %s wallpaper's total is %d" % (keyword, total)) | |
for item in p_item.findall(r_page.text): | |
wallpaper_id = item[0] | |
wallpaper_type = item[1] | |
server = item[2] | |
user_id = item[3] | |
get_download_link(wallpaper_id, wallpaper_type, server, user_id) | |
# if there isn't the next page's link, it will exit the loop | |
if nextpage_link.group(1) == '#': | |
print("All wallpaper done!") | |
break | |
page_num += 1 | |
if __name__ == '__main__': | |
if len(sys.argv) < 2 or len(sys.argv) > 3: | |
usage_text = "Usage:\n\tpython getwallpaper.py miku [miki_pic]\nFirst param: the name of script\nSecond param: the wallpaper's keyword which you want to search\nThird param: the dir's name where you want to download in, optional, default in ./pic" | |
print(usage_text) | |
elif len(sys.argv) == 3: | |
download_dir = str(sys.argv[2]) | |
getwallpaper(str(sys.argv[1])) | |
else: | |
getwallpaper(str(sys.argv[1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment