Skip to content

Instantly share code, notes, and snippets.

@510908220
Last active May 22, 2019 06:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 510908220/fa5ce80f164515cf7e2e094799d45bda to your computer and use it in GitHub Desktop.
Save 510908220/fa5ce80f164515cf7e2e094799d45bda to your computer and use it in GitHub Desktop.
import requests
import os
import shutil
import json
BASE = os.path.dirname(__file__)
DATA_PATH = os.path.join(BASE,'data')
def _fetch(url):
name = url.split('/')[-1]
path = os.path.join(DATA_PATH, name)
r = requests.get(url, stream = True)
if r.status_code != 200:
return
with open(path, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
def fetch(url):
try:
_fetch(url)
except Exception as e:
print('error:',url,e)
def get_urls(page = 1):
url = 'http://adr.meizitu.net/wp-json/wp/v2/posts?page={}&per_page=100'.format(page)
try:
r = requests.get(url)
return [item['thumb_src'] for item in r.json()]
except Exception as e:
print('error:',e)
return []
def main():
start_page = 1
end_page = 500
while start_page < end_page:
print('page is:', start_page)
urls = get_urls(start_page)
for url in urls:
fetch(url)
start_page += 1
if __name__ == "__main__":
main()
1. 将上面代码保存到文件如,spider.py
2. 安装requests库
3. 在当前目录创建一个data文件夹。
4. `python spider.py`
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment