Skip to content

Instantly share code, notes, and snippets.

@ipconfiger
Created August 16, 2019 16:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ipconfiger/378718c7220fb975bba1a992ad376f29 to your computer and use it in GitHub Desktop.
Save ipconfiger/378718c7220fb975bba1a992ad376f29 to your computer and use it in GitHub Desktop.
# coding=utf8
import os
import time
import request
from lxml import html
def get_page_url(pid):
if pid == 1:
return "http://www.imeitou.com/nvsheng/mnns/index.html"
else:
return "http://www.imeitou.com/nvsheng/mnns/index_{}.html".format(pid)
def main():
page = request.get_url('http://www.imeitou.com/nvsheng/mnns/index.html')
root = html.document_fromstring(page)
ele = root.xpath('/html/body/div[5]/div[2]/ul/div/div/span[3]/strong[1]')
page_number = int(ele[0].text)
for pid in range(1, page_number+1):
page_url = get_page_url(pid)
page = request.get_url(page_url)
page_root = html.document_fromstring(page)
eles = page_root.xpath('/html/body/div[5]/div[2]/ul/li/a/img')
for ele in eles:
image_url = ele.attrib['src']
img = request.get_url(image_url)
local_path = os.path.join(os.getcwd(), 'images', image_url.split('/')[-1])
with open(local_path, 'wb') as f:
f.write(img)
time.sleep(0.1)
time.sleep(0.2)
if __name__ == '__main__':
main()
# coding=utf8
import requests
def get_url(url):
session = requests.Session()
session.headers.update({
'Origin': 'http://www.imeitou.com',
'Referer': 'http://www.imeitou.com/nvsheng/mnns/index.html',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
'Cookie': 'Hm_lvt_1ef6d22326fe2c0f9411d7294ca6d902=1565681953,1565682000; Hm_lpvt_1ef6d22326fe2c0f9411d7294ca6d902=1565950344'
})
return session.get(url).content
if __name__=="__main__":
print(get_url('http://www.imeitou.com/nvsheng/mnns/index.html'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment