Skip to content

Instantly share code, notes, and snippets.

@iliakonnov
Created January 16, 2017 16:58
Show Gist options
  • Save iliakonnov/a972ace53aa8329b584a89603d7e1785 to your computer and use it in GitHub Desktop.
Save iliakonnov/a972ace53aa8329b584a89603d7e1785 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
# -*- coding:utf8 -*-
from os import path
import lxml.html
import requests
output = "razumovskaja_8_klass"
url = "https://megaresheba.ru/publ/reshebnik/russkomu_jazyku/razumovskaja_8_klass/35-1-0-1217/{task}"
userAgent_header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1'}
taskCount_xpath = '//div[@id="taskCont"]/div[2]/a[last()]/@data-id'
task_xpath = '//div[@id="task"]/div[1]/a[@class="imgCont"][1]/@href'
taskList_page = requests.get(url.format(task=''), headers=userAgent_header).text
taskList = lxml.html.fromstring(taskList_page)
taskCount = int(taskList.xpath(taskCount_xpath)[0])
for i in range(1, taskCount + 1):
task_page = requests.get(url.format(task=i), headers=userAgent_header).text
task = lxml.html.fromstring(task_page)
img_url = task.xpath(task_xpath)[0]
img_ext = path.splitext(img_url)[1]
img = requests.get(img_url, headers=userAgent_header, stream=True)
with open('{o}/task{i}{ext}'.format(o=output, i=i, ext=img_ext), 'wb') as f:
for chunk in img:
f.write(chunk)
print('{i}: {u} downloaded to {o}/task{i}{ext}'.format(i=i, u=img_url, o=output, ext=img_ext))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment