Skip to content

Instantly share code, notes, and snippets.

@CoderSherlock
Created February 4, 2019 01:57
Show Gist options
  • Save CoderSherlock/be3c552eaa904e46747a726581b8e402 to your computer and use it in GitHub Desktop.
Save CoderSherlock/be3c552eaa904e46747a726581b8e402 to your computer and use it in GitHub Desktop.
from lxml import html
import requests
import shutil
import sys
def download(url):
name = url.split("/")[-1]
response = requests.get(url, stream=True)
with open(name, "wb") as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
def get_image_url(url):
page = requests.get(url)
tree = html.fromstring(page.content)
return tree.xpath("//*[@id=\"content\"]/div/div[1]/div[2]/div/a[1]/img")[0].values()[0]
def get_image_urls(id, number):
img_count = number
counting = 0
urls = []
while(img_count > counting):
print("[" + str(counting) + "/" + str(img_count) + "]")
page = requests.get("https://movie.douban.com/celebrity/" + str(id) + "/photos/?type=C&start=" + str(counting) +"&sortby=like&size=a&subtype=a")
tree = html.fromstring(page.content)
print(page.content)
for i in tree.xpath("//*[@id=\"content\"]/div/div[1]/ul/li[*]/div[1]/a"):
# print(i.values()[0])
urls.append(i.values()[0])
counting += 30
return urls
if __name__ == "__main__":
urls = get_image_urls(sys.argv[1], int(sys.argv[2]))
print(urls)
for i in urls:
download(get_image_url(i))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment