Download original size artwork from https://wikiart.org
#!/bin/env python | |
""" | |
Usage: | |
download-artwork-from-wikiart-url.py URL | |
Arguments: | |
URL URL of artwork in wikiart.org | |
(ex.: https://www.wikiart.org/en/rene-magritte/gonconda-1953/) | |
""" | |
from bs4 import BeautifulSoup | |
from docopt import docopt | |
import requests | |
from json import loads | |
from re import sub | |
def retrieve(url): | |
return requests.get(url).content | |
def parse(html): | |
soup = BeautifulSoup(html, features="html.parser") | |
source = sub( | |
r"^.*= ", | |
"", | |
soup.find( | |
"main", | |
{"ng-controller": "ArtworkViewCtrl"} | |
).get("ng-init")[:-1] | |
) | |
thumbnails = loads(source)["ImageThumbnailsModel"][0]["Thumbnails"] | |
url = next( | |
thumbnail for thumbnail in thumbnails | |
if thumbnail["Name"].lower() == "original" | |
)["Url"] | |
title, artist, _ = soup.find("title").text.split(" - ") | |
name = "{a}, {t}".format(a=artist, t=title) | |
return url, name | |
def download(url, name): | |
extension = url.split(".")[-1] | |
with open(".".join([name, extension]), "wb") as artwork: | |
artwork.write(requests.get(url).content) | |
if __name__ == "__main__": | |
ARGS = docopt(__doc__) | |
URL = ARGS["URL"] | |
IMAGE_URL, IMAGE_NAME = parse(retrieve(URL)) | |
download(IMAGE_URL, IMAGE_NAME) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment