Skip to content

Instantly share code, notes, and snippets.

@nosada
Last active March 7, 2020 13:25
Show Gist options
  • Save nosada/a4026b6920c417dc58522f8b493ece0f to your computer and use it in GitHub Desktop.
Save nosada/a4026b6920c417dc58522f8b493ece0f to your computer and use it in GitHub Desktop.
Download original size artwork from https://wikiart.org
#!/bin/env python
"""
Usage:
download-artwork-from-wikiart-url.py URL
Arguments:
URL URL of artwork in wikiart.org
(ex.: https://www.wikiart.org/en/rene-magritte/gonconda-1953/)
"""
from bs4 import BeautifulSoup
from docopt import docopt
import requests
from json import loads
from re import sub
def retrieve(url):
return requests.get(url).content
def parse(html):
soup = BeautifulSoup(html, features="html.parser")
source = sub(
r"^.*= ",
"",
soup.find(
"main",
{"ng-controller": "ArtworkViewCtrl"}
).get("ng-init")[:-1]
)
thumbnails = loads(source)["ImageThumbnailsModel"][0]["Thumbnails"]
url = next(
thumbnail for thumbnail in thumbnails
if thumbnail["Name"].lower() == "original"
)["Url"]
title, artist, _ = soup.find("title").text.split(" - ")
name = "{a}, {t}".format(a=artist, t=title)
return url, name
def download(url, name):
extension = url.split(".")[-1]
with open(".".join([name, extension]), "wb") as artwork:
artwork.write(requests.get(url).content)
if __name__ == "__main__":
ARGS = docopt(__doc__)
URL = ARGS["URL"]
IMAGE_URL, IMAGE_NAME = parse(retrieve(URL))
download(IMAGE_URL, IMAGE_NAME)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment