Skip to content

Instantly share code, notes, and snippets.

@RobinDavid
Created June 12, 2017 16:06
Show Gist options
  • Save RobinDavid/8d4a7e201b8cc388fdc9b25a5a64e8b6 to your computer and use it in GitHub Desktop.
Save RobinDavid/8d4a7e201b8cc388fdc9b25a5a64e8b6 to your computer and use it in GitHub Desktop.
Download the first image of a wikipedia article
import sys
import lxml.html
from path import Path
import wikipedia
import requests
def dl_image(search_str):
page = wikipedia.page(search_str)
html = lxml.html.fromstring(page.html())
imgs = html.xpath("//img")
if imgs:
im_name = Path(imgs[0].attrib['src']).name
matching_img = [x for x in page.images if Path(x).name in im_name]
if matching_img:
data = requests.get(matching_img[0])
name = Path(matching_img[0]).name
Path(name).write_bytes(data.content)
print("Image written to %s" % name)
else:
print("No matching picture found")
else:
print("No picture found")
if __name__ == '__main__':
dl_image(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment