Skip to content

Instantly share code, notes, and snippets.

@Torvaney
Created November 8, 2018 11:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Torvaney/111ff53668c1e223984e48c2130a4c1e to your computer and use it in GitHub Desktop.
Save Torvaney/111ff53668c1e223984e48c2130a4c1e to your computer and use it in GitHub Desktop.
Download images of la angla zagreba metodo textbook
import os
import urllib.request
import bs4
import progressbar
import requests
URL = 'http://esperantofre.com/zagreb/zagreba.htm'
IMG_DIR = os.path.join(os.path.dirname(__file__), 'zagreba-metodo')
if __name__ == '__main__':
r = requests.get(URL)
soup = bs4.BeautifulSoup(r.text, "html5lib")
os.mkdir(IMG_DIR)
pbar = progressbar.ProgressBar()
for img in pbar(soup.find_all('img')):
img_url = os.path.join(os.path.dirname(URL), img.attrs['src'])
img_path = os.path.join(os.path.dirname(__file__), 'zagreba-metodo', img.attrs['src'])
urllib.request.urlretrieve(img_url, img_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment