Skip to content

Instantly share code, notes, and snippets.

@simonjenny
Created December 26, 2020 21:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonjenny/5a132d63a41ac8c02b99c645f1a1fd54 to your computer and use it in GitHub Desktop.
Save simonjenny/5a132d63a41ac8c02b99c645f1a1fd54 to your computer and use it in GitHub Desktop.
Get Raspberry Pi Magazines
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests, os
def getFile(url, file, path):
if os.path.isfile('{}/{}'.format(path, file)):
print('{} ist bereits vorhanden!'.format(file))
else:
r = requests.get(url)
with open('{}/{}'.format(path, file), 'wb') as f:
f.write(r.content)
def getHtml(url):
page = requests.get(url)
html = BeautifulSoup(page.text, 'html.parser')
return html
def getMag(url, path):
link = getHtml(url).findAll("a", string="Download Free PDF")
dl = getHtml('{}{}/download'.format(url, link[0]['href'])).findAll("a", string="click here to get your free PDF")
mag = urlparse(dl[0]['href'])
getFile(dl[0]['href'], os.path.basename(mag.path), path)
if __name__ == "__main__":
getMag('https://magpi.raspberrypi.org/', '/media/Magazine/ComputerMagazine/magpi')
getMag('https://hackspace.raspberrypi.org/', '/media/Magazine/ComputerMagazine/hackspace')
getMag('https://wireframe.raspberrypi.org/', '/media/Magazine/ComputerMagazine/Wireframe')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment