Skip to content

Instantly share code, notes, and snippets.

@michel-slm
Created March 8, 2015 06:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save michel-slm/e67c6839cdb49fbf2666 to your computer and use it in GitHub Desktop.
Save michel-slm/e67c6839cdb49fbf2666 to your computer and use it in GitHub Desktop.
Download a list of Google Drive documents sent in an HTML email
#!/usr/bin/env python2
from bs4 import BeautifulSoup
import re
from sh import wget
def get_gdoc_links(html_doc, prefix='IMG'):
soup = BeautifulSoup(html_doc)
for link in soup.find_all('a'):
href = link.get('href')
if href.startswith('https://docs.google.com/file/'):
href = link.get('href')
img_name = link.get_text()
img_name = img_name[img_name.index(prefix):]
file_id = re.search(
'https://docs.google.com/file/d/(.*)/edit',
href).group(1)
dl_url = 'https://drive.google.com/uc?export=download&id=%s' \
% (file_id,)
print (img_name,dl_url)
wget(dl_url, c=True, N=True, O=img_name)
if __name__ == '__main__':
with open('prewed-photos.html', 'r') as f:
get_gdoc_links(f.read())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment