Skip to content

Instantly share code, notes, and snippets.

@kiwiholmberg
Forked from melpomene/download.py
Last active August 29, 2015 14:20
Show Gist options
  • Save kiwiholmberg/667857e965cc2bf74380 to your computer and use it in GitHub Desktop.
Save kiwiholmberg/667857e965cc2bf74380 to your computer and use it in GitHub Desktop.
Python script to download all your dailyview.com (aka bilddagboken.se) images.
#!/usr/bin/env python
# encoding: utf-8
"""
Python script to download all your dailyview.com images.
Names them after what date they belong to, + random string to avoid name collisions.
Run with 'python download.py'
Depends on requests and Beautiful soup
"""
from requests import get
from BeautifulSoup import BeautifulSoup, SoupStrainer
from time import sleep
import re, datetime, hashlib, os
date_pattern = re.compile("^\S+\s(\d+)\s(\S+)\s(\d{4})\s.+$")
#months = ['januari','februari','mars','april','maj','juni','juli','augusti','september','oktober','november','december']
months = [datetime.date(2000, m, 1).strftime('%B').lower() for m in range(1, 13)]
def download():
url = ''#put url to latest image here i.e. "http://dayviews.com/username/11111137/"
i = 0
while True:
print 'Get page %s' % url
r = get(url).content
soup = BeautifulSoup(r)
date = soup.find(id='showContentTitle').string
# print date
matches = date_pattern.match(date).groups() # ('5', 'februari', '2006')
# Mongle date format.
month = months.index(matches[1].lower()) + 1
month = '0'+str(month) if len(str(month))==1 else str(month)
day = matches[0] if len(matches[0])==2 else '0'+matches[0]
date_string = '%s-%s-%s' % (matches[2], month, day)
for img in soup.findAll('img', id="picture"):
src = img['src']
fname = './images/%s_(%s)_%s' % (date_string, hashlib.sha1(os.urandom(1024)).hexdigest()[0:10], src[src.rfind('/')+1:] )
print "Downloading %s --> %s" % (src, fname)
f = open( fname, 'wb')
r = get(src)
for chunk in r.iter_content():
f.write(chunk)
# Save next image link
last_url = url
for link in soup.findAll('a', rel=u"fancyImgGrp"):
url = link["href"]
# If they are stil the same, exit
if last_url == url:
print "Done \r"
exit()
i += 1
sleep(1) # be nice to server
if __name__ == '__main__':
download()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment