nickbalch/download_flickr_group.py

## download_flickr_group.py
#!/usr/bin/env python

# import libraries
#import sys
import datetime
import time
import random

import flickrapi
import requests
import os
# import re

# import constants from stat library
from stat import * # ST_SIZE ST_MTIME


FLICKR_KEY = ""
FLICKR_SECRET = ""
USER_ID = ""
SET_ID = ""
GROUP_ID = ""

imagesRootDir = ""
flickrDir = imagesRootDir + "/flickr/"
rssFilename = ""


# the image feed name
rssTitle = "Image Stream"
# the podcast description
rssDescription = "Image Stream"
# the url where the podcast items will be hosted
rssSiteURL = ""
# the url of the folder where the items will be stored
rssItemURL = rssSiteURL + "/frame"
# the url to the podcast html file
rssLink = rssSiteURL + "/index.html"
# url to the podcast image
rssImageUrl = rssSiteURL + "/logo.jpg"
# the time to live (in minutes)
rssTtl = "60"
# contact details of the web master
rssWebMaster = "me@me.com"


# format date method
def formatDate(dt):
    return dt.strftime("%a, %d %b %Y %H:%M:%S +0000")

# get the item/@type based on file extension
def getItemType(fileExtension):
    if fileExtension == "jpg":
         mediaType = "image/jpeg"
##    if fileExtension == "aac":
##         mediaType = "audio/mpeg"
    elif fileExtension == "mp4":
         mediaType = "video/mpeg"
    else:
         mediaType = "audio/mpeg"
    return mediaType

def make_url(photo):
    # url_template = "http://farm{farm-id}.staticflickr.com/
    #                 {server-id}/{id}_{secret}_[mstzb].jpg"
    photo['filename'] = "%(id)s_%(secret)s_b.jpg" % photo
    url = ("http://farm%(farm)s.staticflickr.com/%(server)s/%(filename)s"
           % photo)
    return url, photo['filename']

def main():
# command line options
#    - python createRSFeed.py /path/to/image/filedir/ url/of/webserver /path/to/output/rss
# directory passed in
#	rootdir = sys.argv[1]
# webserver url
#	weburl = sys.argv[2]
# output RSS filename
#	outputFilename = sys.argv[3]

	print "Requesting photos..."
	photolist,dl_count = flickr_download(flickrDir)
	print "Total files in group ",len(photolist)
	print "Files downloaded ",dl_count
	deleted  = dir_sync(flickrDir,photolist)
	print "Files removed",deleted
	date = rss(rssFilename,rssItemURL,imagesRootDir)
	print "RSS generated with time of ",date


def flickr_download(dpath):
    #get new imaged from flickr
#    print " ---> Requesting photos..."
    dl_count = 0
    update = False
    photolist = list()
    flickr = flickrapi.FlickrAPI(FLICKR_KEY, FLICKR_SECRET)
#    photos = flickr.walk_set(SET_ID)
    photos = flickr.walk(tag_mode='all',group_id=GROUP_ID)
    for photo in photos:
        url, filename = make_url(photo.attrib)
#	path = '/home/pi/mnt/www/frame/flickr/%s' % filename
	photolist.append(filename)
	try:
#	    print(dpath+filename)
            image_file = open(dpath+filename)
#           print " ---> Already have %s" % url
        except IOError:
#            print " ---> Downloading %s" % url
            r = requests.get(url)
            image_file = open(dpath+filename, 'w')
            image_file.write(r.content)
            image_file.close()
            dl_count =+ 1
    return photolist, dl_count

def dir_sync(path,photolist):
    #check to see if it needs to remove photos from folder
#    filelist = os.listdir("/home/pi/mnt/www/frame/flickr")
    deletelist =list()
    filelist = os.listdir(path)
    if len(photolist) < len(filelist):
#       print " ---> Removing photos"
	deletelist = set(filelist).difference(photolist)
	print " Delete count = ",len(deletelist)
	for f in deletelist:
#		print " ---> Deleting %s" %f
#                os.remove("/home/pi/mnt/www/frame/flickr/%s" %f)
                os.remove(path+f)
    return len(deletelist)

def rss(outputFilename,weburl,rootdir):
	#http://www.stuffaboutcode.com/2012/09/python-create-rss-podcast-of-mp3-files.html
	rssItemURL = weburl
	#record datetime started
	now = datetime.datetime.now()

	# open rss file
	outputFile = open(outputFilename, "w")


	# write rss header
	outputFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n")
	outputFile.write("<rss version=\"2.0\">\n")
	outputFile.write("<channel>\n")
	outputFile.write("<title>" + rssTitle + "</title>\n")
	outputFile.write("<description>" + rssDescription + "</description>\n")
	outputFile.write("<link>" + rssLink + "</link>\n")
	outputFile.write("<ttl>" + rssTtl + "</ttl>\n")
	outputFile.write("<image><url>" + rssImageUrl + "</url><title>" + rssTitle + "</title><link>" + rssLink + "</link></image>\n")
	outputFile.write("<copyright>2016</copyright>\n")
	outputFile.write("<lastBuildDate>" + formatDate(now) + "</lastBuildDate>\n")
	outputFile.write("<pubDate>" + formatDate(now) + "</pubDate>\n")
	outputFile.write("<webMaster>" + rssWebMaster + "</webMaster>\n")


	# walk through all files and subfolders
	for path, dirs, files in os.walk(rootdir):

#    		print path+" file Directory Count ",len(files)
    		if len(files) > 500:
			list = random.sample(files,400)
	                print "Randomly selecting ",len (list)
		else:
			list = files
	    	for file in list:

        		# split the file based on "." we use the first part as the title and the extension to work out the media type
	        	fileNameBits = file.split(".")
        		# get the full path of the file
        		fullPath = os.path.join(path, file)
        		# get the stats for the file
        		fileStat = os.stat(fullPath)
        		# find the path relative to the starting folder, e.g. /subFolder/file
        		relativePath = fullPath[len(rootdir):]
#			print fullPath
#			print relativePath
	        	# write rss item
        		outputFile.write("<item>\n")
        		outputFile.write("<title>" + fileNameBits[0].replace("_", " ") + "</title>\n")
        		outputFile.write("<description>A description</description>\n")
        		outputFile.write("<link>" + rssItemURL + relativePath + "</link>\n")
        		outputFile.write("<guid>" + rssItemURL + relativePath + "</guid>\n")
        		outputFile.write("<pubDate>" + formatDate(datetime.datetime.fromtimestamp(fileStat[ST_MTIME])) + "</pubDate>\n")
			outputFile.write("<enclosure url=\"" + rssItemURL + relativePath + "\" length=\"" + str(fileStat[ST_SIZE]) + "\" type=\"" + getItemType(fileNameBits[len(fileNameBits)-1]) + "\" />\n")
        		outputFile.write("</item>\n")

	# write rss footer
	outputFile.write("</channel>\n")
	outputFile.write("</rss>")
	outputFile.close()
	print "complete"
	return now


if __name__ == '__main__':
    main()
	#!/usr/bin/env python

	# import libraries
	#import sys
	import datetime
	import time
	import random

	import flickrapi
	import requests
	import os
	# import re

	# import constants from stat library
	from stat import * # ST_SIZE ST_MTIME


	FLICKR_KEY = ""
	FLICKR_SECRET = ""
	USER_ID = ""
	SET_ID = ""
	GROUP_ID = ""

	imagesRootDir = ""
	flickrDir = imagesRootDir + "/flickr/"
	rssFilename = ""


	# the image feed name
	rssTitle = "Image Stream"
	# the podcast description
	rssDescription = "Image Stream"
	# the url where the podcast items will be hosted
	rssSiteURL = ""
	# the url of the folder where the items will be stored
	rssItemURL = rssSiteURL + "/frame"
	# the url to the podcast html file
	rssLink = rssSiteURL + "/index.html"
	# url to the podcast image
	rssImageUrl = rssSiteURL + "/logo.jpg"
	# the time to live (in minutes)
	rssTtl = "60"
	# contact details of the web master
	rssWebMaster = "me@me.com"


	# format date method
	def formatDate(dt):
	return dt.strftime("%a, %d %b %Y %H:%M:%S +0000")

	# get the item/@type based on file extension
	def getItemType(fileExtension):
	if fileExtension == "jpg":
	mediaType = "image/jpeg"
	## if fileExtension == "aac":
	## mediaType = "audio/mpeg"
	elif fileExtension == "mp4":
	mediaType = "video/mpeg"
	else:
	mediaType = "audio/mpeg"
	return mediaType

	def make_url(photo):
	# url_template = "http://farm{farm-id}.staticflickr.com/
	# {server-id}/{id}_{secret}_[mstzb].jpg"
	photo['filename'] = "%(id)s_%(secret)s_b.jpg" % photo
	url = ("http://farm%(farm)s.staticflickr.com/%(server)s/%(filename)s"
	% photo)
	return url, photo['filename']

	def main():
	# command line options
	# - python createRSFeed.py /path/to/image/filedir/ url/of/webserver /path/to/output/rss
	# directory passed in
	# rootdir = sys.argv[1]
	# webserver url
	# weburl = sys.argv[2]
	# output RSS filename
	# outputFilename = sys.argv[3]

	print "Requesting photos..."
	photolist,dl_count = flickr_download(flickrDir)
	print "Total files in group ",len(photolist)
	print "Files downloaded ",dl_count
	deleted = dir_sync(flickrDir,photolist)
	print "Files removed",deleted
	date = rss(rssFilename,rssItemURL,imagesRootDir)
	print "RSS generated with time of ",date



	def flickr_download(dpath):
	#get new imaged from flickr
	# print " ---> Requesting photos..."
	dl_count = 0
	update = False
	photolist = list()
	flickr = flickrapi.FlickrAPI(FLICKR_KEY, FLICKR_SECRET)
	# photos = flickr.walk_set(SET_ID)
	photos = flickr.walk(tag_mode='all',group_id=GROUP_ID)
	for photo in photos:
	url, filename = make_url(photo.attrib)
	# path = '/home/pi/mnt/www/frame/flickr/%s' % filename
	photolist.append(filename)
	try:
	# print(dpath+filename)
	image_file = open(dpath+filename)
	# print " ---> Already have %s" % url
	except IOError:
	# print " ---> Downloading %s" % url
	r = requests.get(url)
	image_file = open(dpath+filename, 'w')
	image_file.write(r.content)
	image_file.close()
	dl_count =+ 1
	return photolist, dl_count

	def dir_sync(path,photolist):
	#check to see if it needs to remove photos from folder
	# filelist = os.listdir("/home/pi/mnt/www/frame/flickr")
	deletelist =list()
	filelist = os.listdir(path)
	if len(photolist) < len(filelist):
	# print " ---> Removing photos"
	deletelist = set(filelist).difference(photolist)
	print " Delete count = ",len(deletelist)
	for f in deletelist:
	# print " ---> Deleting %s" %f
	# os.remove("/home/pi/mnt/www/frame/flickr/%s" %f)
	os.remove(path+f)
	return len(deletelist)

	def rss(outputFilename,weburl,rootdir):
	#http://www.stuffaboutcode.com/2012/09/python-create-rss-podcast-of-mp3-files.html
	rssItemURL = weburl
	#record datetime started
	now = datetime.datetime.now()

	# open rss file
	outputFile = open(outputFilename, "w")


	# write rss header
	outputFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n")
	outputFile.write("<rss version=\"2.0\">\n")
	outputFile.write("<channel>\n")
	outputFile.write("<title>" + rssTitle + "</title>\n")
	outputFile.write("<description>" + rssDescription + "</description>\n")
	outputFile.write("<link>" + rssLink + "</link>\n")
	outputFile.write("<ttl>" + rssTtl + "</ttl>\n")
	outputFile.write("<image><url>" + rssImageUrl + "</url><title>" + rssTitle + "</title><link>" + rssLink + "</link></image>\n")
	outputFile.write("<copyright>2016</copyright>\n")
	outputFile.write("<lastBuildDate>" + formatDate(now) + "</lastBuildDate>\n")
	outputFile.write("<pubDate>" + formatDate(now) + "</pubDate>\n")
	outputFile.write("<webMaster>" + rssWebMaster + "</webMaster>\n")


	# walk through all files and subfolders
	for path, dirs, files in os.walk(rootdir):

	# print path+" file Directory Count ",len(files)
	if len(files) > 500:
	list = random.sample(files,400)
	print "Randomly selecting ",len (list)
	else:
	list = files
	for file in list:

	# split the file based on "." we use the first part as the title and the extension to work out the media type
	fileNameBits = file.split(".")
	# get the full path of the file
	fullPath = os.path.join(path, file)
	# get the stats for the file
	fileStat = os.stat(fullPath)
	# find the path relative to the starting folder, e.g. /subFolder/file
	relativePath = fullPath[len(rootdir):]
	# print fullPath
	# print relativePath
	# write rss item
	outputFile.write("<item>\n")
	outputFile.write("<title>" + fileNameBits[0].replace("_", " ") + "</title>\n")
	outputFile.write("<description>A description</description>\n")
	outputFile.write("<link>" + rssItemURL + relativePath + "</link>\n")
	outputFile.write("<guid>" + rssItemURL + relativePath + "</guid>\n")
	outputFile.write("<pubDate>" + formatDate(datetime.datetime.fromtimestamp(fileStat[ST_MTIME])) + "</pubDate>\n")
	outputFile.write("<enclosure url=\"" + rssItemURL + relativePath + "\" length=\"" + str(fileStat[ST_SIZE]) + "\" type=\"" + getItemType(fileNameBits[len(fileNameBits)-1]) + "\" />\n")
	outputFile.write("</item>\n")

	# write rss footer
	outputFile.write("</channel>\n")
	outputFile.write("</rss>")
	outputFile.close()
	print "complete"
	return now



	if __name__ == '__main__':
	main()