Skip to content

Instantly share code, notes, and snippets.

@vanne02135
Last active December 6, 2018 13:12
Show Gist options
  • Save vanne02135/fa364bc8d28d52cbe945220c1e5db2de to your computer and use it in GitHub Desktop.
Save vanne02135/fa364bc8d28d52cbe945220c1e5db2de to your computer and use it in GitHub Desktop.
Download flickr images and retain metadata in exif
import flickr_api
import os
import random
import sys
from fractions import Fraction
import piexif
import piexif.helper
from iptcinfo import IPTCInfo
import urllib2
import argparse
import time
import datetime
# TODO:
# * Might crash with anything else than jpg? Try videos? How about png?
# * debugging with a lot of content. At least tags and comments seem to work.
# * use https://github.com/jamesacampbell/iptcinfo3 to write caption, keywords and title
AUTH_FILENAME = "flickr_dl_auth.txt"
USER_COMMENT_TEMPLATE = """flickr_dl.py converted metadata:
Title: %(title)s
Description: %(description)s
Tags: %(tags)s
Comments: %(comments)s"""
def get_auth_cache(filename):
# Get the auth file as specified in https://github.com/alexis-mignon/python-flickr-api/wiki/Flickr-API-Keys-and-Authentication
a = flickr_api.auth.AuthHandler() # creates a new AuthHandler object
perms = "read" # set the required permissions
url = a.get_authorization_url(perms)
print url # this is the url we need!
oauth_verifier = raw_input("Please enter oauth verifier code from the url above: ")
a.set_verifier(oauth_verifier) # copy your oauth_verifier tag here!
flickr_api.set_auth_handler(a) # set the AuthHandler for the session
a.save(filename)
def to_deg(value, loc):
"""convert decimal coordinates into degrees, minutes and seconds tuple
Keyword arguments: value is float gps-value, loc is direction list ["S", "N"] or ["W", "E"]
return: tuple like (25, 13, 48.343 ,'N')
"""
if value < 0:
loc_value = loc[0]
elif value > 0:
loc_value = loc[1]
else:
loc_value = ""
abs_value = abs(value)
deg = int(abs_value)
t1 = (abs_value-deg)*60
min = int(t1)
sec = round((t1 - min)* 60, 5)
return (deg, min, sec, loc_value)
def change_to_rational(number):
"""convert a number to rantional
Keyword arguments: number
return: tuple like (1, 2), (numerator, denominator)
"""
f = Fraction(str(number))
return (f.numerator, f.denominator)
def set_gps_location(file_name, lat, lng, altitude):
"""Adds GPS position as EXIF metadata
Keyword arguments:
file_name -- image file
lat -- latitude (as float)
lng -- longitude (as float)
altitude -- altitude (as float)
"""
lat_deg = to_deg(lat, ["S", "N"])
lng_deg = to_deg(lng, ["W", "E"])
exiv_lat = (change_to_rational(lat_deg[0]), change_to_rational(lat_deg[1]), change_to_rational(lat_deg[2]))
exiv_lng = (change_to_rational(lng_deg[0]), change_to_rational(lng_deg[1]), change_to_rational(lng_deg[2]))
gps_ifd = {
piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0),
piexif.GPSIFD.GPSAltitudeRef: 1,
piexif.GPSIFD.GPSAltitude: change_to_rational(round(altitude)),
piexif.GPSIFD.GPSLatitudeRef: lat_deg[3],
piexif.GPSIFD.GPSLatitude: exiv_lat,
piexif.GPSIFD.GPSLongitudeRef: lng_deg[3],
piexif.GPSIFD.GPSLongitude: exiv_lng,
}
exif_dict = piexif.load(file_name)
exif_dict["GPS"] = gps_ifd if exif_dict["GPS"] == {} else exif_dict["GPS"]
exif_bytes = piexif.dump(exif_dict)
piexif.insert(exif_bytes, file_name)
def unfoldComments(comments):
# return a string representation of a list of comments
return ", ".join(["%s: %s" % (c.author.username, c.text) for c in comments])
def updateSavedExif(filename, title, description, comments, tags, longitude, latitude, altitude):
#print title
#print description if len(description) > 0 else "No description"
#print comments if len(comments) > 0 else "No comments"
#print [t.text for t in tags] if len(tags) > 0 else "No tags"
#print "Location lat = %s long = %s" % (latitude, longitude)
ext = filename.split(".")[-1].lower()
piexif_supported_formats = ["jpg", "jpeg", "tif", "tiff"]
if ext in piexif_supported_formats:
try:
# Save GPS data and others in EXIF
if longitude and latitude:
set_gps_location(filename, float(latitude), float(longitude), altitude)
exif_dict = piexif.load(filename)
commentString = USER_COMMENT_TEMPLATE % {"title": title, "description": description, "comments": unfoldComments(comments), "tags": ", ".join([t.text for t in tags])}
user_comment = piexif.helper.UserComment.dump(commentString)
exif_dict["Exif"][piexif.ExifIFD.UserComment] = user_comment
exif_bytes = piexif.dump(exif_dict)
exif_bytes = piexif.dump(exif_dict)
piexif.insert(exif_bytes, filename)
except:
print("Warning: EXIF data transfer failed despite supported image format")
else:
print ("Warning: file type %s not supported by piexif" % ext)
# Also save metadata to IPTC as follows:
# Title -> object name
# Description -> caption/abstract
# tags -> keywords
# Comments -> caption/abstract
try:
myiptc = IPTCInfo(filename)
except:
myiptc = IPTCInfo(filename, force=True)
myiptc.data["object name"] = title.encode("ascii", "ignore")
myiptc.data["caption/abstract"] = description.encode("ascii", "ignore")
myiptc.data["keywords"] = [t.text.encode("ascii", "ignore") for t in tags]
if len(comments) > 0:
myiptc.data["caption/abstract"] += "Flickr comments:\n"
myiptc.data["caption/abstract"] += unfoldComments(comments).encode("ascii", "ignore")
myiptc.save()
def walkFlickr(flickrUser, year):
# Better to use the Walker functionality in flickr api than browsing though all
t1 = time.mktime(datetime.datetime(year, 1, 1).timetuple())
t2 = time.mktime(datetime.datetime(year+1, 1, 1).timetuple())
#w = flickr_api.Walker(flickr_api.Photo.search, user_id=user.id, lat=38.692, lon=-90.147, radius=32, min_taken_date=1372893401.042554, max_taken_date=1373898201.042593, media='photos', per_page=500, extras='description,date_upload,date_taken,geo')
w = flickr_api.Walker(flickr_api.Photo.search, user_id=flickrUser.id, min_taken_date=t1, max_taken_date=t2)
return list(w)
def getPhotosPaged(user, args):
# Old version, where photos were fetched using user.getPhotos() and paging instead of Walker()
p = user.getPhotos()
nPages = p.info.pages
print (args.year)
print "Fetcing %d pages of photos" % nPages
photoN = 0
for page in [1]: #range(nPages):
for photo in user.getPhotos(page=page+1):
photoN += 1
if photoN > 10:
break
sys.stdout.write("Inspecting photo %d %c" % (photoN, 13))
sys.stdout.flush()
yearTaken = int(photo.taken.split('-')[0])
if photo.media == "video" and args.download_video:
try:
videourl = photo.getSizes()["Video Original"]["source"]
except:
print ("Original video not available for some reason, getting Flickr Site MP4")
videourl = photo.getSizes()["Video Original"]["Site MP4"]
myurl = urllib2.urlopen(videourl)
filename = myurl.headers.dict["content-disposition"].split("=")[1]
try:
open(filename, "w").write(urllib2.urlopen(myurl.url).read()) # actual data hidden in videourl's url
except:
print ("Something went wrong while downloading %s from %s" % (filename, videourl))
# TODO: how to handle video's metadata here?
elif (args.year == None) or (yearTaken == args.year):
photo.save(photo.id)
pInfo = photo.getInfo()
# TODO: some photos do not have location info, so next line needs an update
# Also the following does assume always jpg filetype
try:
updateSavedExif(str(photo.id) + ".jpg", photo.title, pInfo["description"], photo.getComments(), pInfo["tags"], pInfo["location"]["longitude"], pInfo["location"]["latitude"], 0)
except:
print ("Something went wrong while updating the exif data for %s" % photo.id)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--verbosity", help="increase output verbosity", action="store_true")
parser.add_argument("--download_video", help="download also video files", action="store_true")
parser.add_argument("--year", help="download images taken at year", type=int)
args = parser.parse_args()
try:
flickr_api.set_auth_handler(AUTH_FILENAME)
user = flickr_api.test.login()
except IOError:
get_auth_cache(AUTH_FILENAME)
flickr_api.set_auth_handler(AUTH_FILENAME)
user = flickr_api.test.login()
#w = flickr_api.Walker(flickr_api.Photo.search, user_id=user.id, lat=38.692, lon=-90.147, radius=32, min_taken_date=1372893401.042554, max_taken_date=1373898201.042593, media='photos', per_page=500, extras='description,date_upload,date_taken,geo')
if args.year:
t1 = int(time.mktime(datetime.datetime(args.year, 1, 1).timetuple()))
t2 = int(time.mktime(datetime.datetime(args.year+1, 1, 1).timetuple()))
walked = flickr_api.Walker(flickr_api.Photo.search, user_id=user.id, min_taken_date=t1, max_taken_date=t2)
else:
walked = flickr_api.Walker(flickr_api.Photo.search, user_id=user.id)
i = 0
for photo in walked:
i += 1
#if i < 513:
# continue
print("%4d : %s" % (i, photo.title))
if photo.media == "video" and args.download_video:
# Still this does not work for some videos
try:
videourl = photo.getSizes()["Video Original"]["source"]
except:
print ("Original video not available for some reason, getting Flickr Site MP4")
videourl = photo.getSizes()["Video Original"]["Site MP4"]
myurl = urllib2.urlopen(videourl)
filename = myurl.headers.dict["content-disposition"].split("=")[1]
try:
open(filename, "w").write(urllib2.urlopen(myurl.url).read()) # actual data hidden in videourl's url
except:
print ("Something went wrong while downloading %s from %s" % (filename, videourl))
# TODO: how to handle video's metadata here?
elif photo.media == "photo":
photo.save(photo.id)
pInfo = photo.getInfo()
# TODO: some photos do not have location info, so next line needs an update
# Also the following does assume always jpg filetype
#try:
if "location" in pInfo.keys():
updateSavedExif(str(photo.id) + "." + pInfo["originalformat"], photo.title, pInfo["description"], photo.getComments(), pInfo["tags"], pInfo["location"]["longitude"], pInfo["location"]["latitude"], 0)
else:
updateSavedExif(str(photo.id) + "." + pInfo["originalformat"], photo.title, pInfo["description"], photo.getComments(), pInfo["tags"], None, None, 0)
#except:
# print ("Something went wrong while updating the exif data for %s" % photo.id)
elif photo.media != "photo" and photo.media != "video":
raise Exception("Unknown media: %s" % photo.media)
@vanne02135
Copy link
Author

vanne02135 commented Dec 6, 2018

Open issues:

  1. piexif fails to handle 360 panorama images, so EXIF data for those is not transferred
  2. videos fail from time to time
  3. no error handling - sometimes Flickr response for photo data is 502 bad gateway

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment