Skip to content

Instantly share code, notes, and snippets.

@lucyb
Created December 30, 2018 15:50
Show Gist options
  • Save lucyb/0f3dc5473fc6c66308111fdd6b6fabf9 to your computer and use it in GitHub Desktop.
Save lucyb/0f3dc5473fc6c66308111fdd6b6fabf9 to your computer and use it in GitHub Desktop.
Sort photos from a Flickr bulk export into one folder per Flickr album
# Sort photos from a flickr bulk export into a better directory structure
#
# It creates directories based on album names and copies photos into the specified
# output location.
#
# Usage: flickr_import.py [flickr export dir] [flickr metadata dir] [output dir]
#
# Requires Python 3.6 and the python magic package (do `pip install python-magic`)
import os
import sys
import json
import time
from datetime import datetime
import shutil
import magic
def is_image_format(filename):
file_type = magic.from_file(filename, mime=True)
return file_type.startswith('image/')
def is_json_format(filename):
file_type = magic.from_file(filename, mime=True)
# Not being too strict here as some files are being id'd as text/html,
# even though they appear to be valid json
return file_type.startswith('text/')
def size_not_zero(filename):
filesize = os.stat(filename).st_size
return filesize > 0
def is_valid(filename, format_type):
return size_not_zero(filename) and format_type(filename)
def find_files(path, format_type):
found_files = []
for root, dirs, files in os.walk(path):
full_paths = [os.path.join(root, name) for name in files]
[found_files.append(f) for f in full_paths if is_valid(f, format_type)]
return found_files
def parse_metadata(json_files, default_album):
metadata = []
for filename in json_files:
with open(filename) as json_file:
try:
data = json.loads(json_file.read())
photo_id = data['id']
date_taken = data['date_taken']
if not data['albums']:
metadata.append((photo_id, date_taken, default_album))
else:
for album in data['albums']:
metadata.append((photo_id, date_taken, album['title']))
except Exception as e:
# Not all json files are for a photo, so skip over those
print(filename)
print(e)
return metadata
def get_photo(photo_id, photos):
filename = '_'+photo_id+'_'
files = list(filter(lambda photo: filename in photo, photos))
if not files:
raise Exception(f'No files found for: {filename}')
if len(files) > 1:
raise Exception(f'Multiple files found: {files}')
return files[0]
def copy_photo(photo, date_taken, dir_name):
if not os.path.isdir(dir_name):
os.mkdir(dir_name, mode=0o755)
shutil.copy2(photo, dir_name)
# Update last modified time with the date photo was taken
date = datetime.strptime(date_taken, '%Y-%m-%d %H:%M:%S')
utime = time.mktime(date.timetuple())
os.utime(os.path.join(dir_name, photo), (utime, utime))
def copy_photos(metadata, photos, output_path):
for item in metadata:
photo_id, date_taken, album = item
photo = get_photo(photo_id, photos)
dir_name = os.path.join(output_path, album)
copy_photo(photo, date_taken, dir_name)
print(f'Copied {photo_id}: {album} (Taken: {date_taken})')
def run(photos_path, meta_path, output_path):
default_album = "Default Album"
photos = find_files(photos_path, is_image_format)
json_files = find_files(meta_path, is_json_format)
metadata = parse_metadata(json_files, default_album)
copy_photos(metadata, photos, output_path)
if __name__ == '__main__':
photos_path = sys.argv[1]
meta_path = sys.argv[2]
output_path = sys.argv[3]
run(photos_path, meta_path, output_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment