Created
December 30, 2018 15:50
-
-
Save lucyb/0f3dc5473fc6c66308111fdd6b6fabf9 to your computer and use it in GitHub Desktop.
Sort photos from a Flickr bulk export into one folder per Flickr album
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sort photos from a flickr bulk export into a better directory structure | |
# | |
# It creates directories based on album names and copies photos into the specified | |
# output location. | |
# | |
# Usage: flickr_import.py [flickr export dir] [flickr metadata dir] [output dir] | |
# | |
# Requires Python 3.6 and the python magic package (do `pip install python-magic`) | |
import os | |
import sys | |
import json | |
import time | |
from datetime import datetime | |
import shutil | |
import magic | |
def is_image_format(filename): | |
file_type = magic.from_file(filename, mime=True) | |
return file_type.startswith('image/') | |
def is_json_format(filename): | |
file_type = magic.from_file(filename, mime=True) | |
# Not being too strict here as some files are being id'd as text/html, | |
# even though they appear to be valid json | |
return file_type.startswith('text/') | |
def size_not_zero(filename): | |
filesize = os.stat(filename).st_size | |
return filesize > 0 | |
def is_valid(filename, format_type): | |
return size_not_zero(filename) and format_type(filename) | |
def find_files(path, format_type): | |
found_files = [] | |
for root, dirs, files in os.walk(path): | |
full_paths = [os.path.join(root, name) for name in files] | |
[found_files.append(f) for f in full_paths if is_valid(f, format_type)] | |
return found_files | |
def parse_metadata(json_files, default_album): | |
metadata = [] | |
for filename in json_files: | |
with open(filename) as json_file: | |
try: | |
data = json.loads(json_file.read()) | |
photo_id = data['id'] | |
date_taken = data['date_taken'] | |
if not data['albums']: | |
metadata.append((photo_id, date_taken, default_album)) | |
else: | |
for album in data['albums']: | |
metadata.append((photo_id, date_taken, album['title'])) | |
except Exception as e: | |
# Not all json files are for a photo, so skip over those | |
print(filename) | |
print(e) | |
return metadata | |
def get_photo(photo_id, photos): | |
filename = '_'+photo_id+'_' | |
files = list(filter(lambda photo: filename in photo, photos)) | |
if not files: | |
raise Exception(f'No files found for: {filename}') | |
if len(files) > 1: | |
raise Exception(f'Multiple files found: {files}') | |
return files[0] | |
def copy_photo(photo, date_taken, dir_name): | |
if not os.path.isdir(dir_name): | |
os.mkdir(dir_name, mode=0o755) | |
shutil.copy2(photo, dir_name) | |
# Update last modified time with the date photo was taken | |
date = datetime.strptime(date_taken, '%Y-%m-%d %H:%M:%S') | |
utime = time.mktime(date.timetuple()) | |
os.utime(os.path.join(dir_name, photo), (utime, utime)) | |
def copy_photos(metadata, photos, output_path): | |
for item in metadata: | |
photo_id, date_taken, album = item | |
photo = get_photo(photo_id, photos) | |
dir_name = os.path.join(output_path, album) | |
copy_photo(photo, date_taken, dir_name) | |
print(f'Copied {photo_id}: {album} (Taken: {date_taken})') | |
def run(photos_path, meta_path, output_path): | |
default_album = "Default Album" | |
photos = find_files(photos_path, is_image_format) | |
json_files = find_files(meta_path, is_json_format) | |
metadata = parse_metadata(json_files, default_album) | |
copy_photos(metadata, photos, output_path) | |
if __name__ == '__main__': | |
photos_path = sys.argv[1] | |
meta_path = sys.argv[2] | |
output_path = sys.argv[3] | |
run(photos_path, meta_path, output_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment