lucyb/flickr_import.py

## flickr_import.py
# Sort photos from a flickr bulk export into a better directory structure
#
# It creates directories based on album names and copies photos into the specified
# output location.
#
# Usage: flickr_import.py [flickr export dir] [flickr metadata dir] [output dir]
#
# Requires Python 3.6 and the python magic package (do `pip install python-magic`)

import os
import sys
import json
import time
from datetime import datetime
import shutil

import magic


def is_image_format(filename):
    file_type = magic.from_file(filename, mime=True)
    return file_type.startswith('image/')


def is_json_format(filename):
    file_type = magic.from_file(filename, mime=True)
    # Not being too strict here as some files are being id'd as text/html,
    # even though they appear to be valid json
    return file_type.startswith('text/')


def size_not_zero(filename):
    filesize = os.stat(filename).st_size
    return filesize > 0


def is_valid(filename, format_type):
    return size_not_zero(filename) and format_type(filename)


def find_files(path, format_type):
    found_files = []
    for root, dirs, files in os.walk(path):
        full_paths = [os.path.join(root, name) for name in files]
        [found_files.append(f) for f in full_paths if is_valid(f, format_type)]

    return found_files


def parse_metadata(json_files, default_album):
    metadata = []
    for filename in json_files:
        with open(filename) as json_file:
            try:
                data = json.loads(json_file.read())
                photo_id = data['id']
                date_taken = data['date_taken']

                if not data['albums']:
                    metadata.append((photo_id, date_taken, default_album))
                else:
                    for album in data['albums']:
                        metadata.append((photo_id, date_taken, album['title']))
            except Exception as e:
                # Not all json files are for a photo, so skip over those
                print(filename)
                print(e)

    return metadata


def get_photo(photo_id, photos):
    filename = '_'+photo_id+'_'
    files = list(filter(lambda photo: filename in photo, photos))

    if not files:
        raise Exception(f'No files found for: {filename}')
    if len(files) > 1:
        raise Exception(f'Multiple files found: {files}')

    return files[0]


def copy_photo(photo, date_taken, dir_name):
    if not os.path.isdir(dir_name):
        os.mkdir(dir_name, mode=0o755)

    shutil.copy2(photo, dir_name)

    # Update last modified time with the date photo was taken
    date = datetime.strptime(date_taken, '%Y-%m-%d %H:%M:%S')
    utime = time.mktime(date.timetuple())
    os.utime(os.path.join(dir_name, photo), (utime, utime))


def copy_photos(metadata, photos, output_path):
    for item in metadata:
        photo_id, date_taken, album = item
        photo = get_photo(photo_id, photos)
        dir_name = os.path.join(output_path, album)

        copy_photo(photo, date_taken, dir_name)
        print(f'Copied {photo_id}: {album} (Taken: {date_taken})')


def run(photos_path, meta_path, output_path):
    default_album = "Default Album"
    photos = find_files(photos_path, is_image_format)
    json_files = find_files(meta_path, is_json_format)
    metadata = parse_metadata(json_files, default_album)
    copy_photos(metadata, photos, output_path)


if __name__ == '__main__':
    photos_path = sys.argv[1]
    meta_path = sys.argv[2]
    output_path = sys.argv[3]

    run(photos_path, meta_path, output_path)
	# Sort photos from a flickr bulk export into a better directory structure
	#
	# It creates directories based on album names and copies photos into the specified
	# output location.
	#
	# Usage: flickr_import.py [flickr export dir] [flickr metadata dir] [output dir]
	#
	# Requires Python 3.6 and the python magic package (do `pip install python-magic`)

	import os
	import sys
	import json
	import time
	from datetime import datetime
	import shutil

	import magic


	def is_image_format(filename):
	file_type = magic.from_file(filename, mime=True)
	return file_type.startswith('image/')


	def is_json_format(filename):
	file_type = magic.from_file(filename, mime=True)
	# Not being too strict here as some files are being id'd as text/html,
	# even though they appear to be valid json
	return file_type.startswith('text/')


	def size_not_zero(filename):
	filesize = os.stat(filename).st_size
	return filesize > 0


	def is_valid(filename, format_type):
	return size_not_zero(filename) and format_type(filename)


	def find_files(path, format_type):
	found_files = []
	for root, dirs, files in os.walk(path):
	full_paths = [os.path.join(root, name) for name in files]
	[found_files.append(f) for f in full_paths if is_valid(f, format_type)]

	return found_files


	def parse_metadata(json_files, default_album):
	metadata = []
	for filename in json_files:
	with open(filename) as json_file:
	try:
	data = json.loads(json_file.read())
	photo_id = data['id']
	date_taken = data['date_taken']

	if not data['albums']:
	metadata.append((photo_id, date_taken, default_album))
	else:
	for album in data['albums']:
	metadata.append((photo_id, date_taken, album['title']))
	except Exception as e:
	# Not all json files are for a photo, so skip over those
	print(filename)
	print(e)

	return metadata


	def get_photo(photo_id, photos):
	filename = '_'+photo_id+'_'
	files = list(filter(lambda photo: filename in photo, photos))

	if not files:
	raise Exception(f'No files found for: {filename}')
	if len(files) > 1:
	raise Exception(f'Multiple files found: {files}')

	return files[0]


	def copy_photo(photo, date_taken, dir_name):
	if not os.path.isdir(dir_name):
	os.mkdir(dir_name, mode=0o755)

	shutil.copy2(photo, dir_name)

	# Update last modified time with the date photo was taken
	date = datetime.strptime(date_taken, '%Y-%m-%d %H:%M:%S')
	utime = time.mktime(date.timetuple())
	os.utime(os.path.join(dir_name, photo), (utime, utime))


	def copy_photos(metadata, photos, output_path):
	for item in metadata:
	photo_id, date_taken, album = item
	photo = get_photo(photo_id, photos)
	dir_name = os.path.join(output_path, album)

	copy_photo(photo, date_taken, dir_name)
	print(f'Copied {photo_id}: {album} (Taken: {date_taken})')


	def run(photos_path, meta_path, output_path):
	default_album = "Default Album"
	photos = find_files(photos_path, is_image_format)
	json_files = find_files(meta_path, is_json_format)
	metadata = parse_metadata(json_files, default_album)
	copy_photos(metadata, photos, output_path)


	if __name__ == '__main__':
	photos_path = sys.argv[1]
	meta_path = sys.argv[2]
	output_path = sys.argv[3]

	run(photos_path, meta_path, output_path)