isaaclw/parse_and_date_photos.py

## parse_and_date_photos.py
#!/usr/bin/python
"""
Run this script twice:
1) Run it with the 'search' flag, and an output csv file.

- Then edit the csv file, and update the final column. The found date is
  used as a reference
  If you want to skip a file, don't include it.
  The date should be in the format;
  %Y:%m:%d %H:%M:%S
  Look at the second column for examples.
  Skipping a value from the right, will set it to zero, or January 1st.
  ie: 2017 -> Jan 1, 2017 at 00:00:00
      2016:01:30 21 -> Jan 30, 2016 at 21:00:00
      See 'process file' for more info

2) Run the script again with the 'update' flag, *in the same folder*.
   The script will take the final column and update files with the date
   there.
"""
import datetime
import os
import csv
import shutil
import subprocess
import sys

def set_date_time(file, dateobj):
    cwd = os.getcwd()
    abs_file = os.path.join(cwd, file)
    # set the date
    subprocess.call(['exiftool', '-overwrite_original',
        dateobj.strftime('-datetimedigitized=%Y:%m:%d %H:%M:%S'), file])
    subprocess.call(['exiftool', '-overwrite_original',
        dateobj.strftime('-datetimeoriginal=%Y:%m:%d %H:%M:%S'), file])


def delete_file(file):
    cwd = os.getcwd()
    os.remove(os.path.join(cwd, file))


def process_file(reader):
    for row in reader:
        file = row[0].strip()
        old_date = row[1].strip()
        new_date = row[2].strip()

        if not os.path.isfile(file):
            print "can't find:", file
            continue

        if new_date == 'd':
            delete_file(file)
            continue

        if new_date in ('', '-'):
            new_date = None

        hrs = None
        date = None
        if new_date is not None:

            # If there's no hour specified, provide 'None'
            date, hrs = (new_date.split(' ') + [None])[:2]
            if hrs is None:
                hrs = '00:00:00'

            # if there's missing date information, just use '01' for the
            # month/year
            yr, mo, da = (date.split(':') + ['01']*2)[:3]
            if mo == '00':
                mo = '01'
            if da == '00':
                da = '01'

            # If there's missing hour information, just use '00' for the hours,
            # minutes or seconds
            h, m, s = (hrs.split(':') + ['00']*3)[:3]

            try:
                file_dt = datetime.datetime(int(yr), int(mo), int(da),
                        int(h), int(m), int(s))
            except ValueError, e:
                print yr, mo, da, h, m, s
                raise e

            set_date_time(file, file_dt)


def get_image_date(file):
    process = subprocess.Popen(['identify', '-verbose', file],
            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout = process.stdout.read()
    stderr = process.stderr.read()

    if stderr:
        print stderr
        return None

    for line in stdout.split('\n'):
        if 'DateTimeDigitized' in line:
            # line looks like this:
            #     exif:DateTimeDigitized: 2009:08:25 00:06:33
            # split on ':' and capture the end. Then re-join and strip.
            return ':'.join(
                    line.split(':')[2:]
                ).strip()

    retcode = process.wait()
    if retcode:
        raise Exception("process crashed")
    return None


def create_csv(writer, folder):
    for root, dirs, files in os.walk(folder):
        for file in files:
            full_path = os.path.join(root, file)
            date = get_image_date(full_path) or ''
            csv_row = [full_path, date, '']
            print 'writing', csv_row
            writer.writerow(csv_row)


if __name__ == '__main__':
    import optparse
    parser = optparse.OptionParser()

    parser.add_option(
            '-u', '--update',
            dest='update',
            help="Provide a spreadsheet to update files with"
        )
    parser.add_option(
            '-s', '--search',
            dest='search',
            help="Provide a directory to search."
        )
    parser.add_option(
            '-o', '--output',
            dest='output',
            help="Provide a csv file to write. Used with 'search'"
        )

    options, args = parser.parse_args()
    if options.update:
        file = open(options.update, 'r')
        reader = csv.reader(file)
        process_file(reader)

    if options.search and options.output:
        file = open(options.output, 'w')
        writer = csv.writer(file)
        create_csv(writer, options.search)
	#!/usr/bin/python
	"""
	Run this script twice:
	1) Run it with the 'search' flag, and an output csv file.

	- Then edit the csv file, and update the final column. The found date is
	used as a reference
	If you want to skip a file, don't include it.
	The date should be in the format;
	%Y:%m:%d %H:%M:%S
	Look at the second column for examples.
	Skipping a value from the right, will set it to zero, or January 1st.
	ie: 2017 -> Jan 1, 2017 at 00:00:00
	2016:01:30 21 -> Jan 30, 2016 at 21:00:00
	See 'process file' for more info

	2) Run the script again with the 'update' flag, in the same folder.
	The script will take the final column and update files with the date
	there.
	"""
	import datetime
	import os
	import csv
	import shutil
	import subprocess
	import sys

	def set_date_time(file, dateobj):
	cwd = os.getcwd()
	abs_file = os.path.join(cwd, file)
	# set the date
	subprocess.call(['exiftool', '-overwrite_original',
	dateobj.strftime('-datetimedigitized=%Y:%m:%d %H:%M:%S'), file])
	subprocess.call(['exiftool', '-overwrite_original',
	dateobj.strftime('-datetimeoriginal=%Y:%m:%d %H:%M:%S'), file])


	def delete_file(file):
	cwd = os.getcwd()
	os.remove(os.path.join(cwd, file))


	def process_file(reader):
	for row in reader:
	file = row[0].strip()
	old_date = row[1].strip()
	new_date = row[2].strip()

	if not os.path.isfile(file):
	print "can't find:", file
	continue

	if new_date == 'd':
	delete_file(file)
	continue

	if new_date in ('', '-'):
	new_date = None

	hrs = None
	date = None
	if new_date is not None:

	# If there's no hour specified, provide 'None'
	date, hrs = (new_date.split(' ') + [None])[:2]
	if hrs is None:
	hrs = '00:00:00'

	# if there's missing date information, just use '01' for the
	# month/year
	yr, mo, da = (date.split(':') + ['01']*2)[:3]
	if mo == '00':
	mo = '01'
	if da == '00':
	da = '01'

	# If there's missing hour information, just use '00' for the hours,
	# minutes or seconds
	h, m, s = (hrs.split(':') + ['00']*3)[:3]

	try:
	file_dt = datetime.datetime(int(yr), int(mo), int(da),
	int(h), int(m), int(s))
	except ValueError, e:
	print yr, mo, da, h, m, s
	raise e

	set_date_time(file, file_dt)


	def get_image_date(file):
	process = subprocess.Popen(['identify', '-verbose', file],
	stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	stdout = process.stdout.read()
	stderr = process.stderr.read()

	if stderr:
	print stderr
	return None

	for line in stdout.split('\n'):
	if 'DateTimeDigitized' in line:
	# line looks like this:
	# exif:DateTimeDigitized: 2009:08:25 00:06:33
	# split on ':' and capture the end. Then re-join and strip.
	return ':'.join(
	line.split(':')[2:]
	).strip()

	retcode = process.wait()
	if retcode:
	raise Exception("process crashed")
	return None


	def create_csv(writer, folder):
	for root, dirs, files in os.walk(folder):
	for file in files:
	full_path = os.path.join(root, file)
	date = get_image_date(full_path) or ''
	csv_row = [full_path, date, '']
	print 'writing', csv_row
	writer.writerow(csv_row)


	if __name__ == '__main__':
	import optparse
	parser = optparse.OptionParser()

	parser.add_option(
	'-u', '--update',
	dest='update',
	help="Provide a spreadsheet to update files with"
	)
	parser.add_option(
	'-s', '--search',
	dest='search',
	help="Provide a directory to search."
	)
	parser.add_option(
	'-o', '--output',
	dest='output',
	help="Provide a csv file to write. Used with 'search'"
	)

	options, args = parser.parse_args()
	if options.update:
	file = open(options.update, 'r')
	reader = csv.reader(file)
	process_file(reader)

	if options.search and options.output:
	file = open(options.output, 'w')
	writer = csv.writer(file)
	create_csv(writer, options.search)