Skip to content

Instantly share code, notes, and snippets.

@fengye
Created December 15, 2019 12:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fengye/92708a1ae2102fcbf722165d67418396 to your computer and use it in GitHub Desktop.
Save fengye/92708a1ae2102fcbf722165d67418396 to your computer and use it in GitHub Desktop.
Parse Google Photo takeout and categories them into date-named folders
#!/usr/bin/env python3
from glob import glob
import os.path
import os
import re
import json
import datetime
import shutil
class PhotoEntry:
def __init__(self, dirname, filename, json_filename):
self.dirname = dirname
self.filename = filename
self.json_filename = json_filename
self.parsed = False
def getPhotoFullname(self):
return os.path.join(self.dirname, self.filename)
def getJsonFullname(self):
return os.path.join(self.dirname, self.json_filename)
def parse(self):
json_filename = self.getJsonFullname()
with open(json_filename) as f:
self.data = json.load(f)
self.parsed = True
def isParsed(self):
return self.parsed and self.data != None
def getPhotoTakenTimeFormatted(self):
if not self.isParsed():
return None
else:
return self.data["photoTakenTime"]["formatted"]
def getPhotoTakenTimestamp(self):
if not self.isParsed():
return None
else:
return self.data["photoTakenTime"]["timestamp"]
def getPhotoTakenTime(self):
if not self.isParsed():
return None
else:
return datetime.datetime.fromtimestamp(int(self.data["photoTakenTime"]["timestamp"]), datetime.timezone.utc)
def main():
print("Google Takeout - Photos JSON parser and categoriser")
# list all the .json files
filenames = glob("./original_data/*.json", recursive=True)
error_filenames = []
entries = []
for json_filename in filenames:
dirname = os.path.dirname(json_filename)
short_json_filename = os.path.basename(json_filename)
version = None
error = False
m = re.search('^([a-zA-Z0-9\s_\\.\-]+)(\([0-9]+\))*(.json)$', short_json_filename)
if m[0] != None and m[3] == '.json':
if m[2] != None:
mv = re.search('^\(([0-9]+)\)$', m[2])
if mv[0] != None:
version = int(mv[1])
else:
error = True
else:
error = True
if error:
print("Unrecognised file: " + json_filename)
error_filenames.append(json_filename)
else:
if version == None:
photo_filename = m[1]
else:
photo_original_filename = m[1]
names = os.path.splitext(photo_original_filename)
extension = names[-1]
photo_filename_list = [''.join(names[0:-1]), "(", str(version), ")", names[-1]]
photo_filename = ''.join(photo_filename_list)
if os.path.exists(os.path.join(dirname, photo_filename)):
print("Found: " + photo_filename + " -> " + short_json_filename)
entries.append(PhotoEntry(dirname, photo_filename, short_json_filename))
else:
print("[ERROR] Doesn't exists: " + photo_filename + " -> " + short_json_filename)
error_filenames.append(json_filename)
# parse each valid json
target_dir = "./organised_data"
for photo_entry in entries:
photo_entry.parse()
photo_time = photo_entry.getPhotoTakenTime()
print("Processing " + photo_entry.filename + " @ " + str(photo_time))
datefolder = photo_time.date().isoformat()
if not os.path.exists(os.path.join(target_dir, datefolder)):
os.mkdir(os.path.join(target_dir, datefolder))
print("Copy \"" + photo_entry.filename + "\" to \"" + datefolder + "\"...")
shutil.copyfile(photo_entry.getPhotoFullname(), os.path.join(target_dir, datefolder, photo_entry.filename))
print("Copy \"" + photo_entry.json_filename + "\" to \"" + datefolder + "\"...")
shutil.copyfile(photo_entry.getJsonFullname(), os.path.join(target_dir, datefolder, photo_entry.json_filename))
print("Processed " + str(len(entries)) + " files")
if len(error_filenames) > 0:
with open("error.txt", "w") as f:
for error_filename in error_filenames:
f.write(error_filename + "\n")
print("All errors has been written to error.txt")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment