Skip to content

Instantly share code, notes, and snippets.

@one-data-cookie
Last active December 31, 2024 09:43
Show Gist options
  • Save one-data-cookie/ed91cc06ad47b207ac50304cd7e7193e to your computer and use it in GitHub Desktop.
Save one-data-cookie/ed91cc06ad47b207ac50304cd7e7193e to your computer and use it in GitHub Desktop.
"""
Script to process a Takeout export of a Google Photos album based on HTML source code.
This is a very hacky solution to the non-existent possibility of exporting Google Photos
albums in the order set in the UI.
The code takes the folder, parses the corresponding JSON files, extracts URL IDs, finds their order
in an HTML source code file, and copies & renames the photos with a prefixed counter based on the order.
If your export includes edited photos, named `{orig_name}-edited`, these will be used instead of the originals.
Usage:
1. Use Google Takeout to export a specific album from Google Photos.
2. Set `FOLDER_PATH` to the path of the folder containing your photos and JSON files.
3.1 Locate the first photo in your album, open its JSON file, and copy its ID from the `url` field:
`"url": "https://photos.google.com/photo/{ID}"`.
3.2 Set `CUT_ID` to the copied ID
4.1 View the Page Source of the album in the Google Photos UI.
4.2 Save the HTML code to a file.
4.3 Set `SOURCE_PATH` to the path of your source file.
5. Run the script using Python.
N.B. The code is rather imperfect, created with ChatGPT, with not much time to clean up. But it works!
"""
import os
import json
import shutil
FOLDER_PATH = '[TEMP] Test' # Replace with path to folder with photos & JSONs
SOURCE_PATH = 'source.html' # Replace with path to source file
CUT_ID = 'AF1QipOwzolUMGRvlbO9qPTwRniXbp4iFbUjgTZlIoG1' # Replace with ID of album's first photo
def extract_url_id(file_path):
"""
Extract the URL ID and title from a JSON file.
"""
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
url = data.get("url", "")
title = data.get("title", "")
if url:
url_id = url.split('/')[-1]
return url_id, title
return None, None
def process_folder(folder_path):
"""
Process all JSON files in the folder and extract URL IDs and titles.
"""
url_id_title_map = {}
# Iterate over all files in the folder
for filename in os.listdir(folder_path):
if filename.endswith(".json"):
file_path = os.path.join(folder_path, filename)
url_id, title = extract_url_id(file_path)
if url_id and title:
url_id_title_map[url_id] = title
return url_id_title_map
def find_string_order(file_path, target_strings, cut_target_id):
"""
Find the order of target strings in the given file.
"""
# Read the entire file content
with open(file_path, 'r') as file:
lines = file.readlines()
# Combine lines into a single content string
content = ''.join(lines)
# Find last occurrence of the first photo and cut the content
last_occurrence_index = content.rfind(cut_target_id)
if last_occurrence_index != -1:
content = content[last_occurrence_index:]
# Split the modified content back into lines
lines = content.splitlines()
# List to store the order of occurrences
order_list = []
for line in lines:
# Split the line into sublines using "]]"
sublines = line.split(']]')
for subline in sublines:
for target in target_strings:
if target in subline:
# Record the occurrence of the target string
order_list.append(target)
# Remove duplicates while preserving order
seen = set()
ordered_occurrences = [item for item in order_list if item not in seen and not seen.add(item)]
return ordered_occurrences
def copy_and_rename_files(folder_path, order_list, url_id_title_map):
"""
Copy and rename files to a new folder based on the determined order.
"""
# Create a new folder for ordered files
new_folder_path = f"{folder_path}_ordered"
os.makedirs(new_folder_path, exist_ok=True)
# Determine the number of digits needed for zero-padding
num_files = len(order_list)
num_digits = len(str(num_files))
for index, url_id in enumerate(order_list):
# Prefer the edited version if it exists
file_name = url_id_title_map.get(url_id, None)
base_name, file_extension = os.path.splitext(file_name)
edited_file_name = f"{base_name}-edited{file_extension}"
original_file_name = (
edited_file_name if os.path.exists(os.path.join(folder_path, edited_file_name))
else file_name
)
if original_file_name:
original_file_path = os.path.join(folder_path, original_file_name)
file_extension = os.path.splitext(original_file_name)[1]
# Create new file name with zero-padded counter
new_file_name = f"{str(index + 1).zfill(num_digits)}_{original_file_name}"
new_file_path = os.path.join(new_folder_path, new_file_name)
shutil.copy2(original_file_path, new_file_path)
print(f"Copied and renamed {original_file_path} to {new_file_path}")
# Process the folder to get the list of IDs and their corresponding titles
url_id_title_map = process_folder(FOLDER_PATH)
# Find the order of occurrences based on the source file
target_strings = list(url_id_title_map.keys())
order_list = find_string_order(SOURCE_PATH, target_strings, CUT_ID)
# Copy and rename the files based on the order
copy_and_rename_files(FOLDER_PATH, order_list, url_id_title_map)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment