Last active
December 31, 2024 09:43
-
-
Save one-data-cookie/ed91cc06ad47b207ac50304cd7e7193e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Script to process a Takeout export of a Google Photos album based on HTML source code. | |
This is a very hacky solution to the non-existent possibility of exporting Google Photos | |
albums in the order set in the UI. | |
The code takes the folder, parses the corresponding JSON files, extracts URL IDs, finds their order | |
in an HTML source code file, and copies & renames the photos with a prefixed counter based on the order. | |
If your export includes edited photos, named `{orig_name}-edited`, these will be used instead of the originals. | |
Usage: | |
1. Use Google Takeout to export a specific album from Google Photos. | |
2. Set `FOLDER_PATH` to the path of the folder containing your photos and JSON files. | |
3.1 Locate the first photo in your album, open its JSON file, and copy its ID from the `url` field: | |
`"url": "https://photos.google.com/photo/{ID}"`. | |
3.2 Set `CUT_ID` to the copied ID | |
4.1 View the Page Source of the album in the Google Photos UI. | |
4.2 Save the HTML code to a file. | |
4.3 Set `SOURCE_PATH` to the path of your source file. | |
5. Run the script using Python. | |
N.B. The code is rather imperfect, created with ChatGPT, with not much time to clean up. But it works! | |
""" | |
import os | |
import json | |
import shutil | |
FOLDER_PATH = '[TEMP] Test' # Replace with path to folder with photos & JSONs | |
SOURCE_PATH = 'source.html' # Replace with path to source file | |
CUT_ID = 'AF1QipOwzolUMGRvlbO9qPTwRniXbp4iFbUjgTZlIoG1' # Replace with ID of album's first photo | |
def extract_url_id(file_path): | |
""" | |
Extract the URL ID and title from a JSON file. | |
""" | |
with open(file_path, 'r', encoding='utf-8') as file: | |
data = json.load(file) | |
url = data.get("url", "") | |
title = data.get("title", "") | |
if url: | |
url_id = url.split('/')[-1] | |
return url_id, title | |
return None, None | |
def process_folder(folder_path): | |
""" | |
Process all JSON files in the folder and extract URL IDs and titles. | |
""" | |
url_id_title_map = {} | |
# Iterate over all files in the folder | |
for filename in os.listdir(folder_path): | |
if filename.endswith(".json"): | |
file_path = os.path.join(folder_path, filename) | |
url_id, title = extract_url_id(file_path) | |
if url_id and title: | |
url_id_title_map[url_id] = title | |
return url_id_title_map | |
def find_string_order(file_path, target_strings, cut_target_id): | |
""" | |
Find the order of target strings in the given file. | |
""" | |
# Read the entire file content | |
with open(file_path, 'r') as file: | |
lines = file.readlines() | |
# Combine lines into a single content string | |
content = ''.join(lines) | |
# Find last occurrence of the first photo and cut the content | |
last_occurrence_index = content.rfind(cut_target_id) | |
if last_occurrence_index != -1: | |
content = content[last_occurrence_index:] | |
# Split the modified content back into lines | |
lines = content.splitlines() | |
# List to store the order of occurrences | |
order_list = [] | |
for line in lines: | |
# Split the line into sublines using "]]" | |
sublines = line.split(']]') | |
for subline in sublines: | |
for target in target_strings: | |
if target in subline: | |
# Record the occurrence of the target string | |
order_list.append(target) | |
# Remove duplicates while preserving order | |
seen = set() | |
ordered_occurrences = [item for item in order_list if item not in seen and not seen.add(item)] | |
return ordered_occurrences | |
def copy_and_rename_files(folder_path, order_list, url_id_title_map): | |
""" | |
Copy and rename files to a new folder based on the determined order. | |
""" | |
# Create a new folder for ordered files | |
new_folder_path = f"{folder_path}_ordered" | |
os.makedirs(new_folder_path, exist_ok=True) | |
# Determine the number of digits needed for zero-padding | |
num_files = len(order_list) | |
num_digits = len(str(num_files)) | |
for index, url_id in enumerate(order_list): | |
# Prefer the edited version if it exists | |
file_name = url_id_title_map.get(url_id, None) | |
base_name, file_extension = os.path.splitext(file_name) | |
edited_file_name = f"{base_name}-edited{file_extension}" | |
original_file_name = ( | |
edited_file_name if os.path.exists(os.path.join(folder_path, edited_file_name)) | |
else file_name | |
) | |
if original_file_name: | |
original_file_path = os.path.join(folder_path, original_file_name) | |
file_extension = os.path.splitext(original_file_name)[1] | |
# Create new file name with zero-padded counter | |
new_file_name = f"{str(index + 1).zfill(num_digits)}_{original_file_name}" | |
new_file_path = os.path.join(new_folder_path, new_file_name) | |
shutil.copy2(original_file_path, new_file_path) | |
print(f"Copied and renamed {original_file_path} to {new_file_path}") | |
# Process the folder to get the list of IDs and their corresponding titles | |
url_id_title_map = process_folder(FOLDER_PATH) | |
# Find the order of occurrences based on the source file | |
target_strings = list(url_id_title_map.keys()) | |
order_list = find_string_order(SOURCE_PATH, target_strings, CUT_ID) | |
# Copy and rename the files based on the order | |
copy_and_rename_files(FOLDER_PATH, order_list, url_id_title_map) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment