Skip to content

Instantly share code, notes, and snippets.

@guillaumematheron
Last active April 4, 2024 21:08
Show Gist options
  • Save guillaumematheron/89f52ffd274ff3ac99f6dc0249bcc331 to your computer and use it in GitHub Desktop.
Save guillaumematheron/89f52ffd274ff3ac99f6dc0249bcc331 to your computer and use it in GitHub Desktop.
"""
Simulate re-watching all your videos from a given date onwards.
If part of your youtube history has been deeleted but is still visible from 'my activity', then
you can export it using google takeout, and use this script to simulate watching all these videos
in order, in a relatively short time.
Note that the history will not be backdated.
License: CC0 / Public domain
"""
import hashlib
import json
import random
import os
import time
from typing import Any, Dict, List
import yt_dlp
RESUME = "2022-08-17T11:50:00.000Z"
def main():
# An empty file will be created in the 'done' directory to keep track
# of videos that were already watched, so that this script can be
# interrupted and resumed.
try:
os.makedirs("done")
except FileExistsError:
pass
# This file should be generated using google takeout
with open("watch-history.json", encoding="utf8") as f:
data = json.load(f)
kept: List[Dict[str, Any]] = []
for event in data:
# Disregard youtube music
if event["header"] != "YouTube":
continue
if "details" in event and event["details"][0]["name"] == "From Google Ads":
continue
if event["time"] < RESUME:
continue
if "titleUrl" not in event:
continue
kept.append(event)
print(f"Found {len(kept)} videos to watch")
# Deduplicate
kept = [event for event in {event["titleUrl"]: event for event in kept}.values()]
print(f"Found {len(kept)} videos to watch after de-duplication")
# Sort
kept.sort(key=lambda x: x["time"])
opts = {
"mark_watched": True,
"simulate": True,
"quiet": True,
"cookiesfrombrowser": ("firefox",),
}
with yt_dlp.YoutubeDL(opts) as ydl:
for i, event in enumerate(kept):
t = event["time"]
url = event["titleUrl"]
title = event["title"][8:]
m = hashlib.sha256()
m.update(url.encode("utf-8"))
marker = "done/" + m.hexdigest()
print(
f"{i}/{len(kept)} \t {t} \t {url} \t {title} ... ", end="", flush=True
)
try:
with open(marker, "r"):
pass
print(" -> Already done")
continue
except FileNotFoundError:
pass
try:
ydl.download(url)
print(" -> Sleeping ... ", end="", flush=True)
time.sleep(3 + random.random() * 8)
print(" -> Done")
except yt_dlp.utils.DownloadError:
print(" -> DownloadError")
with open(marker, "w"):
pass
if __name__ == "__main__":
main()
@ZiClaud
Copy link

ZiClaud commented Mar 2, 2024

I found a small bug, if there's a network error, it'll write the files in the 'done' folder, without actually restoring the history
Apart from that, good code, easy to both understand and use

@guillaumematheron
Copy link
Author

I found a small bug, if there's a network error, it'll write the files in the 'done' folder, without actually restoring the history Apart from that, good code, easy to both understand and use

Ah yes, but the try/except is pretty necessary because many videos in my history had been deleted/unlisted. I suppose the except could be finer, and fail/retry on networkerror

@oSumAtrIX
Copy link

Hi, I'd like to fork this script into ReVanced to inform other users about it. Is this fine? If so, before doing so, I'd like to know if after restoring the history this way has any noticeable or beneficial effect. Just having the videos in the watch history may not have a real effect. When you watch them, in what order, how long and many more parameters are not considered and may have a negative effect.

@oSumAtrIX
Copy link

https://chat.openai.com/share/53aab2df-bc69-4fee-badf-9513761dcafb

import hashlib
import json
import random
import os
import time
from typing import Any, Dict, List

import yt_dlp

# Constants
WATCH_HISTORY_FILE = "watch-history.json"
DONE_DIRECTORY = "done"
RESUME_TIMESTAMP = "2022-08-17T11:50:00.000Z"
SLEEP_MIN = 3
SLEEP_MAX = 11

def main():
    """
    Main function to process YouTube watch history and download videos.
    """
    # Create 'done' directory if not exists
    try:
       os.makedirs(DONE_DIRECTORY)
    except FileExistsError:
       pass

    # Load watch history data
    with open(WATCH_HISTORY_FILE, encoding="utf8") as f:
        data = json.load(f)

    # Filter and keep relevant video events
    kept: List[Dict[str, Any]] = filter_video_events(data)

    print(f"Found {len(kept)} videos to watch")

    # Deduplicate video events based on URL
    kept = deduplicate_videos(kept)

    print(f"Found {len(kept)} videos to watch after de-duplication")

    # Sort videos based on timestamp
    kept.sort(key=lambda x: x["time"])

    # Download videos
    download_videos(kept)


def filter_video_events(data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Filter video events from the provided data.

    Args:
        data: List of events.

    Returns:
        List of filtered video events.
    """
    filtered_events: List[Dict[str, Any]] = []

    for event in data:
        if event.get("header") != "YouTube":
            continue
        if "details" in event and event["details"][0]["name"] == "From Google Ads":
            continue
        if event["time"] < RESUME_TIMESTAMP:
            continue
        if "titleUrl" not in event:
            continue
        filtered_events.append(event)

    return filtered_events


def deduplicate_videos(events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Deduplicate video events based on URL.

    Args:
        events: List of video events.

    Returns:
        List of deduplicated video events.
    """
    unique_events = {event["titleUrl"]: event for event in events}.values()
    return list(unique_events)


def download_videos(events: List[Dict[str, Any]]):
    """
    Download videos from the provided list of events.

    Args:
        events: List of video events to download.
    """
    opts = {
        "mark_watched": True,
        "simulate": True,
        "quiet": True,
        "cookiesfrombrowser": ("firefox",),
    }

    with yt_dlp.YoutubeDL(opts) as ydl:
        for i, event in enumerate(events):
            timestamp = event["time"]
            url = event["titleUrl"]
            title = event["title"][8:]

            # Generate marker file path
            marker_path = os.path.join(DONE_DIRECTORY, hashlib.sha256(url.encode("utf-8")).hexdigest())

            print(f"{i}/{len(events)} \t {timestamp} \t {url} \t {title} ... ", end="", flush=True)

            # Check if video is already downloaded
            if os.path.exists(marker_path):
                print(" -> Already done")
                continue

            try:
                ydl.download(url)
                print(" -> Sleeping ... ", end="", flush=True)
                time.sleep(SLEEP_MIN + random.random() * (SLEEP_MAX - SLEEP_MIN))
                print(" -> Done")
            except yt_dlp.utils.DownloadError:
                print(" -> DownloadError")

            # Create marker file
            with open(marker_path, "w"):
                pass


if __name__ == "__main__":
    main()

@guillaumematheron
Copy link
Author

Hi, I'd like to fork this script into ReVanced to inform other users about it. Is this fine? If so, before doing so, I'd like to know if after restoring the history this way has any noticeable or beneficial effect. Just having the videos in the watch history may not have a real effect. When you watch them, in what order, how long and many more parameters are not considered and may have a negative effect.

Sure, consider this CC0 or public domain.

The main intended effect was attained for me : watched videos are not suggested anymore. I did not notice it decrease the quality of my recommendations, but I mostly stick to channels I subscribe to so my recommendations are pretty easy to predict.

@oSumAtrIX
Copy link

Gotcha thanks

@seaque
Copy link

seaque commented Mar 7, 2024

Not ideal at all, tried couple dozens of videos and all of them falls into the current date, not the date you have watched.

@guillaumematheron
Copy link
Author

Yes, as far as I know there is no way to backdate the watch date of videos unfortunately. I'll add a comment making that clear.

@quangvux2001
Copy link

quangvux2001 commented Mar 10, 2024

It's seem that my history json have only history recently. It does not contain any history further than 28 february 2024. Does anyone can explain this to me and have the same issue? Thanks
image

@seaque
Copy link

seaque commented Mar 15, 2024

an update, the history entries started coming back for a lot of people. It seems mine is mostly fixed.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment