-
-
Save guillaumematheron/89f52ffd274ff3ac99f6dc0249bcc331 to your computer and use it in GitHub Desktop.
""" | |
Simulate re-watching all your videos from a given date onwards. | |
If part of your youtube history has been deeleted but is still visible from 'my activity', then | |
you can export it using google takeout, and use this script to simulate watching all these videos | |
in order, in a relatively short time. | |
Note that the history will not be backdated. | |
License: CC0 / Public domain | |
""" | |
import hashlib | |
import json | |
import random | |
import os | |
import time | |
from typing import Any, Dict, List | |
import yt_dlp | |
RESUME = "2022-08-17T11:50:00.000Z" | |
def main(): | |
# An empty file will be created in the 'done' directory to keep track | |
# of videos that were already watched, so that this script can be | |
# interrupted and resumed. | |
try: | |
os.makedirs("done") | |
except FileExistsError: | |
pass | |
# This file should be generated using google takeout | |
with open("watch-history.json", encoding="utf8") as f: | |
data = json.load(f) | |
kept: List[Dict[str, Any]] = [] | |
for event in data: | |
# Disregard youtube music | |
if event["header"] != "YouTube": | |
continue | |
if "details" in event and event["details"][0]["name"] == "From Google Ads": | |
continue | |
if event["time"] < RESUME: | |
continue | |
if "titleUrl" not in event: | |
continue | |
kept.append(event) | |
print(f"Found {len(kept)} videos to watch") | |
# Deduplicate | |
kept = [event for event in {event["titleUrl"]: event for event in kept}.values()] | |
print(f"Found {len(kept)} videos to watch after de-duplication") | |
# Sort | |
kept.sort(key=lambda x: x["time"]) | |
opts = { | |
"mark_watched": True, | |
"simulate": True, | |
"quiet": True, | |
"cookiesfrombrowser": ("firefox",), | |
} | |
with yt_dlp.YoutubeDL(opts) as ydl: | |
for i, event in enumerate(kept): | |
t = event["time"] | |
url = event["titleUrl"] | |
title = event["title"][8:] | |
m = hashlib.sha256() | |
m.update(url.encode("utf-8")) | |
marker = "done/" + m.hexdigest() | |
print( | |
f"{i}/{len(kept)} \t {t} \t {url} \t {title} ... ", end="", flush=True | |
) | |
try: | |
with open(marker, "r"): | |
pass | |
print(" -> Already done") | |
continue | |
except FileNotFoundError: | |
pass | |
try: | |
ydl.download(url) | |
print(" -> Sleeping ... ", end="", flush=True) | |
time.sleep(3 + random.random() * 8) | |
print(" -> Done") | |
except yt_dlp.utils.DownloadError: | |
print(" -> DownloadError") | |
with open(marker, "w"): | |
pass | |
if __name__ == "__main__": | |
main() |
I found a small bug, if there's a network error, it'll write the files in the 'done' folder, without actually restoring the history Apart from that, good code, easy to both understand and use
Ah yes, but the try/except is pretty necessary because many videos in my history had been deleted/unlisted. I suppose the except could be finer, and fail/retry on networkerror
Hi, I'd like to fork this script into ReVanced to inform other users about it. Is this fine? If so, before doing so, I'd like to know if after restoring the history this way has any noticeable or beneficial effect. Just having the videos in the watch history may not have a real effect. When you watch them, in what order, how long and many more parameters are not considered and may have a negative effect.
https://chat.openai.com/share/53aab2df-bc69-4fee-badf-9513761dcafb
import hashlib
import json
import random
import os
import time
from typing import Any, Dict, List
import yt_dlp
# Constants
WATCH_HISTORY_FILE = "watch-history.json"
DONE_DIRECTORY = "done"
RESUME_TIMESTAMP = "2022-08-17T11:50:00.000Z"
SLEEP_MIN = 3
SLEEP_MAX = 11
def main():
"""
Main function to process YouTube watch history and download videos.
"""
# Create 'done' directory if not exists
try:
os.makedirs(DONE_DIRECTORY)
except FileExistsError:
pass
# Load watch history data
with open(WATCH_HISTORY_FILE, encoding="utf8") as f:
data = json.load(f)
# Filter and keep relevant video events
kept: List[Dict[str, Any]] = filter_video_events(data)
print(f"Found {len(kept)} videos to watch")
# Deduplicate video events based on URL
kept = deduplicate_videos(kept)
print(f"Found {len(kept)} videos to watch after de-duplication")
# Sort videos based on timestamp
kept.sort(key=lambda x: x["time"])
# Download videos
download_videos(kept)
def filter_video_events(data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Filter video events from the provided data.
Args:
data: List of events.
Returns:
List of filtered video events.
"""
filtered_events: List[Dict[str, Any]] = []
for event in data:
if event.get("header") != "YouTube":
continue
if "details" in event and event["details"][0]["name"] == "From Google Ads":
continue
if event["time"] < RESUME_TIMESTAMP:
continue
if "titleUrl" not in event:
continue
filtered_events.append(event)
return filtered_events
def deduplicate_videos(events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Deduplicate video events based on URL.
Args:
events: List of video events.
Returns:
List of deduplicated video events.
"""
unique_events = {event["titleUrl"]: event for event in events}.values()
return list(unique_events)
def download_videos(events: List[Dict[str, Any]]):
"""
Download videos from the provided list of events.
Args:
events: List of video events to download.
"""
opts = {
"mark_watched": True,
"simulate": True,
"quiet": True,
"cookiesfrombrowser": ("firefox",),
}
with yt_dlp.YoutubeDL(opts) as ydl:
for i, event in enumerate(events):
timestamp = event["time"]
url = event["titleUrl"]
title = event["title"][8:]
# Generate marker file path
marker_path = os.path.join(DONE_DIRECTORY, hashlib.sha256(url.encode("utf-8")).hexdigest())
print(f"{i}/{len(events)} \t {timestamp} \t {url} \t {title} ... ", end="", flush=True)
# Check if video is already downloaded
if os.path.exists(marker_path):
print(" -> Already done")
continue
try:
ydl.download(url)
print(" -> Sleeping ... ", end="", flush=True)
time.sleep(SLEEP_MIN + random.random() * (SLEEP_MAX - SLEEP_MIN))
print(" -> Done")
except yt_dlp.utils.DownloadError:
print(" -> DownloadError")
# Create marker file
with open(marker_path, "w"):
pass
if __name__ == "__main__":
main()
Hi, I'd like to fork this script into ReVanced to inform other users about it. Is this fine? If so, before doing so, I'd like to know if after restoring the history this way has any noticeable or beneficial effect. Just having the videos in the watch history may not have a real effect. When you watch them, in what order, how long and many more parameters are not considered and may have a negative effect.
Sure, consider this CC0 or public domain.
The main intended effect was attained for me : watched videos are not suggested anymore. I did not notice it decrease the quality of my recommendations, but I mostly stick to channels I subscribe to so my recommendations are pretty easy to predict.
Gotcha thanks
Not ideal at all, tried couple dozens of videos and all of them falls into the current date, not the date you have watched.
Yes, as far as I know there is no way to backdate the watch date of videos unfortunately. I'll add a comment making that clear.
an update, the history entries started coming back for a lot of people. It seems mine is mostly fixed.
I found a small bug, if there's a network error, it'll write the files in the 'done' folder, without actually restoring the history
Apart from that, good code, easy to both understand and use