Skip to content

Instantly share code, notes, and snippets.

@rebane2001
Last active December 25, 2023 16:06
Show Gist options
  • Save rebane2001/c77558b22de1b75a240ab522bd79f297 to your computer and use it in GitHub Desktop.
Save rebane2001/c77558b22de1b75a240ab522bd79f297 to your computer and use it in GitHub Desktop.
import json
import sys
if len(sys.argv) < 2:
print("infojsonredact - A simple script to redact private information from ytdl info.json files")
print("Output will be saved in info.json.redacted files")
print("Usage: infojsonredact.py file1.info.json [file2.info.json, file3.info.json...]")
sys.exit(2)
redacted = ["url","manifest_url","fragment_base_url","fragments","http_headers","User-Agent","Accept-Charset","Accept","Accept-Encoding","Accept-Language","player_url","playlist","playlist_id","playlist_title","playlist_uploader","playlist_uploader_id","playlist_index","thumbnail","_filename","downloader_options","http_chunk_size","initialization_url","annotations", "playlist_count","version","_version","repository","release_git_head","filesize_approx","_format_sort_fields"]
allowed = ["id","uploader","uploader_id","uploader_url","channel_id","channel_url","upload_date","license","creator","title","alt_title","thumbnails","width","height","resolution","description","categories","tags","subtitles","automatic_captions","duration","age_limit","chapters","webpage_url","view_count","like_count","dislike_count","average_rating","formats","ext","format_note","acodec","abr","container","format_id","tbr","asr","fps","language","filesize","vcodec","path","protocol","format","is_live","start_time","end_time","series","season_number","episode_number","track","artist","album","release_date","release_year","extractor","webpage_url_basename","extractor_key","n_entries","display_id","vbr","stretched_ratio","fulltitle","quality","ar","bs","bg","ca","zh","zh-TW","hr","cs","da","nl","en","fi","fr","de","el","iw","hi","hu","id","it","ja","ko","no","pl","pt","pt-BR","pt-PT","ro","ru","sr-Cyrl","sr-Latn","sk","es","sv","th","tr","vi","subscriber_count","live_chat","video_id","en-US","en-UK","en-GB","fr-FR","de-DE","hi-Latn","es-MX","es-419","es-US","zh-CN","preference","segment_urls","af","sq","am","ar","hy","az","bn","eu","be","bs","bg","my","ca","ceb","zh-Hans","zh-Hant","co","hr","cs","da","nl","en","eo","et","fil","fi","fr","gl","ka","de","el","gu","ht","ha","haw","iw","hi","hmn","hu","is","ig","id","ga","it","ja","jv","kn","kk","km","rw","ko","ku","ky","lo","la","lv","lt","lb","mk","mg","ms","ml","mt","mi","mr","mn","ne","no","ny","or","ps","fa","pl","pt","pa","ro","ru","sm","gd","sr","sn","sd","si","sk","sl","so","st","es","su","sw","sv","tg","ta","tt","te","th","tr","tk","uk","ur","ug","uz","vi","cy","fy","xh","yi","yo","zu","rows","columns","audio_ext","video_ext","source_preference","audio_channels","playable_in_embed","om","qu","ts","_type","was_live","webpage_url_domain","ti","sa","nso","name","ln","lg","kri","gn","en-orig","dynamic_range","dv","channel","channel_follower_count","comment_count","duration_string","bho","ay","as","aspect_ratio","ak","epoch","ee","availability","live_status","has_drm","language_preference","und","channel_is_verified"]
def redactRecursively(data, allowed, redacted):
if isinstance(data, (dict, list)):
for k, v in (data.items() if isinstance(data, dict) else enumerate(data)):
if k in redacted:
data[k] = "[REDACTED]"
continue
elif not k in allowed:
if not isinstance(k, int):
raise KeyError("Key", k, "not found in both the redacted and allowed keysets")
redactRecursively(v, allowed, redacted)
for filename in sys.argv[1:]:
print(filename)
with open(filename,"r",encoding="utf-8") as f:
infojson = json.load(f)
redactRecursively(infojson, allowed, redacted)
with open(filename + ".redacted","w",encoding="utf-8") as f:
json.dump(infojson,f)
@rebane2001
Copy link
Author

I plan on adding that, as well as something to deal with language keys so they don't all need to be in the list one-by-one.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment