rebane2001/infojsonredact.py

## infojsonredact.py
import json
import sys

if len(sys.argv) < 2:
    print("infojsonredact - A simple script to redact private information from ytdl info.json files")
    print("Output will be saved in info.json.redacted files")
    print("Usage: infojsonredact.py file1.info.json [file2.info.json, file3.info.json...]")
    sys.exit(2)

redacted = ["url","manifest_url","fragment_base_url","fragments","http_headers","User-Agent","Accept-Charset","Accept","Accept-Encoding","Accept-Language","player_url","playlist","playlist_id","playlist_title","playlist_uploader","playlist_uploader_id","playlist_index","thumbnail","_filename","downloader_options","http_chunk_size","initialization_url","annotations", "playlist_count","version","_version","repository","release_git_head","filesize_approx","_format_sort_fields"]
allowed = ["id","uploader","uploader_id","uploader_url","channel_id","channel_url","upload_date","license","creator","title","alt_title","thumbnails","width","height","resolution","description","categories","tags","subtitles","automatic_captions","duration","age_limit","chapters","webpage_url","view_count","like_count","dislike_count","average_rating","formats","ext","format_note","acodec","abr","container","format_id","tbr","asr","fps","language","filesize","vcodec","path","protocol","format","is_live","start_time","end_time","series","season_number","episode_number","track","artist","album","release_date","release_year","extractor","webpage_url_basename","extractor_key","n_entries","display_id","vbr","stretched_ratio","fulltitle","quality","ar","bs","bg","ca","zh","zh-TW","hr","cs","da","nl","en","fi","fr","de","el","iw","hi","hu","id","it","ja","ko","no","pl","pt","pt-BR","pt-PT","ro","ru","sr-Cyrl","sr-Latn","sk","es","sv","th","tr","vi","subscriber_count","live_chat","video_id","en-US","en-UK","en-GB","fr-FR","de-DE","hi-Latn","es-MX","es-419","es-US","zh-CN","preference","segment_urls","af","sq","am","ar","hy","az","bn","eu","be","bs","bg","my","ca","ceb","zh-Hans","zh-Hant","co","hr","cs","da","nl","en","eo","et","fil","fi","fr","gl","ka","de","el","gu","ht","ha","haw","iw","hi","hmn","hu","is","ig","id","ga","it","ja","jv","kn","kk","km","rw","ko","ku","ky","lo","la","lv","lt","lb","mk","mg","ms","ml","mt","mi","mr","mn","ne","no","ny","or","ps","fa","pl","pt","pa","ro","ru","sm","gd","sr","sn","sd","si","sk","sl","so","st","es","su","sw","sv","tg","ta","tt","te","th","tr","tk","uk","ur","ug","uz","vi","cy","fy","xh","yi","yo","zu","rows","columns","audio_ext","video_ext","source_preference","audio_channels","playable_in_embed","om","qu","ts","_type","was_live","webpage_url_domain","ti","sa","nso","name","ln","lg","kri","gn","en-orig","dynamic_range","dv","channel","channel_follower_count","comment_count","duration_string","bho","ay","as","aspect_ratio","ak","epoch","ee","availability","live_status","has_drm","language_preference","und","channel_is_verified"]


def redactRecursively(data, allowed, redacted):
    if isinstance(data, (dict, list)):
        for k, v in (data.items() if isinstance(data, dict) else enumerate(data)):
            if k in redacted:
                data[k] = "[REDACTED]"
                continue
            elif not k in allowed:
                if not isinstance(k, int):
                    raise KeyError("Key", k, "not found in both the redacted and allowed keysets")
            redactRecursively(v, allowed, redacted)

for filename in sys.argv[1:]:
    print(filename)
    with open(filename,"r",encoding="utf-8") as f:
        infojson = json.load(f)
    redactRecursively(infojson, allowed, redacted)
    with open(filename + ".redacted","w",encoding="utf-8") as f:
       json.dump(infojson,f)
	import json
	import sys

	if len(sys.argv) < 2:
	print("infojsonredact - A simple script to redact private information from ytdl info.json files")
	print("Output will be saved in info.json.redacted files")
	print("Usage: infojsonredact.py file1.info.json [file2.info.json, file3.info.json...]")
	sys.exit(2)

	redacted = ["url","manifest_url","fragment_base_url","fragments","http_headers","User-Agent","Accept-Charset","Accept","Accept-Encoding","Accept-Language","player_url","playlist","playlist_id","playlist_title","playlist_uploader","playlist_uploader_id","playlist_index","thumbnail","_filename","downloader_options","http_chunk_size","initialization_url","annotations", "playlist_count","version","_version","repository","release_git_head","filesize_approx","_format_sort_fields"]
	allowed = ["id","uploader","uploader_id","uploader_url","channel_id","channel_url","upload_date","license","creator","title","alt_title","thumbnails","width","height","resolution","description","categories","tags","subtitles","automatic_captions","duration","age_limit","chapters","webpage_url","view_count","like_count","dislike_count","average_rating","formats","ext","format_note","acodec","abr","container","format_id","tbr","asr","fps","language","filesize","vcodec","path","protocol","format","is_live","start_time","end_time","series","season_number","episode_number","track","artist","album","release_date","release_year","extractor","webpage_url_basename","extractor_key","n_entries","display_id","vbr","stretched_ratio","fulltitle","quality","ar","bs","bg","ca","zh","zh-TW","hr","cs","da","nl","en","fi","fr","de","el","iw","hi","hu","id","it","ja","ko","no","pl","pt","pt-BR","pt-PT","ro","ru","sr-Cyrl","sr-Latn","sk","es","sv","th","tr","vi","subscriber_count","live_chat","video_id","en-US","en-UK","en-GB","fr-FR","de-DE","hi-Latn","es-MX","es-419","es-US","zh-CN","preference","segment_urls","af","sq","am","ar","hy","az","bn","eu","be","bs","bg","my","ca","ceb","zh-Hans","zh-Hant","co","hr","cs","da","nl","en","eo","et","fil","fi","fr","gl","ka","de","el","gu","ht","ha","haw","iw","hi","hmn","hu","is","ig","id","ga","it","ja","jv","kn","kk","km","rw","ko","ku","ky","lo","la","lv","lt","lb","mk","mg","ms","ml","mt","mi","mr","mn","ne","no","ny","or","ps","fa","pl","pt","pa","ro","ru","sm","gd","sr","sn","sd","si","sk","sl","so","st","es","su","sw","sv","tg","ta","tt","te","th","tr","tk","uk","ur","ug","uz","vi","cy","fy","xh","yi","yo","zu","rows","columns","audio_ext","video_ext","source_preference","audio_channels","playable_in_embed","om","qu","ts","_type","was_live","webpage_url_domain","ti","sa","nso","name","ln","lg","kri","gn","en-orig","dynamic_range","dv","channel","channel_follower_count","comment_count","duration_string","bho","ay","as","aspect_ratio","ak","epoch","ee","availability","live_status","has_drm","language_preference","und","channel_is_verified"]



	def redactRecursively(data, allowed, redacted):
	if isinstance(data, (dict, list)):
	for k, v in (data.items() if isinstance(data, dict) else enumerate(data)):
	if k in redacted:
	data[k] = "[REDACTED]"
	continue
	elif not k in allowed:
	if not isinstance(k, int):
	raise KeyError("Key", k, "not found in both the redacted and allowed keysets")
	redactRecursively(v, allowed, redacted)

	for filename in sys.argv[1:]:
	print(filename)
	with open(filename,"r",encoding="utf-8") as f:
	infojson = json.load(f)
	redactRecursively(infojson, allowed, redacted)
	with open(filename + ".redacted","w",encoding="utf-8") as f:
	json.dump(infojson,f)