Last active
October 6, 2023 15:59
-
-
Save lamusmaser/837fb58f73ea0cad784a33497932e0dd to your computer and use it in GitHub Desktop.
Function to generate `.info.json` from `ffmpeg` details.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import ffmpeg | |
import datetime | |
import time | |
import json | |
def create_info_json(video, file_dest, reason="private"): | |
print("Attempting to generate .info.json details from video metadata.") | |
info_ext = "info.json" | |
info_file = f"{os.path.splitext(os.path.basename(video))[0]}.{info_ext}" | |
try: | |
vid_ff = ffmpeg.probe(f"{video}") | |
except ffmpeg.Error as e: | |
print(e.stderr.decode(), file=sys.stderr) | |
sys.exit(1) | |
primary_stream = {} | |
for stream in vid_ff["streams"]: | |
if stream["codec_type"] not in primary_stream: | |
primary_stream[stream["codec_type"]] = int(stream["index"]) | |
if primary_stream[stream["codec_type"]] > int(stream["index"]): | |
primary_stream[stream["codec_type"]] = int(stream["index"]) | |
info_json = {} | |
info_json["id"] = vid_ff["format"]["tags"]["comment"].split("?v=")[1] | |
info_json["title"] = vid_ff["format"]["tags"]["title"] | |
info_json["formats"] = [] | |
vid_format = {} | |
vid_format["format_id"] = vid_ff["format"]["tags"]["major_brand"] | |
vid_format["format_note"] = f"{vid_ff['format']['tags']['major_brand']}." | |
vid_format["format_note"] += f"{vid_ff['format']['tags']['minor_version']}" | |
vid_format["ext"] = os.path.splitext(video)[-1] | |
vid_format["protocol"] = vid_ff["format"]["tags"]["compatible_brands"] | |
vid_format["acodec"] = vid_ff["streams"][primary_stream["audio"]][ | |
"codec_name" | |
] | |
vid_format["vcodec"] = vid_ff["streams"][primary_stream["video"]][ | |
"codec_name" | |
] | |
vid_format["url"] = vid_ff["format"]["tags"]["comment"] | |
vid_format["width"] = vid_ff["streams"][primary_stream["video"]][ | |
"coded_width" | |
] | |
vid_format["height"] = vid_ff["streams"][primary_stream["video"]][ | |
"coded_height" | |
] | |
vid_format["fps"] = 0 | |
# Fragments are not needed for private or non-existent videos | |
vid_format["fragments"] = [] | |
vid_format["audio_ext"] = vid_ff["streams"][primary_stream["audio"]][ | |
"codec_long_name" | |
] | |
vid_format["video_ext"] = vid_ff["streams"][primary_stream["video"]][ | |
"codec_long_name" | |
] | |
vid_format["format"] = vid_ff["format"]["tags"]["compatible_brands"] | |
vid_format["resolution"] = f"{vid_format['width']}x{vid_format['height']}" | |
ratio1, ratio2 = vid_ff["streams"][0]["display_aspect_ratio"].split(":", 1) | |
vid_format["aspect_ratio"] = round(float(ratio1) / float(ratio2), 2) | |
vid_format["http_headers"] = { | |
"User-Agent": "Chrome/0.0.0.1", | |
"Accept": "text/html,application/xhtml+xml,application/xml", | |
"Accept-Language": "en-us,en;q=0.5", | |
"Sec-Fetch-Mode": "navigate", | |
} | |
info_json["formats"].append(vid_format) | |
info_json["thumbnails"] = [] | |
info_json["thumbnail"] = "" | |
info_json["description"] = vid_ff["format"]["tags"]["description"] | |
info_json["uploader"] = os.getenv("SHOW_NAME") | |
ch_id = "" | |
try: | |
with open(f"{file_dest}/channel.id") as file: | |
for line in file.readlines(): | |
if len(line) > 0: | |
ch_id = line.strip() | |
except BaseException as E: | |
print( | |
f"Unable to open channel.id file at the destination: {file_dest}", | |
"\nNot including a channel ID.", | |
) | |
print(E) | |
info_json["uploader_id"] = ch_id | |
info_json[ | |
"uploader_url" | |
] = f"http://www.youtube.com/channel/{info_json['uploader_id']}" | |
info_json["channel_id"] = ch_id | |
info_json[ | |
"channel_url" | |
] = f"http://www.youtube.com/channel/{info_json['uploader_id']}" | |
info_json["duration"] = int(round(float(vid_ff["format"]["duration"]), 0)) | |
info_json["view_count"] = 0 | |
info_json["age_limit"] = 0 | |
info_json["webpage_url"] = vid_ff["format"]["tags"]["comment"] | |
info_json["categories"] = [] | |
info_json["categories"].append(reason) | |
info_json["tags"] = [] | |
info_json["categories"].append(reason) | |
info_json["playable_in_embed"] = True | |
info_json["live_status"] = "not_live" | |
info_json["automatic_captions"] = {} | |
info_json["subtitles"] = {} | |
info_json["comment_count"] = 0 | |
info_json["like_count"] = 0 | |
info_json["channel"] = os.getenv("SHOW_NAME") | |
info_json["channel_follow_count"] = 0 | |
info_json["upload_date"] = vid_ff["format"]["tags"]["date"] | |
info_json["availability"] = reason | |
info_json["webpage_url_basename"] = "watch" | |
info_json["webpage_url_domain"] = "youtube.com" | |
info_json["extractor"] = "youtube" | |
info_json["extractor_key"] = "Youtube" | |
info_json["display_id"] = info_json["id"] | |
info_json["fulltitle"] = vid_ff["format"]["tags"]["title"] | |
info_json["duration_string"] = str( | |
datetime.timedelta(seconds=info_json["duration"]) | |
) | |
info_json["is_live"] = False | |
info_json["was_live"] = False | |
info_json["format_id"] = "0" | |
info_json["ext"] = "mp4" | |
info_json["protocol"] = "https" | |
info_json["format_note"] = vid_format["format_note"] | |
info_json["filesize_approx"] = vid_ff["format"]["size"] | |
info_json["tbr"] = 0 # TBR Values are not necessary | |
info_json["width"] = vid_format["width"] | |
info_json["height"] = vid_format["height"] | |
info_json["resolution"] = f"{info_json['width']}x{info_json['height']}" | |
info_json["format"] = f"{info_json['format_id']} -" | |
info_json["format"] += f" {info_json['format_note']}" | |
info_json["format"] += f" ({info_json['resolution']})" | |
info_json["fps"] = 24 # Assume default | |
info_json["dynamic_range"] = "SDR" # Assume default | |
info_json["vcodec"] = vid_format["vcodec"] | |
info_json["vbr"] = 0 # VBR Values are not necessary | |
info_json["aspect_ratio"] = vid_format["aspect_ratio"] | |
info_json["acodec"] = vid_format["acodec"] | |
info_json["abr"] = 0 # ABR Values are not necessary | |
info_json["asr"] = 0 # ASR Values are not necessary | |
info_json["audio_channels"] = vid_ff["streams"][primary_stream["audio"]][ | |
"channels" | |
] | |
info_json["epoch"] = int(time.time()) | |
info_json["_type"] = "video" | |
info_json["_version"] = {} | |
info_json["_version"]["version"] = "2023.01.01-dummy" | |
info_json["_version"]["release_git_head"] = "0000000000000000000000000000000000000000" | |
info_json["_version"]["repository"] = "yt-dlp/yt-dlp" | |
print("Dictionary for `.info.json` file is filled. Writing to file.") | |
with open(f"{file_dest}/{info_file}", "w") as file: | |
json.dump(info_json, file) | |
print(f"File {file_dest}/{info_file} successfully written.") | |
return 0 | |
#Run the function | |
create_info_json(filename,dir) |
To get the file sizes inside of it, and combining it with the above:
import re
def conv_size(size):
if size < 1024:
return f"{size} bytes"
elif size < pow(1024,2):
return f"{round(size/1024, 2)} KB"
elif size < pow(1024,3):
return f"{round(size/(pow(1024,2)), 2)} MB"
elif size < pow(1024,4):
return f"{round(size/(pow(1024,3)), 2)} GB"
else
return f"{round(size/pow(1024,4)), 2} TB"
rx = '[0-9]{8}_[a-zA-Z0-9_-]{11}_*.*'
dir = '/youtube'
f_sizes = {}
current_count = 0
for root, folders, files in os.walk(dir):
for filename in files:
current_count += 1
all_files.append(os.path.join(root,filename))
match = re.search(rx, filename, re.IGNORECASE)
if match:
create_info_json(filename,root)
print(f"{filename}: {conv_size(os.path.getsize(os.path.join(root, filename)))"})
if not f_sizes.get(root):
f_sizes[root] = 0
f_sizes[root] += os.path.getsize(os.path.join(root,filename))
print("Folder sizes for moving items:")
for key in f_sizes.keys():
print(f"{key}: {conv_size(f_sizes[key])}")
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is an example of running it for each item in a chosen directory. This can be done at any level.