Skip to content

Instantly share code, notes, and snippets.

@lamusmaser
Last active October 6, 2023 15:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lamusmaser/837fb58f73ea0cad784a33497932e0dd to your computer and use it in GitHub Desktop.
Save lamusmaser/837fb58f73ea0cad784a33497932e0dd to your computer and use it in GitHub Desktop.
Function to generate `.info.json` from `ffmpeg` details.
import os
import ffmpeg
import datetime
import time
import json
def create_info_json(video, file_dest, reason="private"):
print("Attempting to generate .info.json details from video metadata.")
info_ext = "info.json"
info_file = f"{os.path.splitext(os.path.basename(video))[0]}.{info_ext}"
try:
vid_ff = ffmpeg.probe(f"{video}")
except ffmpeg.Error as e:
print(e.stderr.decode(), file=sys.stderr)
sys.exit(1)
primary_stream = {}
for stream in vid_ff["streams"]:
if stream["codec_type"] not in primary_stream:
primary_stream[stream["codec_type"]] = int(stream["index"])
if primary_stream[stream["codec_type"]] > int(stream["index"]):
primary_stream[stream["codec_type"]] = int(stream["index"])
info_json = {}
info_json["id"] = vid_ff["format"]["tags"]["comment"].split("?v=")[1]
info_json["title"] = vid_ff["format"]["tags"]["title"]
info_json["formats"] = []
vid_format = {}
vid_format["format_id"] = vid_ff["format"]["tags"]["major_brand"]
vid_format["format_note"] = f"{vid_ff['format']['tags']['major_brand']}."
vid_format["format_note"] += f"{vid_ff['format']['tags']['minor_version']}"
vid_format["ext"] = os.path.splitext(video)[-1]
vid_format["protocol"] = vid_ff["format"]["tags"]["compatible_brands"]
vid_format["acodec"] = vid_ff["streams"][primary_stream["audio"]][
"codec_name"
]
vid_format["vcodec"] = vid_ff["streams"][primary_stream["video"]][
"codec_name"
]
vid_format["url"] = vid_ff["format"]["tags"]["comment"]
vid_format["width"] = vid_ff["streams"][primary_stream["video"]][
"coded_width"
]
vid_format["height"] = vid_ff["streams"][primary_stream["video"]][
"coded_height"
]
vid_format["fps"] = 0
# Fragments are not needed for private or non-existent videos
vid_format["fragments"] = []
vid_format["audio_ext"] = vid_ff["streams"][primary_stream["audio"]][
"codec_long_name"
]
vid_format["video_ext"] = vid_ff["streams"][primary_stream["video"]][
"codec_long_name"
]
vid_format["format"] = vid_ff["format"]["tags"]["compatible_brands"]
vid_format["resolution"] = f"{vid_format['width']}x{vid_format['height']}"
ratio1, ratio2 = vid_ff["streams"][0]["display_aspect_ratio"].split(":", 1)
vid_format["aspect_ratio"] = round(float(ratio1) / float(ratio2), 2)
vid_format["http_headers"] = {
"User-Agent": "Chrome/0.0.0.1",
"Accept": "text/html,application/xhtml+xml,application/xml",
"Accept-Language": "en-us,en;q=0.5",
"Sec-Fetch-Mode": "navigate",
}
info_json["formats"].append(vid_format)
info_json["thumbnails"] = []
info_json["thumbnail"] = ""
info_json["description"] = vid_ff["format"]["tags"]["description"]
info_json["uploader"] = os.getenv("SHOW_NAME")
ch_id = ""
try:
with open(f"{file_dest}/channel.id") as file:
for line in file.readlines():
if len(line) > 0:
ch_id = line.strip()
except BaseException as E:
print(
f"Unable to open channel.id file at the destination: {file_dest}",
"\nNot including a channel ID.",
)
print(E)
info_json["uploader_id"] = ch_id
info_json[
"uploader_url"
] = f"http://www.youtube.com/channel/{info_json['uploader_id']}"
info_json["channel_id"] = ch_id
info_json[
"channel_url"
] = f"http://www.youtube.com/channel/{info_json['uploader_id']}"
info_json["duration"] = int(round(float(vid_ff["format"]["duration"]), 0))
info_json["view_count"] = 0
info_json["age_limit"] = 0
info_json["webpage_url"] = vid_ff["format"]["tags"]["comment"]
info_json["categories"] = []
info_json["categories"].append(reason)
info_json["tags"] = []
info_json["categories"].append(reason)
info_json["playable_in_embed"] = True
info_json["live_status"] = "not_live"
info_json["automatic_captions"] = {}
info_json["subtitles"] = {}
info_json["comment_count"] = 0
info_json["like_count"] = 0
info_json["channel"] = os.getenv("SHOW_NAME")
info_json["channel_follow_count"] = 0
info_json["upload_date"] = vid_ff["format"]["tags"]["date"]
info_json["availability"] = reason
info_json["webpage_url_basename"] = "watch"
info_json["webpage_url_domain"] = "youtube.com"
info_json["extractor"] = "youtube"
info_json["extractor_key"] = "Youtube"
info_json["display_id"] = info_json["id"]
info_json["fulltitle"] = vid_ff["format"]["tags"]["title"]
info_json["duration_string"] = str(
datetime.timedelta(seconds=info_json["duration"])
)
info_json["is_live"] = False
info_json["was_live"] = False
info_json["format_id"] = "0"
info_json["ext"] = "mp4"
info_json["protocol"] = "https"
info_json["format_note"] = vid_format["format_note"]
info_json["filesize_approx"] = vid_ff["format"]["size"]
info_json["tbr"] = 0 # TBR Values are not necessary
info_json["width"] = vid_format["width"]
info_json["height"] = vid_format["height"]
info_json["resolution"] = f"{info_json['width']}x{info_json['height']}"
info_json["format"] = f"{info_json['format_id']} -"
info_json["format"] += f" {info_json['format_note']}"
info_json["format"] += f" ({info_json['resolution']})"
info_json["fps"] = 24 # Assume default
info_json["dynamic_range"] = "SDR" # Assume default
info_json["vcodec"] = vid_format["vcodec"]
info_json["vbr"] = 0 # VBR Values are not necessary
info_json["aspect_ratio"] = vid_format["aspect_ratio"]
info_json["acodec"] = vid_format["acodec"]
info_json["abr"] = 0 # ABR Values are not necessary
info_json["asr"] = 0 # ASR Values are not necessary
info_json["audio_channels"] = vid_ff["streams"][primary_stream["audio"]][
"channels"
]
info_json["epoch"] = int(time.time())
info_json["_type"] = "video"
info_json["_version"] = {}
info_json["_version"]["version"] = "2023.01.01-dummy"
info_json["_version"]["release_git_head"] = "0000000000000000000000000000000000000000"
info_json["_version"]["repository"] = "yt-dlp/yt-dlp"
print("Dictionary for `.info.json` file is filled. Writing to file.")
with open(f"{file_dest}/{info_file}", "w") as file:
json.dump(info_json, file)
print(f"File {file_dest}/{info_file} successfully written.")
return 0
#Run the function
create_info_json(filename,dir)
@lamusmaser
Copy link
Author

Here is an example of running it for each item in a chosen directory. This can be done at any level.

import re

rx = '[0-9]{8}_[a-zA-Z0-9_-]{11}_*.*'
dir = '/youtube'
for root, _, files in os.walk(dir):
  for filename in files:
    current_count += 1
    all_files.append(os.path.join(root,filename))
    match = re.search(rx, filename, re.IGNORECASE)
    if match:
      create_info_json(filename,root)

@lamusmaser
Copy link
Author

To get the file sizes inside of it, and combining it with the above:

import re

def conv_size(size):
        if size < 1024:
            return f"{size} bytes"
        elif size < pow(1024,2):
            return f"{round(size/1024, 2)} KB"
        elif size < pow(1024,3):
            return f"{round(size/(pow(1024,2)), 2)} MB"
        elif size < pow(1024,4):
            return f"{round(size/(pow(1024,3)), 2)} GB"
        else
            return f"{round(size/pow(1024,4)), 2} TB"

rx = '[0-9]{8}_[a-zA-Z0-9_-]{11}_*.*'
dir = '/youtube'
f_sizes = {}
current_count = 0
for root, folders, files in os.walk(dir):
  for filename in files:
    current_count += 1
    all_files.append(os.path.join(root,filename))
    match = re.search(rx, filename, re.IGNORECASE)
    if match:
      create_info_json(filename,root)
      print(f"{filename}: {conv_size(os.path.getsize(os.path.join(root, filename)))"})
      if not f_sizes.get(root):
        f_sizes[root] = 0
      f_sizes[root] += os.path.getsize(os.path.join(root,filename))
print("Folder sizes for moving items:")
for key in f_sizes.keys():
  print(f"{key}: {conv_size(f_sizes[key])}")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment