Skip to content

Instantly share code, notes, and snippets.

@sveetch
Created May 5, 2024 19:54
Show Gist options
  • Save sveetch/0973050c97b2df4dc34d416cfe3b96ef to your computer and use it in GitHub Desktop.
Save sveetch/0973050c97b2df4dc34d416cfe3b96ef to your computer and use it in GitHub Desktop.
Collecting video meta informations with MediaInfo
"""
Proof of concept script to use MediaInfo to get metadatas from a video file.
Although this script have been done only for videos, MediaInfo also allow to read infos from audio and images.
First, this have been done with Python 3.10 but it should probably work with Python 3.8
MediaInfo library is required to be installed on your system, see:
https://github.com/MediaArea/MediaInfo
On Ubuntu you would get it with::
sudo apt-get install mediainfo
Then the Python wrapper library:
https://github.com/sbraz/pymediainfo
You would install it with::
pip install pymediainfo
Be aware that recent pymediainfo version could be incompatible with very old MediaInfo.
You may install pymediainfo directly on your system with package manager like apt but
it may then difficult to use in a virtual Python environment as it is recommended.
"""
from pymediainfo import MediaInfo
class VideoMetaParser:
# Selected attribute names to get informations from track types
GENERAL_FIELDS = [
"format",
"duration",
"file_last_modification_date",
]
VIDEO_FIELDS = [
"format",
"codec_id",
"width",
"height",
"bit_rate",
"frame_rate",
"pixel_aspect_ratio",
"display_aspect_ratio",
]
AUDIO_FIELDS = [
"title",
"language",
"format",
"codec_id",
"bit_rate",
"sampling_rate",
]
SUBTITLE_FIELDS = [
"title",
"language",
"format",
"codec_id",
]
def format_general_duration(self, value):
"""
Ensure duration is always an integer
"""
if value and (isinstance(value, str) or isinstance(value, float)):
value = int(value)
return value
def format_general_file_last_modification_date(self, value):
"""
Always return an UTC datetime with timezone.
"""
if value:
# Remove possible UTC prefix
value = value[len("UTC "):] if value.startswith("UTC ") else value
# Add UTC timezone if there is not any
value = value + "+00:00" if "+" not in value else value
# Finish the ISO format
return value.replace(" ", "T")
return value
def format_video_frame_rate(self, value):
"""
Ensure frame rate is always an integer
"""
if value and isinstance(value, str):
value = float(value)
if value and isinstance(value, float):
value = int(value)
return value
def format_video_bit_rate(self, value):
"""
Ensure bit rate is always an integer
"""
if value and isinstance(value, str):
value = int(value)
return value
def format_video_pixel_aspect_ratio(self, value):
"""
Ensure pixel ratio is always a float
"""
if value and isinstance(value, str):
value = float(value)
return value
def format_video_display_aspect_ratio(self, value):
"""
Ensure display ratio is always a float
"""
if value and isinstance(value, str):
value = float(value)
return value
def format_audio_bit_rate(self, value):
"""
Ensure bit rate is always an integer
"""
if value and isinstance(value, str):
value = int(value)
return value
def format_audio_sampling_rate(self, value):
"""
Ensure sampling rate is always an integer
"""
if value and isinstance(value, str):
value = int(value)
return value
def formatted_attr_value(self, kind, data, name):
"""
formatted_attr_value("video", DATA, "frame_rate")
"""
value = getattr(data, name)
formatter = "format_{}_{}".format(kind, name)
if hasattr(self, formatter):
return getattr(self, formatter)(value)
return value
def scan(self, filepath):
media_info = MediaInfo.parse(filepath)
data = {
"general": {},
"video": [],
"audio": [],
"subtitle": [],
}
# We only care about a single one general track, there should not be more
video_general = media_info.general_tracks[0]
for fieldname in self.GENERAL_FIELDS:
data["general"][fieldname] = self.formatted_attr_value(
"general",
video_general,
fieldname
)
for i, track in enumerate(media_info.video_tracks, start=1):
data["video"].append({
fieldname: self.formatted_attr_value("video", track, fieldname)
for fieldname in self.VIDEO_FIELDS
})
for i, track in enumerate(media_info.audio_tracks, start=1):
data["audio"].append({
fieldname: getattr(track, fieldname)
for fieldname in self.AUDIO_FIELDS
})
for i, track in enumerate(media_info.text_tracks, start=1):
data["subtitle"].append({
fieldname: getattr(track, fieldname)
for fieldname in self.SUBTITLE_FIELDS
})
return data
if __name__ == "__main__":
import json
from pathlib import Path
SAMPLES = [
Path("some_videos.mkv"),
Path("another_videos.mp4"),
]
parser = VideoMetaParser()
for path in SAMPLES:
output_filedst = "{}.json".format(path.name)
print(path, output_filedst, path.exists())
data = parser.scan(path)
(OUTPUT_DIR / output_filedst).write_text(json.dumps(data, indent=4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment