Skip to content

Instantly share code, notes, and snippets.

@milothiesen
Created January 5, 2018 14:00
Show Gist options
  • Save milothiesen/8147ed1e339aaef6b75cb857e8957fc0 to your computer and use it in GitHub Desktop.
Save milothiesen/8147ed1e339aaef6b75cb857e8957fc0 to your computer and use it in GitHub Desktop.
A Python script that searches recursively through a given directory and pulls technical metadata about the video codec and wrapper
import os
import sys
import subprocess
import lxml.etree as ET
import pprint
rootdir = sys.argv[1]
accepted_file_types = ['mov', 'mp4', 'm4v', 'wmv']
files_list = []
formats_list = []
def dirwalker(rootdir, accepted_file_types, files_list):
for root, dirs, files in os.walk(rootdir):
for file_ in files:
file_extension = file_.rpartition('.')[2]
if file_extension in accepted_file_types:
path = ( os.path.join(root, file_) )
files_list.append(path)
generate_mediainfo(files_list)
def generate_mediainfo(files_list):
for filepath in files_list:
print(filepath)
try:
mediainfo_output = subprocess.check_output(['mediainfo', '--Output=XML', filepath])
except FileNotFoundError as e:
print(e)
except Exception as e:
print(e)
else:
asset_dict = {}
root = ET.fromstring(mediainfo_output)
for text in root.xpath('.//track[@type="General"]/Complete_name/text()'):
asset_dict['Complete_name'] = text
for text in root.xpath('.//track[@type="General"]/Format/text()'):
asset_dict['Format'] = text
for text in root.xpath('.//track[@type="General"]/Commercial_name/text()'):
asset_dict['Commerical_name'] = text
for text in root.xpath('.//track[@type="General"]/Format_profile/text()'):
asset_dict['Format_profile'] = text
for text in root.xpath('.//track[@type="General"]/Codec_ID/text()'):
asset_dict['Codec_ID'] = text
for text in root.xpath('.//track[@type="General"]/Overall_bit_rate_mode/text()'):
asset_dict['Overall_bit_rate_mode'] = text
for text in root.xpath('.//track[@type="General"]/Writing_library/text()'):
asset_dict['Writing_library'] = text
# pp = pprint.PrettyPrinter(indent=4)
# pp.pprint(asset_dict)
formats_list.append(asset_dict)
# finally:
# print(formats_list)
dirwalker(rootdir, accepted_file_types, files_list)
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(formats_list)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment