Skip to content

Instantly share code, notes, and snippets.

@scottchiefbaker
Last active December 4, 2023 02:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scottchiefbaker/0747e6b6a6c66abe0d27e972d3ddac8e to your computer and use it in GitHub Desktop.
Save scottchiefbaker/0747e6b6a6c66abe0d27e972d3ddac8e to your computer and use it in GitHub Desktop.
FFMPEG reencode helper script
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dump::Color;
use File::Basename;
use Getopt::Long;
use JSON::PP;
use Cwd 3.75 qw(abs_path);
use Time::HiRes qw(time);
use Encode;
binmode(STDOUT, ":utf8");
###############################################################################
###############################################################################
my $crf = undef;
my $outdir = "";
my $preset = "";
my $height = 0;
my $hevc = 1;
my $h264 = 0;
my $sample = 0;
my $quiet = 0;
my $debug = 0;
my $keep = 0;
my $keep_surround = 0;
my $info = 0;
my $verbose = 0;
my $eight_bit = 0;
my $dry = 0;
my $yify = 0;
my $sanitize = 0;
my $aac = 0;
my ($res_480, $res_720, $res_1080);
my @params;
my $script_start = time();
my $ok = GetOptions(
"outdir=s" => \$outdir,
"crf=i" => \$crf,
"height|h=i" => \$height,
"hevc" => \$hevc,
"h264" => \$h264,
"sample" => \$sample,
"preset=s" => \$preset,
"quiet" => \$quiet,
"debug" => \$debug,
"keep" => \$keep,
"surround" => \$keep_surround,
"info" => \$info,
"verbose" => \$verbose,
"480p" => \$res_480,
"720p" => \$res_720,
"1080p" => \$res_1080,
"8bit" => \$eight_bit,
"dry" => \$dry,
"yify" => \$yify,
"sanitize" => \$sanitize,
"aac" => \$aac,
);
if (!$ok) {
die(usage());
}
if ($h264) {
$hevc = 0;
}
if ($res_480) {
$height = 480;
} elsif ($res_720) {
$height = 720;
} elsif ($res_1080) {
$height = 1080;
}
# If we're outputting to another directory, keep is implied
if ($outdir) {
$keep = 1;
}
if ($yify) {
$keep = 1;
$height = 720;
$hevc = 1;
$keep_surround = 0; # Convert to stereo
$crf = 26;
}
# Build an array of readable files to encode
my @files;
foreach my $file (@ARGV) {
$file = Encode::decode('utf8', $file);
if (-r $file && -f $file) {
push(@files, $file);
} elsif (!-f $file) {
printf("%sWarning:%s Skipping '%s' as it's not a file\n", color("yellow"), color("reset"), $file);
} else {
printf("%sWarning:%s Skipping '%s' as it's not readable\n", color("yellow"), color("reset"), $file);
}
my $orig_height = get_video_height($file);
if ($orig_height && $orig_height < $height) {
my $orange = color('orange');
my $reset = color();
print $orange . "Warning:$reset disabling upscaling ($orig_height < $height)\n";
$height = $orig_height;
}
}
if (!@files) {
print "Usage: reencode [file1.mp4] [file2.mkv] ...\n";
# If there are files then show the info for all of them
@files = sort(glob("./*.mp4"), glob("./*.mkv"));
foreach my $x (@files) {
$x = Encode::decode('utf8', $x);
}
if (!@ARGV && @files) {
print "\n";
$info = 1;
} else {
exit(9);
}
}
my $vid_codec = "h264";
if ($hevc) {
push(@params, "-c:v");
push(@params, "hevc");
$vid_codec = "hevc";
}
if ($crf) {
push(@params, "-crf");
push(@params, "$crf");
}
# Scale the video if applicable
if ($height > 100) {
push(@params, "-vf");
push(@params, "scale=-2:$height");
}
if ($preset) {
push(@params, "-preset");
push(@params, $preset);
}
# Sample only does the first X seconds
if ($sample) {
push(@params, "-t");
push(@params, "90");
# We definitely keep the original file if we're only making a sample
$keep = 1;
}
if (!$debug) {
push(@params, "-loglevel");
push(@params, "quiet");
}
# Don't output the ffmpeg encoding status (it's really verbose)
if (!$quiet) {
push(@params, "-stats");
}
if ($eight_bit) {
push(@params, "-pix_fmt");
push(@params, "yuv420p");
}
my $total_size = 0;
my $total_compressed = 0;
my $tmp_outdir = '';
foreach my $infile (@files) {
my $base = basename($infile);
my $sum = show_video_summary($infile);
my $aud_channels = $sum->{audio}->[0]->{channels} || 0;
my $is_surround = ($aud_channels > 2);
my @aud_params;
my $aud_codec;
if ($aac || ($is_surround && !$keep_surround)) {
# Default to aac @ 128Kb/s
push(@aud_params, "-c:a");
push(@aud_params, "aac");
push(@aud_params, "-b:a");
push(@aud_params, "128k");
# Two audio channels
push(@aud_params, "-ac");
push(@aud_params, "2");
$aud_codec = "2.0 [aac @ 128Kb/s]";
} else {
push(@aud_params, "-c:a");
push(@aud_params, "copy");
$aud_codec = "*COPY*";
}
# If we're doing info, don't process anything else
if ($info) {
next;
}
$base =~ s/\.(mp4|avi|mkv)//ig;
# Remove some common strings from the file name
$base =~ s/(1080p|\d\d\dp|HEVC|x265|x264|h.264|-TBS|-strife|-MeGusta|-mSD|-NhaNc3)//gi;
my $orig = $base;
if ($sanitize) {
$base = sanitize($base,'.');
} else {
# Remove and double/triple "." from the filename
$base =~ s/\.+/./ig;
# Remove ".-"
$base =~ s/\.-/./ig;
# Remove "()"
$base =~ s/\(\)//ig;
}
my @prefix;
push(@prefix, "ffmpeg");
push(@prefix, "-y");
push(@prefix, "-i");
push(@prefix, $infile);
# If we don't have a specified outdir, we use the same dir the input file is in
if (!$outdir || $tmp_outdir) {
$tmp_outdir = dirname($infile) . "/";
$outdir = $tmp_outdir;
}
# If we have an output directory append a / for safety
if ($outdir) {
$outdir .= "/";
# Make sure there is only one trailing /
$outdir =~ s|\/+$|\/|g;
}
# If there is an output dir make sure we can write to it
if ($outdir && !(-w -d $outdir)) {
my $str = sprintf("\n%sError:%s $outdir is not writable\n", color('red'), color('reset'));
die($str);
}
# If there is no output dir assume ./
if (!$outdir && (!-w -d "./")) {
my $path = abs_path("./");
my $str = sprintf("\n%sError:%s $path is not writable\n", color('red'), color('reset'));
die($str);
}
$base = trim($base);
my $outfile = $outdir . $base;
if ($outfile !~ /\.mkv/) {
$outfile .= ".mkv";
}
if ($hevc) {
$outfile =~ s/\.mkv/.hevc.mkv/g;
}
if ($yify) {
$outfile =~ s/\.mkv/.tablet.mkv/g;
}
my @cmd_full = (@prefix, @aud_params, @params, $outfile);
# Are the input/output filenames/paths the same
my $same = abs_path($infile) eq abs_path($outfile);
if ($same) {
printf("%sWarning:%s Skipping '%s' because the input/output file names are the same\n", color("yellow"), color("reset"), $base);
next;
}
my @escaped = @cmd_full;
# Put quotes around anything with a *space* in it
foreach my $i (@escaped) {
if ($i =~ / /) {
$i = "'$i'";
}
}
if ($dry) {
my $cmd = join(" ", @escaped);
print "CMD: $cmd\n\n";
exit(7);
}
# Print out the full command params, and then run it
my $start = time();
print "\n";
if ($verbose) {
dd(\@cmd_full);
}
printf("== OUTPUT ==\n");
printf(" File : '%s'\n", $outfile);
printf(" Video : [%s]\n", $vid_codec);
printf(" Audio : %s\n", $aud_codec);
if ($crf) {
printf(" CRF : %s\n", $crf);
}
printf(" Subtitle : %s\n", "*COPY*");
print "\n";
system(@cmd_full);
my $exit = $? >> 8;
print "\n";
if ($exit != 0) {
dd(\@cmd_full);
}
# See if the original/encoded file length is the same
# This is to check if the encode was completed successfully
my $orig_length = get_video_length($infile) // 330;
my $encoded_length = get_video_length($outfile) // 0;
# A little difference is OK. We say within 5 seconds is OK
my $diff = abs($orig_length - $encoded_length);
if ($diff < 5) {
my $diff_size = int((filesize($infile) - filesize($outfile)) / 1024 / 1024);
$total_size += filesize($infile);
$total_compressed += filesize($outfile);
if (!$keep) {
unlink($infile);
#rename($infile, "$infile.orig");
}
my $final_size = (filesize($outfile) / 1024 / 1024);
printf("%sReencode successful:%s %s, %0.1fMB (saved %s%0.1fMB%s)\n", color("green"), color("reset"), human_time(time() - $start), $final_size, color(43), $diff_size, color('reset'));
} elsif ($sample) {
printf("%sInfo:%s Sample file generated in %s\n", color("green"), color("reset"), human_time($diff));
} else {
printf("%sError:%s Something went wrong with the encode. Output length difference: %s\n", color("red"), color("reset"), human_time($diff));
}
}
my $count = scalar(@files);
if ($count > 1) {
my $total_time = human_time(time() - $script_start);
$total_size = color(43, human_size($total_size));
$total_compressed = color(229, human_size($total_compressed));
if (!$info) {
print "\n";
print "Compressed $count files in $total_time. $total_size compressed down to $total_compressed\n";
}
}
###############################################################################
###############################################################################
sub filesize {
my $file = shift();
my $ret = -s $file;
return $ret;
}
sub argv {
my $ret = {};
for (my $i = 0; $i < scalar(@ARGV); $i++) {
# If the item starts with "-" it's a key
if ((my ($key) = $ARGV[$i] =~ /^--?([a-zA-Z_]\w*)/) && ($ARGV[$i] !~ /^-\w\w/)) {
# If the next item does not start with "--" it's the value for this item
if (defined($ARGV[$i + 1]) && ($ARGV[$i + 1] !~ /^--?\D/)) {
$ret->{$key} = $ARGV[$i + 1];
# Bareword like --verbose with no options
} else {
$ret->{$key}++;
}
}
}
# We're looking for a certain item
if ($_[0]) { return $ret->{$_[0]}; }
return $ret;
}
sub trim {
if (wantarray) {
my @ret;
foreach (@_) {
push(@ret,scalar(trim($_)));
}
return @ret;
} else {
my $s = shift();
if (length($s) == 0) { return ""; }
$s =~ s/^\s*//;
$s =~ s/\s*$//;
return $s;
}
}
# Debug print variable using either Data::Dump::Color (preferred) or Data::Dumper
# Creates methods k() and kd() to print, and print & die respectively
BEGIN {
if (eval { require Data::Dump::Color }) {
*k = sub { Data::Dump::Color::dd(@_) };
} else {
require Data::Dumper;
*k = sub { print Data::Dumper::Dumper(\@_) };
}
sub kd {
k(@_);
printf("Died at %2\$s line #%3\$s\n",caller());
exit(15);
}
}
# String format: '115', '165_bold', '10_on_140', 'reset', 'on_173', 'red', 'white_on_blue'
sub color {
my ($str, $txt) = @_;
# If we're NOT connected to a an interactive terminal don't do color
#if (-t STDOUT == 0) { return $txt // ''; }
# No string sent in, so we just reset
if (!length($str) || $str eq 'reset') { return "\e[0m"; }
# Some predefined colors
my %color_map = qw(red 160 blue 27 green 34 yellow 226 orange 214 purple 93 white 15 black 0);
$str =~ s|([A-Za-z]+)|$color_map{$1} // $1|eg;
# Get foreground/background and any commands
my ($fc,$cmd) = $str =~ /^(\d{1,3})?_?(\w+)?$/g;
my ($bc) = $str =~ /on_(\d{1,3})$/g;
# Some predefined commands
my %cmd_map = qw(bold 1 italic 3 underline 4 blink 5 inverse 7);
my $cmd_num = $cmd_map{$cmd // 0};
my $ret = '';
if ($cmd_num) { $ret .= "\e[${cmd_num}m"; }
if (defined($fc)) { $ret .= "\e[38;5;${fc}m"; }
if (defined($bc)) { $ret .= "\e[48;5;${bc}m"; }
if ($txt) { $ret .= $txt . "\e[0m"; }
return $ret;
}
sub get_video_length {
my $file = shift();
my $x = get_video_info($file);
my $ret = $x->{format}->{duration};
return $ret;
}
sub get_video_height {
my $file = shift();
my $x = get_video_info($file);
my @vid = find_stream($x, 'video');
my $height = $vid[0]->{height};
return $height;
}
sub get_video_info {
my $file = shift();
my $cmd = "ffprobe -v quiet -print_format json -show_format -show_streams \"$file\"";
my $out = `$cmd`;
my $x = decode_json($out);
return $x;
}
sub show_video_summary {
my $file = shift();
my $x = get_video_info($file);
my $duration = human_time($x->{format}->{duration});
my @vid = find_stream($x, 'video');
my @aud = find_stream($x, 'audio');
my @sub = find_stream($x, 'subtitle');
if (!@vid) {
print color("red", "WARNING:");
my $file_str = color('yellow', $file);
print " Skipping $file_str because it's not video\n";
return {};
}
my $ret;
$ret->{video} = \@vid;
$ret->{audio} = \@aud;
$ret->{subtitle} = \@sub;
my $filesize = filesize($file) || 0;
my $bitrate = $x->{format}->{bit_rate} || 0;
my $br_str = color('yellow', human_size($bitrate) . "b/s");
print color('orange', "== INPUT ==") . "\n";
printf(" File : '%s'\n", color('white', basename($file)));
printf(" Size : %s (%s)\n", human_size($filesize), $br_str);
printf(" Length : %s\n", $duration);
foreach my $x (@vid) {
my $pix_format = $x->{pix_fmt};
my $bit_rate = $x->{bit_rate} // 1024;
$bit_rate /= 1000;
my $bit_str = "";
if (is_10bit($pix_format)) {
$bit_str = "10bit";
} else {
$bit_str = "8bit";
}
$bit_str = color('87', $bit_str);
my $res_str = get_resolution_string($x->{width}, $x->{height});
my $br_str = color('yellow', $bit_rate . "Kb/s");
if ($bit_rate > 2) {
printf(" Video : %s @ %s [%s] (%s)\n", $res_str, $br_str, color('green', $x->{codec_name}), $bit_str);
} else {
printf(" Video : %s [%s] (%s)\n", $res_str, color('green', $x->{codec_name}), $bit_str);
}
}
foreach my $x (@aud) {
my $aud_chan = "??";
my $bit_rate = $x->{bit_rate} // 0;
$bit_rate /= 1000;
if ($x->{channels} == 2) {
$aud_chan = "2.0";
} elsif($x->{channels} == 6) {
$aud_chan = "5.1";
} elsif ($x->{channels} == 1) {
$aud_chan = "Mono";
}
#my $codec = $x->{profile} // $x->{codec_name};
my $codec = $x->{codec_name};
my $br_str = color('yellow', $bit_rate . "Kb/s");
if ($bit_rate > 2) {
printf(" Audio : %s @ %s [%s]\n", $aud_chan, $br_str, color('green', $codec));
} else {
printf(" Audio : %s [%s]\n", $aud_chan, color('green', $codec));
}
}
foreach my $x (@sub) {
my $lang = $x->{tags}->{title} || "";
if ($lang) {
printf(" Subtitle : [%s] (%s)\n", $x->{codec_name}, $lang);
} else {
printf(" Subtitle : [%s]\n", $x->{codec_name});
}
}
if (!@sub) {
#printf(" Subtitle : *NONE*\n");
printf(" Subtitle : " . color("white", "NONE") . "\n");
}
return $ret;
}
sub find_stream {
my $x = shift();
my $type = shift();
my @ret = ();
foreach my $y (@{$x->{streams}}) {
my $found_type = $y->{codec_type};
if ($type eq $found_type) {
push(@ret, $y);
}
}
return @ret;
}
sub human_time {
my $secs = shift() // 0;
my $ret = "";
if (int($secs / 31536000) > 0) { $ret .= int($secs / 31536000) . " years "; }
if (int(($secs % 31536000) / 2628000) > 0) { $ret .= int(($secs % 31536000) / 2628000) . " months "; }
if (int(($secs % 2628000) / 86400) > 0) { $ret .= int(($secs % 2628000) / 86400) . " days "; }
if (int(($secs % 86400) / 3600) > 0) { $ret .= int(($secs % 86400) / 3600) . " hours "; }
if (int(($secs % 3600) / 60) > 0) { $ret .= int(($secs % 3600) / 60) . " minutes "; }
if (int($secs % 60) > 0) { $ret .= int($secs % 60) . " seconds "; }
$ret =~ s/\s+$//;
return $ret;
}
sub human_size {
my $size = shift();
my $ret = 0;
if ($size > 1024**4) {
$ret = sprintf("%.1fT",$size / 1024**4);
} elsif ($size > 1024**3) {
$ret = sprintf("%.1fG",$size / 1024**3);
} elsif ($size > 1024**2) {
$ret = sprintf("%.1fM",$size / 1024**2);
} elsif ($size > 1024) {
$ret = sprintf("%.1fK",$size / 1024);
} elsif ($size > 0) {
$ret = sprintf("%dB",$size);
}
#k("HS: $size/$ret");
return $ret;
}
sub is_10bit {
my $pix_format = shift();
if ($pix_format eq "yuv420p10le") {
return 1;
} else {
return 0;
}
}
sub get_resolution_string {
my ($width, $height) = @_;
my $ret = "";
if ($width == 1920 && ($height == 1080 || $height == 800)) {
$ret = "1080p";
} elsif ($width == 1280 && $height == 720) {
$ret = "720p";
} elsif ($width == 720 && $height == 480) {
$ret = "480p";
} else {
$ret = $width . "x" . $height;
}
#k($width, $height, $ret);
return $ret;
}
sub usage {
return "$0 filename.mp4 [--keep] [--720p] [--sample]";
}
sub sanitize {
my $str = shift();
my $sep = shift() // "_";
if (!defined($str)) {
return undef;
}
# Convert multiple non-word sequences to the separator
$str =~ s/[\W_]+/$sep/g;
# The separator is a literal character so we quotemeta it
$sep = quotemeta($sep);
# Remove any separators at the beginning and end
$str =~ s/\A$sep+//;
$str =~ s/$sep+\z//;
return $str;
}
# vim: filetype=perl tabstop=4 shiftwidth=4 autoindent softtabstop=4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment