BigRoy/ffmpeg_overlay_video_snippet.py

## ffmpeg_overlay_video_snippet.py
import logging
import subprocess
from datetime import datetime
import os
import re
import tempfile

log = logging.getLogger(__name__)

# Locate ffmpeg if full path provided, otherwise use ffmpeg executable name
FFMPEG = os.environ.get("FFMPEG_PATH", None)
if FFMPEG is None:
    # Use the one from OpenPype
    try:
        from openpype.lib import get_ffmpeg_tool_path
        FFMPEG = get_ffmpeg_tool_path("ffmpeg")
    except ImportError as exc:
        FFMPEG = "ffmpeg"

# Locate the fixtures for custom font and a logo overlay
FIXTURES = os.path.join(os.path.dirname(__file__),
                        "fixtures")
FONT_PATH = os.path.join(FIXTURES, 'DejaVuSansMono.ttf').replace("\\", "/")
FONT_PATH = FONT_PATH.replace(":", "\\:")  # escape colon
LOGO_PATH = os.path.join(FIXTURES, "logo.png").replace("\\", "/")

# Text draw string
TEXT_DRAW = (
    'drawtext=fontsize=16:'
    'fontcolor=white:'
    "fontfile='{0}':".format(FONT_PATH) +
    'box=1:boxcolor=black@0.60:boxborderw=2:'
    "text='{text}':"
    'x={x}:'
    'y={y}'
)


def draw_text(text, x, y, fps=25.0):
    """Returns a complex drawtext filter string

    When `text` is a list of values it will set the values per frame using
    the `sendcmd` functionality of ffmpeg. This will write the command to
    a temporary file to allow for large commands to be created (500+ frames)

    Args:
        text (str, list): The value or values
        x (str): The x position
        y (str): The y position
        fps (float): The frames per second to be interpreted for the list
            of values. This is required separately to accurately "burn in"
            the multiple values at the correct times.

    Returns:
        str: The complex filter string.

    """

    # TODO: Optimize this for large videos 10000+ frames (400 seconds)

    fps = float(fps)
    eps = 0.001

    if eps > 1.0 / fps:
        # This would only happen for 1000+ FPS.
        raise RuntimeError("The FPS is too high for the precision to "
                           "write multiple values over time.")

    cmd = ""

    is_dynamic = isinstance(text, list)

    if is_dynamic:
        with tempfile.NamedTemporaryFile(delete=False) as f:
            for i, value in enumerate(text):
                seconds = i / fps

                # Escape special character
                value = str(value).replace(":", "\\:")

                line = ("{start} drawtext reinit text='{value}';"
                        "\n".format(start=seconds-eps,
                                    value=value))

                f.write(line)
            f.flush()
            path = f.name
            path = path.replace("\\", "/")
            path = path.replace(":", "\\:")
            cmd += "sendcmd=f='{0}', ".format(path)

    value = text[0] if is_dynamic else text
    draw = TEXT_DRAW.format(text=value,
                            x=x,
                            y=y)
    cmd += draw
    return cmd


def overlay_video(source,
                  output,
                  start,
                  end,
                  scene,
                  username,
                  focal_length,
                  fps=25.0,
                  logo=True,
                  include_alpha=False,
                  audio_track=None,
                  create_no_window=False,
                  verbose=False):
    """Overlay information on video using ffmpeg"""

    # Ensure integers
    start = int(start)
    end = int(end)

    # Get timestamp
    date = datetime.now()
    timestamp = date.strftime("%Y-%m-%d %H:%M:%S")
    timestamp = timestamp.replace(":", "\\:") # escape

    # region text draws
    text_draws = []

    # drawtext focal at top center
    FOCAL_FORMAT = "{0:.1f} mm"
    if isinstance(focal_length, list):
        focal_length = [FOCAL_FORMAT.format(value) for value in focal_length]
    else:
        focal_length = FOCAL_FORMAT.format(focal_length)
    draw = draw_text(text=focal_length,
                     x="(w-text_w)/2",
                     y="10")
    text_draws.append(draw)

    # drawtext name top center-right
    draw = TEXT_DRAW.format(text=username,
                            x="(w-text_w)/1.5",
                            y="10")
    text_draws.append(draw)

    # drawtext timestamp at top right
    draw = TEXT_DRAW.format(text=timestamp,
                            x="(w-text_w)-10",
                            y="10")
    text_draws.append(draw)

    # drawtext frame ranges bottom right
    frame = "{current} [ {start} : {end} ]".format(
        current="%{eif:n+" + str(start) + ":d}",
        start=start,
        end=end
    )
    frame = frame.replace(":", "\\:")  # escape
    draw = TEXT_DRAW.format(text=frame,
                            x="(w-text_w)-10",
                            y="(h-text_h)-10")
    text_draws.append(draw)

    # drawtext scene name bottom left
    draw = TEXT_DRAW.format(text=scene,
                            x="10",
                            y="(h-text_h)-10")
    text_draws.append(draw)

    render(source, output, text_draws,
           fps=fps,
           start=start,
           logo=logo,
           include_alpha=include_alpha,
           audio_track=audio_track,
           create_no_window=create_no_window,
           verbose=verbose)


def render(source,
           output,
           text_draws=tuple(),
           fps=25.0,
           start=0,
           logo=True,
           include_alpha=False,
           audio_track=None,
           create_no_window=False,
           verbose=False):
    """Render source to output with FFMPEG"""

    # load source video
    cmd = r'{exe} -y '.format(exe=FFMPEG)

    # Test whether it's likely an image sequence so we can force the
    # correct framerate and start frame.
    is_sequence = bool(re.search("%[0-9]*d", source))
    if is_sequence:
        # todo: Allow non integer framerates (e.g. NTSC at 30000/1001)
        # See: https://video.stackexchange.com/a/13074
        cmd += ' -framerate {framerate} '.format(framerate=int(fps))

        if start != 0:
            # Allow FFMPEG to read the image sequence from the start frame
            cmd += ' -start_number {start} '.format(start=int(start))

    cmd += ' -i "{source}" '.format(source=source)

    # Force forward slashes
    cmd = cmd.replace("\\", "/")

    # load logo input
    cmd += ' -i "{source}" '.format(source=LOGO_PATH).replace("\\", "/")

    if audio_track:
        cmd += ' -i "{source}" '.format(source=audio_track)

    # Start a complex filter (to also fix H264 divisible by 2 requirement)
    cmd += (
        ' -filter_complex "'
        '[0:v]scale=trunc(iw/2)*2:trunc(ih/2)*2'  # fix divisible by 2
    )

    # Overlay logo
    if logo:
        cmd += "[scaled];[scaled][1:v]overlay=10:10"    # overlay logo

    # Merge text draws into the command
    if text_draws:
        cmd += "[ol];[ol]"                              # keep videofilter open
        draw_cmd = ", ".join(text_draws)                # merge text draws
        cmd += draw_cmd

    # end videofilter, map outputs and set compression
    cmd += '[out]" -map "[out]" '    # map filter output to video stream

    if audio_track:
        cmd += ' -map "2:a" '        # map audio
    else:
        # If no explicit audio track provided try and map the audio
        # of the first input if it contains any
        cmd += ' -map "a:0?" '       # map first input's audio (if available)

    # Decide on codec based on Alpha plus whether it's image sequence output
    is_sequence_output = bool(re.search("%[0-9]*d", output))
    if not is_sequence_output:
        if not include_alpha:
            # defaults to libx264
            # profile to fix adobe skipping frames
            cmd += ' -profile:v baseline '
            # increase quality (default: 23)
            cmd += ' -crf 19 '
            # ensure correct pixel format for baseline profile
            cmd += '-pix_fmt yuv420p '
        else:
            # .mov with Prores 444 with Alpha
            cmd += " -codec prores_ks " \
                   " -pix_fmt yuva444p10le " \
                   " -alpha_bits 16 " \
                   " -profile:v 4444 " \
                   " -f mov "
    else:
        print("Sequence output codecs not implemented, "
              "result could be unexpected..")

    if audio_track:
        # Ensure we encode the audio correctly
        cmd += " -c:a copy "

    # define output path
    cmd += ' "{output}"'.format(output=output)

    if verbose:
        log.info(cmd)

    kwargs = {}
    if create_no_window:
        CREATE_NO_WINDOW = 0x08000000
        kwargs["creationflags"] = CREATE_NO_WINDOW

    try:
        subprocess.check_output(cmd, stderr=subprocess.STDOUT, **kwargs)
    except subprocess.CalledProcessError as exc:
        log.error(exc)
        log.error("STDOUT:\n{0}".format(exc.output))
        raise RuntimeError("Failed FFMPEG render with overlay.")
	import logging
	import subprocess
	from datetime import datetime
	import os
	import re
	import tempfile

	log = logging.getLogger(__name__)

	# Locate ffmpeg if full path provided, otherwise use ffmpeg executable name
	FFMPEG = os.environ.get("FFMPEG_PATH", None)
	if FFMPEG is None:
	# Use the one from OpenPype
	try:
	from openpype.lib import get_ffmpeg_tool_path
	FFMPEG = get_ffmpeg_tool_path("ffmpeg")
	except ImportError as exc:
	FFMPEG = "ffmpeg"

	# Locate the fixtures for custom font and a logo overlay
	FIXTURES = os.path.join(os.path.dirname(__file__),
	"fixtures")
	FONT_PATH = os.path.join(FIXTURES, 'DejaVuSansMono.ttf').replace("\\", "/")
	FONT_PATH = FONT_PATH.replace(":", "\\:") # escape colon
	LOGO_PATH = os.path.join(FIXTURES, "logo.png").replace("\\", "/")

	# Text draw string
	TEXT_DRAW = (
	'drawtext=fontsize=16:'
	'fontcolor=white:'
	"fontfile='{0}':".format(FONT_PATH) +
	'box=1:boxcolor=black@0.60:boxborderw=2:'
	"text='{text}':"
	'x={x}:'
	'y={y}'
	)


	def draw_text(text, x, y, fps=25.0):
	"""Returns a complex drawtext filter string

	When `text` is a list of values it will set the values per frame using
	the `sendcmd` functionality of ffmpeg. This will write the command to
	a temporary file to allow for large commands to be created (500+ frames)

	Args:
	text (str, list): The value or values
	x (str): The x position
	y (str): The y position
	fps (float): The frames per second to be interpreted for the list
	of values. This is required separately to accurately "burn in"
	the multiple values at the correct times.

	Returns:
	str: The complex filter string.

	"""

	# TODO: Optimize this for large videos 10000+ frames (400 seconds)

	fps = float(fps)
	eps = 0.001

	if eps > 1.0 / fps:
	# This would only happen for 1000+ FPS.
	raise RuntimeError("The FPS is too high for the precision to "
	"write multiple values over time.")

	cmd = ""

	is_dynamic = isinstance(text, list)

	if is_dynamic:
	with tempfile.NamedTemporaryFile(delete=False) as f:
	for i, value in enumerate(text):
	seconds = i / fps

	# Escape special character
	value = str(value).replace(":", "\\:")

	line = ("{start} drawtext reinit text='{value}';"
	"\n".format(start=seconds-eps,
	value=value))

	f.write(line)
	f.flush()
	path = f.name
	path = path.replace("\\", "/")
	path = path.replace(":", "\\:")
	cmd += "sendcmd=f='{0}', ".format(path)

	value = text[0] if is_dynamic else text
	draw = TEXT_DRAW.format(text=value,
	x=x,
	y=y)
	cmd += draw
	return cmd


	def overlay_video(source,
	output,
	start,
	end,
	scene,
	username,
	focal_length,
	fps=25.0,
	logo=True,
	include_alpha=False,
	audio_track=None,
	create_no_window=False,
	verbose=False):
	"""Overlay information on video using ffmpeg"""

	# Ensure integers
	start = int(start)
	end = int(end)

	# Get timestamp
	date = datetime.now()
	timestamp = date.strftime("%Y-%m-%d %H:%M:%S")
	timestamp = timestamp.replace(":", "\\:") # escape

	# region text draws
	text_draws = []

	# drawtext focal at top center
	FOCAL_FORMAT = "{0:.1f} mm"
	if isinstance(focal_length, list):
	focal_length = [FOCAL_FORMAT.format(value) for value in focal_length]
	else:
	focal_length = FOCAL_FORMAT.format(focal_length)
	draw = draw_text(text=focal_length,
	x="(w-text_w)/2",
	y="10")
	text_draws.append(draw)

	# drawtext name top center-right
	draw = TEXT_DRAW.format(text=username,
	x="(w-text_w)/1.5",
	y="10")
	text_draws.append(draw)

	# drawtext timestamp at top right
	draw = TEXT_DRAW.format(text=timestamp,
	x="(w-text_w)-10",
	y="10")
	text_draws.append(draw)

	# drawtext frame ranges bottom right
	frame = "{current} [ {start} : {end} ]".format(
	current="%{eif:n+" + str(start) + ":d}",
	start=start,
	end=end
	)
	frame = frame.replace(":", "\\:") # escape
	draw = TEXT_DRAW.format(text=frame,
	x="(w-text_w)-10",
	y="(h-text_h)-10")
	text_draws.append(draw)

	# drawtext scene name bottom left
	draw = TEXT_DRAW.format(text=scene,
	x="10",
	y="(h-text_h)-10")
	text_draws.append(draw)

	render(source, output, text_draws,
	fps=fps,
	start=start,
	logo=logo,
	include_alpha=include_alpha,
	audio_track=audio_track,
	create_no_window=create_no_window,
	verbose=verbose)


	def render(source,
	output,
	text_draws=tuple(),
	fps=25.0,
	start=0,
	logo=True,
	include_alpha=False,
	audio_track=None,
	create_no_window=False,
	verbose=False):
	"""Render source to output with FFMPEG"""

	# load source video
	cmd = r'{exe} -y '.format(exe=FFMPEG)

	# Test whether it's likely an image sequence so we can force the
	# correct framerate and start frame.
	is_sequence = bool(re.search("%[0-9]*d", source))
	if is_sequence:
	# todo: Allow non integer framerates (e.g. NTSC at 30000/1001)
	# See: https://video.stackexchange.com/a/13074
	cmd += ' -framerate {framerate} '.format(framerate=int(fps))

	if start != 0:
	# Allow FFMPEG to read the image sequence from the start frame
	cmd += ' -start_number {start} '.format(start=int(start))

	cmd += ' -i "{source}" '.format(source=source)

	# Force forward slashes
	cmd = cmd.replace("\\", "/")

	# load logo input
	cmd += ' -i "{source}" '.format(source=LOGO_PATH).replace("\\", "/")

	if audio_track:
	cmd += ' -i "{source}" '.format(source=audio_track)

	# Start a complex filter (to also fix H264 divisible by 2 requirement)
	cmd += (
	' -filter_complex "'
	'[0:v]scale=trunc(iw/2)2:trunc(ih/2)2' # fix divisible by 2
	)

	# Overlay logo
	if logo:
	cmd += "[scaled];[scaled][1:v]overlay=10:10" # overlay logo

	# Merge text draws into the command
	if text_draws:
	cmd += "[ol];[ol]" # keep videofilter open
	draw_cmd = ", ".join(text_draws) # merge text draws
	cmd += draw_cmd

	# end videofilter, map outputs and set compression
	cmd += '[out]" -map "[out]" ' # map filter output to video stream

	if audio_track:
	cmd += ' -map "2:a" ' # map audio
	else:
	# If no explicit audio track provided try and map the audio
	# of the first input if it contains any
	cmd += ' -map "a:0?" ' # map first input's audio (if available)

	# Decide on codec based on Alpha plus whether it's image sequence output
	is_sequence_output = bool(re.search("%[0-9]*d", output))
	if not is_sequence_output:
	if not include_alpha:
	# defaults to libx264
	# profile to fix adobe skipping frames
	cmd += ' -profile:v baseline '
	# increase quality (default: 23)
	cmd += ' -crf 19 '
	# ensure correct pixel format for baseline profile
	cmd += '-pix_fmt yuv420p '
	else:
	# .mov with Prores 444 with Alpha
	cmd += " -codec prores_ks " \
	" -pix_fmt yuva444p10le " \
	" -alpha_bits 16 " \
	" -profile:v 4444 " \
	" -f mov "
	else:
	print("Sequence output codecs not implemented, "
	"result could be unexpected..")

	if audio_track:
	# Ensure we encode the audio correctly
	cmd += " -c:a copy "

	# define output path
	cmd += ' "{output}"'.format(output=output)

	if verbose:
	log.info(cmd)

	kwargs = {}
	if create_no_window:
	CREATE_NO_WINDOW = 0x08000000
	kwargs["creationflags"] = CREATE_NO_WINDOW

	try:
	subprocess.check_output(cmd, stderr=subprocess.STDOUT, **kwargs)
	except subprocess.CalledProcessError as exc:
	log.error(exc)
	log.error("STDOUT:\n{0}".format(exc.output))
	raise RuntimeError("Failed FFMPEG render with overlay.")