mildsunrise/sona_gif.py

## sona_gif.py
# 1. download video:
#    youtube-dl -f 399 https://www.youtube.com/watch?v=hawThTG5No8 -o youtube_video.mkv
# 2. extract important segment:
#    ffmpeg -ss 1:08 -to 1:27 -i youtube_video.mkv -c copy -map 0:0 out.mkv
# 3. run this to generate out.gif

from math import tau
import numpy as np
import cv2
import av

import logging
logging.basicConfig()
logging.getLogger('libav').setLevel(logging.DEBUG)

# rgb24 -> rgb8 quantizer with random spatial-only dither
def rgb8_random_dither(shape):
  dither = (np.random.random_sample(shape[:2] + (3,)) - 0.5).astype('float32')
  def quantize(x):
    x = x.astype('float32') / 255
    x[:,:,0] *= 0b111
    x[:,:,1] *= 0b111
    x[:,:,2] *= 0b11
    x = np.round(x + dither).astype('int')
    x = (np.clip(x[:,:,0], 0, 0b111) << 5) + (np.clip(x[:,:,1], 0, 0b111) << 2) + (np.clip(x[:,:,2], 0, 0b11))
    return av.VideoFrame.from_ndarray(x.astype('uint8'), format='rgb8')
  return quantize


FRAME_START = 61   # animation scene starts (but not exactly at beginning)
FRAME_END = 467    # animation seeks to beginning
           #468      first frame that shows transition to black
           #470      actual frame that matches FRAME_START
RATIO = 0.906335
EASE_CURVE = lambda x: np.sin(tau/4 * x*.86) * 1.0247

# grab FRAME_END (FIXME: do a seek, which is the correct & efficient thing)
with av.open('out.mkv') as vin:
  for iframe in vin.decode():
    if iframe.index == FRAME_END:
      im_end = iframe.to_ndarray(format='rgb24')
      break

with av.open('out.mkv') as vin, av.open('out.gif', 'w') as vout:
  sin = vin.streams[0]

  #sout = vout.add_stream('libx264', rate=sin.rate, bit_rate=1986000)
  sout = vout.add_stream('gif', rate=sin.rate, pix_fmt='rgb8')
  sout.width = sin.width
  sout.height = sin.height

  def encode_frame(frame=None, pts=None):
    if frame != None: frame.pts = pts
    for packet in sout.encode(frame): vout.mux(packet)

  h, w = im_end.shape[:2]
  im_orig = im_end.copy()
  im = im_orig.copy()
  bbox = np.zeros(im.shape[:2], dtype=bool)
  bbox[460:900, 585:900] = True
  #bbox[:, :] = True
  quant = rgb8_random_dither(im.shape)

  for iframe in vin.decode(sin):
    idx = iframe.index
    if idx < FRAME_START: continue
    if idx > FRAME_END: break
    print('Processing: {}'.format(idx))
    frame = iframe.to_ndarray(format='rgb24')

    fx = EASE_CURVE( (idx - FRAME_START) / (FRAME_END - FRAME_START) )
    ratio = RATIO * (1-fx) + fx
    tf = np.array([[ ratio, 0, w * (1-ratio)/2 ], [ 0, ratio, h * (1-ratio)/2 ]])
    cv2.warpAffine(frame, tf, (w, h), im, cv2.INTER_AREA, cv2.BORDER_TRANSPARENT)

    # (somewhat aggressive) hysteresis
    diff = im_orig.astype('int16') - im.astype('int16')
    diff = cv2.GaussianBlur(diff.astype('float32'), (0,0), 2)
    diff_mag = (diff.astype('float32') ** 2).sum(2)
    #mask = bbox & (diff_mag > 9)
    mask = bbox

    im_orig[mask] = im[mask]
    #encode_frame(oframe := av.VideoFrame.from_ndarray(im_orig, format='rgb24'))
    encode_frame(oframe := quant(im_orig))

  # append 2 copies of last frame
  for _ in range(2): encode_frame(oframe)

  encode_frame()
	# 1. download video:
	# youtube-dl -f 399 https://www.youtube.com/watch?v=hawThTG5No8 -o youtube_video.mkv
	# 2. extract important segment:
	# ffmpeg -ss 1:08 -to 1:27 -i youtube_video.mkv -c copy -map 0:0 out.mkv
	# 3. run this to generate out.gif

	from math import tau
	import numpy as np
	import cv2
	import av

	import logging
	logging.basicConfig()
	logging.getLogger('libav').setLevel(logging.DEBUG)

	# rgb24 -> rgb8 quantizer with random spatial-only dither
	def rgb8_random_dither(shape):
	dither = (np.random.random_sample(shape[:2] + (3,)) - 0.5).astype('float32')
	def quantize(x):
	x = x.astype('float32') / 255
	x[:,:,0] *= 0b111
	x[:,:,1] *= 0b111
	x[:,:,2] *= 0b11
	x = np.round(x + dither).astype('int')
	x = (np.clip(x[:,:,0], 0, 0b111) << 5) + (np.clip(x[:,:,1], 0, 0b111) << 2) + (np.clip(x[:,:,2], 0, 0b11))
	return av.VideoFrame.from_ndarray(x.astype('uint8'), format='rgb8')
	return quantize


	FRAME_START = 61 # animation scene starts (but not exactly at beginning)
	FRAME_END = 467 # animation seeks to beginning
	#468 first frame that shows transition to black
	#470 actual frame that matches FRAME_START
	RATIO = 0.906335
	EASE_CURVE = lambda x: np.sin(tau/4 * x.86) 1.0247

	# grab FRAME_END (FIXME: do a seek, which is the correct & efficient thing)
	with av.open('out.mkv') as vin:
	for iframe in vin.decode():
	if iframe.index == FRAME_END:
	im_end = iframe.to_ndarray(format='rgb24')
	break

	with av.open('out.mkv') as vin, av.open('out.gif', 'w') as vout:
	sin = vin.streams[0]

	#sout = vout.add_stream('libx264', rate=sin.rate, bit_rate=1986000)
	sout = vout.add_stream('gif', rate=sin.rate, pix_fmt='rgb8')
	sout.width = sin.width
	sout.height = sin.height

	def encode_frame(frame=None, pts=None):
	if frame != None: frame.pts = pts
	for packet in sout.encode(frame): vout.mux(packet)

	h, w = im_end.shape[:2]
	im_orig = im_end.copy()
	im = im_orig.copy()
	bbox = np.zeros(im.shape[:2], dtype=bool)
	bbox[460:900, 585:900] = True
	#bbox[:, :] = True
	quant = rgb8_random_dither(im.shape)

	for iframe in vin.decode(sin):
	idx = iframe.index
	if idx < FRAME_START: continue
	if idx > FRAME_END: break
	print('Processing: {}'.format(idx))
	frame = iframe.to_ndarray(format='rgb24')

	fx = EASE_CURVE( (idx - FRAME_START) / (FRAME_END - FRAME_START) )
	ratio = RATIO * (1-fx) + fx
	tf = np.array([[ ratio, 0, w * (1-ratio)/2 ], [ 0, ratio, h * (1-ratio)/2 ]])
	cv2.warpAffine(frame, tf, (w, h), im, cv2.INTER_AREA, cv2.BORDER_TRANSPARENT)

	# (somewhat aggressive) hysteresis
	diff = im_orig.astype('int16') - im.astype('int16')
	diff = cv2.GaussianBlur(diff.astype('float32'), (0,0), 2)
	diff_mag = (diff.astype('float32') ** 2).sum(2)
	#mask = bbox & (diff_mag > 9)
	mask = bbox

	im_orig[mask] = im[mask]
	#encode_frame(oframe := av.VideoFrame.from_ndarray(im_orig, format='rgb24'))
	encode_frame(oframe := quant(im_orig))

	# append 2 copies of last frame
	for _ in range(2): encode_frame(oframe)

	encode_frame()