Skip to content

Instantly share code, notes, and snippets.

@carylee
Created November 8, 2020 18:47
Show Gist options
  • Save carylee/62dbe19579ed9fa1714a3635ff0bdec7 to your computer and use it in GitHub Desktop.
Save carylee/62dbe19579ed9fa1714a3635ff0bdec7 to your computer and use it in GitHub Desktop.
Face-aware video cropping
#!/usr/bin/env python3
import click
import ffmpeg
from matplotlib import pyplot as plt
import matplotlib
import face_recognition
from PIL import Image
import numpy as np
def face_location(filename):
probe = ffmpeg.probe(filename)
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
width = int(video_stream['width'])
height = int(video_stream['height'])
num_frames = int(video_stream['nb_frames'])
out, _ = (
ffmpeg
.input(filename, ss=50, t=1)
.output('pipe:', format='rawvideo', pix_fmt='rgb24')
.run(capture_stdout=True)
)
video = np.frombuffer(out, np.uint8).reshape([-1, width, height, 3])
image = video[0,:,:,:]
face_locations = face_recognition.face_locations(image)
return face_locations[0]
#print("I found {} faces in this photograph.".format(len(face_locations)))
#for face_location in face_locations:
# # Print the location of each face in this image
# top, right, bottom, left = face_location
# print("A face is located at pixel location Top: {}, Left: {}, Bottom: {}, Right: {}".format(top, left, bottom, right))
# # You can access the actual face itself like this:
# face_image = image[top:bottom, left:right]
# pil_image = Image.fromarray(face_image)
# pil_image.show()
@click.command()
@click.argument('filename', type=click.Path(exists=True))
@click.argument('output')
def crop(filename, output):
probe = ffmpeg.probe(filename)
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
width = int(video_stream['width'])
height = int(video_stream['height'])
print(video_stream)
rotation = video_stream['tags'].get('rotate')
if rotation == '90' or rotation == '270':
height, width = width, height
num_frames = int(video_stream['nb_frames'])
print(video_stream)
top, left, bottom, right = face_location(filename)
out_w = 320
out_h = 360
r = out_w/out_h
crop_to_w = width
crop_to_h = width / (out_w / out_h)
face_height = bottom - top
above_face = (crop_to_h - face_height) / 3
print("Above face: {}".format(above_face))
(
ffmpeg
.input(filename)
.filter('tpad', start_duration=20)
.filter('crop', width, width/r, 0, top - above_face)
.filter('scale', out_w, out_h)
.filter('fps', fps=30)
.output('{}.mp4'.format(output))
.overwrite_output()
.run()
)
if __name__ == '__main__':
crop()
@stefanpenner
Copy link

Thanks for this, I appreciate it! I use this as the basis for my own script, which saved me a good amount of time yesterday. I did run into issues with your specific approach to cropping/scaling. Specifically it would fail when attempting to crop some dimension ratios.

The following was inspired by your code, but was able to handle the more ... unique ... input types I had to process.

https://gist.github.com/stefanpenner/ca0334f617acc02e67482947d1fcf7b0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment