Skip to content

Instantly share code, notes, and snippets.

@Enchufa2
Last active December 25, 2023 11:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Enchufa2/9dce124762ba66d303ea490053a4b247 to your computer and use it in GitHub Desktop.
Save Enchufa2/9dce124762ba66d303ea490053a4b247 to your computer and use it in GitHub Desktop.
Extract a transcribed score from a video to PDF
#!/bin/python3
import argparse, textwrap
import cv2 as cv
import numpy as np
from pathlib import Path
from fpdf import FPDF
from tempfile import NamedTemporaryFile
class Selector(object):
def __init__(self, img, name):
self._window(name, img)
def _window(self, name, img):
raise NotImplementedError()
def _crop(self, img):
raise NotImplementedError()
def crop(self, img, bw=True, margin=None):
if bw:
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
ret, img = cv.threshold(img, 0, 255, cv.THRESH_BINARY+cv.THRESH_OTSU)
img = self._crop(img)
if margin is None:
return img
gray = img.copy() if bw else cv.cvtColor(img, cv.COLOR_BGR2GRAY)
gray = 255 * (gray < 128).astype(np.uint8)
x, y, w, h = cv.boundingRect(cv.findNonZero(gray))
margin = int(margin*h)
aux = np.ndarray((h+2*margin,) + img.shape[1:], dtype=img.dtype)
aux[:] = [255]
aux[margin:margin+h, :] = img[y:y+h, :]
return aux
class Rectangle(Selector):
def __init__(self, img, name='Rectangle Selector'):
super().__init__(img, name)
def _window(self, name, img):
cv.namedWindow(name, cv.WINDOW_NORMAL)
cv.setWindowProperty(name, cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)
self._roi = cv.selectROI(name, img)
cv.destroyWindow(name)
def _crop(self, img):
return img[self._roi[1]:self._roi[1]+self._roi[3],
self._roi[0]:self._roi[0]+self._roi[2]]
class MagicWand(Selector):
# Based on https://github.com/alkasm/magicwand
def __init__(self, img, name='Magic Wand Selector', connectivity=4, tolerance=32):
super().__init__(img, name)
self._flood_mask = np.zeros((img.shape[0]+2, img.shape[1]+2), dtype=np.uint8)
self._flood_fill_flags = (
connectivity | cv.FLOODFILL_FIXED_RANGE | cv.FLOODFILL_MASK_ONLY | 255 << 8
) # 255 << 8 tells to fill with the value 255
self.tolerance = (tolerance,) * 3
def _window(self, name, img):
cv.namedWindow(name, cv.WINDOW_NORMAL)
cv.setWindowProperty(name, cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)
cv.setMouseCallback(name, self._mouse_callback)
cv.imshow(name, img)
cv.waitKey(0)
cv.destroyWindow(name)
def _mouse_callback(self, event, x, y, flags, *userdata):
if event != cv.EVENT_LBUTTONDOWN:
return
self.pos = (x, y)
self._mod = flags & (cv.EVENT_FLAG_ALTKEY + cv.EVENT_FLAG_SHIFTKEY)
def _crop(self, img):
self._flood_mask[:] = 0
cv.floodFill(
img,
self._flood_mask,
self.pos,
0,
self.tolerance,
self.tolerance,
self._flood_fill_flags,
)
mask = self._flood_mask[1:-1, 1:-1]
idx = [(_.min(), _.max()) for _ in np.where(mask != 0)]
return img[idx[0][0]:idx[0][1], idx[1][0]:idx[1][1]]
def mse(img1, img2):
# compare just the middle third
x = int(img1.shape[0] / 3 / 2)
y = int(img1.shape[1] / 3 / 2)
img1 = img1[x:3*x, y:3*y]
img2 = img2[x:3*x, y:3*y]
try:
err = np.sum((img1.astype('float') - img2.astype('float')) ** 2)
return err / float(img1.shape[0] * img1.shape[1])
except:
return float('inf')
def parse_video(filename, thr, skip=0.0, every=1.0, bw=True, margin=None, use_mw=True):
def skip_seconds(video, t):
for _ in range(int(t * video.get(cv.CAP_PROP_FPS))):
video.grab()
video = cv.VideoCapture(filename)
if not video.isOpened():
raise 'video cannot be opened'
skip_seconds(video, skip)
success, frame = video.read()
if use_mw:
sel = MagicWand(frame)
else:
sel = Rectangle(frame)
stack = []
diffs = []
while success:
frame = sel.crop(frame, bw, margin)
if len(stack):
diffs.append(mse(stack[-1], frame))
if not len(diffs) or thr[0] < diffs[-1] < thr[1]:
stack.append(frame)
skip_seconds(video, every)
success, frame = video.read()
return stack, diffs
def write_pdf(filename, stack, thr=True):
def add_image(pdf, pages):
page = pages[-1]
pages[-1] = NamedTemporaryFile(suffix='.png')
cv.imwrite(pages[-1].name, page)
pdf.image(pages[-1].name, 10, 20, 190)
def concatenate(img1, img2):
imgs = [img1, img2]
if img1.shape[1] == img2.shape[1]:
return np.concatenate(imgs)
flipped = False
if img1.shape[1] < img2.shape[1]:
imgs = list(reversed(imgs))
flipped = True
aux_shape = (imgs[1].shape[0], imgs[0].shape[1], imgs[1].shape[2])
aux = np.ndarray(aux_shape, dtype=imgs[0].dtype)
aux[:, :imgs[1].shape[1]] = imgs[1]
aux[:, imgs[1].shape[1]:] = [255]
imgs[1] = aux
if flipped:
imgs = list(reversed(imgs))
return np.concatenate(imgs)
pdf = FPDF()
pdf.add_page()
pages = [stack[0]]
for img in stack[1:]:
rel = (pages[-1].shape[0] + img.shape[0]) / pages[-1].shape[1]
if rel < 297 / 210: # A4
pages[-1] = concatenate(pages[-1], img)
else:
add_image(pdf, pages)
pdf.add_page()
pages.append(img)
add_image(pdf, pages)
pdf.output(filename)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent('''\
With the magic wand (default), click a blank pixel in the score and press Enter.
Without it, make a rectangular selection and press Enter.'''))
parser.add_argument('video', type=Path, help='path to video file')
parser.add_argument('--skip', type=float, default=0.0, help='seconds to skip')
parser.add_argument('--every', type=float, default=1.0, help='sample every x seconds')
parser.add_argument('--crop', metavar='MARGIN', type=float,
help='crop and add top-bottom specified margin ratio')
parser.add_argument('--no-mw', dest='mw', action='store_false',
help='no magic wand, use a rect selector instead')
parser.add_argument('--no-bw', dest='bw', action='store_false',
help='no image thresholding')
parser.add_argument('--mse', action='store_true', help='show MSE graph')
parser.add_argument('--thr', nargs=2, type=float, default=[5e3, 1e5],
help='MSE thresholds')
args = parser.parse_args()
stack, diffs = parse_video(
str(args.video), args.thr, args.skip, args.every, args.bw, args.crop, args.mw)
write_pdf(str(args.video.with_suffix('.pdf')), stack)
if args.mse:
import pandas as pd
import matplotlib.pyplot as plt
pd.DataFrame({'values': diffs}).plot()
plt.show(block=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment