|
#! /usr/bin/env python |
|
|
|
import os |
|
import sys |
|
|
|
|
|
try: |
|
import cv2 |
|
import numpy as np |
|
import pytesseract |
|
except ImportError: |
|
sys.exit( |
|
os.system( |
|
"docker run --rm " |
|
+ "--volume {}:/mnt ".format(os.getcwd()) |
|
+ "--user {}:{} ".format(os.getuid(), os.getgid()) |
|
+ "--workdir /mnt " |
|
# + "fbcotter/docker-tensorflow-opencv " |
|
+ "ricktorzynski/ocr-tesseract-docker " |
|
+ "./watch {}".format(" ".join(sys.argv[1:])) |
|
) |
|
) |
|
|
|
|
|
def main(filename): |
|
# https://docs.opencv.org/4.5.2/dd/d43/tutorial_py_video_display.html |
|
cap = cv2.VideoCapture(filename) |
|
|
|
count = 0 |
|
OFFSET = 30 * 60 # 30fps * 1min |
|
previous = u'' |
|
|
|
# Specify structure shape and kernel size. |
|
# Kernel size increases or decreases the area |
|
# of the rectangle to be detected. |
|
# A smaller value like (10, 10) will detect |
|
# each word instead of a sentence. |
|
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20)) |
|
dilation_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) |
|
|
|
while cap.isOpened(): |
|
ret, frame = cap.read() |
|
|
|
# if frame is read correctly ret is True |
|
if not ret: |
|
print("Can't receive frame (stream end?). Exiting ...") |
|
cap.release() |
|
break |
|
|
|
# Answers show up on the lower half of the frame |
|
# Aspect ratio is 16/9, this will record the lower-central 12/4 of the image |
|
height, width = frame.shape[:2] |
|
trimH = int(height * 5 / 9) |
|
trimW = int(width * 2 / 16) |
|
frame = frame[trimH:, trimW:-trimW, :] |
|
frame = cv2.resize(frame, (960, 320)) # full 12/4 of 720i |
|
# gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) |
|
|
|
# find the white letters and blue backgrounds |
|
mask = cv2.inRange(frame, (150, 150, 150), (255, 255, 255)) |
|
back = cv2.inRange(frame, (40, 0, 0), (255, 200, 100)) |
|
|
|
# Appplying dilation on the threshold image |
|
dilation = cv2.dilate(mask, rect_kernel, iterations=1) |
|
|
|
# merge background in such a way we have indicators of what is NOT a character |
|
back = cv2.bitwise_not(back | dilation) |
|
|
|
# Find countours |
|
cnts = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
cnts = cnts[0] if len(cnts) == 2 else cnts[1] |
|
found = [] |
|
for c in cnts: |
|
|
|
# Cards have a specified height |
|
x,y,w,h = cv2.boundingRect(c) |
|
if h < 70 or h > 120: # character height |
|
continue |
|
|
|
# Borders of the math board sometimes get picked up |
|
if w < 30: |
|
continue |
|
|
|
# Cards are white and blue |
|
n = cv2.countNonZero(back[y:y+h, x:x+w]) |
|
if n > 10: # white characters or blue backgorund |
|
continue |
|
|
|
# Tesseract likes black and white, so invert the threshold mask |
|
ROI = ~mask[y:y+h, x:x+w] |
|
|
|
# Shrink the bold font face down a little bit (not required with cleaned parameters) |
|
ROI = cv2.dilate(ROI, dilation_kernel, iterations=1) |
|
|
|
# Blur the dialated edge |
|
ROI = cv2.filter2D(ROI, -1, dilation_kernel) |
|
|
|
# Remember ROI (sort left to right followed by any numbers on top) |
|
found.append((y < 65, x, ROI)) |
|
|
|
# Mark the found region of interest in resulting frame |
|
cv2.rectangle(frame, (x, y), (x+w, y+h), (36,255,12), 1) |
|
|
|
# Sort using the ROI position + remove un-needed image information |
|
found.sort() |
|
found = [x[2] for x in found] |
|
|
|
# Process the regions of interest based on the game we are playing! |
|
if len(found) in (8, 9): |
|
found = [(to_letter(roi), roi) for roi in found] |
|
elif len(found) == 7: |
|
found = [(to_num(roi), roi) for roi in found] |
|
elif len(found) > 0: |
|
print('Unknown Game Type!', len(found)) |
|
count += 1 |
|
cap.set(cv2.CAP_PROP_POS_FRAMES, count * OFFSET) |
|
continue |
|
|
|
dedupe = u' '.join([entry[0] for entry in found]) |
|
|
|
# Printing some debug images |
|
if len(found) > 0 and dedupe != previous: |
|
previous = dedupe # only play each game once |
|
|
|
# Debug information! |
|
for idx, entry in enumerate(found): |
|
cv2.imwrite('frames/sample-{:04}-{:02}.png'.format(count, idx), entry[1]) |
|
cv2.imwrite("frames/sample-{:04}.jpg".format(count), frame) |
|
# cv2.imwrite("frames/sample-{:04}-back.jpg".format(count), ~mask) |
|
print("Frame {:04}".format(count), frame.shape, dedupe, DESIRED.get(count) == dedupe) |
|
sys.stdout.flush() |
|
|
|
# increment frame counter |
|
count += 1 |
|
cap.set(cv2.CAP_PROP_POS_FRAMES, count * OFFSET) |
|
|
|
|
|
""" |
|
tesseract --help-extra |
|
Usage: |
|
tesseract --help | --help-extra | --help-psm | --help-oem | --version |
|
tesseract --list-langs [--tessdata-dir PATH] |
|
tesseract --print-parameters [options...] [configfile...] |
|
tesseract imagename|imagelist|stdin outputbase|stdout [options...] [configfile...] |
|
|
|
OCR options: |
|
--tessdata-dir PATH Specify the location of tessdata path. |
|
--user-words PATH Specify the location of user words file. |
|
--user-patterns PATH Specify the location of user patterns file. |
|
-l LANG[+LANG] Specify language(s) used for OCR. |
|
-c VAR=VALUE Set value for config variables. |
|
Multiple -c arguments are allowed. |
|
--psm NUM Specify page segmentation mode. |
|
--oem NUM Specify OCR Engine mode. |
|
NOTE: These options must occur before any configfile. |
|
|
|
Page segmentation modes: |
|
0 Orientation and script detection (OSD) only. |
|
1 Automatic page segmentation with OSD. |
|
2 Automatic page segmentation, but no OSD, or OCR. |
|
3 Fully automatic page segmentation, but no OSD. (Default) |
|
4 Assume a single column of text of variable sizes. |
|
5 Assume a single uniform block of vertically aligned text. |
|
6 Assume a single uniform block of text. |
|
7 Treat the image as a single text line. |
|
8 Treat the image as a single word. |
|
9 Treat the image as a single word in a circle. |
|
10 Treat the image as a single character. |
|
11 Sparse text. Find as much text as possible in no particular order. |
|
12 Sparse text with OSD. |
|
13 Raw line. Treat the image as a single text line, |
|
bypassing hacks that are Tesseract-specific. |
|
|
|
OCR Engine modes: (see https://github.com/tesseract-ocr/tesseract/wiki#linux) |
|
0 Legacy engine only. |
|
1 Neural nets LSTM engine only. |
|
2 Legacy + LSTM engines. |
|
3 Default, based on what is available. |
|
|
|
Single options: |
|
-h, --help Show minimal help message. |
|
--help-extra Show extra help for advanced users. |
|
--help-psm Show page segmentation modes. |
|
--help-oem Show OCR Engine modes. |
|
-v, --version Show version information. |
|
--list-langs List available languages for tesseract engine. |
|
--print-parameters Print tesseract parameters. |
|
""" |
|
|
|
# NOTE: some testdata in container at /usr/share/tesseract-ocr/4.00/tessdata |
|
|
|
def to_letter(roi): |
|
value = pytesseract.image_to_string(roi, config='--psm 10 --tessdata-dir /mnt --oem 0 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ') |
|
if len(value) == 0: |
|
return u'<unknown>' |
|
return value |
|
|
|
|
|
def to_num(roi): |
|
value = pytesseract.image_to_string(roi, config='--psm 8 --tessdata-dir /mnt --oem 0 -c tessedit_char_whitelist=0123456789') |
|
if len(value) == 0: |
|
return u'<unknown>' |
|
return value |
|
|
|
|
|
# sample.mkv with 1min offset : 30fps * 1min |
|
DESIRED = { |
|
28: u'B U C T E A S D N', |
|
33: u'100 50 8 10 4 1 271', |
|
37: u'E R O T I C M E', |
|
39: u'I S L E O T G N Z', |
|
42: u'25 8 6 9 7 2 682', |
|
64: u'O C L S E A R S T', |
|
71: u'R I T R O W E I N', |
|
75: u'E A T I N G F U R', |
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
assert len(sys.argv) == 2, "Must have 1 arg" |
|
main(sys.argv[1]) |