Skip to content

Instantly share code, notes, and snippets.

@bign8
Last active July 28, 2021 01:50
Show Gist options
  • Save bign8/fe481f49162017314aa456d12d6e2eb6 to your computer and use it in GitHub Desktop.
Save bign8/fe481f49162017314aa456d12d6e2eb6 to your computer and use it in GitHub Desktop.
Countdown helper utilities

Countdown

A british TV series about leters and number problems.

Use ./nums to help with the numbers rounds, and ./words for everything else.

words.txt
*.swp
*.mkv
eng.traineddata
#! /bin/sh
wget --no-clobber https://raw.githubusercontent.com/dwyl/english-words/master/words.txt
wget --no-clobber https://github.com/tesseract-ocr/tessdata/raw/4.00/eng.traineddata
#! /usr/bin/env python3
import sys
from heapq import *
samples = {
"t1": [1, 10, 5, 4, 75, 50, 980],
"t2": [1, 10, 5, 9, 8, 100, 155],
"t3": [9, 10, 1, 9, 50, 75, 410],
"t4": [3, 7, 2, 5, 25, 100, 613],
"t5": [9, 2, 3, 7, 75, 50, 434],
"t6": [6, 2, 9, 8, 75, 50, 589],
"t7": [2, 3, 2, 8, 75, 50, 436],
"t8": [6, 9, 10, 2, 8, 50, 157],
"t9": [7, 3, 4, 6, 75, 100, 731],
"s6e5a": [10, 7, 9, 8, 50, 75, 186],
"s6e5b": [7, 8, 7, 5, 75, 100, 651],
"s6e6a": [9, 9, 5, 1, 100, 25, 527], # possible!
"s6e6b": [9, 2, 9, 10, 25, 100, 660],
"s6e7a": [5, 9, 7, 1, 100, 25, 729],
"s6e12b": [10, 9, 7, 3, 3, 75, 450], # simple: (3+3)*75
"s6e12c": [4, 8, 3, 1, 75, 100, 126], # simple: 100+(75/3)+1
"s6e14c": [10, 3, 3, 5, 8, 50, 881], # impossible
}
def parse():
assert len(sys.argv) > 1, "Missing params"
if sys.argv[1] in samples:
return samples.get(sys.argv[1])
assert len(sys.argv) > 2, "Missing params"
return list(map(int, sys.argv[1:]))
def process(parts):
parts.append("") # for remembering how we did it
queue = [(-1, -1, -1, parts)]
n = 0
def add(o, v):
item = (len(o), abs(v - o[-2]), n, o)
heappush(queue, item)
def add2(o, i, j, op, v):
o = o.copy()
o[-1] += f"\n{o[i]} {op} {o[j]} = {v}"
o[i] = v
o.pop(j)
add(o, v)
while queue:
n += 1
if n % 10000 == 0:
print(".", end='')
sys.stdout.flush()
o = heappop(queue)[-1]
for i, a in enumerate(o[:-2]):
if a == o[-2]:
print(o[-1])
return True
if len(o) == 3:
continue
p = o.copy()
p.pop(i)
add(p, p[-2])
for j, b in enumerate(o[:-2]):
if i >= j:
continue
add2(o, i, j, "+", a+b)
add2(o, i, j, "*", a*b)
# sub (i've never seen them go negative)
if a-b > 0:
add2(o, i, j, "-", a-b)
if b-a > 0:
add2(o, j, i, "-", b-a)
# dividing by 1 gets us nowhere
if b == 1 or a == 1:
continue
# div
if b > 0 and a % b == 0:
add2(o, i, j, "/", int(a/b))
# if they are the same, only insert once
if a == b:
continue
# div backwards
if a > 0 and b % a == 0:
add2(o, j, i, "/", int(b/a))
if __name__ == "__main__":
parts = parse()
print('Processing:', ' '.join(map(str, parts[:-1])), '=>', parts[-1])
if not process(parts):
print("Impossible!")
#! /usr/bin/env python
import os
import sys
try:
import cv2
import numpy as np
import pytesseract
except ImportError:
sys.exit(
os.system(
"docker run --rm "
+ "--volume {}:/mnt ".format(os.getcwd())
+ "--user {}:{} ".format(os.getuid(), os.getgid())
+ "--workdir /mnt "
# + "fbcotter/docker-tensorflow-opencv "
+ "ricktorzynski/ocr-tesseract-docker "
+ "./watch {}".format(" ".join(sys.argv[1:]))
)
)
def main(filename):
# https://docs.opencv.org/4.5.2/dd/d43/tutorial_py_video_display.html
cap = cv2.VideoCapture(filename)
count = 0
OFFSET = 30 * 60 # 30fps * 1min
previous = u''
# Specify structure shape and kernel size.
# Kernel size increases or decreases the area
# of the rectangle to be detected.
# A smaller value like (10, 10) will detect
# each word instead of a sentence.
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 20))
dilation_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
while cap.isOpened():
ret, frame = cap.read()
# if frame is read correctly ret is True
if not ret:
print("Can't receive frame (stream end?). Exiting ...")
cap.release()
break
# Answers show up on the lower half of the frame
# Aspect ratio is 16/9, this will record the lower-central 12/4 of the image
height, width = frame.shape[:2]
trimH = int(height * 5 / 9)
trimW = int(width * 2 / 16)
frame = frame[trimH:, trimW:-trimW, :]
frame = cv2.resize(frame, (960, 320)) # full 12/4 of 720i
# gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# find the white letters and blue backgrounds
mask = cv2.inRange(frame, (150, 150, 150), (255, 255, 255))
back = cv2.inRange(frame, (40, 0, 0), (255, 200, 100))
# Appplying dilation on the threshold image
dilation = cv2.dilate(mask, rect_kernel, iterations=1)
# merge background in such a way we have indicators of what is NOT a character
back = cv2.bitwise_not(back | dilation)
# Find countours
cnts = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
found = []
for c in cnts:
# Cards have a specified height
x,y,w,h = cv2.boundingRect(c)
if h < 70 or h > 120: # character height
continue
# Borders of the math board sometimes get picked up
if w < 30:
continue
# Cards are white and blue
n = cv2.countNonZero(back[y:y+h, x:x+w])
if n > 10: # white characters or blue backgorund
continue
# Tesseract likes black and white, so invert the threshold mask
ROI = ~mask[y:y+h, x:x+w]
# Shrink the bold font face down a little bit (not required with cleaned parameters)
ROI = cv2.dilate(ROI, dilation_kernel, iterations=1)
# Blur the dialated edge
ROI = cv2.filter2D(ROI, -1, dilation_kernel)
# Remember ROI (sort left to right followed by any numbers on top)
found.append((y < 65, x, ROI))
# Mark the found region of interest in resulting frame
cv2.rectangle(frame, (x, y), (x+w, y+h), (36,255,12), 1)
# Sort using the ROI position + remove un-needed image information
found.sort()
found = [x[2] for x in found]
# Process the regions of interest based on the game we are playing!
if len(found) in (8, 9):
found = [(to_letter(roi), roi) for roi in found]
elif len(found) == 7:
found = [(to_num(roi), roi) for roi in found]
elif len(found) > 0:
print('Unknown Game Type!', len(found))
count += 1
cap.set(cv2.CAP_PROP_POS_FRAMES, count * OFFSET)
continue
dedupe = u' '.join([entry[0] for entry in found])
# Printing some debug images
if len(found) > 0 and dedupe != previous:
previous = dedupe # only play each game once
# Debug information!
for idx, entry in enumerate(found):
cv2.imwrite('frames/sample-{:04}-{:02}.png'.format(count, idx), entry[1])
cv2.imwrite("frames/sample-{:04}.jpg".format(count), frame)
# cv2.imwrite("frames/sample-{:04}-back.jpg".format(count), ~mask)
print("Frame {:04}".format(count), frame.shape, dedupe, DESIRED.get(count) == dedupe)
sys.stdout.flush()
# increment frame counter
count += 1
cap.set(cv2.CAP_PROP_POS_FRAMES, count * OFFSET)
"""
tesseract --help-extra
Usage:
tesseract --help | --help-extra | --help-psm | --help-oem | --version
tesseract --list-langs [--tessdata-dir PATH]
tesseract --print-parameters [options...] [configfile...]
tesseract imagename|imagelist|stdin outputbase|stdout [options...] [configfile...]
OCR options:
--tessdata-dir PATH Specify the location of tessdata path.
--user-words PATH Specify the location of user words file.
--user-patterns PATH Specify the location of user patterns file.
-l LANG[+LANG] Specify language(s) used for OCR.
-c VAR=VALUE Set value for config variables.
Multiple -c arguments are allowed.
--psm NUM Specify page segmentation mode.
--oem NUM Specify OCR Engine mode.
NOTE: These options must occur before any configfile.
Page segmentation modes:
0 Orientation and script detection (OSD) only.
1 Automatic page segmentation with OSD.
2 Automatic page segmentation, but no OSD, or OCR.
3 Fully automatic page segmentation, but no OSD. (Default)
4 Assume a single column of text of variable sizes.
5 Assume a single uniform block of vertically aligned text.
6 Assume a single uniform block of text.
7 Treat the image as a single text line.
8 Treat the image as a single word.
9 Treat the image as a single word in a circle.
10 Treat the image as a single character.
11 Sparse text. Find as much text as possible in no particular order.
12 Sparse text with OSD.
13 Raw line. Treat the image as a single text line,
bypassing hacks that are Tesseract-specific.
OCR Engine modes: (see https://github.com/tesseract-ocr/tesseract/wiki#linux)
0 Legacy engine only.
1 Neural nets LSTM engine only.
2 Legacy + LSTM engines.
3 Default, based on what is available.
Single options:
-h, --help Show minimal help message.
--help-extra Show extra help for advanced users.
--help-psm Show page segmentation modes.
--help-oem Show OCR Engine modes.
-v, --version Show version information.
--list-langs List available languages for tesseract engine.
--print-parameters Print tesseract parameters.
"""
# NOTE: some testdata in container at /usr/share/tesseract-ocr/4.00/tessdata
def to_letter(roi):
value = pytesseract.image_to_string(roi, config='--psm 10 --tessdata-dir /mnt --oem 0 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ')
if len(value) == 0:
return u'<unknown>'
return value
def to_num(roi):
value = pytesseract.image_to_string(roi, config='--psm 8 --tessdata-dir /mnt --oem 0 -c tessedit_char_whitelist=0123456789')
if len(value) == 0:
return u'<unknown>'
return value
# sample.mkv with 1min offset : 30fps * 1min
DESIRED = {
28: u'B U C T E A S D N',
33: u'100 50 8 10 4 1 271',
37: u'E R O T I C M E',
39: u'I S L E O T G N Z',
42: u'25 8 6 9 7 2 682',
64: u'O C L S E A R S T',
71: u'R I T R O W E I N',
75: u'E A T I N G F U R',
}
if __name__ == "__main__":
assert len(sys.argv) == 2, "Must have 1 arg"
main(sys.argv[1])
#! /usr/bin/env python3
import sys
"""
s6e5z: titbanger => battering
s6e6a: etsifoapn
b: sourtits => tourists
c: eoeazrglr
d: puberugs => superbug
e: tuptleuto
f: analmist => talisman
g: rngieinut => reuniting
h: mingeburn => numbering
"""
assert len(sys.argv) == 2, "Must have 1 arg"
target = ''.join(sorted(sys.argv[-1]))
print("Searching", target)
# Prepare Dictionary
dataset = open('words.txt', 'rt').read().strip().split('\n')
print("Starting size:", len(dataset))
dataset = [
n.lower() for n in dataset
if n.isalpha() and len(n) <= len(target)
]
memory = {}
for n in dataset:
memory.setdefault(''.join(sorted(n)), []).append(n)
print("Filtered size:", len(memory))
batch = [target]
while len(batch[0]) > 3:
found = []
future = []
for target in batch:
if target in memory.keys():
found.extend(memory[target])
for i in range(len(target)):
future.append(target[:i] + target[i+1:])
if found:
print(len(batch[0]), sorted(found))
if len(found) > 10:
sys.exit(0)
else:
print(f"Didn't find any {len(batch[0])}s")
batch = list(set(future))
print("Didn't find in dictionary")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment