Last active
June 22, 2020 02:44
-
-
Save alexcu/8072c2b0e617becc905b50e56b7bb852 to your computer and use it in GitHub Desktop.
Small python script to extract timestamps from VHS tapes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import pytesseract | |
import re | |
cap = cv2.VideoCapture("/Users/Alex/Desktop/untitled folder/Home Movie No 2.m4v") | |
fps = round(cap.get(cv2.CAP_PROP_FPS)) | |
frame_num = 0 | |
NTH_SECONDS = 5 | |
while cap.isOpened(): | |
frame_exists, frame = cap.read() | |
frame_num += 1 | |
if frame_exists and frame_num % (fps * NTH_SECONDS) == 0: | |
secs = round(cap.get(cv2.CAP_PROP_POS_MSEC) / 1000) | |
print("Frame {} @ {}s".format(frame_num, secs)) | |
x, y, w, h = 80, 460, 250, 50 | |
timestamp_crop = frame[y: y + h, x: x + w] | |
cv2.imshow('timestamp', timestamp_crop) | |
if cv2.waitKey(1) & 0xFF == ord('q'): | |
break | |
timestamp_grey = cv2.cvtColor(timestamp_crop, cv2.COLOR_BGR2GRAY) | |
_, timestamp_thresh = cv2.threshold( \ | |
timestamp_grey, 127, 255, cv2.THRESH_BINARY) | |
cv2.imshow('thresholded timestamp', timestamp_thresh) | |
candidate_str = pytesseract.image_to_string(timestamp_thresh,\ | |
config='--psm 7 outputbase digits') | |
regex_str = '[123]?\d{1}\.1?\d{1}\.19[89]\d{1}' | |
if bool(re.match(regex_str, candidate_str)): | |
print("** Timestamp @ {}s = {}".format(secs, candidate_str)) | |
cap.release() | |
cv2.destroyAllWindows() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pytesseract | |
import cv2 | |
import sys | |
import re | |
import datetime | |
#### | |
# To run, pipe output to a csv: | |
# | |
# $ python3 extract-timestamps.py file1.mov file2.mov > timestamps.csv | |
# | |
# Output will be: | |
# | |
# file,timestamp,hhmmss,sec | |
# file1.mov,15.10.1993,0:02:25,145 | |
# file1.mov,15.10.1993,0:02:30,150 | |
# file1.mov,15.10.1993,0:02:55,175 | |
# file1.mov,15.10.1993,0:03:00,180 | |
# | |
# hhmmss indicates the timestamp within the file (i.e., hhmmss) where the VHS | |
# timestamp was found | |
#### | |
# Adjust this to capture timestamps fewer seconds | |
CAPTURE_AT_N_SECONDS = 5 | |
# Adjust this to modify where the timestamp is located in the frame | |
TIMESTAMP_XY = (80, 460) | |
TIMESTAMP_SZ = (250, 50) | |
def extract_timestamp(frame): | |
img_grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) | |
_, img_thresh = cv2.threshold(img_grey, 127, 255, cv2.THRESH_BINARY) | |
x, y = TIMESTAMP_XY | |
w, h = TIMESTAMP_SZ | |
img_crop = img_thresh[y:y + h, x:x + w] | |
# Uncomment to show cropped timestamp | |
# cv2.imshow('crop', img_crop) | |
# Uncomment to show full frame | |
# cv2.imshow('full', frame) | |
timestamp = pytesseract.image_to_string(img_crop, \ | |
config='--psm 7 outputbase digits') | |
# Movies are usually in 198x--200x | |
if bool(re.match('[123]?\d{1}\.1?\d{1}\.[12][90][890]\d{1}', timestamp)): | |
# Parse as a proper datetime | |
try: | |
return datetime.datetime.strptime(timestamp, "%d.%m.%Y") | |
except ValueError: | |
return None | |
return None | |
if __name__ == "__main__": | |
files = sys.argv[1:] | |
while len(files) > 0: | |
infile = files.pop(-1) | |
cap = cv2.VideoCapture(infile) | |
fps = round(cap.get(cv2.CAP_PROP_FPS)) | |
frame_num = 0 | |
print('file,timestamp,hhmmss,sec') | |
while (cap.isOpened()): | |
frame_exists, frame = cap.read() | |
if frame_exists and (frame_num % (fps * CAPTURE_AT_N_SECONDS) == 0): | |
timestamp = extract_timestamp(frame) | |
if timestamp is not None: | |
cap_millisecs = round(cap.get(cv2.CAP_PROP_POS_MSEC)) | |
hhmmss = str(datetime.timedelta( \ | |
milliseconds=cap_millisecs)).split(".")[0] | |
print("{},{},{},{}".format(\ | |
infile.split('/')[-1],\ | |
timestamp.strftime("%d.%m.%Y"),\ | |
hhmmss,\ | |
round(cap_millisecs / 1000))\ | |
) | |
if cv2.waitKey(1) & 0xFF == ord('q'): | |
break | |
frame_num += 1 | |
cap.release() | |
cv2.destroyAllWindows() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment