Skip to content

Instantly share code, notes, and snippets.

@rickwierenga
Created January 21, 2023 17:18
Show Gist options
  • Save rickwierenga/42395c4607b5da762c554f022f7159ac to your computer and use it in GitHub Desktop.
Save rickwierenga/42395c4607b5da762c554f022f7159ac to your computer and use it in GitHub Desktop.
# usage: python srtfinder.py filename.rst your quote
# requires https://pypi.org/project/srt/
# can be memory optimized by discarding failed 'read' chars and resetting 'widx'
import srt
import sys
print(sys.argv[1])
with open(sys.argv[1], "r") as f: data = f.read()
p = list(srt.parse(data))
q = " ".join(sys.argv[2:]) # query is all parameters after the filename
print(q)
q = q.lower()
qs = q.split()
print()
def do():
fidx = 0 # current fragment index
fsidx = 0 # fragment index where match started, or current
frag = [] # current fragment
read = frag # list of read characters
widx = 0 # index into read
into_match = 0 # look ahead after widx, also index into query
while True:
if (widx+into_match) >= len(read):
frag = ""
while len(frag) == 0: # keep reading until non empty fragment
fidx += 1
if fidx >= len(p): return # end of file
frag = p[fidx].content.split()
read += frag
if qs[into_match] == read[widx+into_match]:
into_match += 1
if into_match == len(qs): # found full match
print("found full match", qs, ":", fsidx, "-", fidx)
print(p[fsidx].start, p[fidx].end)
return
else:
into_match = 0
widx += 1
fsidx = fidx
do()
print("term.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment