Skip to content

Instantly share code, notes, and snippets.

Created May 12, 2011 18:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/969113 to your computer and use it in GitHub Desktop.
Save anonymous/969113 to your computer and use it in GitHub Desktop.
ankify lyrics
#!/usr/bin/env python
# muflax <muflax@gmail.com>, GPL 3.0
"""
Reads in .txt files and formats them for cloze deletion, with 2 lines of context
before and after. It regards every line as one item.
"""
import re, sys
def main():
for song in sys.argv[1:]:
with open(song, "r") as f:
# read in lines
fl = []
for i, line in enumerate(f, 1):
entry = "%d: %s" % (i, line.strip())
fl.append(entry)
tag = " "+re.sub(".txt$", "", song)
# process lines, output tsv
linebreak = "<br/>"
# reversed because training from unknown -> known is easier
for n in reversed(range(len(fl))):
# skip empty lines
if re.search("^[0-9]+:\s*$", fl[n]):
continue
before = linebreak.join( fl[max(0, n-2):n] )
answer = "<span style='color:#0000ff;'>%s</span>" % (fl[n])
after = linebreak.join(fl[n+1:min(len(fl), n+3)])
print "\t".join((before, answer, after, tag))
if __name__=="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment