Skip to content

Instantly share code, notes, and snippets.

@dlip
Created June 8, 2014 10:07
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dlip/163d429553bfc899d94f to your computer and use it in GitHub Desktop.
Save dlip/163d429553bfc899d94f to your computer and use it in GitHub Desktop.
Converts this https://ankiweb.net/shared/info/702754122 Anki deck into a furigana cloze deck.
import re
debug = False
file = None
if debug:
file = open('test.txt', 'r')
else:
file = open('core10k.txt', 'r')
out = open("core10kout.txt", "w")
used = []
knownkanji = open("knownkanji.txt", "r")
for line in knownkanji.readlines():
used.append(line.replace('\n', ''))
knownkanji.close()
ignored = 0
loop = 0
for line in file.readlines():
if debug:
if loop > 100:
exit()
loop += 1
elements = line.split("\t")
index = elements[1]
meaning = elements[6]
reading = elements[8]
audio = elements[11]
rtk = elements[22]
if debug:
out.write(reading+"\n")
count = 1
result = reading.replace(' ', '')
foundany = False
for word in re.findall(ur"[^\s][^\s]*?\[.*?\]", reading):
furigana = re.search(ur"(?:\[)(.*?)(?:\])", word).group(1)
kanji = re.search(ur"(.*?)(?:\[)", word).group(1)
if any(word in s for s in used):
result = result.replace(word, kanji)
else:
foundany = True
if debug:
out.write(furigana+"\n")
out.write(kanji+"\n")
#replstring = "{{c%s::%s}}[{{c%s::%s}}]" % (count + 1, kanji, count, furigana)
replstring = "<ruby title=\"%s(%s)\"><rb><span class=\"clozewrapper\">{{c%s::<span class=\"basemaru\">%s<!-- end_basemaru_l --></span><!-- end_basemaru_r -->}}<!-- clozewr --></span><!-- apper --></rb><rt><span class=\"clozewrapper\">{{c%s::%s}}<!-- clozewr --></span><!-- apper --></rt></ruby>" % (kanji, furigana, count + 1, kanji, count, furigana)
result = result.replace(word, replstring)
count += 2
used.append(word)
if foundany:
out.write(index + "\t" + result + "\t" + meaning + "\t" + audio + "\t" + rtk + "\t" + reading + "\n")
else:
if debug:
out.write("IGNORED" + result + "\t" + meaning + "\n")
ignored += 1
print 'ignored ' + str(ignored)
knownkanji = open("knownkanji.txt", "w")
for line in used:
knownkanji.write(line+"\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment