Skip to content

Instantly share code, notes, and snippets.

@IllDepence
Created August 2, 2020 08:08
Show Gist options
  • Save IllDepence/568dffe61fc42cc73bc5f18919498a4c to your computer and use it in GitHub Desktop.
Save IllDepence/568dffe61fc42cc73bc5f18919498a4c to your computer and use it in GitHub Desktop.
Anki 2.1.29 DB change adjustment for anki_add_pitch_plugin
""" Utility functions.
"""
import json
import re
import time
from aqt import mw
from aqt.utils import chooseList
from anki.utils import stripHTML
def select_deck_id(msg):
decks = []
for row in mw.col.db.execute('SELECT id, name FROM decks'):
d_id = row[0]
d_name = row[1]
decks.append((d_id, d_name))
choices = [deck[1] for deck in decks]
choice = chooseList(msg, choices)
return decks[choice][0]
def select_note_type(note_type_ids):
note_types = []
for row in mw.col.db.execute('SELECT id, name FROM notetypes'):
n_id = row[0]
n_name = row[1]
note_types.append((n_id, n_name))
choices = [note_type[1] for note_type in note_types]
choice = chooseList(
'Select a note type.',
choices
)
return note_types[choice][0]
def get_accent_dict(path):
acc_dict = {}
with open(path, encoding='utf8') as f:
for line in f:
orths_txt, hira, hz, accs_txt, patts_txt = line.strip().split('\u241e')
orth_txts = orths_txt.split('\u241f')
if clean_orth(orth_txts[0]) != orth_txts[0]:
orth_txts = [clean_orth(orth_txts[0])] + orth_txts
patts = patts_txt.split(',')
patt_common = patts[0] # TODO: extend to support variants?
if is_katakana(orth_txts[0]):
hira = hira_to_kata(hira)
for orth in orth_txts:
if not orth in acc_dict:
acc_dict[orth] = []
new = True
for patt in acc_dict[orth]:
if patt[0] == hira and patt[1] == patt_common:
new = False
break
if new:
acc_dict[orth].append((hira, patt_common))
return acc_dict
def get_note_type_ids(deck_id):
note_type_ids = []
for row in mw.col.db.execute(
'SELECT distinct mid FROM notes WHERE id IN (SELECT nid FROM'
' cards WHERE did = ?) ORDER BY id', deck_id):
mid = row[0]
note_type_ids.append(mid)
return note_type_ids
def get_note_ids(deck_id, note_type):
note_ids = []
for row in mw.col.db.execute(
'SELECT id FROM notes WHERE mid = ? AND id IN (SELECT nid FROM'
' cards WHERE did = ?) ORDER BY id', note_type, deck_id):
nid = row[0]
note_ids.append(nid)
return note_ids
def select_note_fields_all(note_id):
example_row = mw.col.db.first(
'SELECT flds FROM notes WHERE id = ?', note_id)
example_flds = example_row[0].split('\x1f')
choices = ['[{}] {}'.format(i, fld[:20]) for i, fld
in enumerate(example_flds)]
expr_idx = chooseList(
'Which field contains the Japanese expression?', choices
)
reading_idx = chooseList(
'Which field contains the reading?', choices
)
output_idx = chooseList(
'Which field should the pitch accent be shown in?', choices
)
return expr_idx, reading_idx, output_idx
def select_note_fields_del(note_id):
example_row = mw.col.db.first(
'SELECT flds FROM notes WHERE id = ?', note_id)
example_flds = example_row[0].split('\x1f')
choices = ['[{}] {}'.format(i, fld[:20]) for i, fld
in enumerate(example_flds)]
del_idx = chooseList(
'Which field should the pitch accent be removed from?', choices
)
return del_idx
def clean(s):
# remove HTML
s = stripHTML(s)
# remove everyhing in brackets
s = re.sub(r'[\[\(\{][^\]\)\}]*[\]\)\}]', '', s)
return s.strip()
def get_acc_patt(expr_field, reading_field, dicts):
def select_best_patt(reading_field, patts):
best_pos = 9001
best = patts[0] # default
for patt in patts:
hira, p = patt
try:
pos = reading_field.index(hira)
if pos < best_pos:
best = patt
best_pos = pos
except ValueError:
continue
return best
expr_field = clean(expr_field)
reading_field = clean(reading_field)
if len(expr_field) == 0:
return False
for dic in dicts:
patts = dic.get(expr_field, False)
if patts:
return select_best_patt(reading_field, patts)
guess = expr_field.split(' ')[0]
patts = dic.get(guess, False)
if patts:
return select_best_patt(reading_field, patts)
guess = re.sub('[<&]', ' ', expr_field).split(' ')[0]
patts = dic.get(guess, False)
if patts:
return select_best_patt(reading_field, patts)
return False
def add_pitch(acc_dict, plugin_dir_name, note_ids, expr_idx, reading_idx,
output_idx):
draw_pitch = __import__(
'{}.draw_pitch'.format(plugin_dir_name), fromlist=('foo')
)
not_found_list = []
num_updated = 0
num_already_done = 0
num_svg_fail = 0
for nid in note_ids:
row = mw.col.db.first(
'SELECT flds FROM notes WHERE id = ?', nid
)
flds_str = row[0]
if '<!-- accent_start -->' in flds_str:
# already has pitch accent image
num_already_done += 1
continue
fields = flds_str.split('\x1f')
expr_field = fields[expr_idx].strip()
reading_field = fields[reading_idx].strip()
patt = get_acc_patt(expr_field, reading_field, [acc_dict])
if not patt:
not_found_list.append([nid, expr_field])
continue
hira, LlHh_patt = patt
LH_patt = re.sub(r'[lh]', '', LlHh_patt)
svg = draw_pitch.pitch_svg(hira, LH_patt)
if not svg:
num_svg_fail += 1
continue
if len(fields[output_idx]) > 0:
separator = '<br><hr><br>'
else:
separator = ''
fields[output_idx] = (
'{}<!-- accent_start -->{}{}<!-- accent_end -->'
).format(fields[output_idx], separator, svg) # add svg
new_flds_str = '\x1f'.join(fields)
mod_time = int(time.time())
mw.col.db.execute(
'UPDATE notes SET usn = ?, mod = ?, flds = ? WHERE id = ?',
-1, mod_time, new_flds_str, nid
)
num_updated += 1
return not_found_list, num_updated, num_already_done, num_svg_fail
def remove_pitch(note_ids, del_idx):
acc_patt = re.compile(r'<!-- accent_start -->.+<!-- accent_end -->', re.S)
num_updated = 0
num_already_done = 0
for nid in note_ids:
row = mw.col.db.first('SELECT flds FROM notes WHERE id = ?', nid)
flds_str = row[0]
fields = flds_str.split('\x1f')
if 'accent_start' not in fields[del_idx]: #FIXME
# has no pitch accent image
num_already_done += 1
continue
fields[del_idx] = re.sub(acc_patt, '', fields[del_idx])
new_flds_str = '\x1f'.join(fields)
mod_time = int(time.time())
mw.col.db.execute(
'UPDATE notes SET usn = ?, mod = ?, flds = ? WHERE id = ?',
-1, mod_time, new_flds_str, nid)
num_updated += 1
return num_already_done, num_updated
def hira_to_kata(s):
return ''.join(
[chr(ord(ch) + 96) if ('ぁ' <= ch <= 'ゔ') else ch for ch in s]
)
def is_katakana(s):
num_ktkn = 0
for ch in s:
if ch == 'ー' or ('ァ' <= ch <= 'ヴ'):
num_ktkn += 1
return num_ktkn / max(1, len(s)) > .5
def clean_orth(orth):
orth = re.sub('[()△×・〈〉{}]', '', orth) #
orth = orth.replace('…', '〜') # change depending on what you use
return orth
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment