Last active
January 12, 2020 09:17
-
-
Save garfieldnate/f05d00c3b6683c2349d38d1c08500776 to your computer and use it in GitHub Desktop.
Parse CSV vocab output from ThaiDict app
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parse CSV vocab output from ThaiDict app | |
import csv | |
import sys | |
# from tango.tango import model | |
def row_to_vocab(row): | |
v = { | |
'headword': row['th'], | |
'pronunciation': row['pron'], | |
'morphology': row['pos'], | |
'source': row['link']} | |
# use bullet points if multiple definitions were provided | |
defs = row['en'].split(' | ') | |
def_text = '' if len(defs) == 1 else '• ' | |
def_text += '\n• '.join(defs) | |
v['definition'] = def_text | |
notes = [] | |
if row['cat']: | |
notes.append('category: ' + row['cat']) | |
if row['usage']: | |
notes.append('usage: ' + row['usage']) | |
v['notes'] = '\n'.join(notes) | |
return v | |
# model.add_tango('th', v) | |
def read_vocab(file): | |
vocab = [] | |
with open(file) as f: | |
reader = csv.DictReader(f, dialect='unix', fieldnames=['en', 'th', 'pron', 'usage', 'pos', 'cat', 'link']) | |
for row in reader: | |
vocab.append(row_to_vocab(row)) | |
return vocab | |
def main(argv): | |
if len(argv) != 2: | |
print('Usage: python3 import_thaidict.py <csv file>') | |
sys.exit() | |
vocab = read_vocab(argv[1]) | |
print(vocab) | |
if __name__ == '__main__': | |
main(sys.argv) |
Next step: uncomment import statement and actually run it. Need to finish exporting favorites first.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Next step: import into tango;
tango.tango.model.get_model()
; model.add_tango('th', {headword, pronunciation, morphology, definition, example, image_url, image_base64, notes, source}`