waf/parse-word-list.py

## parse-word-list.py
#!/usr/bin/env python

# Python 2 script that parses Stuart Jay Raj's "First Words to Learn - Enhanced Wordlist from Fluency Forever Multi Lang"
# Input file should be the JSON export here: https://spreadsheets.google.com/feeds/list/1GsNe8GVzgIuOeeEVBCcoIvxVMDO5xnEgGRr8EKtcSGI/1/public/basic?alt=json
# See original spreadsheet here: https://docs.google.com/spreadsheets/d/1GsNe8GVzgIuOeeEVBCcoIvxVMDO5xnEgGRr8EKtcSGI/edit#gid=884321509

import json
import codecs

QUESTION_LANGUAGE = 'thaith'
ANSWER_LANGUAGE = 'englishen'
INPUT_FILENAME = 'basic.json'

with codecs.open(INPUT_FILENAME, 'r', encoding='utf8') as json_file:
    data = json.loads(json_file.read())

entries = data['feed']['entry']
parsed = dict() # this will hold the json that we write to file
words = list() # this will be all the words in a single category
for entry in entries:
    category = entry['title']['$t']
    content = entry['content']['$t']
    if not category.startswith('Row: '):
        words = list()
        parsed[category] = words

    # each content row is one mega-string that needs to be split into useful data.
    # transform it to a dictionary.
    translations = dict()
    for translation in content.split(", "):
        language_pair = translation.split(": ")
        if len(language_pair) == 2:
            translations[language_pair[0]] = language_pair[1]

    jsonobj = dict()
    jsonobj['question'] = translations[QUESTION_LANGUAGE]
    jsonobj['answer'] = translations[ANSWER_LANGUAGE]
    words.append(jsonobj)

output_filename = QUESTION_LANGUAGE + ".json"
with codecs.open(output_filename, 'w', encoding='utf8') as output_file:
    json.dump(parsed, output_file, indent=4, ensure_ascii=False)

print("Created " + output_filename)
	#!/usr/bin/env python

	# Python 2 script that parses Stuart Jay Raj's "First Words to Learn - Enhanced Wordlist from Fluency Forever Multi Lang"
	# Input file should be the JSON export here: https://spreadsheets.google.com/feeds/list/1GsNe8GVzgIuOeeEVBCcoIvxVMDO5xnEgGRr8EKtcSGI/1/public/basic?alt=json
	# See original spreadsheet here: https://docs.google.com/spreadsheets/d/1GsNe8GVzgIuOeeEVBCcoIvxVMDO5xnEgGRr8EKtcSGI/edit#gid=884321509

	import json
	import codecs

	QUESTION_LANGUAGE = 'thaith'
	ANSWER_LANGUAGE = 'englishen'
	INPUT_FILENAME = 'basic.json'

	with codecs.open(INPUT_FILENAME, 'r', encoding='utf8') as json_file:
	data = json.loads(json_file.read())

	entries = data['feed']['entry']
	parsed = dict() # this will hold the json that we write to file
	words = list() # this will be all the words in a single category
	for entry in entries:
	category = entry['title']['$t']
	content = entry['content']['$t']
	if not category.startswith('Row: '):
	words = list()
	parsed[category] = words

	# each content row is one mega-string that needs to be split into useful data.
	# transform it to a dictionary.
	translations = dict()
	for translation in content.split(", "):
	language_pair = translation.split(": ")
	if len(language_pair) == 2:
	translations[language_pair[0]] = language_pair[1]

	jsonobj = dict()
	jsonobj['question'] = translations[QUESTION_LANGUAGE]
	jsonobj['answer'] = translations[ANSWER_LANGUAGE]
	words.append(jsonobj)

	output_filename = QUESTION_LANGUAGE + ".json"
	with codecs.open(output_filename, 'w', encoding='utf8') as output_file:
	json.dump(parsed, output_file, indent=4, ensure_ascii=False)

	print("Created " + output_filename)