Last active
May 6, 2018 06:35
-
-
Save nmz787/53852c926eac0c4d9b047aac5733f74a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
wiktionary audio vidio flasher (like automated flashcards). | |
An app that gets words from wiktionary, | |
and if there is an associated audio clip, | |
displays the word on-screen in a Tkinter GUI, | |
and plays the audio pronunciations available. | |
Change the seed_word_name variable as you desire. | |
target audience: babies, language learning | |
requires (from pip): pydub, wiktionaryparser | |
""" | |
from wiktionaryparser import WiktionaryParser | |
import os | |
import subprocess | |
import Tkinter as tk | |
import urllib2 | |
from pydub import AudioSegment | |
from pydub.playback import play | |
class wiktionaryPlayer(object): | |
def __init__(self, master): | |
self.master=master | |
self.string_var = tk.StringVar() | |
label2 = tk.Label(master, height=0, textvariable=self.string_var, font=("Helvetica", 146)) | |
label2.pack() | |
seed_word_name = 'science' | |
self.string_var.set(seed_word_name) | |
self.parser = WiktionaryParser() | |
self.master.update() | |
self.words_seen = set() | |
self.attempt_a_word(seed_word_name, isSeedWord=True) | |
def play_pydub(self, url): | |
audio_file = urllib2.urlopen(url) | |
ext = url.split('.')[-1] | |
filename = './test.{}'.format(ext) | |
with open(filename,'wb') as output: | |
output.write(audio_file.read()) | |
try: | |
song = AudioSegment.from_file(filename, ext) | |
play(song) | |
except: | |
print('audio failed to play ({}) with detected file-extension ({})'.format(url, ext)) | |
def attempt_a_word(self, word, isSeedWord=False): | |
""" attempts to find a word on wiktionary that also has an audio clip """ | |
self.words_seen.add(word) | |
word_obj = self.parser.fetch(word) | |
for w in word_obj: | |
audio = w['pronunciations']['audio'] | |
for a in audio: | |
self.string_var.set(word) | |
self.master.update() | |
aa = 'https:'+a | |
print('playing {}'.format(aa)) | |
self.play_pydub(aa) | |
for d in w['definitions']: | |
for r in d['relatedWords']: | |
for www in r['words']: | |
# split on : ( or , | |
for w in www.split(':')[0].split('(')[0].split(','): | |
stripped = w.strip('()') | |
words_to_attempt = [stripped] | |
# if there's a space in the 'word' get each sub-word | |
if u' ' in stripped: | |
for subword in stripped.split(u' '): | |
words_to_attempt.append(subword) | |
# then check each word-to-attempt, possibly recursing (to continue finding more words) | |
for word_to_attempt in words_to_attempt: | |
if word_to_attempt not in self.words_seen: | |
print('attempting {}'.format(word_to_attempt.encode('ascii','ignore'))) | |
self.attempt_a_word(word_to_attempt.encode('ascii','ignore')) | |
if isSeedWord: | |
print('done with seed-word and all it\'s related words') | |
if __name__ == "__main__": | |
master = tk.Tk() | |
b = wiktionaryPlayer(master) | |
tk.mainloop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment