nmz787/wiktionary_audio_video_flashcards.py

## wiktionary_audio_video_flashcards.py
"""
wiktionary audio vidio flasher (like automated flashcards).
An app that gets words from wiktionary,
and if there is an associated audio clip,
displays the word on-screen in a Tkinter GUI,
and plays the audio pronunciations available.
Change the seed_word_name variable as you desire.

target audience: babies, language learning

requires (from pip): pydub, wiktionaryparser
"""

from wiktionaryparser import WiktionaryParser
import os
import subprocess
import Tkinter as tk
import urllib2
from pydub import AudioSegment
from pydub.playback import play

class wiktionaryPlayer(object):
    def __init__(self, master):
        self.master=master
        self.string_var = tk.StringVar()
        label2 = tk.Label(master, height=0, textvariable=self.string_var, font=("Helvetica", 146))
        label2.pack()

        seed_word_name = 'science'
        self.string_var.set(seed_word_name)

        self.parser = WiktionaryParser()
        self.master.update()
        self.words_seen = set()
        self.attempt_a_word(seed_word_name, isSeedWord=True)

    def play_pydub(self, url):
        audio_file = urllib2.urlopen(url)
        ext = url.split('.')[-1]
        filename = './test.{}'.format(ext)
        with open(filename,'wb') as output:
          output.write(audio_file.read())

        try:
            song = AudioSegment.from_file(filename, ext)
            play(song)
        except:
            print('audio failed to play ({}) with detected file-extension ({})'.format(url, ext))

    def attempt_a_word(self, word, isSeedWord=False):
        """ attempts to find a word on wiktionary that also has an audio clip """
        self.words_seen.add(word)
        word_obj = self.parser.fetch(word)
        for w in word_obj:
            audio = w['pronunciations']['audio']
            for a in audio:
                self.string_var.set(word)
                self.master.update()
                aa = 'https:'+a
                print('playing {}'.format(aa))
                self.play_pydub(aa)

            for d in w['definitions']:
                for r in d['relatedWords']:
                    for www in r['words']:
                        # split on : ( or ,
                        for w in www.split(':')[0].split('(')[0].split(','):
                            stripped = w.strip('()')
                            words_to_attempt = [stripped]
                            # if there's a space in the 'word' get each sub-word
                            if u' ' in stripped:
                                for subword in stripped.split(u' '):
                                    words_to_attempt.append(subword)
                            # then check each word-to-attempt, possibly recursing (to continue finding more words)
                            for word_to_attempt in words_to_attempt:
                                if word_to_attempt not in self.words_seen:
                                    print('attempting {}'.format(word_to_attempt.encode('ascii','ignore')))
                                    self.attempt_a_word(word_to_attempt.encode('ascii','ignore'))
        if isSeedWord:
            print('done with seed-word and all it\'s related words')

if __name__ == "__main__":
    master = tk.Tk()
    b = wiktionaryPlayer(master)
    tk.mainloop()
	"""
	wiktionary audio vidio flasher (like automated flashcards).
	An app that gets words from wiktionary,
	and if there is an associated audio clip,
	displays the word on-screen in a Tkinter GUI,
	and plays the audio pronunciations available.
	Change the seed_word_name variable as you desire.

	target audience: babies, language learning

	requires (from pip): pydub, wiktionaryparser
	"""

	from wiktionaryparser import WiktionaryParser
	import os
	import subprocess
	import Tkinter as tk
	import urllib2
	from pydub import AudioSegment
	from pydub.playback import play

	class wiktionaryPlayer(object):
	def __init__(self, master):
	self.master=master
	self.string_var = tk.StringVar()
	label2 = tk.Label(master, height=0, textvariable=self.string_var, font=("Helvetica", 146))
	label2.pack()

	seed_word_name = 'science'
	self.string_var.set(seed_word_name)

	self.parser = WiktionaryParser()
	self.master.update()
	self.words_seen = set()
	self.attempt_a_word(seed_word_name, isSeedWord=True)

	def play_pydub(self, url):
	audio_file = urllib2.urlopen(url)
	ext = url.split('.')[-1]
	filename = './test.{}'.format(ext)
	with open(filename,'wb') as output:
	output.write(audio_file.read())

	try:
	song = AudioSegment.from_file(filename, ext)
	play(song)
	except:
	print('audio failed to play ({}) with detected file-extension ({})'.format(url, ext))

	def attempt_a_word(self, word, isSeedWord=False):
	""" attempts to find a word on wiktionary that also has an audio clip """
	self.words_seen.add(word)
	word_obj = self.parser.fetch(word)
	for w in word_obj:
	audio = w['pronunciations']['audio']
	for a in audio:
	self.string_var.set(word)
	self.master.update()
	aa = 'https:'+a
	print('playing {}'.format(aa))
	self.play_pydub(aa)

	for d in w['definitions']:
	for r in d['relatedWords']:
	for www in r['words']:
	# split on : ( or ,
	for w in www.split(':')[0].split('(')[0].split(','):
	stripped = w.strip('()')
	words_to_attempt = [stripped]
	# if there's a space in the 'word' get each sub-word
	if u' ' in stripped:
	for subword in stripped.split(u' '):
	words_to_attempt.append(subword)
	# then check each word-to-attempt, possibly recursing (to continue finding more words)
	for word_to_attempt in words_to_attempt:
	if word_to_attempt not in self.words_seen:
	print('attempting {}'.format(word_to_attempt.encode('ascii','ignore')))
	self.attempt_a_word(word_to_attempt.encode('ascii','ignore'))
	if isSeedWord:
	print('done with seed-word and all it\'s related words')

	if __name__ == "__main__":
	master = tk.Tk()
	b = wiktionaryPlayer(master)
	tk.mainloop()