Skip to content

Instantly share code, notes, and snippets.

@NSBum
Created November 3, 2017 11:02
Show Gist options
  • Save NSBum/bed77ec635d20c1be48ae79a42b5bcfc to your computer and use it in GitHub Desktop.
Save NSBum/bed77ec635d20c1be48ae79a42b5bcfc to your computer and use it in GitHub Desktop.
Download Russian pronunciation from Wiktionary
#!/usr/bin/python
# encoding=utf8
import re
import requests
import urllib2
import xerox
from os.path import expanduser, normpath, basename, join
import Foundation
import objc
import AppKit
DEBUG = 0
# deal with encoding for cyrillic
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
NSUserNotification = objc.lookUpClass('NSUserNotification')
NSUserNotificationCenter = objc.lookUpClass('NSUserNotificationCenter')
def notify(title, subtitle, info_text, delay=0, sound=False, userInfo={}):
notification = NSUserNotification.alloc().init()
notification.setTitle_(title)
notification.setSubtitle_(subtitle)
notification.setInformativeText_(info_text)
notification.setUserInfo_(userInfo)
if sound:
notification.setSoundName_("NSUserNotificationDefaultSoundName")
notification.setDeliveryDate_(Foundation.NSDate.dateWithTimeInterval_sinceDate_(delay, Foundation.NSDate.date()))
NSUserNotificationCenter.defaultUserNotificationCenter().scheduleNotification_(notification)
class WikiPage(object):
"""Wiktionary page - source for the extraction"""
def __init__(self, ruWord):
super(WikiPage, self).__init__()
self.word = ruWord
self.baseURL = u'http://en.wiktionary.org/wiki/'
self.anchor = u'#Russian'
def url(self):
return self.baseURL + self.word + self.anchor
def page(self):
return requests.get(self.url())
def audioLink(self):
searchObj = re.search("commons(\\/.+\\/.+\\/Ru-.+\\.ogg)", self.page().text, re.M)
return searchObj.group(1)
def fullAudioLink(self):
return 'https://upload.wikimedia.org/wikipedia/commons' + self.audioLink()
def downloadAudio(self):
path = join(expanduser("~"),'Downloads',self.word + '.ogg')
try:
mp3file = urllib2.urlopen(self.fullAudioLink())
except AttributeError:
print "There appears to be no audio."
notify("No audio","Wiktionary has no pronunciation", "Pronunciation is not available for download.", sound=True)
else:
with open(path,'wb') as output:
output.write(mp3file.read())
word = xerox.paste().encode('utf-8')
wikipage = WikiPage(word)
if DEBUG:
print wikipage.url()
print wikipage.fullAudioLink()
wikipage.downloadAudio()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment