Created
May 10, 2016 22:35
-
-
Save zopieux/f1ca1ca1a58c387cbca8f546ff554d6c to your computer and use it in GitHub Desktop.
WordReference pseudo-TTS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python wordreferencespeak.py \ | |
never gonna give you up \ | |
never gonna let you down \ | |
never gonna run around and desert you |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import functools | |
import lxml.html | |
import random | |
import requests | |
import subprocess | |
import tempfile | |
import contextlib | |
@functools.lru_cache(128) | |
def get_mp3(word): | |
page = lxml.html.fromstring(requests.get('http://www.wordreference.com/enfr/{}'.format(word)).content) | |
return [el.find('source').get('src') for el in page.xpath('//*[@id="listen_widget"]//audio')] | |
def get_urls(sentence): | |
for word in sentence: | |
accents = get_mp3(word) | |
try: | |
yield 'http://www.wordreference.com/' + random.choice(accents) | |
except IndexError: | |
raise ValueError("Unspeakable word: " + word) | |
if __name__ == '__main__': | |
import sys | |
postprocess = False | |
sentence = ' '.join(sys.argv[1:]).lower().split() | |
with contextlib.ExitStack() as stack: | |
fout_names = [] | |
for url in get_urls(sentence): | |
fin = stack.enter_context(tempfile.NamedTemporaryFile(suffix='.mp3')) | |
fin.write(requests.get(url).content) | |
fin.flush() | |
if postprocess: | |
fout = stack.enter_context(tempfile.NamedTemporaryFile(suffix='.mp3')) | |
cmd = ['sox', fin.name, fout.name, 'silence', '1', '0.05', '0.03%', 'reverse', 'silence', '1', '0.1', '5%', 'reverse'] | |
subprocess.check_call(cmd) | |
fout_names.append(fout.name) | |
else: | |
fout_names.append(fin.name) | |
subprocess.check_call(['cvlc', '--quiet'] + fout_names) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment