Skip to content

Instantly share code, notes, and snippets.

@zevaverbach
Created January 27, 2016 22:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zevaverbach/a38a35464f9e707f9682 to your computer and use it in GitHub Desktop.
Save zevaverbach/a38a35464f9e707f9682 to your computer and use it in GitHub Desktop.
Downloading .json transcripts from Speechmatics with Selenium
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from config import username, password
"""This downloads *all* the .json transcripts from your Speechmatics account. Why didn't I use the API? The docs are down, so voila."""
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList",2)
fp.set_preference("browser.download.manager.showWhenStarting", False)
fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/json")
fp.set_preference("browser.download.dir", "~/Downloads")
driver = webdriver.Firefox(firefox_profile=fp)
driver.get("https://speechmatics.com/login")
assert "Speechmatics" in driver.title
driver.find_element_by_id("email").send_keys(username)
driver.find_element_by_id("password").send_keys(password)
driver.find_element_by_id("submit").click()
assert "Dashboard" in driver.title
time.sleep(1)
links = driver.find_elements_by_class_name("linkable")
assert len(links) != 0
jobs_rows = driver.find_element_by_id("jobs_rows")
rows = jobs_rows.find_elements_by_tag_name("tr")
for index, row in enumerate(rows):
if index % 5 == 0:
driver.execute_script("arguments[0].scrollIntoView(true);", row)
time.sleep(.5)
download_link = row.find_element_by_css_selector(
".linkable:nth-child(6)")
download_link.click()
if index != 0:
time.sleep(.5)
download_link.click()
time.sleep(.5)
json_download_link = driver.find_element_by_id("json_dl_link")
json_download_link.click()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment