Last active
July 13, 2019 00:23
-
-
Save shspage/b0ffc3481226be3eaa227334638051ba to your computer and use it in GitHub Desktop.
NHKゴガク ストリーミング 一括保存用 (レベルアップ中国語 )
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding:utf-8 | |
from __future__ import print_function | |
import sys | |
import os | |
import traceback | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
from time import sleep, strftime | |
# NHKゴガク ストリーミング 一括保存用 (レベルアップ中国語 ) | |
CHROME_BIN = '/usr/bin/chromium-browser' | |
CHROME_DRIVER = '/usr/bin/chromedriver' | |
SOX = "sox" | |
DIR_OUTPUT = "/home/<user_name>/<folder>/" # 保存先フォルダ | |
CHANNELS = "1" # 1:mono, 2:stereo | |
RATE = "44100" | |
DEVICE_TYPE = "pulseaudio alsa_output.pci-0000_00_1f.3.analog-stereo.monitor" # 環境依存 | |
EXTENSION = ".ogg" | |
SECONDS = 60 * 15 - 10 # 記録時間(秒) | |
def recordWithSox(): | |
err = False | |
outfile_base = os.path.join(DIR_OUTPUT, strftime("rec_%Y_%m_%d-%H_%M_%S")) | |
cmd_rec = "%s -t %s -c %s -r %s %s.wav trim 0 %d"\ | |
% (SOX, DEVICE_TYPE, CHANNELS, RATE, outfile_base, SECONDS) | |
cmd_compress = "%s %s.wav %s%s norm"\ | |
% (SOX, outfile_base, outfile_base, EXTENSION) | |
try: | |
print(" recording...") | |
os.system(cmd_rec) | |
sleep(3) | |
print(" compress...") | |
os.system(cmd_compress) | |
print(" saved") | |
os.unlink("%s.wav" % outfile_base) | |
except: | |
print(traceback.format_exc()) | |
err = True | |
return err | |
def main(): | |
url = "https://www2.nhk.or.jp/gogaku/mygogaku/streaming/?spid=00000915&tcd=F0" | |
opts = Options() | |
#opts.set_headless(True) | |
opts.binary_location = CHROME_BIN | |
driver = webdriver.Chrome(CHROME_DRIVER, chrome_options=opts) | |
driver.get(url) | |
sleep(5) | |
print("woke up") | |
elem = driver.find_element_by_class_name('broadcast_list') | |
contents = elem.find_elements_by_tag_name('li') | |
for c in contents: | |
atag = c.find_element_by_tag_name('a') | |
print(atag.text) # x月x日放送分 | |
atag.click() | |
sleep(2) | |
err = recordWithSox() | |
if err: | |
print("break by err") | |
break | |
print("end") | |
driver.close() | |
driver.quit() | |
if __name__=="__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment