Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
#!/usr/bin/env python
#
from lxml import html
from urllib.request import urlopen
from urllib.parse import urlparse
from urllib.parse import urljoin
import re,json,os,shlex,argparse,subprocess
def download_nhk_video(url):
fio = urlopen(url)
src = fio.read().decode('utf-8')
tree = html.fromstring( src )
# DetailProp を解析して dict へ。JSONとして解釈は難しい
ret = tree.xpath('//script[not(@src) and contains(./text() , "DetailProp")]')
script = ret[0].text
json_str = re.sub(r'^[^\{]+', '', script, re.MULTILINE )
json_str = re.sub(r'^(\s+)(\w+):', '\g<1>"\g<2>":', json_str, flags=re.M )
json_str = re.sub(r'^.+//.+$', '', json_str, flags=re.M )
json_str = re.sub(r';\s*$', '', json_str, flags=re.M )
json_str = re.sub(r'\'', '"', json_str, flags=re.M )
data = eval(json_str)
## 必要なもの取り出し
video_url = data['video']
video_url = re.sub(r'\\', '', video_url)
video_json_url = re.sub(r'html$', 'json', video_url)
title=data['title']
name=os.path.splitext( os.path.basename(video_url))[0]
# m3u8 のURLを取得
video_json_url = urljoin( url, video_json_url )
fio = urlopen(video_json_url)
src = fio.read().decode('utf-8')
data = json.loads(src)
playlist_url = data['mediaResource']['url']
# ffmpeg でまるっとゲット
cmd = f"ffmpeg -y -i {playlist_url} -codec copy -f mp4 '{title}-{name}.mp4' "
print(cmd)
p1 = subprocess.check_call( shlex.split(cmd) )
def main():
parser = argparse.ArgumentParser(description='NHKニュースの取得')
parser.add_argument('url', help='ニュースのURL' )
args = parser.parse_args()
url = vars(args)['url']
download_nhk_video(url)
#
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment