Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/usr/bin/env python
#
from lxml import html
from urllib.request import urlopen
from urllib.parse import urlparse
from urllib.parse import urljoin
import re,json,os,shlex,argparse,subprocess
def download_nhk_video(url):
fio = urlopen(url)
src = fio.read().decode('utf-8')
tree = html.fromstring( src )
# iframe へアクセス
ret = tree.xpath('//iframe[@class="video-player"]')
player_src = urljoin( url, ret[0].get('src') )
fio = urlopen( player_src )
src = fio.read().decode('utf-8')
tree = html.fromstring( src )
## iframe から json URLを取り出し nPlayer になってる
js = tree.xpath('//script[not(@src) and contains(./text() , "nPlayer")]')[0].text
json_f_name = re.search("'(\w+\.json)'", js )[1]
json_url = urljoin( player_src, json_f_name)
print(json_url)
# ## 必要なもの取り出し
fio = urlopen( json_url )
video_json = fio.read().decode('utf-8')
ret = json.loads( video_json )
# # m3u8 のURLを取得
name = os.path.splitext(json_f_name)[0]
playlist_url = ret["mediaResource"]["url"]
title = ret["va"]["adobe"]['vodContentsID']['VInfo1']
# # ffmpeg でまるっとゲット
cmd = f"ffmpeg -y -i {playlist_url} -codec copy -f mp4 '{title}-{name}.mp4' "
print(cmd)
p1 = subprocess.check_call( shlex.split(cmd) )
def main():
parser = argparse.ArgumentParser(description='NHKニュースの取得')
parser.add_argument('url', help='ニュースのURL' )
args = parser.parse_args()
url = vars(args)['url']
download_nhk_video(url)
#
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment