Instantly share code, notes, and snippets.

Embed
What would you like to do?
nhk がiframe をnoscriptに入れちゃったので対応
#!/usr/bin/env python
#
from lxml import html
from urllib.request import urlopen
from urllib.parse import urlparse
from urllib.parse import urljoin
import re, json, os, shlex, argparse, subprocess
def download_nhk_video(url):
fio = urlopen(url)
src = fio.read().decode('utf-8')
tree = html.fromstring(src)
# iframe へアクセス
ret = tree.xpath('//article/script')
ret = re.search("video:\s*'(.+\.html)'\s*,\s*", ret[0].text)
iframe_path = "https://www3.nhk.or.jp"+ret[1].replace("\\", "" )
player_src = urljoin(url, iframe_path)
fio = urlopen(player_src)
src = fio.read().decode('utf-8')
tree = html.fromstring(src)
## iframe から json URLを取り出し nPlayer になってる
js = tree.xpath('//script[not(@src) and contains(./text() , "nPlayer")]')[0].text
json_f_name = re.search("'(\w+\.json)'", js)[1]
json_url = urljoin(player_src, json_f_name)
print(json_url)
# ## 必要なもの取り出し
fio = urlopen(json_url)
video_json = fio.read().decode('utf-8')
ret = json.loads(video_json)
# # m3u8 のURLを取得
name = os.path.splitext(json_f_name)[0]
playlist_url = ret["mediaResource"]["url"]
title = ret["va"]["adobe"]['vodContentsID']['VInfo1']
# # ffmpeg でまるっとゲット
cmd = f"ffmpeg -y -i {playlist_url} -codec copy -f mp4 '{title}-{name}.mp4' "
print(cmd)
p1 = subprocess.check_call(shlex.split(cmd))
def main():
parser = argparse.ArgumentParser(description='NHKニュースの取得')
parser.add_argument('url', help='ニュースのURL')
args = parser.parse_args()
url = vars(args)['url']
download_nhk_video(url)
#
if __name__ == '__main__':
main()
@takuya

This comment has been minimized.

Owner

takuya commented Nov 14, 2018

2018-11-15 更新

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment