Last active
January 25, 2019 12:58
-
-
Save yuyosy/0103e96b930ebd0d0404f8bcab3d0b4c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install beautifulsoup4 | |
# install ffmpeg-python | |
from bs4 import BeautifulSoup | |
import urllib.request | |
import ffmpeg | |
import re | |
import os | |
save_path = 'C:/Users/' # 保存先パス | |
url = 'http://example.com/' # スクレイピングするURL | |
if not os.path.exists(save_path): os.mkdir(save_path) | |
html = urllib.request.urlopen(url).read() | |
soup = BeautifulSoup(html, "html.parser") | |
video_list = soup.find_all('a',href=re.compile(r'\.flv$')) # ファイル拡張子が 'flv' のaタグを抽出 | |
exists_video_list = [] | |
for item in video_list: | |
item_url = item.attrs["href"] | |
filename = os.path.basename(item_url) | |
print(filename, '(', item_url, ')', end=' ') | |
try: | |
res = urllib.request.urlopen(item_url) | |
res.close() | |
exists_video_list.append(item) | |
print('\033[32mExists\033[0m') | |
except urllib.error.HTTPError as e: | |
print('\033[31mHTTP Error', e.code, '\033[0m') | |
except urllib.error.URLError as e: | |
print('\033[31mURL Error', e.reason, '\033[0m') | |
print('-----------') | |
print(len(video_list), 'links') | |
print('\033[32mExsist\033[0m :', len(exists_video_list), 'files') | |
print('\033[31mNot Exsist\033[0m :', len(video_list) - len(exists_video_list), 'files') | |
print('-----------') | |
input('Download and Convert') | |
for item in exists_video_list: | |
item_url = item.attrs["href"] | |
filename = os.path.basename(item_url) | |
print(filename) | |
save = save_path+filename | |
if os.path.isfile(save): | |
print(' \033[32mAlready Downloaded\033[0m') | |
else: | |
print(' \033[36mDownload to', save, '\033[0m') | |
ffmpeg.input(item_url).output(save).run() # ffmpegを使って動画をダウンロード | |
name, ext = os.path.splitext(filename) | |
cnv_file_name = save_path + name + '.mp4' # 変換するファイルタイプの拡張子を付ける | |
if os.path.exists(cnv_file_name): | |
print(' \033[32mAlready Converted\033[0m') | |
else: | |
print(' \033[36mConvert', save, '-->', cnv_file_name, '\033[0m') | |
ffmpeg.input(save).output(cnv_file_name).run() # ffmpegで動画変換 | |
x = input('break ? [y/else]') | |
if x == 'y': break | |
print('Fnished') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment