Skip to content

Instantly share code, notes, and snippets.

@yuyosy
Last active January 25, 2019 12:58
Show Gist options
  • Save yuyosy/0103e96b930ebd0d0404f8bcab3d0b4c to your computer and use it in GitHub Desktop.
Save yuyosy/0103e96b930ebd0d0404f8bcab3d0b4c to your computer and use it in GitHub Desktop.
# install beautifulsoup4
# install ffmpeg-python
from bs4 import BeautifulSoup
import urllib.request
import ffmpeg
import re
import os
save_path = 'C:/Users/' # 保存先パス
url = 'http://example.com/' # スクレイピングするURL
if not os.path.exists(save_path): os.mkdir(save_path)
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html, "html.parser")
video_list = soup.find_all('a',href=re.compile(r'\.flv$')) # ファイル拡張子が 'flv' のaタグを抽出
exists_video_list = []
for item in video_list:
item_url = item.attrs["href"]
filename = os.path.basename(item_url)
print(filename, '(', item_url, ')', end=' ')
try:
res = urllib.request.urlopen(item_url)
res.close()
exists_video_list.append(item)
print('\033[32mExists\033[0m')
except urllib.error.HTTPError as e:
print('\033[31mHTTP Error', e.code, '\033[0m')
except urllib.error.URLError as e:
print('\033[31mURL Error', e.reason, '\033[0m')
print('-----------')
print(len(video_list), 'links')
print('\033[32mExsist\033[0m :', len(exists_video_list), 'files')
print('\033[31mNot Exsist\033[0m :', len(video_list) - len(exists_video_list), 'files')
print('-----------')
input('Download and Convert')
for item in exists_video_list:
item_url = item.attrs["href"]
filename = os.path.basename(item_url)
print(filename)
save = save_path+filename
if os.path.isfile(save):
print(' \033[32mAlready Downloaded\033[0m')
else:
print(' \033[36mDownload to', save, '\033[0m')
ffmpeg.input(item_url).output(save).run() # ffmpegを使って動画をダウンロード
name, ext = os.path.splitext(filename)
cnv_file_name = save_path + name + '.mp4' # 変換するファイルタイプの拡張子を付ける
if os.path.exists(cnv_file_name):
print(' \033[32mAlready Converted\033[0m')
else:
print(' \033[36mConvert', save, '-->', cnv_file_name, '\033[0m')
ffmpeg.input(save).output(cnv_file_name).run() # ffmpegで動画変換
x = input('break ? [y/else]')
if x == 'y': break
print('Fnished')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment