Ishibasystems/youtube-dl.py

## youtube-dl.py
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-

import datetime
import youtube_dl
import subprocess
from sys import exit, argv
from os.path import exists
from modules import text
from traceback import format_exc
from urllib.request import urlopen

# (youtubeで画質選択に必要) ffmpegをインストールしてあるか
ffmpeg = True

list_videos = '/mnt/download/youtube-dl.log'	# DL済み動画リスト
download_def = "/mnt/download/video/"	# 保存ディレクトリ
download_tw = "/mnt/download/video/vine/"	# twitter動画の保存ディレクトリ

# ニコニコのアカウントをここに入力しておく
user = ''
pswd = ''

# 祝日カレンダー
holiday = {(9, 19), (9, 22), (10, 10), (11, 3), (11, 23), (12, 23), }

def command(x):
	try:
		p = subprocess.Popen(x, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
		hoge = p.communicate()
		return hoge[0].decode('utf-8', 'ignore') + '\n' + hoge[1].decode('utf-8', 'ignore')

	except:
		print(format_exc() + 'Command Error: ' + ' '.join(x) + '\n')

	return ''

class catchmsg(object):
	def __init__(self):
		self.logmsg = ''

	def input(self, msg):
		self.logmsg += msg.strip().replace('\033[0;31m', '').replace('\033[0m', '') + '\n'

	def debug(self, msg): self.input(msg)
	def warning(self, msg): self.input(msg)
	def error(self, msg): self.input(msg)

	def getmsg(self):
		hoge = self.logmsg
		self.__init__()
		return hoge

class sharpmsg(object):
	def __init__(self):
		self.logmsg = ''
		self.countr = 0
		self.tim = datetime.datetime.today()

	def set(self, msg):
		self.__init__()
		self.logmsg = msg

	def add(self, list, msg):
		if self.logmsg:
			msg = self.logmsg + '\n' + msg
			self.__init__()

		self.countr += 1
		text.add(list, msg)
	def dummy(ex):
		sharp.set('# ' + str(datetime.datetime.today()) + '\t' + url[2:].split('\t')[-1])

def deny(title):
	# タイトルにこれらが入っていたらダウンロードしない
	if False or \
	'4コマ'      in title or \
	'４コマ'     in title or \
	'四コマ'     in title or \
	'BGM'        in title or \
	'MMDドラマ'  in title or \
	'エロゲ'     in title or \
	'ギャルゲ'   in title or \
	'コマ劇場'   in title or \
	'ニコ生'     in title or \
	'プレイ動画' in title or \
	'画像集'     in title or \
	'紙芝居'     in title or \
	'逆再生'     in title or \
	'作業用'     in title or \
	'実況'       in title or \
	'手書'       in title or \
	'手描'       in title or \
	('第' in title and '話' in title):
		return True

	return False

def main(list_urls):
	download = download_def

	urls_list = list(text.read(list_urls))
	sets_list = set()

	if len(urls_list) == 0:
		exit(0)

	list_log = list_urls + '.log'

	# 処理リストテキスト初期化: 日本語とLFを埋め込み他エディタでもUTF-8N LFで読み書きするテキストに固定
	text.write(list_urls, '# UTF-8N LF 厳守\n')
	text.write(list_log , '')
	sharp = sharpmsg()

	for url in urls_list:
		if url.startswith('# '):
			sharp.set('# ' + str(datetime.datetime.today()) + '\t' + url[2:].split('\t')[-1])

		elif url.startswith('http'):
			url = url.split(' ')[0]

			try:
				if url[-1] == '/':
					url = url[:-1]

				if 'youtu.be' in url or 'm.youtube.com' in url or 'nico.ms' in url:
					try: url = urlopen(url).geturl()
					except: pass

				nicovid_flag = 'nico.ms' in url or 'www.nicovideo.jp' in url
				youtube_flag = 'youtu.be' in url or 'm.youtube.com' in url or 'www.youtube.com' in url

				if youtube_flag:
					if 'playlist?' in url and 'list=' in url:
						url = url.split('playlist?')[0] + 'playlist?list=' + url.split('list=')[1].split('&')[0]

					if '?' in url and 'v=' in url:
						url = url.split('?')[0] + '?v=' + url.split('v=')[1].split('&')[0]

				if nicovid_flag:
					url = url.split('?')[0].split('/videoExplorer')[0]

				if url.split('/')[-1] in text.read(list_videos):
					text.add(list_log, 'SKIP: duplicate video in log: ' + url)
					continue

				if url.split('/')[-1] in sets_list:
					text.add(list_log, 'SKIP: duplicate video in list: ' + url)
					continue

				sets_list.add(url.split('/')[-1])

				ydl_opts = {'quiet': True, 'ignoreerrors': True, 'logger': catchmsg(), 'format': 'best', 'outtmpl': download + '%(title)s.%(ext)s'}

				# Filesystem free space check
				stdout_data = command(['df', download])
				try:
					if float(stdout_data.split('\n')[1].split()[3]) < 2**20:
						sharp.add(list_urls, url + ' ! SKIP: Filesystem free space is <1GB')
						continue

				except:
					text.add(list_log, format_exc() + 'disk check is failed\n' + stdout_data + '\n')
					sharp.add(list_urls, url + ' ! SKIP: disk check is failed')
					continue

				# video info check (youtube or nicovideo)
				if nicovid_flag or youtube_flag:
					# nicovideo低画質回避
					if nicovid_flag:
						if '/sm' in url:
							d = datetime.datetime.today()

							# 時間帯(18～26時)回避
							if d.hour < 2 or 18 <= d.hour:
								sharp.add(list_urls, url + ' | SKIP: economy mode hour (基本時間外)')
								continue

							# 時間帯(12～26時)回避
							if 12 <= d.hour:
								if 4 < d.weekday():
									sharp.add(list_urls, url + ' | SKIP: economy mode hour (土曜・日曜)')
									continue

								# 祝日など諸事情
								if (d.month, d.day) in holiday:
									sharp.add(list_urls, url + ' | SKIP: economy mode hour (計画内)')
									continue

								# 夏休み(海の日翌日～8月中)
								if d.month == 8 or (d.month == 7 and d.day + (7 - datetime.date(d.year, 7, 1).weekday()) % 7 > 15):
									sharp.add(list_urls, url + ' | SKIP: economy mode hour (夏休み)')
									continue

						# ERROR: Unable to extract thumbPlayKey 認証しなければ動画情報が取得できない
						ydl_opts['username'] = user
						ydl_opts['password'] = pswd

					try:
						info = youtube_dl.YoutubeDL(ydl_opts).extract_info(url, download = False)

					except youtube_dl.utils.DownloadError as e:
						print('YoutubeDL().extract_info ' + url + '\n' + str(e.args))
						sharp.add(list_urls, url + ' ! ' + e.args[0])
						continue

					stdout_data = ydl_opts['logger'].getmsg()

					# YoutubePlaylist(展開のみ リストに削除動画などある可能性があるのでエラー検知前に)
					if info is not None and info['extractor'] == 'youtube:playlist':
						for x in info['entries']:
							if x is not None:
								sharp.add(list_urls, x['webpage_url'] + ' | Extract from ' + url)
						continue

					# extract_infoエラー
					elif 'ERROR:' in stdout_data:
						text.add(list_log, stdout_data + 'YoutubeDL().extract_info ' + url + '\n')
						sharp.add(list_urls, url + ' ! ' + stdout_data.strip().split('\n')[-1].split(';')[0])
						continue

						if not nicovid_flag:
							pass
						ydl_opts['logger'].input('memo: YoutubeDL().extract_info is failure')

					# 低画質時間帯回避漏れの動画(ニコニコの無料アカウントでのチャンネル放送動画など)
					elif nicovid_flag and info['format_id'] == 'economy':
						sharp.add(list_urls, url + ' ! SKIP: economy mode video')
						continue

					# どの膳だよ
					elif info['title'] == '膳':
							ydl_opts['outtmpl'] = download + '膳%(id)s.%(ext)s'

					# [title] 禁止ワード
					elif deny(info['title']):
						sharp.add(list_urls, url + ' ! SKIP: [title] 禁止ワードを確認してください')
						continue

					# youtube select more High-Resolution (e.g. FullHD - 8K) best video if enable ffmpeg
					if ffmpeg and '/channel/' not in url and youtube_flag:
						mode = [info['format_id'], info['height'], 0]

						try:
							for format in info['formats']:
								if 'height' in format and format['height'] is not None and mode[1] <= format['height']:
									# 高さが同じか大きい動画
									if format['acodec'] == 'none':
										# ファイルサイズが大きいなら選択
										if 'filesize' in format and format['filesize'] is not None and mode[2] < format['filesize']:
											mode = [format['format_id'], format['height'], format['filesize']]
									else:
										# 高さが大きいビデオなら音声付き動画でもOK・ファイルサイズは比較不可なので0セット
										mode = [format['format_id'], format['height'], 0]

							if info['format_id'] != mode[0]:
								# best以上の動画が見つかっている
								ydl_opts['format'] = mode[0]
								mode = [info['format_id'], 0]

								for format in info['formats']:
									if format['acodec'] != 'none' and 'filesize' in format and mode[1] < format['filesize']:
										mode = [format['format_id'], format['filesize']]

								ydl_opts['format'] += '+' + mode[0]

						except:
							text.add(list_log, format_exc() + 'FFmpeg codec-selector Parser Error: ' + url + '\n')
							sharp.add(list_urls, url + ' ! FFmpeg codec-selector Parser Error')
							continue

				if 'vine.co' in url:
					ydl_opts['outtmpl'] = download_tw + url.split('/')[-1] + '.%(ext)s'

				try:
					youtube_dl.YoutubeDL(ydl_opts).download([url])

				except youtube_dl.utils.DownloadError as e:
					print('YoutubeDL().download ' + url + '\n' + str(e.args))
					sharp.add(list_urls, url + ' ! ' + e.args[0])
					continue

				stdout_data = ydl_opts['logger'].getmsg()

				# downloadエラー
				if 'ERROR:' in stdout_data:
					text.add(list_log, stdout_data + 'YoutubeDL().download ' + url + '\n')
					sharp.add(list_urls, url + ' ! ' + stdout_data.strip().split('\n')[-1].split(';')[0])

				elif 'has already been downloaded' in stdout_data:
					text.add(list_log, 'SKIP: duplicate video in file: ' + url)

				else:
					text.add(list_videos, url.split('/')[-1])

			except:
				# youtube_dl.utils.DownloadErrorで拾えないのとかもここ
				text.add(list_log, format_exc() + 'Critical Error: ' + url + '\n')
				sharp.add(list_urls, url + ' ! Critical Error')

if __name__ == '__main__':
	if len(argv) < 2:
		exit(0)

	main(argv[1])
	#!/usr/local/bin/python3
	# -- coding: utf-8 --

	import datetime
	import youtube_dl
	import subprocess
	from sys import exit, argv
	from os.path import exists
	from modules import text
	from traceback import format_exc
	from urllib.request import urlopen

	# (youtubeで画質選択に必要) ffmpegをインストールしてあるか
	ffmpeg = True

	list_videos = '/mnt/download/youtube-dl.log' # DL済み動画リスト
	download_def = "/mnt/download/video/" # 保存ディレクトリ
	download_tw = "/mnt/download/video/vine/" # twitter動画の保存ディレクトリ

	# ニコニコのアカウントをここに入力しておく
	user = ''
	pswd = ''

	# 祝日カレンダー
	holiday = {(9, 19), (9, 22), (10, 10), (11, 3), (11, 23), (12, 23), }

	def command(x):
	try:
	p = subprocess.Popen(x, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	hoge = p.communicate()
	return hoge[0].decode('utf-8', 'ignore') + '\n' + hoge[1].decode('utf-8', 'ignore')

	except:
	print(format_exc() + 'Command Error: ' + ' '.join(x) + '\n')

	return ''

	class catchmsg(object):
	def __init__(self):
	self.logmsg = ''

	def input(self, msg):
	self.logmsg += msg.strip().replace('\033[0;31m', '').replace('\033[0m', '') + '\n'

	def debug(self, msg): self.input(msg)
	def warning(self, msg): self.input(msg)
	def error(self, msg): self.input(msg)

	def getmsg(self):
	hoge = self.logmsg
	self.__init__()
	return hoge

	class sharpmsg(object):
	def __init__(self):
	self.logmsg = ''
	self.countr = 0
	self.tim = datetime.datetime.today()

	def set(self, msg):
	self.__init__()
	self.logmsg = msg

	def add(self, list, msg):
	if self.logmsg:
	msg = self.logmsg + '\n' + msg
	self.__init__()

	self.countr += 1
	text.add(list, msg)
	def dummy(ex):
	sharp.set('# ' + str(datetime.datetime.today()) + '\t' + url[2:].split('\t')[-1])

	def deny(title):
	# タイトルにこれらが入っていたらダウンロードしない
	if False or \
	'4コマ' in title or \
	'４コマ' in title or \
	'四コマ' in title or \
	'BGM' in title or \
	'MMDドラマ' in title or \
	'エロゲ' in title or \
	'ギャルゲ' in title or \
	'コマ劇場' in title or \
	'ニコ生' in title or \
	'プレイ動画' in title or \
	'画像集' in title or \
	'紙芝居' in title or \
	'逆再生' in title or \
	'作業用' in title or \
	'実況' in title or \
	'手書' in title or \
	'手描' in title or \
	('第' in title and '話' in title):
	return True

	return False

	def main(list_urls):
	download = download_def

	urls_list = list(text.read(list_urls))
	sets_list = set()

	if len(urls_list) == 0:
	exit(0)

	list_log = list_urls + '.log'

	# 処理リストテキスト初期化: 日本語とLFを埋め込み他エディタでもUTF-8N LFで読み書きするテキストに固定
	text.write(list_urls, '# UTF-8N LF 厳守\n')
	text.write(list_log , '')
	sharp = sharpmsg()

	for url in urls_list:
	if url.startswith('# '):
	sharp.set('# ' + str(datetime.datetime.today()) + '\t' + url[2:].split('\t')[-1])

	elif url.startswith('http'):
	url = url.split(' ')[0]

	try:
	if url[-1] == '/':
	url = url[:-1]

	if 'youtu.be' in url or 'm.youtube.com' in url or 'nico.ms' in url:
	try: url = urlopen(url).geturl()
	except: pass

	nicovid_flag = 'nico.ms' in url or 'www.nicovideo.jp' in url
	youtube_flag = 'youtu.be' in url or 'm.youtube.com' in url or 'www.youtube.com' in url

	if youtube_flag:
	if 'playlist?' in url and 'list=' in url:
	url = url.split('playlist?')[0] + 'playlist?list=' + url.split('list=')[1].split('&')[0]

	if '?' in url and 'v=' in url:
	url = url.split('?')[0] + '?v=' + url.split('v=')[1].split('&')[0]

	if nicovid_flag:
	url = url.split('?')[0].split('/videoExplorer')[0]

	if url.split('/')[-1] in text.read(list_videos):
	text.add(list_log, 'SKIP: duplicate video in log: ' + url)
	continue

	if url.split('/')[-1] in sets_list:
	text.add(list_log, 'SKIP: duplicate video in list: ' + url)
	continue

	sets_list.add(url.split('/')[-1])

	ydl_opts = {'quiet': True, 'ignoreerrors': True, 'logger': catchmsg(), 'format': 'best', 'outtmpl': download + '%(title)s.%(ext)s'}

	# Filesystem free space check
	stdout_data = command(['df', download])
	try:
	if float(stdout_data.split('\n')[1].split()[3]) < 2**20:
	sharp.add(list_urls, url + ' ! SKIP: Filesystem free space is <1GB')
	continue

	except:
	text.add(list_log, format_exc() + 'disk check is failed\n' + stdout_data + '\n')
	sharp.add(list_urls, url + ' ! SKIP: disk check is failed')
	continue

	# video info check (youtube or nicovideo)
	if nicovid_flag or youtube_flag:
	# nicovideo低画質回避
	if nicovid_flag:
	if '/sm' in url:
	d = datetime.datetime.today()

	# 時間帯(18～26時)回避
	if d.hour < 2 or 18 <= d.hour:
	sharp.add(list_urls, url + ' \| SKIP: economy mode hour (基本時間外)')
	continue

	# 時間帯(12～26時)回避
	if 12 <= d.hour:
	if 4 < d.weekday():
	sharp.add(list_urls, url + ' \| SKIP: economy mode hour (土曜・日曜)')
	continue

	# 祝日など諸事情
	if (d.month, d.day) in holiday:
	sharp.add(list_urls, url + ' \| SKIP: economy mode hour (計画内)')
	continue

	# 夏休み(海の日翌日～8月中)
	if d.month == 8 or (d.month == 7 and d.day + (7 - datetime.date(d.year, 7, 1).weekday()) % 7 > 15):
	sharp.add(list_urls, url + ' \| SKIP: economy mode hour (夏休み)')
	continue

	# ERROR: Unable to extract thumbPlayKey 認証しなければ動画情報が取得できない
	ydl_opts['username'] = user
	ydl_opts['password'] = pswd

	try:
	info = youtube_dl.YoutubeDL(ydl_opts).extract_info(url, download = False)

	except youtube_dl.utils.DownloadError as e:
	print('YoutubeDL().extract_info ' + url + '\n' + str(e.args))
	sharp.add(list_urls, url + ' ! ' + e.args[0])
	continue

	stdout_data = ydl_opts['logger'].getmsg()

	# YoutubePlaylist(展開のみリストに削除動画などある可能性があるのでエラー検知前に)
	if info is not None and info['extractor'] == 'youtube:playlist':
	for x in info['entries']:
	if x is not None:
	sharp.add(list_urls, x['webpage_url'] + ' \| Extract from ' + url)
	continue

	# extract_infoエラー
	elif 'ERROR:' in stdout_data:
	text.add(list_log, stdout_data + 'YoutubeDL().extract_info ' + url + '\n')
	sharp.add(list_urls, url + ' ! ' + stdout_data.strip().split('\n')[-1].split(';')[0])
	continue

	if not nicovid_flag:
	pass
	ydl_opts['logger'].input('memo: YoutubeDL().extract_info is failure')

	# 低画質時間帯回避漏れの動画(ニコニコの無料アカウントでのチャンネル放送動画など)
	elif nicovid_flag and info['format_id'] == 'economy':
	sharp.add(list_urls, url + ' ! SKIP: economy mode video')
	continue

	# どの膳だよ
	elif info['title'] == '膳':
	ydl_opts['outtmpl'] = download + '膳%(id)s.%(ext)s'

	# [title] 禁止ワード
	elif deny(info['title']):
	sharp.add(list_urls, url + ' ! SKIP: [title] 禁止ワードを確認してください')
	continue

	# youtube select more High-Resolution (e.g. FullHD - 8K) best video if enable ffmpeg
	if ffmpeg and '/channel/' not in url and youtube_flag:
	mode = [info['format_id'], info['height'], 0]

	try:
	for format in info['formats']:
	if 'height' in format and format['height'] is not None and mode[1] <= format['height']:
	# 高さが同じか大きい動画
	if format['acodec'] == 'none':
	# ファイルサイズが大きいなら選択
	if 'filesize' in format and format['filesize'] is not None and mode[2] < format['filesize']:
	mode = [format['format_id'], format['height'], format['filesize']]
	else:
	# 高さが大きいビデオなら音声付き動画でもOK・ファイルサイズは比較不可なので0セット
	mode = [format['format_id'], format['height'], 0]

	if info['format_id'] != mode[0]:
	# best以上の動画が見つかっている
	ydl_opts['format'] = mode[0]
	mode = [info['format_id'], 0]

	for format in info['formats']:
	if format['acodec'] != 'none' and 'filesize' in format and mode[1] < format['filesize']:
	mode = [format['format_id'], format['filesize']]

	ydl_opts['format'] += '+' + mode[0]

	except:
	text.add(list_log, format_exc() + 'FFmpeg codec-selector Parser Error: ' + url + '\n')
	sharp.add(list_urls, url + ' ! FFmpeg codec-selector Parser Error')
	continue

	if 'vine.co' in url:
	ydl_opts['outtmpl'] = download_tw + url.split('/')[-1] + '.%(ext)s'

	try:
	youtube_dl.YoutubeDL(ydl_opts).download([url])

	except youtube_dl.utils.DownloadError as e:
	print('YoutubeDL().download ' + url + '\n' + str(e.args))
	sharp.add(list_urls, url + ' ! ' + e.args[0])
	continue

	stdout_data = ydl_opts['logger'].getmsg()

	# downloadエラー
	if 'ERROR:' in stdout_data:
	text.add(list_log, stdout_data + 'YoutubeDL().download ' + url + '\n')
	sharp.add(list_urls, url + ' ! ' + stdout_data.strip().split('\n')[-1].split(';')[0])

	elif 'has already been downloaded' in stdout_data:
	text.add(list_log, 'SKIP: duplicate video in file: ' + url)

	else:
	text.add(list_videos, url.split('/')[-1])

	except:
	# youtube_dl.utils.DownloadErrorで拾えないのとかもここ
	text.add(list_log, format_exc() + 'Critical Error: ' + url + '\n')
	sharp.add(list_urls, url + ' ! Critical Error')

	if __name__ == '__main__':
	if len(argv) < 2:
	exit(0)

	main(argv[1])