Skip to content

Instantly share code, notes, and snippets.

@cashiwamochi
Created November 14, 2018 16:20
Show Gist options
  • Save cashiwamochi/c6892346adccd07bfcea0da89d4736d8 to your computer and use it in GitHub Desktop.
Save cashiwamochi/c6892346adccd07bfcea0da89d4736d8 to your computer and use it in GitHub Desktop.
RealEstate10kのデータセットを生成するスクリプト.pytubeのバグでいくつかはDLできず,失敗した情報はテキストに吐き出される.
import os
import sys
import glob
import subprocess
from pytube import YouTube
if __name__=="__main__":
if len(sys.argv) != 2:
print("usage: this.py [test or train]")
quit()
if sys.argv[1] == "test":
mode = "test"
elif sys.argv[1] == "train":
mode = "train"
else:
print("invalid mode")
quit()
data_root = "./RealEstate10K/" + mode
seqname_list = sorted(glob.glob(data_root + "/*.txt"))
print("{} sequences are saved".format(len(seqname_list)))
for txt_file in seqname_list:
print("{} is the current target.".format(txt_file))
dir_name = txt_file.split('/')[-1]
dir_name = dir_name.split('.')[0]
output_root = './videos/' + mode + '/' + dir_name
if not os.path.exists(output_root):
os.makedirs(output_root)
else:
continue
seq_file = open(txt_file, "r")
lines = seq_file.readlines()
timestamp_list = []
str_timestamp_list = []
for idx, line in enumerate(lines):
if idx == 0:
youtube_url = line.strip()
else:
timestamp = int(line.split(' ')[0])
str_timestamp_list.append(str(timestamp))
timestamp = int(timestamp/1000)
str_hour = str(int(timestamp/3600000)).zfill(2)
str_min = str(int(int(timestamp%3600000)/60000)).zfill(2)
str_sec = str(int(int(int(timestamp%3600000)%60000)/1000)).zfill(2)
str_mill = str(int(int(int(timestamp%3600000)%60000)%1000)).zfill(3)
str_timestamp = str_hour+":"+str_min+":"+str_sec+"."+str_mill
timestamp_list.append(str_timestamp)
seq_file.close()
try :
yt = YouTube(youtube_url)
stream = yt.streams.first()
stream.download('./','current')
except :
failure_log = open('falied_videos.txt', 'a')
failure_log.writelines(txt_file+'\n')
failure_log.close()
continue
videoname_candinate_list = glob.glob('./*')
for videoname_candinate in videoname_candinate_list:
print(videoname_candinate.split('.'))
if videoname_candinate.split('.')[-2] == "/current":
videoname = videoname_candinate
# ffmpeg -i tmp.mp4 -ss 00:01:28.800 -vframes 1 -f image2 out.jpg
for idx, timestamp in enumerate(timestamp_list):
command = 'ffmpeg'+' -ss '+timestamp+' -i '+videoname+' -vframes 1 -f image2 '+output_root+'/'+str_timestamp_list[idx]+'.png'
os.system(command)
command = "rm " + videoname
os.system(command)
print("done!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment