find the youtube video and downlaod convert to matlab readable format avi from mp4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
This code stupidly take the lined list of movie names, suppossely scraped from imdb | |
and download them as a first instance of YouTube. It also converts the downloaded video mp4 | |
to avi version to be processed in Matlab. | |
erogol - erengolge@gmail.com | |
It needs; | |
Beautifulsoup : easy_install BeautifulSoup -- for scraping | |
ffmpeg : apt-get install ffmpeg -- for video conversion | |
youtube-dl : apt0get install youtube-dl -- for youtube download | |
''' | |
import urllib2 | |
from BeautifulSoup import BeautifulSoup | |
import pdb | |
import time | |
import os | |
import shlex | |
import subprocess | |
def get_recent_item(PATH): | |
l = [(os.path.getmtime(PATH+x), x) for x in os.listdir(PATH)] | |
l.sort() | |
return l[0][1] | |
#RANK_LIST = ['NC-17','PG-13','PG','R','G'] | |
RANK_LIST = ['g', 'nc_17','pg_13','pg','r'] | |
RANK_LIST = ['pg_13','pg','r'] | |
#f = open('movies_download_list.data','r') | |
f1 = open('g.txt','r') | |
f2 = open('nc_17.txt','r') | |
f3= open('pg_13.txt','r') | |
f4= open('pg.txt','r') | |
f5= open('r.txt','r') | |
files = [f3,f4,f5] | |
counter = 0 | |
already_installed_list = [] | |
for file_obj in files: | |
for movie_name in file_obj: | |
movie_name = movie_name.replace('\n',' ') | |
print '***********************************' | |
print movie_name | |
print '***********************************\n' | |
FILE_PATH = 'outputs/' | |
OUTPUT_PATH = FILE_PATH+RANK_LIST[counter]+'/' | |
TEMP_PATH = 'temp/' | |
# check movie_name whether conveyed already | |
url = 'http://www.youtube.com/results?search_query='+movie_name.replace(' ','+')+'official+movie+trailer'; | |
try: | |
soup = BeautifulSoup(urllib2.urlopen(url).read()) | |
#pdb.set_trace() | |
link = soup('a',{'dir':'ltr'})[0] | |
link = link['href'] | |
except: | |
continue | |
if link not in already_installed_list: | |
already_installed_list.append(link) | |
all_link = 'http://www.youtube.com'+link | |
#command = "youtube-dl -o '"+OUTPUT_PATH+"/%(title)s-%(id)s.%(ext)s'"+' --write-description --write-info-json --write-thumbnail -f 18 '+all_link] | |
try: | |
command = "youtube-dl --restrict-filenames -u erengolge -p 137555555eren --min-filesize 3m --max-filesize 50m -o '"+TEMP_PATH+"%(title)s.%(ext)s'"+' -f 18 '+all_link | |
print command | |
os.system(command); | |
#subprocess.Popen(command, stdout=subprocess.PIPE) | |
#output = test.communicate()[0] | |
video_name = get_recent_item(TEMP_PATH) | |
video_name_alone = video_name.split('.')[0] | |
#convert_video to avi format to be processed in Matlab | |
if not os.path.exists(OUTPUT_PATH): | |
os.makedirs(OUTPUT_PATH) | |
command = 'ffmpeg -i '+ TEMP_PATH+video_name+' -an -vb 1000k -r 32 -vf scale=-1:360 ' + OUTPUT_PATH+video_name_alone+'.avi'; | |
os.system(command) | |
os.remove(TEMP_PATH+video_name) | |
except: | |
pass | |
else: | |
print movie_name + ' already downloaded !!!!!!!' | |
#time.sleep(3); | |
counter = counter+1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment