Skip to content

Instantly share code, notes, and snippets.

@erogol
Last active Feb 22, 2019
Embed
What would you like to do?
find the youtube video and downlaod convert to matlab readable format avi from mp4
'''
This code stupidly take the lined list of movie names, suppossely scraped from imdb
and download them as a first instance of YouTube. It also converts the downloaded video mp4
to avi version to be processed in Matlab.
erogol - erengolge@gmail.com
It needs;
Beautifulsoup : easy_install BeautifulSoup -- for scraping
ffmpeg : apt-get install ffmpeg -- for video conversion
youtube-dl : apt0get install youtube-dl -- for youtube download
'''
import urllib2
from BeautifulSoup import BeautifulSoup
import pdb
import time
import os
import shlex
import subprocess
def get_recent_item(PATH):
l = [(os.path.getmtime(PATH+x), x) for x in os.listdir(PATH)]
l.sort()
return l[0][1]
#RANK_LIST = ['NC-17','PG-13','PG','R','G']
RANK_LIST = ['g', 'nc_17','pg_13','pg','r']
RANK_LIST = ['pg_13','pg','r']
#f = open('movies_download_list.data','r')
f1 = open('g.txt','r')
f2 = open('nc_17.txt','r')
f3= open('pg_13.txt','r')
f4= open('pg.txt','r')
f5= open('r.txt','r')
files = [f3,f4,f5]
counter = 0
already_installed_list = []
for file_obj in files:
for movie_name in file_obj:
movie_name = movie_name.replace('\n',' ')
print '***********************************'
print movie_name
print '***********************************\n'
FILE_PATH = 'outputs/'
OUTPUT_PATH = FILE_PATH+RANK_LIST[counter]+'/'
TEMP_PATH = 'temp/'
# check movie_name whether conveyed already
url = 'http://www.youtube.com/results?search_query='+movie_name.replace(' ','+')+'official+movie+trailer';
try:
soup = BeautifulSoup(urllib2.urlopen(url).read())
#pdb.set_trace()
link = soup('a',{'dir':'ltr'})[0]
link = link['href']
except:
continue
if link not in already_installed_list:
already_installed_list.append(link)
all_link = 'http://www.youtube.com'+link
#command = "youtube-dl -o '"+OUTPUT_PATH+"/%(title)s-%(id)s.%(ext)s'"+' --write-description --write-info-json --write-thumbnail -f 18 '+all_link]
try:
command = "youtube-dl --restrict-filenames -u erengolge -p 137555555eren --min-filesize 3m --max-filesize 50m -o '"+TEMP_PATH+"%(title)s.%(ext)s'"+' -f 18 '+all_link
print command
os.system(command);
#subprocess.Popen(command, stdout=subprocess.PIPE)
#output = test.communicate()[0]
video_name = get_recent_item(TEMP_PATH)
video_name_alone = video_name.split('.')[0]
#convert_video to avi format to be processed in Matlab
if not os.path.exists(OUTPUT_PATH):
os.makedirs(OUTPUT_PATH)
command = 'ffmpeg -i '+ TEMP_PATH+video_name+' -an -vb 1000k -r 32 -vf scale=-1:360 ' + OUTPUT_PATH+video_name_alone+'.avi';
os.system(command)
os.remove(TEMP_PATH+video_name)
except:
pass
else:
print movie_name + ' already downloaded !!!!!!!'
#time.sleep(3);
counter = counter+1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment