Skip to content

Instantly share code, notes, and snippets.

@erogol
Last active February 22, 2019 19:05
Show Gist options
  • Save erogol/6465353 to your computer and use it in GitHub Desktop.
Save erogol/6465353 to your computer and use it in GitHub Desktop.
find the youtube video and downlaod convert to matlab readable format avi from mp4
'''
This code stupidly take the lined list of movie names, suppossely scraped from imdb
and download them as a first instance of YouTube. It also converts the downloaded video mp4
to avi version to be processed in Matlab.
erogol - erengolge@gmail.com
It needs;
Beautifulsoup : easy_install BeautifulSoup -- for scraping
ffmpeg : apt-get install ffmpeg -- for video conversion
youtube-dl : apt0get install youtube-dl -- for youtube download
'''
import urllib2
from BeautifulSoup import BeautifulSoup
import pdb
import time
import os
import shlex
import subprocess
def get_recent_item(PATH):
l = [(os.path.getmtime(PATH+x), x) for x in os.listdir(PATH)]
l.sort()
return l[0][1]
#RANK_LIST = ['NC-17','PG-13','PG','R','G']
RANK_LIST = ['g', 'nc_17','pg_13','pg','r']
RANK_LIST = ['pg_13','pg','r']
#f = open('movies_download_list.data','r')
f1 = open('g.txt','r')
f2 = open('nc_17.txt','r')
f3= open('pg_13.txt','r')
f4= open('pg.txt','r')
f5= open('r.txt','r')
files = [f3,f4,f5]
counter = 0
already_installed_list = []
for file_obj in files:
for movie_name in file_obj:
movie_name = movie_name.replace('\n',' ')
print '***********************************'
print movie_name
print '***********************************\n'
FILE_PATH = 'outputs/'
OUTPUT_PATH = FILE_PATH+RANK_LIST[counter]+'/'
TEMP_PATH = 'temp/'
# check movie_name whether conveyed already
url = 'http://www.youtube.com/results?search_query='+movie_name.replace(' ','+')+'official+movie+trailer';
try:
soup = BeautifulSoup(urllib2.urlopen(url).read())
#pdb.set_trace()
link = soup('a',{'dir':'ltr'})[0]
link = link['href']
except:
continue
if link not in already_installed_list:
already_installed_list.append(link)
all_link = 'http://www.youtube.com'+link
#command = "youtube-dl -o '"+OUTPUT_PATH+"/%(title)s-%(id)s.%(ext)s'"+' --write-description --write-info-json --write-thumbnail -f 18 '+all_link]
try:
command = "youtube-dl --restrict-filenames -u erengolge -p 137555555eren --min-filesize 3m --max-filesize 50m -o '"+TEMP_PATH+"%(title)s.%(ext)s'"+' -f 18 '+all_link
print command
os.system(command);
#subprocess.Popen(command, stdout=subprocess.PIPE)
#output = test.communicate()[0]
video_name = get_recent_item(TEMP_PATH)
video_name_alone = video_name.split('.')[0]
#convert_video to avi format to be processed in Matlab
if not os.path.exists(OUTPUT_PATH):
os.makedirs(OUTPUT_PATH)
command = 'ffmpeg -i '+ TEMP_PATH+video_name+' -an -vb 1000k -r 32 -vf scale=-1:360 ' + OUTPUT_PATH+video_name_alone+'.avi';
os.system(command)
os.remove(TEMP_PATH+video_name)
except:
pass
else:
print movie_name + ' already downloaded !!!!!!!'
#time.sleep(3);
counter = counter+1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment