Skip to content

Instantly share code, notes, and snippets.

@yssharma
Last active April 6, 2022 03:23
Show Gist options
  • Save yssharma/8269000 to your computer and use it in GitHub Desktop.
Save yssharma/8269000 to your computer and use it in GitHub Desktop.
Python Script to get movie ratings from IMDB/OMDB and save as file.
import subprocess
import re
import urllib2
import json
import os, sys
import csv, operator
def getMovieList(moviedir_path, movielist_path):
print 'INFO: Creating Movie list.'
logfile = open(movielist_path, "w")
output, error = subprocess.Popen(
["ls", "-R", moviesdir_path], stdout=subprocess.PIPE,
stderr=subprocess.PIPE).communicate()
logfile.write(output)
logfile.close()
def cleanMovieList(movielist_path, cleanmovielist_path, scanonly_videofile_name='N'):
print 'INFO: Cleaning Movie names.'
infile = open(movielist_path, 'r')
outfile = open(cleanmovielist_path,"a")
line = infile.readline()
junkwords = ['dvdrip', 'x264', 'txt', 'srt',
'brrip', '480p', 'aac', 'mgb', 'bluray',
'ts', 'cam', 'camrip', 'dts', 's4a', 'subs',
'hdtvrip', 'hdrip'];
allowedfileformats = ["3g2","3gp","asf","asx","avi","flv","m4v",
"mov","mp4","mpg","rm","srt","swf","vob",
"wmv", "mkv", "xvid", "divx"];
while line:
line = line.lower()
line = re.sub(r'\[.*?\]|\(.*?\)|\W', ' ', line)
newline = ''
if len(line.strip()) <= 0:
line = infile.readline()
continue
splittedname = line.split()
if scanonly_videofile_name == 'Y':
if splittedname[len(splittedname) -1] not in allowedfileformats:
line = infile.readline()
continue
for word in splittedname:
if re.search('\\d+(\\.\\d+)?', word) and len(word)==4:
break
if word in junkwords:
continue
if word in allowedfileformats:
continue
if len(word) >= 13:
continue
newline += ' ' +word
outfile.write(newline.strip()+'\n')
line = infile.readline()
infile.close()
outfile.close()
def getRatings(cleanmovielist_path, movierating_path, movierating_notfound_path):
print 'INFO: Fetching ratings.'
url = 'http://www.omdbapi.com/?t='
infile = open(cleanmovielist_path, 'r')
finaloutstr = ''
imdbIds = []
buferrecords = []
buffererrorrecords = []
count =0
for moviename in infile.readlines():
if len(moviename.strip()) <=0:
continue
movie = moviename
finaloutstr = ''
finalurl = url + urllib2.quote(moviename.rstrip())
#print finalurl
response = urllib2.urlopen(finalurl)
jsonstr = response.read()
data = json.loads(jsonstr)
if data.get('Title'):
if data["imdbID"] in imdbIds:
continue
finaloutstr = data["Title"] + ',' + data["Year"] + ',' + data["imdbRating"]
imdbIds.append(data["imdbID"])
buferrecords.append(finaloutstr.encode('utf-8')+'\n')
else:
finaloutstr = movie.rstrip() + ',N/A,Not Found'
buffererrorrecords.append(finaloutstr.encode('utf-8')+'\n')
if(len(buferrecords) % 20 == 0):
outfile = open(movierating_path,"a")
for line in buferrecords:
outfile.write(line)
del buferrecords[:]
outfile.close()
if(len(buffererrorrecords) % 20 == 0):
errorfile = open(movierating_notfound_path,"a")
for line in buffererrorrecords:
errorfile.write(line)
del buffererrorrecords[:]
errorfile.close()
#print finaloutstr.encode('utf-8')
count += 1
print '.',
if count % 20 == 0:
print count+ ' Names Scanned\n',
outfile = open(movierating_path,"a")
for line in buferrecords:
outfile.write(line)
del buferrecords[:]
outfile.close()
errorfile = open(movierating_notfound_path,"a")
for line in buffererrorrecords:
errorfile.write(line)
del buffererrorrecords[:]
errorfile.close()
print '\n'
infile.close()
def sortlist(movierating_path, movierating_path2):
print 'INFO: Sorting Data.'
outpath = open(movierating_path2,"a")
writer = csv.writer(outpath)
reader = csv.reader(open(movierating_path), delimiter=",")
sortedlist = sorted(reader, key=operator.itemgetter(2), reverse=True)
writer.writerows(sortedlist)
#for line in sortedlist:
# outpath.write(line)
outpath.close()
def deletefiles(files):
print 'INFO: Removing temp files.'
for filename in files:
os.path.exists(filename) and os.remove(filename)
if __name__=='__main__':
moviesdir_path = '/media/yash/fb32bb89-17c2-4d8a-8c91-2f7910434834/English'
movielist_path = '/home/yash/Desktop/Movie_List.txt'
cleanmovielist_path = '/home/yash/Desktop/CleanMovie_List.txt'
movierating_path = '/home/yash/Desktop/MovieRatings_temp.csv'
movierating_path_sorted = '/home/yash/Desktop/MovieRatings.csv'
movierating_notfound_path = '/home/yash/Desktop/MovieRatings_NotFound.csv'
# Flag to scan only video files or all files in dir
scanonly_videofile_name = "N"
deletefiles([movielist_path, cleanmovielist_path, movierating_path, movierating_notfound_path, movierating_path_sorted])
getMovieList(moviesdir_path, movielist_path)
cleanMovieList(movielist_path,cleanmovielist_path, scanonly_videofile_name)
getRatings(cleanmovielist_path, movierating_path, movierating_notfound_path)
sortlist(movierating_path, movierating_path_sorted)
deletefiles([movielist_path, cleanmovielist_path, movierating_path])
print 'Over N Out !!'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment