yssharma/MovieRatingScript.py

## MovieRatingScript.py

import subprocess
import re
import urllib2
import json
import os, sys
import csv, operator

def getMovieList(moviedir_path, movielist_path):
    print 'INFO: Creating Movie list.'
    logfile = open(movielist_path, "w")
    output, error = subprocess.Popen(
        ["ls", "-R", moviesdir_path], stdout=subprocess.PIPE,
        stderr=subprocess.PIPE).communicate()
    logfile.write(output)
    logfile.close()


def cleanMovieList(movielist_path, cleanmovielist_path, scanonly_videofile_name='N'):
    print 'INFO: Cleaning Movie names.'
    infile = open(movielist_path, 'r')
    outfile = open(cleanmovielist_path,"a")
    line = infile.readline()

    junkwords = ['dvdrip', 'x264', 'txt', 'srt',
                 'brrip', '480p', 'aac', 'mgb', 'bluray',
                 'ts', 'cam', 'camrip', 'dts', 's4a', 'subs',
                 'hdtvrip', 'hdrip'];

    allowedfileformats = ["3g2","3gp","asf","asx","avi","flv","m4v",
                          "mov","mp4","mpg","rm","srt","swf","vob",
                          "wmv", "mkv", "xvid", "divx"];

    while line:
        line = line.lower()
        line = re.sub(r'\[.*?\]|\(.*?\)|\W', ' ', line)
        newline = ''

        if len(line.strip()) <= 0:
            line = infile.readline()
            continue

        splittedname = line.split()

        if scanonly_videofile_name == 'Y':
            if splittedname[len(splittedname) -1] not in allowedfileformats:
                line = infile.readline()
                continue

        for word in splittedname:
            if re.search('\\d+(\\.\\d+)?', word) and len(word)==4:
                break

            if word in junkwords:
                continue

            if word in allowedfileformats:
                continue

            if len(word) >= 13:
                continue

            newline += ' ' +word

        outfile.write(newline.strip()+'\n')
        line = infile.readline()

    infile.close()
    outfile.close()


def getRatings(cleanmovielist_path, movierating_path, movierating_notfound_path):
    print 'INFO: Fetching ratings.'
    url = 'http://www.omdbapi.com/?t='
    infile = open(cleanmovielist_path, 'r')


    finaloutstr = ''
    imdbIds = []
    buferrecords = []
    buffererrorrecords = []

    count =0

    for moviename in infile.readlines():

        if len(moviename.strip()) <=0:
            continue

        movie = moviename
        finaloutstr = ''
        finalurl = url + urllib2.quote(moviename.rstrip())
        #print finalurl
        response = urllib2.urlopen(finalurl)
        jsonstr = response.read()
        data = json.loads(jsonstr)
        if data.get('Title'):
            if data["imdbID"] in imdbIds:
                continue

            finaloutstr = data["Title"] + ',' + data["Year"] + ',' + data["imdbRating"]
            imdbIds.append(data["imdbID"])
            buferrecords.append(finaloutstr.encode('utf-8')+'\n')
        else:
            finaloutstr = movie.rstrip() + ',N/A,Not Found'
            buffererrorrecords.append(finaloutstr.encode('utf-8')+'\n')

        if(len(buferrecords) % 20 == 0):
            outfile = open(movierating_path,"a")
            for line in buferrecords:
                outfile.write(line)
            del buferrecords[:]
            outfile.close()

        if(len(buffererrorrecords) % 20 == 0):
            errorfile = open(movierating_notfound_path,"a")
            for line in buffererrorrecords:
                errorfile.write(line)
            del buffererrorrecords[:]
            errorfile.close()


        #print finaloutstr.encode('utf-8')
        count += 1
        print '.',
        if count % 20 == 0:
            print count+ ' Names Scanned\n',

    outfile = open(movierating_path,"a")
    for line in buferrecords:
        outfile.write(line)
    del buferrecords[:]
    outfile.close()

    errorfile = open(movierating_notfound_path,"a")
    for line in buffererrorrecords:
        errorfile.write(line)
    del buffererrorrecords[:]
    errorfile.close()

    print '\n'
    infile.close()


def sortlist(movierating_path, movierating_path2):
    print 'INFO: Sorting Data.'
    outpath = open(movierating_path2,"a")
    writer = csv.writer(outpath)

    reader = csv.reader(open(movierating_path), delimiter=",")
    sortedlist = sorted(reader, key=operator.itemgetter(2), reverse=True)

    writer.writerows(sortedlist)
    #for line in sortedlist:
    #    outpath.write(line)

    outpath.close()


def deletefiles(files):
    print 'INFO: Removing temp files.'
    for filename in files:
        os.path.exists(filename) and os.remove(filename)


if __name__=='__main__':
    moviesdir_path = '/media/yash/fb32bb89-17c2-4d8a-8c91-2f7910434834/English'
    movielist_path = '/home/yash/Desktop/Movie_List.txt'
    cleanmovielist_path = '/home/yash/Desktop/CleanMovie_List.txt'
    movierating_path = '/home/yash/Desktop/MovieRatings_temp.csv'
    movierating_path_sorted = '/home/yash/Desktop/MovieRatings.csv'
    movierating_notfound_path = '/home/yash/Desktop/MovieRatings_NotFound.csv'

    # Flag to scan only video files or all files in dir
    scanonly_videofile_name = "N"


    deletefiles([movielist_path, cleanmovielist_path, movierating_path, movierating_notfound_path, movierating_path_sorted])
    getMovieList(moviesdir_path, movielist_path)
    cleanMovieList(movielist_path,cleanmovielist_path, scanonly_videofile_name)
    getRatings(cleanmovielist_path, movierating_path, movierating_notfound_path)
    sortlist(movierating_path, movierating_path_sorted)
    deletefiles([movielist_path, cleanmovielist_path, movierating_path])
    print 'Over N Out !!'

	import subprocess
	import re
	import urllib2
	import json
	import os, sys
	import csv, operator

	def getMovieList(moviedir_path, movielist_path):
	print 'INFO: Creating Movie list.'
	logfile = open(movielist_path, "w")
	output, error = subprocess.Popen(
	["ls", "-R", moviesdir_path], stdout=subprocess.PIPE,
	stderr=subprocess.PIPE).communicate()
	logfile.write(output)
	logfile.close()


	def cleanMovieList(movielist_path, cleanmovielist_path, scanonly_videofile_name='N'):
	print 'INFO: Cleaning Movie names.'
	infile = open(movielist_path, 'r')
	outfile = open(cleanmovielist_path,"a")
	line = infile.readline()

	junkwords = ['dvdrip', 'x264', 'txt', 'srt',
	'brrip', '480p', 'aac', 'mgb', 'bluray',
	'ts', 'cam', 'camrip', 'dts', 's4a', 'subs',
	'hdtvrip', 'hdrip'];

	allowedfileformats = ["3g2","3gp","asf","asx","avi","flv","m4v",
	"mov","mp4","mpg","rm","srt","swf","vob",
	"wmv", "mkv", "xvid", "divx"];

	while line:
	line = line.lower()
	line = re.sub(r'\[.?\]\|\(.?\)\|\W', ' ', line)
	newline = ''

	if len(line.strip()) <= 0:
	line = infile.readline()
	continue

	splittedname = line.split()

	if scanonly_videofile_name == 'Y':
	if splittedname[len(splittedname) -1] not in allowedfileformats:
	line = infile.readline()
	continue

	for word in splittedname:
	if re.search('\\d+(\\.\\d+)?', word) and len(word)==4:
	break

	if word in junkwords:
	continue

	if word in allowedfileformats:
	continue

	if len(word) >= 13:
	continue

	newline += ' ' +word

	outfile.write(newline.strip()+'\n')
	line = infile.readline()

	infile.close()
	outfile.close()


	def getRatings(cleanmovielist_path, movierating_path, movierating_notfound_path):
	print 'INFO: Fetching ratings.'
	url = 'http://www.omdbapi.com/?t='
	infile = open(cleanmovielist_path, 'r')


	finaloutstr = ''
	imdbIds = []
	buferrecords = []
	buffererrorrecords = []

	count =0

	for moviename in infile.readlines():

	if len(moviename.strip()) <=0:
	continue

	movie = moviename
	finaloutstr = ''
	finalurl = url + urllib2.quote(moviename.rstrip())
	#print finalurl
	response = urllib2.urlopen(finalurl)
	jsonstr = response.read()
	data = json.loads(jsonstr)
	if data.get('Title'):
	if data["imdbID"] in imdbIds:
	continue

	finaloutstr = data["Title"] + ',' + data["Year"] + ',' + data["imdbRating"]
	imdbIds.append(data["imdbID"])
	buferrecords.append(finaloutstr.encode('utf-8')+'\n')
	else:
	finaloutstr = movie.rstrip() + ',N/A,Not Found'
	buffererrorrecords.append(finaloutstr.encode('utf-8')+'\n')

	if(len(buferrecords) % 20 == 0):
	outfile = open(movierating_path,"a")
	for line in buferrecords:
	outfile.write(line)
	del buferrecords[:]
	outfile.close()

	if(len(buffererrorrecords) % 20 == 0):
	errorfile = open(movierating_notfound_path,"a")
	for line in buffererrorrecords:
	errorfile.write(line)
	del buffererrorrecords[:]
	errorfile.close()



	#print finaloutstr.encode('utf-8')
	count += 1
	print '.',
	if count % 20 == 0:
	print count+ ' Names Scanned\n',

	outfile = open(movierating_path,"a")
	for line in buferrecords:
	outfile.write(line)
	del buferrecords[:]
	outfile.close()

	errorfile = open(movierating_notfound_path,"a")
	for line in buffererrorrecords:
	errorfile.write(line)
	del buffererrorrecords[:]
	errorfile.close()

	print '\n'
	infile.close()



	def sortlist(movierating_path, movierating_path2):
	print 'INFO: Sorting Data.'
	outpath = open(movierating_path2,"a")
	writer = csv.writer(outpath)

	reader = csv.reader(open(movierating_path), delimiter=",")
	sortedlist = sorted(reader, key=operator.itemgetter(2), reverse=True)

	writer.writerows(sortedlist)
	#for line in sortedlist:
	# outpath.write(line)

	outpath.close()


	def deletefiles(files):
	print 'INFO: Removing temp files.'
	for filename in files:
	os.path.exists(filename) and os.remove(filename)




	if __name__=='__main__':
	moviesdir_path = '/media/yash/fb32bb89-17c2-4d8a-8c91-2f7910434834/English'
	movielist_path = '/home/yash/Desktop/Movie_List.txt'
	cleanmovielist_path = '/home/yash/Desktop/CleanMovie_List.txt'
	movierating_path = '/home/yash/Desktop/MovieRatings_temp.csv'
	movierating_path_sorted = '/home/yash/Desktop/MovieRatings.csv'
	movierating_notfound_path = '/home/yash/Desktop/MovieRatings_NotFound.csv'

	# Flag to scan only video files or all files in dir
	scanonly_videofile_name = "N"


	deletefiles([movielist_path, cleanmovielist_path, movierating_path, movierating_notfound_path, movierating_path_sorted])
	getMovieList(moviesdir_path, movielist_path)
	cleanMovieList(movielist_path,cleanmovielist_path, scanonly_videofile_name)
	getRatings(cleanmovielist_path, movierating_path, movierating_notfound_path)
	sortlist(movierating_path, movierating_path_sorted)
	deletefiles([movielist_path, cleanmovielist_path, movierating_path])
	print 'Over N Out !!'