Skip to content

Instantly share code, notes, and snippets.

@davidyen1124 davidyen1124/movie.py

Last active Aug 29, 2015
Embed
What would you like to do?
clone https://github.com/richardasaurus/imdb-pie, then put this script into the folder.
import re
import os
from itertools import groupby
from imdbpie import Imdb
import threading
import Queue
queue = Queue.Queue()
out_queue = Queue.Queue()
results = []
PATH = '/Volumes/Toshiba/Downloaded'
# filter out useless text
def filterTitle(titles):
results = []
# first split texts with space or dot
for title in re.split('\s|\.', titles):
# if encouter the year then stop
# because useless texts always show after the year
if re.search('20\d{2}', title):
break
else:
results.append(title)
return results
class ImdbThread(threading.Thread):
def __init__(self, queue, out_queue):
threading.Thread.__init__(self)
self.queue = queue
self.out_queue = out_queue
def run(self):
while True:
title = self.queue.get()
obj = self.getRating(title)
if obj:
title, rating = obj
print title.encode('utf-8'), rating
self.out_queue.put({
'title': title,
'rating': rating,
})
self.queue.task_done()
# get rating of the movie title
def getRating(self, title):
try:
imdb = Imdb()
result = imdb.find_by_title(title)
if result:
id = result[0]['imdb_id']
movie = imdb.find_movie_by_id(id)
if movie:
return movie.title, movie.rating
except:
pass
# get movie list
movies = []
# change the path here
for root, dirs, files in os.walk(PATH):
for file in files:
path = os.path.join(root, file)
if os.path.isfile(path) and 'DS_Store' not in path and 'jpg' not in path and 'txt' not in path:
movies.append(' '.join(filterTitle(file.lower())))
for key, group in groupby(movies):
queue.put(key)
for i in range(5):
thread = ImdbThread(queue, out_queue)
thread.setDaemon(True)
thread.start()
queue.join()
results = []
while True:
try:
elem = out_queue.get(block=False)
except Queue.Empty:
break
else:
results.append(elem)
print '-' * 20
# sort the thread with the rating
for movie in sorted(results, key=lambda object: object['rating'], reverse=True):
print movie['title'], movie['rating']
@davidyen1124

This comment has been minimized.

Copy link
Owner Author

davidyen1124 commented Apr 3, 2014

too many thread causes problem

@davidyen1124

This comment has been minimized.

Copy link
Owner Author

davidyen1124 commented Apr 3, 2014

fix!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.