Skip to content

Instantly share code, notes, and snippets.

@davidyen1124

davidyen1124/movie.py

Last active Aug 29, 2015
Embed
What would you like to do?
clone https://github.com/richardasaurus/imdb-pie, then put this script into the folder.
import re
import os
from itertools import groupby
from imdbpie import Imdb
import threading
import Queue
queue = Queue.Queue()
out_queue = Queue.Queue()
results = []
PATH = '/Volumes/Toshiba/Downloaded'
# filter out useless text
def filterTitle(titles):
results = []
# first split texts with space or dot
for title in re.split('\s|\.', titles):
# if encouter the year then stop
# because useless texts always show after the year
if re.search('20\d{2}', title):
break
else:
results.append(title)
return results
class ImdbThread(threading.Thread):
def __init__(self, queue, out_queue):
threading.Thread.__init__(self)
self.queue = queue
self.out_queue = out_queue
def run(self):
while True:
title = self.queue.get()
obj = self.getRating(title)
if obj:
title, rating = obj
print title.encode('utf-8'), rating
self.out_queue.put({
'title': title,
'rating': rating,
})
self.queue.task_done()
# get rating of the movie title
def getRating(self, title):
try:
imdb = Imdb()
result = imdb.find_by_title(title)
if result:
id = result[0]['imdb_id']
movie = imdb.find_movie_by_id(id)
if movie:
return movie.title, movie.rating
except:
pass
# get movie list
movies = []
# change the path here
for root, dirs, files in os.walk(PATH):
for file in files:
path = os.path.join(root, file)
if os.path.isfile(path) and 'DS_Store' not in path and 'jpg' not in path and 'txt' not in path:
movies.append(' '.join(filterTitle(file.lower())))
for key, group in groupby(movies):
queue.put(key)
for i in range(5):
thread = ImdbThread(queue, out_queue)
thread.setDaemon(True)
thread.start()
queue.join()
results = []
while True:
try:
elem = out_queue.get(block=False)
except Queue.Empty:
break
else:
results.append(elem)
print '-' * 20
# sort the thread with the rating
for movie in sorted(results, key=lambda object: object['rating'], reverse=True):
print movie['title'], movie['rating']
@davidyen1124

This comment has been minimized.

Copy link
Owner Author

@davidyen1124 davidyen1124 commented Apr 3, 2014

too many thread causes problem

@davidyen1124

This comment has been minimized.

Copy link
Owner Author

@davidyen1124 davidyen1124 commented Apr 3, 2014

fix!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment