Last active
August 7, 2023 09:50
-
-
Save cdtinney/b02160dc34997320af8165e5eb35ffdd to your computer and use it in GitHub Desktop.
Python script for determining the total time you've spent watching movies and TV shows based on your IMDb ratings.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import urllib2 | |
user_id = {{USER_ID_HERE}} | |
ratings_url = "http://www.imdb.com/user/" + user_id + "/ratings" | |
total_runtime = 0 | |
def get_ratings_page(url): | |
print("fetching: " + url) | |
response = urllib2.urlopen(url) | |
html = response.read() | |
return html | |
def get_movies(page): | |
soup = BeautifulSoup(page, 'html.parser') | |
items = 0 | |
time = 0 | |
# find all list elements | |
for movie in soup.find_all("div", { "class" : "list_item" }): | |
# find the description element within each list item | |
desc_element = movie.find("div", { "class" : "item_description" }) | |
if desc_element: | |
# increase the count of found items | |
items += 1 | |
# find the span element that contains the runtime (in minutes) | |
min_element = desc_element.find("span") | |
if min_element: | |
# parse out the actual minutes | |
min_text = min_element.string.replace("(", "").replace(" mins.)", "") | |
# convert the string into an int | |
minutes = int(min_text) | |
# ignore TV shows (less than 60 min) | |
if (minutes > 60): | |
time += minutes | |
# no items found | |
if items == 0: | |
return 0 | |
# add the time for the page to the total_runtime var | |
global total_runtime | |
total_runtime += time | |
return items | |
def get_all_movies(url): | |
num_items = 0 | |
# loop until no more pages | |
while (True): | |
page_url = url + "?start=" + str(num_items) | |
page_html = get_ratings_page(page_url) | |
items_found = get_movies(page_html) | |
if items_found == 0: | |
break | |
else: | |
num_items += items_found + 1 | |
# calculate hours and minutes | |
global total_runtime | |
hours = total_runtime / 60 | |
minutes = total_runtime % 60 | |
# calculate days (!!!) | |
if hours > 24: | |
days = hours / 24 | |
hours = hours % 24 | |
print("\ntotal running time = {}d {}h {}m".format(days, hours, minutes)) | |
else: | |
print("\ntotal running time = {}h {}m".format(hours, minutes)) | |
get_all_movies(ratings_url) | |
Hey! I doubt this is working anymore, but youd update USER_ID on Line 4 and then run it from the command line.
I have zero knowledge of coding/programming.
I created an account here just to post this.
Can somebody explain me how to use this (explain me like I'm 5)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I signed up to Github to thank you and ask if you could explain how to run it.