Skip to content

Instantly share code, notes, and snippets.

@cdtinney
Last active August 7, 2023 09:50
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save cdtinney/b02160dc34997320af8165e5eb35ffdd to your computer and use it in GitHub Desktop.
Python script for determining the total time you've spent watching movies and TV shows based on your IMDb ratings.
from bs4 import BeautifulSoup
import urllib2
user_id = {{USER_ID_HERE}}
ratings_url = "http://www.imdb.com/user/" + user_id + "/ratings"
total_runtime = 0
def get_ratings_page(url):
print("fetching: " + url)
response = urllib2.urlopen(url)
html = response.read()
return html
def get_movies(page):
soup = BeautifulSoup(page, 'html.parser')
items = 0
time = 0
# find all list elements
for movie in soup.find_all("div", { "class" : "list_item" }):
# find the description element within each list item
desc_element = movie.find("div", { "class" : "item_description" })
if desc_element:
# increase the count of found items
items += 1
# find the span element that contains the runtime (in minutes)
min_element = desc_element.find("span")
if min_element:
# parse out the actual minutes
min_text = min_element.string.replace("(", "").replace(" mins.)", "")
# convert the string into an int
minutes = int(min_text)
# ignore TV shows (less than 60 min)
if (minutes > 60):
time += minutes
# no items found
if items == 0:
return 0
# add the time for the page to the total_runtime var
global total_runtime
total_runtime += time
return items
def get_all_movies(url):
num_items = 0
# loop until no more pages
while (True):
page_url = url + "?start=" + str(num_items)
page_html = get_ratings_page(page_url)
items_found = get_movies(page_html)
if items_found == 0:
break
else:
num_items += items_found + 1
# calculate hours and minutes
global total_runtime
hours = total_runtime / 60
minutes = total_runtime % 60
# calculate days (!!!)
if hours > 24:
days = hours / 24
hours = hours % 24
print("\ntotal running time = {}d {}h {}m".format(days, hours, minutes))
else:
print("\ntotal running time = {}h {}m".format(hours, minutes))
get_all_movies(ratings_url)
@adhamsharkawi
Copy link

I signed up to Github to thank you and ask if you could explain how to run it.

@cdtinney
Copy link
Author

cdtinney commented Feb 8, 2022

Hey! I doubt this is working anymore, but youd update USER_ID on Line 4 and then run it from the command line.

@rohiljain9826
Copy link

I have zero knowledge of coding/programming.
I created an account here just to post this.
Can somebody explain me how to use this (explain me like I'm 5)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment