Skip to content

Instantly share code, notes, and snippets.

@CatherineH
Last active December 5, 2023 17:30
Show Gist options
  • Save CatherineH/6b9e9edd7bf8a29564e5d0f62e22d24a to your computer and use it in GitHub Desktop.
Save CatherineH/6b9e9edd7bf8a29564e5d0f62e22d24a to your computer and use it in GitHub Desktop.
import pickle
import re
import os
from time import sleep
import urllib.parse
from pandas import read_csv
from selenium.webdriver.common.by import By
from selenium import webdriver
def name_to_time(game_name):
query = urllib.parse.quote(game_name)
query_url = f"https://howlongtobeat.com/?q={query}"
driver = webdriver.Chrome()
driver.get(query_url)
divs = driver.find_elements(By.TAG_NAME, "div")
for div in divs:
groups = re.match("(^[0-9½]+) Hours", div.text)
if groups:
# assume the first result is correct
return int(groups.group(1).replace("½", ""))
print(f"could not find: {game_name}")
return None
if __name__ == "__main__":
# games list is a csv with a "Name" column. I generate mine by loading my games libraries into https://playnite.link/,
# hiding the ones I've completed or don't want to play, and then use the "library exporter advanced" Add-on:
# https://playnite.link/addons.html#LibraryExporter_54bf64c6-c453-4cbc-92f8-4960b56f930e
games_list = read_csv("games_list.csv")
games_list = games_list[games_list["Hidden"] == False]
cache_filename = "cached_data.pkl"
cached_data = {}
if os.path.exists(cache_filename):
cached_data = pickle.load(open(cache_filename, "rb"))
print(cached_data)
for game_name in sorted(games_list["Name"]):
if game_name in cached_data:
# skip this, we've already read it
continue
print(game_name)
time_to_beat = name_to_time(game_name)
if time_to_beat:
cached_data[game_name] = time_to_beat
pickle.dump(cached_data, open(cache_filename, "wb"))
sleep(60) # time to beat throttles after about 5 page loads without a break
print(f"total play time: {sum(cached_data.values())} hours")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment