Riebart/steam_library_completion_time.sh

## steam_library_completion_time.sh
#!/bin/bash

# DEPENDENCIES:
#  - jq: https://stedolan.github.io/jq/
#  - pup: https://github.com/EricChiang/pup
#  - python-Levenshtein: https://pypi.org/project/python-Levenshtein/
#  - You'll need a Steam API key, which you can get from https://steamcommunity.com/dev/apikey

# USAGE:
#  This pulls in the full steam app list from the API, your user's game list via the Steam API,
#  and then parses HTML responses from the HLTB search endpoint using pup into JSON to get
#  completion times. Levenshtein matching is used to resolve the (very common) case of where
#  the Steam game name and the HLTB game name aren't exactly matching.
#
#  This script processes games at a rate of 1 to 2 per second.

STEAM_USER_ID="$1"

ownedAppIds=$(
    wget -qO- "http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key=${STEAM_API_KEY}&steamid=${STEAM_USER_ID}&format=json" | \
    jq .response.games[].appid)

wget -qO- https://api.steampowered.com/ISteamApps/GetAppList/v2/ | \
    jq -c '.applist.apps[]' | \
    grep -E "\"appid\":($(echo "${ownedAppIds}" | tr '\n' '|' | sed 's/|$//'))," | \
    while read game
    do
        (
            echo "$game"
            appName=$(echo "$game" | jq -r .name)
            echo $appName >&2
            wget -qO- --post-data \
                "queryString=${appName}&t=games&sorthead=popular&sortd=Normal Order&length_type=main" \
                https://howlongtobeat.com/search_results?page=1 | \
            pup 'li .search_list_details json{}' | \
            jq 'map(
                [
                    .children[0].children[0].text,
                    .children[0].children[0].href,
                    [(.children[1].children[0].children | (if . == null then null else .[].text end))]])'
        ) | jq -c --slurp '.'
    done | tee "complete_hltb_results_${STEAM_USER_ID}.jsonl" | \
python3 -c '
import json, sys, Levenshtein

def levenshtein(a, b):
  if not a:
    return len(b)
  if not b:
    return len(a)
  return min(levenshtein(a[1:], b[1:])+(a[0] != b[0]),
             levenshtein(a[1:], b)+1,
             levenshtein(a, b[1:])+1)

for gameline in sys.stdin.readlines():
  game = json.loads(gameline.strip())
  if game[1] == []:
    continue
  else:
    # Find the game where the Levenshtein distance between the Steam game name and the HLTB game name is minimal
    steamName = game[0]["name"]
    bestMatch = game[1][0]
    bestDistance = Levenshtein.distance(steamName, bestMatch[0])
    for hltbGame in game[1][1:]:
      curDistance = Levenshtein.distance(steamName, hltbGame[0])
      if curDistance < bestDistance:
          bestMatch = hltbGame
          bestDistance = curDistance
    if bestDistance >= 5:
      bestMatch = game[1][0]
    outMatch = [bestMatch[0], bestMatch[1]] + [
      [p.replace("\u00bd",".5") for p in bestMatch[2] if p != None]
    ]
    print(json.dumps([game[0],outMatch]))
' | tee "selected_hltb_results_${STEAM_USER_ID}.jsonl" | \
    jq -r --slurp 'map([(.[1][2] | (if . == [] then null else (.[1] | split(" "))[0] end)), .[0].name])[] | @csv' > "selected_hltb_results_${STEAM_USER_ID}.csv"
	#!/bin/bash

	# DEPENDENCIES:
	# - jq: https://stedolan.github.io/jq/
	# - pup: https://github.com/EricChiang/pup
	# - python-Levenshtein: https://pypi.org/project/python-Levenshtein/
	# - You'll need a Steam API key, which you can get from https://steamcommunity.com/dev/apikey

	# USAGE:
	# This pulls in the full steam app list from the API, your user's game list via the Steam API,
	# and then parses HTML responses from the HLTB search endpoint using pup into JSON to get
	# completion times. Levenshtein matching is used to resolve the (very common) case of where
	# the Steam game name and the HLTB game name aren't exactly matching.
	#
	# This script processes games at a rate of 1 to 2 per second.

	STEAM_USER_ID="$1"

	ownedAppIds=$(
	wget -qO- "http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key=${STEAM_API_KEY}&steamid=${STEAM_USER_ID}&format=json" \| \
	jq .response.games[].appid)

	wget -qO- https://api.steampowered.com/ISteamApps/GetAppList/v2/ \| \
	jq -c '.applist.apps[]' \| \
	grep -E "\"appid\":($(echo "${ownedAppIds}" \| tr '\n' '\|' \| sed 's/\|$//'))," \| \
	while read game
	do
	(
	echo "$game"
	appName=$(echo "$game" \| jq -r .name)
	echo $appName >&2
	wget -qO- --post-data \
	"queryString=${appName}&t=games&sorthead=popular&sortd=Normal Order&length_type=main" \
	https://howlongtobeat.com/search_results?page=1 \| \
	pup 'li .search_list_details json{}' \| \
	jq 'map(
	[
	.children[0].children[0].text,
	.children[0].children[0].href,
	[(.children[1].children[0].children \| (if . == null then null else .[].text end))]])'
	) \| jq -c --slurp '.'
	done \| tee "complete_hltb_results_${STEAM_USER_ID}.jsonl" \| \
	python3 -c '
	import json, sys, Levenshtein

	def levenshtein(a, b):
	if not a:
	return len(b)
	if not b:
	return len(a)
	return min(levenshtein(a[1:], b[1:])+(a[0] != b[0]),
	levenshtein(a[1:], b)+1,
	levenshtein(a, b[1:])+1)

	for gameline in sys.stdin.readlines():
	game = json.loads(gameline.strip())
	if game[1] == []:
	continue
	else:
	# Find the game where the Levenshtein distance between the Steam game name and the HLTB game name is minimal
	steamName = game[0]["name"]
	bestMatch = game[1][0]
	bestDistance = Levenshtein.distance(steamName, bestMatch[0])
	for hltbGame in game[1][1:]:
	curDistance = Levenshtein.distance(steamName, hltbGame[0])
	if curDistance < bestDistance:
	bestMatch = hltbGame
	bestDistance = curDistance
	if bestDistance >= 5:
	bestMatch = game[1][0]
	outMatch = [bestMatch[0], bestMatch[1]] + [
	[p.replace("\u00bd",".5") for p in bestMatch[2] if p != None]
	]
	print(json.dumps([game[0],outMatch]))
	' \| tee "selected_hltb_results_${STEAM_USER_ID}.jsonl" \| \
	jq -r --slurp 'map([(.[1][2] \| (if . == [] then null else (.[1] \| split(" "))[0] end)), .[0].name])[] \| @csv' > "selected_hltb_results_${STEAM_USER_ID}.csv"