benosteen/BGG_weights.py

## BGG_weights.py
#!/usr/bin/env python

USER = "benosteen"
CACHE_FILE = "data.json"

rating_t ="""http://www.boardgamegeek.com/xmlapi/collection/%s?rated=1"""
weight_t = """http://www.boardgamegeek.com/xmlapi/boardgame/%s?stats=1"""

import requests
from xml.etree import ElementTree as ET
import os

# stats stuff
import math

def average(s): return sum(s) * 1.0 / len(s)
def variance(s):
  avg = average(s)
  return map(lambda x: (x - avg)**2, s)
def std_deviation(s):
  return math.sqrt(average(variance(s)))

# BGG stuff
def get_weight(objid):
  gw = requests.get(weight_t % objid)
  if gw.status_code == 200:
    game_doc = ET.fromstring(gw.content)
    try:
      weight = float(game_doc[0].find('statistics/ratings/averageweight').text)
      return weight
    except:
      # weight not found, bad xml, etc
      pass
  return 0

def get_data():
  r = requests.get(rating_t % USER)
  if r.status_code == 200:
    print("Got Collection list for %s" % USER)
    collection_list = ET.fromstring(r.content)
    items = []
    for item in collection_list:
      data = {'objectid': item.attrib['objectid'],
              'rating' : float(item.find('stats/rating').attrib['value']),
              'name' : item[0].text,
              'weight' : get_weight(item.attrib['objectid']),
              }
      try:
        data['plays'] = float(item.find('numplays').text)
      except:
        data['plays'] = 0
      print("Got %s" % data['name'])
      items.append(data)
    return items
  else:
    print("Unable to get the collection list for %s" % USER)
    return []

if __name__ == "__main__":
  import json
  if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE, "r") as fp:
      d = json.load(fp)
  else:
    data = get_data()
    if data:
      with open(CACHE_FILE, "w") as output:
        json.dump(data, output)
    d = data
  weights = map(lambda x: x['weight'], d)
  weighted_weights = [y for y in map(lambda x: x['weight'] * x['rating'], d) if y]
  # number of plays can be 0, which is bad for log of course ;)
  play_weighted_weights = []
  most_played = ""
  max_plays = 0
  most_weighty = ""
  max_weight = 0
  plot_points = []
  weighted_points = []
  for item in d:
    if item['weight'] > max_weight:
      most_weighty = item
      max_weight = item['weight']
    if item['plays'] > max_plays:
      most_plays = item
      max_plays = item['plays']

    ww = item['weight'] * item['rating']
    if item['plays'] != 0:
      play_weighted_weights.append(ww * math.log(item['plays'],2))
      weighted_points.append((ww, item['plays']))
      plot_points.append((item['weight'], item['plays']))
  # play_weighted_weights = [y for y in map(lambda x: x['weight'] * x['rating'] * math.log(x['plays'],2), d) if y]

  print("How does '%s' like their gaming?" % USER)
  print("Weightiest game rated: %s at %s" % (most_weighty['name'], most_weighty['weight']))
  print("Most played game: %s with a weight of %s" % (most_plays['name'], most_plays['weight']))
  print("\nBy weights of rated games:")
  print("Ave. weight: %s" % average(weights))
  print("Std deviation: %s" % std_deviation(weights))
  print("Majority of games (~68%%) are between %s and %s in weight" % (average(weights) - std_deviation(weights), average(weights) + std_deviation(weights)))
  print("\nBy weight x rating:")
  print("Ave. weight: %s" % average(weighted_weights))
  print("Std deviation: %s" % std_deviation(weighted_weights))
  print("\nBy weight x rating x log2(number of plays):")
  print("Ave. weight: %s" % average(play_weighted_weights))
  print("Std deviation: %s" % std_deviation(play_weighted_weights))
  print("\n\nWeights (x) vs number of plays (y):")
  print("(Paste into something like http://fooplot.com - add a 'Points' source)")
  print("\n")
  for point in plot_points:
    print("%s,%s" % (point))

  print("\n\nWeighted weights (x) vs number of plays (y):")
  print("(Paste into something like http://fooplot.com - add a 'Points' source)")
  print("\n")
  for point in weighted_points:
    print("%s,%s" % (point))
	#!/usr/bin/env python

	USER = "benosteen"
	CACHE_FILE = "data.json"

	rating_t ="""http://www.boardgamegeek.com/xmlapi/collection/%s?rated=1"""
	weight_t = """http://www.boardgamegeek.com/xmlapi/boardgame/%s?stats=1"""

	import requests
	from xml.etree import ElementTree as ET
	import os

	# stats stuff
	import math

	def average(s): return sum(s) * 1.0 / len(s)
	def variance(s):
	avg = average(s)
	return map(lambda x: (x - avg)**2, s)
	def std_deviation(s):
	return math.sqrt(average(variance(s)))

	# BGG stuff
	def get_weight(objid):
	gw = requests.get(weight_t % objid)
	if gw.status_code == 200:
	game_doc = ET.fromstring(gw.content)
	try:
	weight = float(game_doc[0].find('statistics/ratings/averageweight').text)
	return weight
	except:
	# weight not found, bad xml, etc
	pass
	return 0

	def get_data():
	r = requests.get(rating_t % USER)
	if r.status_code == 200:
	print("Got Collection list for %s" % USER)
	collection_list = ET.fromstring(r.content)
	items = []
	for item in collection_list:
	data = {'objectid': item.attrib['objectid'],
	'rating' : float(item.find('stats/rating').attrib['value']),
	'name' : item[0].text,
	'weight' : get_weight(item.attrib['objectid']),
	}
	try:
	data['plays'] = float(item.find('numplays').text)
	except:
	data['plays'] = 0
	print("Got %s" % data['name'])
	items.append(data)
	return items
	else:
	print("Unable to get the collection list for %s" % USER)
	return []

	if __name__ == "__main__":
	import json
	if os.path.exists(CACHE_FILE):
	with open(CACHE_FILE, "r") as fp:
	d = json.load(fp)
	else:
	data = get_data()
	if data:
	with open(CACHE_FILE, "w") as output:
	json.dump(data, output)
	d = data
	weights = map(lambda x: x['weight'], d)
	weighted_weights = [y for y in map(lambda x: x['weight'] * x['rating'], d) if y]
	# number of plays can be 0, which is bad for log of course ;)
	play_weighted_weights = []
	most_played = ""
	max_plays = 0
	most_weighty = ""
	max_weight = 0
	plot_points = []
	weighted_points = []
	for item in d:
	if item['weight'] > max_weight:
	most_weighty = item
	max_weight = item['weight']
	if item['plays'] > max_plays:
	most_plays = item
	max_plays = item['plays']

	ww = item['weight'] * item['rating']
	if item['plays'] != 0:
	play_weighted_weights.append(ww * math.log(item['plays'],2))
	weighted_points.append((ww, item['plays']))
	plot_points.append((item['weight'], item['plays']))
	# play_weighted_weights = [y for y in map(lambda x: x['weight'] * x['rating'] * math.log(x['plays'],2), d) if y]

	print("How does '%s' like their gaming?" % USER)
	print("Weightiest game rated: %s at %s" % (most_weighty['name'], most_weighty['weight']))
	print("Most played game: %s with a weight of %s" % (most_plays['name'], most_plays['weight']))
	print("\nBy weights of rated games:")
	print("Ave. weight: %s" % average(weights))
	print("Std deviation: %s" % std_deviation(weights))
	print("Majority of games (~68%%) are between %s and %s in weight" % (average(weights) - std_deviation(weights), average(weights) + std_deviation(weights)))
	print("\nBy weight x rating:")
	print("Ave. weight: %s" % average(weighted_weights))
	print("Std deviation: %s" % std_deviation(weighted_weights))
	print("\nBy weight x rating x log2(number of plays):")
	print("Ave. weight: %s" % average(play_weighted_weights))
	print("Std deviation: %s" % std_deviation(play_weighted_weights))
	print("\n\nWeights (x) vs number of plays (y):")
	print("(Paste into something like http://fooplot.com - add a 'Points' source)")
	print("\n")
	for point in plot_points:
	print("%s,%s" % (point))

	print("\n\nWeighted weights (x) vs number of plays (y):")
	print("(Paste into something like http://fooplot.com - add a 'Points' source)")
	print("\n")
	for point in weighted_points:
	print("%s,%s" % (point))