Skip to content

Instantly share code, notes, and snippets.

@allisontharp
Created June 30, 2016 01:38
Show Gist options
  • Save allisontharp/7dbcbf6878c38aa5e1d6633853d74ba9 to your computer and use it in GitHub Desktop.
Save allisontharp/7dbcbf6878c38aa5e1d6633853d74ba9 to your computer and use it in GitHub Desktop.
from xml.dom.minidom import parse
import matplotlib.pyplot as plt
import sqlite3, urllib2, unicodedata, time
import numpy as np
import sys
def strip_accents(s):
return ''.join(c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn')
def addgame(db_loc, bggid):
query = "SELECT bggid FROM games WHERE bggid = {bi}".format(bi = int(bggid))
c.execute(query)
try:
bggid = int(c.fetchone()[0])
except TypeError:
gm = url + 'thing?id=' + str(bggid) + "&stats=1"
gmdom = parse(urllib2.urlopen(gm))
minplayers = gmdom.getElementsByTagName('minplayers')
minplayers = minplayers[0].attributes['value'].value
if not minplayers:
minplayers = 'NULL'
maxplayers = gmdom.getElementsByTagName('maxplayers')
maxplayers = maxplayers[0].attributes['value'].value
if not maxplayers:
maxplayers = 'NULL'
nm = gmdom.getElementsByTagName('name')
nm = nm[0].attributes['value'].value
try:
nm = nm.translate(None, "'&-:")
except TypeError:
nm = unicode(nm)
nm = strip_accents(nm)
nm = nm.encode("ascii", "ignore")
nm = nm.replace("'", "")
query = "INSERT INTO games VALUES ({bi}, '{nm}', {mn}, {mx})".format(bi = bggid, nm = nm, mn = minplayers, mx = maxplayers)
c.execute(query)
xmlparam = 'id='
url = 'https://boardgamegeek.com/xmlapi2/'
# TODO check to see if games table exists
# TODO check to see if plays table exists
print "Choose:"
print "(1) Game Name"
print "(2) BGGID"
print "(3) Username"
choice = raw_input()
if choice == "1":
db_loc = 'boardgamecollection.sqlite'
conn = sqlite3.connect(db_loc)
c = conn.cursor()
print "Game name:"
name = raw_input()
query = "SELECT bggid FROM games WHERE name like '%{nm}%'".format(nm = name)
c.execute(query)
bggid = int(c.fetchone()[0])
print bggid
conn.close()
#db_loc = 'boardgameplays.sqlite'
db_loc = 'bgplays.sqlite'
conn = sqlite3.connect(db_loc)
c = conn.cursor()
addgame(db_loc, bggid)
xmlparam = 'id='
xmlval = bggid
elif choice == "2":
print "BGGID:"
bggid = str(raw_input())
db_loc = 'boardgameplays.sqlite'
conn = sqlite3.connect(db_loc)
c = conn.cursor()
addgame(db_loc, bggid)
xmlparam = 'id='
xmlval = bggid
elif choice == "3":
print "Username:"
username = 'mad4hatter'
print username
db_loc = 'userplays.sqlite'
conn = sqlite3.connect(db_loc)
c = conn.cursor()
xmlparam = 'username='
xmlval = username
else:
sys.exit("Goodbye!")
pg = 1
xml = url + 'plays?' + xmlparam + str(xmlval) + "&page=" + str(pg)
print xml
dom = parse(urllib2.urlopen(xml))
nodes = dom.getElementsByTagName('play')
while nodes and pg < 200:
for i in range(len(nodes)):
count = 0
nNode = nodes.item(i)
if nNode.nodeType == 1: # it is an element_node
dur = int(nNode.attributes['length'].value)
if dur > 0:
id = nNode.attributes['id'].value
item = nNode.getElementsByTagName('item')
bggid = int(item[0].attributes['objectid'].value)
players = nNode.getElementsByTagName('player')
for items in players:
try:
test = items.attributes['name'].value
count += 1
except IndexError:
pass
if count > 0:
query = "SELECT numplayers FROM plays, games on plays.bggid = games.bggid WHERE playid = {pi}".format(pi = id)
c.execute(query)
try:
test = int(c.fetchone()[0])
except TypeError:
try:
if choice=="3":
addgame(db_loc, bggid)
query = "INSERT INTO plays VALUES ({pi}, {bi}, {dur}, {np})".format(pi = id, bi = bggid, dur = dur, np = count)
conn.execute(query)
print "id: " + str(id) + " dur: " + str(dur) + " players: " + str(count)
except sqlite3.IntegrityError:
pass
pg += 1
#time.sleep(0.25)
xml = url + 'plays?' + xmlparam + str(xmlval) + "&page=" + str(pg)
print xml
dom = parse(urllib2.urlopen(xml))
nodes = dom.getElementsByTagName('play')
conn.commit()
query = "SELECT plays.numplayers, avg(plays.duration), count(plays.playid) \
FROM plays, games on games.bggid = plays.bggid \
WHERE plays.bggid = {bi} and numplayers >= games.minplayers and numplayers <= games.maxplayers and plays.duration < 10000 \
GROUP BY numplayers ORDER BY numplayers".format(bi = bggid)
c.execute(query)
result = c.fetchall()
numplayers = [int(i[0]) for i in result]
duration = [int(i[1]) for i in result]
plays = [int(i[2]) for i in result]
print numplayers
print duration
print plays
query = "SELECT name FROM games WHERE bggid = {bi}".format(bi = bggid)
c.execute(query)
name = c.fetchone()[0]
fig, ax = plt.subplots()
rects = ax.bar(numplayers, duration, color='DarkBlue', align='center')
labels = ["%d" % i for i in plays]
print labels
ax.set_title("%s : Average Duration (w/ Sample Size)" %name, y = 1.03)
ax.set_xlabel("Number of Players")
ax.set_ylabel("Average Duration (minutes)")
for rect, label in zip(rects, labels):
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2, 5, label, ha='center', va='bottom', color = 'white') # height*0.8
plt.show()
#plt.plot(numplayers, duration)
#plt.show()
print "Completed!"
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment