Skip to content

Instantly share code, notes, and snippets.

@allisontharp
Last active March 29, 2022 23:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save allisontharp/38466fc801d7a9dca96de3b3e5f3213f to your computer and use it in GitHub Desktop.
Save allisontharp/38466fc801d7a9dca96de3b3e5f3213f to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Created on Mon May 09 19:58:56 2016
@author: Allison
In SQLite, to select for a certain month, do this:
select * from collection where strftime('%m', date) = strftime('%m', julianday('2016-03-01'));
select games.name, collection.owned from games join collection on games.bggid = collection.bggid order by owned desc limit 10;
0_3: added support for:
- average weight
- number wanting
- number trading
- number wishing
- average rating
0_4: added support for:
- ability to pickup where it left off it it errors out
"""
# TODO print line number that error occurred on
from datetime import datetime, date
from xml.dom import minidom
import sqlite3, unicodedata, urllib2, time, sys
db_loc = 'boardgamecollection_xml.sqlite' # name of database
startTime = datetime.now() # Used to calculate total time later
def try_toplevel(tag, dom):
try:
#dom = minidom.parse(urllib2.urlopen(url))
out = dom.getElementsByTagName(tag)
except urllib2.HTTPError as e:
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
time.sleep(10)
dom = minidom.parse(urllib2.urlopen(url))
out = dom.getElementsByTagName(tag)
return out
def strip_accents(s):
return ''.join(c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn')
def owned_ranked(bggid):
url = 'https://www.boardgamegeek.com/xmlapi2/thing?stats=1&type=boardgame,boardgameexpansion&id='
url += str(bggid)
dom = minidom.parse(urllib2.urlopen(url))
# number of users owning
out = try_toplevel('owned', dom)
try:
owned = int(out[0].attributes['value'].value)
except (ValueError, IndexError) as e:
owned = -1
#err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
#f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
# board game rank
rank = dom.getElementsByTagName('rank')
try:
rank = int(rank[0].attributes['value'].value) # change this to look at friendlyname="Board Game Rank"
except (ValueError, IndexError) as e: # TODO: I think IndexError happens when the BGGID is removed. Check this and do something about it
rank = 'NULL'
# board game weight
out = try_toplevel('averageweight', dom)
try:
weight = float(out[0].attributes['value'].value)
except (ValueError, IndexError) as e:
weight = -1
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
# Number of users trading
out = try_toplevel('trading', dom)
try:
trading = int(out[0].attributes['value'].value)
except (ValueError, IndexError) as e:
trading = -1
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
# Number of users wishing
out = try_toplevel('wishing', dom)
try:
wishing = int(out[0].attributes['value'].value)
except (ValueError, IndexError) as e:
wishing = -1
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
# Number of users wanting
out = try_toplevel('wanting', dom)
try:
wanting = int(out[0].attributes['value'].value)
except (ValueError, IndexError) as e:
wanting = -1
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
# Average Rating
out = try_toplevel('average', dom)
try:
rating = float(out[0].attributes['value'].value)
except (ValueError, IndexError) as e:
rating = -1
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
# BGG rating
out = try_toplevel('bayesaverage', dom)
try:
bggrating = float(out[0].attributes['value'].value)
except (ValueError, IndexError) as e:
bggrating = -1
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
# Number of Ratings
out = try_toplevel('usersrated', dom)
try:
numrating = float(out[0].attributes['value'].value)
except (ValueError, IndexError) as e:
numrating = -1
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
return [owned,rank,weight,trading,wishing,wanting,rating,bggrating,numrating, dom]
def add_groups(lst, table, tableid, junctiontable):
for name in lst:
name = translate(name)
#print name
query = "SELECT " + str(tableid) + " FROM " + str(table) + " WHERE name = '{nm}'".format(nm = name)
c.execute(query)
try:
groupid = int(c.fetchone()[0])
except TypeError as e:
query = "INSERT INTO " + str(table) + " VALUES (NULL, '{nm}')".format(nm = name)
c.execute(query)
#print " Added " + str(name) + " to " + str(table) + " table."
query = "SELECT " + str(tableid) + " FROM " + str(table) + " WHERE name = '{nm}'".format(nm = name)
c.execute(query)
groupid = int(c.fetchone()[0])
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
try:
query = "INSERT INTO " + str(junctiontable) + " VALUES ({bg}, {gi})".format(bg = bggid, gi = groupid)
c.execute(query)
#print " Added to " + str(junctiontable) + " table"
except sqlite3.IntegrityError as e:
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
pass
def translate(name):
try:
name = name.translate(None, "'&-:")
except TypeError as e:
name = unicode(name)
name = strip_accents(name)
name = name.encode("ascii", "ignore")
name = name.replace("'", "")
return name
url = 'https://www.boardgamegeek.com/xmlapi2/thing?stats=1&type=boardgame,boardgameexpansion&id='
# Connect to SQLite Database
conn = sqlite3.connect(db_loc)
c = conn.cursor()
f = open('pythonout.txt', 'w')
f.write(str(startTime) + '\n')
# Check to see if games table exists
c.execute("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'games'")
if not c.fetchall(): # if the games table doesn't exist, create it
print "Creating Games Table"
c.execute("CREATE TABLE games(bggid INTEGER PRIMARY KEY, name VARCHAR(50) NOT NULL, year INTEGER, minplayers INTEGER, maxplayers INTEGER)")
# Check to see if collection table exists
c.execute("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'collection'")
if not c.fetchall():
print "Creating Collection Table"
c.execute("CREATE TABLE collection(bggid INTEGER, date datetime default current_date, owned INTEGER NOT NULL, rank INTEGER, \
weight REAL, trading INT, wishing INT, wanting INT, rating REAL, bggrating REAL, numrating INT, PRIMARY KEY(bggid, date))")
# TODO add mechanic table check
# TODO add gamemech table check
# TODO add publisher table check
# TODO add gamepub table check
# # Get list of bggids
# c.execute("SELECT bggid FROM games")
# idlist = c.fetchall()
# idlist = [int(i[0]) for i in idlist] # convert list of tuples to list of INTs
print 'before'
if __name__ == "__main__":
if len(sys.argv) == 1:
c.execute("SELECT bggid FROM games")
idlist = c.fetchall()
idlist = [int(i[0]) for i in idlist] # convert list of tuples to list of INTs
elif str(sys.argv[1]) == 'restart':
c.execute("SELECT bggid FROM games")
idlist = c.fetchall()
idlist = [int(i[0]) for i in idlist] # convert list of tuples to list of INTs
query = "SELECT max(bggid) FROM collection where date like '{dt}%'".format(dt = date.today().strftime('%Y-%m'))
c.execute(query)
maxid = c.fetchone()[0]
idlist = idlist[idlist.index(maxid)+1:]
# print ("max")
# print maxid
# TODO add total number of plays
# for bggid in idlist[idlist.index(24200)+1:]:
for bggid in idlist:
time.sleep(0.25)
#time.sleep(0.5)
[owned,rank,weight,trading,wishing,wanting,rating,bggrating,numrating, dom] = owned_ranked(bggid)
query = "INSERT INTO collection(bggid, owned, rank, weight, trading, wishing, wanting, rating, bggrating, numrating) VALUES ({bi}, {ow}, {rk}, {wt}, {td}, {wish}, {want}, {rt}, {bggr}, {nr})"\
.format(bi = bggid, ow = owned, rk = rank, wt = weight, td = trading, wish = wishing, want = wanting, rt = rating, bggr = bggrating, nr = numrating)
c.execute(query)
# # for june only:
# link = dom.getElementsByTagName('link')
# families = [items.attributes['value'].value for items in link if items.attributes['type'].value == "boardgamefamily"]
# add_groups(families, "families", "famid", "famgame")
today = date.today()
today = today.strftime('%Y-%m')
# query = "SELECT count(bggid)*1.0/84000 FROM collection WHERE bggid <= {id} and date > '2016-06-30'".format(id = bggid)
query = "SELECT count(bggid)*1.0/84000 FROM collection WHERE date like '{dt}%'".format(dt = date.today().strftime('%Y-%m'))
c.execute(query)
tot = c.fetchone()[0]
print str(bggid) + ": " + "%.2f" %(tot*100) + "%"
if bggid%100 == 0:
conn.commit()
check = 0
while check < 100:
bggid += 1
print bggid
url ='https://www.boardgamegeek.com/xmlapi2/thing?stats=1&type=boardgame,boardgameexpansion&id='
url += str(bggid)
time.sleep(0.5)
dom = minidom.parse(urllib2.urlopen(url))
item = dom.getElementsByTagName('item')
bgcheck = [items.attributes['id'].value for items in item if items.attributes['type'].value == "boardgame"]
name = dom.getElementsByTagName('name')
if name and bgcheck: # it is a boardgame or expansion
check = 0
name = name[0].attributes['value'].value
name = translate(name)
#print name
[owned,rank,weight,trading,wishing,wanting,rating,bggrating, numrating, dom] = owned_ranked(bggid)
year = dom.getElementsByTagName('yearpublished')
try:
year = int(year[0].attributes['value'].value)
except ValueError as e:
# err = 'LINE: {} BGGID: '.format(sys.exec_info()[-1].tb_lineno)
# f.write(err + (str(bggid) + ' Error: ' + str(e) + '\n'))
f.write(str(bggid) + ' Error: ' + str(e) + '\n')
year = 'NULL'
link = dom.getElementsByTagName('link')
publishers = [items.attributes['value'].value for items in link if items.attributes['type'].value == "boardgamepublisher"]
mechanics = [items.attributes['value'].value for items in link if items.attributes['type'].value == "boardgamemechanic"]
artists = [items.attributes['value'].value for items in link if items.attributes['type'].value == "boardgameartist"]
designers = [items.attributes['value'].value for items in link if items.attributes['type'].value == "boardgamedesigner"]
families = [items.attributes['value'].value for items in link if items.attributes['type'].value == "boardgamefamily"]
categories = [items.attributes['value'].value for items in link if items.attributes['type'].value == "boardgamecategory"]
minplayers = dom.getElementsByTagName('minplayers')
minplayers = minplayers[0].attributes['value'].value
if not minplayers:
minplayers = 'NULL'
maxplayers = dom.getElementsByTagName('maxplayers')
maxplayers = maxplayers[0].attributes['value'].value
if not maxplayers:
maxplayers = 'NULL'
add_groups(publishers, "publishers", "pubid", "gamepub")
add_groups(mechanics, "mechanics", "mechid", "gamemech")
add_groups(artists, "artists", "artid", "artgame")
add_groups(designers, "designers", "designid", "designgame")
add_groups(categories, "categories", "catid", "catgame")
add_groups(families, "families", "famid", "famgame")
query = "INSERT INTO games(bggid, name, year, minplayers, maxplayers) VALUES \
({bi}, '{nm}', {yr}, {mn}, {mx})".format(bi = bggid, nm = name, yr = year, mn = minplayers, mx = maxplayers)
c.execute(query)
query = "INSERT INTO collection(bggid, owned, rank, weight, trading, wishing, wanting, rating, bggrating, numrating) VALUES ({bi}, {ow}, {rk}, {wt}, {td}, {wish}, {want}, {rt}, {bggr}, {nr})"\
.format(bi = bggid, ow = owned, rk = rank, wt = weight, td = trading, wish = wishing, want = wanting, rt = rating, bggr = bggrating, nr = numrating)
c.execute(query)
today = date.today()
today = today.strftime('%Y-%m')
query = "SELECT count(bggid)*1.0/84000 FROM collection WHERE date(date) like '{td}%'".format(td = today)
c.execute(query)
tot = c.fetchone()[0]
print check
if bggid%100 == 0:
conn.commit()
else:
check += 1
conn.commit()
conn.close()
f.close()
StopTime = datetime.now() # Final Time
print "Completed!"
print str(StopTime - StartTime)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment