Last active
May 8, 2016 22:21
-
-
Save allisontharp/46111f2a398a734a82a6 to your computer and use it in GitHub Desktop.
Added more commenting and the ability to create the database if it does not already exist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from boardgamegeek import BoardGameGeek | |
from datetime import datetime | |
import sqlite3, re, unicodedata | |
# # # Notes: | |
# This script pulls board game play data from www.boardgamegeek.com and places it in a SQLite database. | |
# To use it: | |
# 1. Save this file as a .py file in the folder where you would like the database to be created | |
# 2. Set the board game geek (BGG) username variable, bgg_username, to be the username of the | |
# BGG profile with which you would like to colect the data. | |
# 3. If you would like to change the location of the database, uncomment the first db_loc and | |
# change the location. Additionally, you must comment out the second db_loc | |
# | |
# If you would like to run the script for a specific date range, change the variables | |
# 'firstdate' and 'lastdate' on lines 80-85 | |
# | |
# For more information on this script and others, visit http://www.techtrek.io | |
# Variables | |
bgg_username = 'mad4hatter' # username of plays to grab | |
#db_loc = 'C:\\Users\\Allison\\boardgameplays' # location of sqlite database | |
db_loc = 'boardgameplays.sqlite' # name of database | |
def strip_accents(s): | |
return ''.join(c for c in unicodedata.normalize('NFD', s) | |
if unicodedata.category(c) != 'Mn') | |
# Used for printout at the end of script | |
startTime = datetime.now() # Used to calculate total time later | |
num_games = 0 | |
num_players = 0 | |
num_plays = 0 | |
# Connect to BGG | |
bgg = BoardGameGeek() | |
my_username = bgg.plays(bgg_username) | |
my_plays = my_username.plays | |
# Connect to SQLite Database | |
conn = sqlite3.connect(db_loc) | |
c = conn.cursor() | |
# Check to see if games table exists | |
c.execute("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'games'") | |
if not c.fetchall(): # if the games table doesn't exist, create it | |
print "Creating Games Table" | |
c.execute("CREATE TABLE games(game_id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(50) NOT NULL, bggid INTEGER)") | |
# Check to see if players table exists | |
c.execute("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'players'") | |
if not c.fetchall(): | |
print "Creating Players Table" | |
c.execute("CREATE TABLE players(player_id INTEGER PRIMARY KEY AUTOINCREMENT, firstname VARCHAR(50) NOT NULL, lastname VARCHAR(50), bgg VARCHAR(50));") | |
# Check to see if playersplays table exists | |
c.execute("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'playersplays'") | |
if not c.fetchall(): | |
print "Creating Playersplays Table" | |
c.execute("CREATE TABLE playersplays(play_id INTEGER NOT NULL,player_id INTEGER NOT NULL,score INTEGER NOT NULL,winner INTEGER(0,1), new INTEGER(0,1), PRIMARY KEY (play_id, player_id))") | |
# Check to see if plays table exists | |
c.execute("SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'plays'") | |
if not c.fetchall(): | |
print "Creating Plays table" | |
c.execute("CREATE TABLE plays(play_id INTEGER PRIMARY KEY AUTOINCREMENT,game_id INTEGER NOT NULL,date DATETIME NOT NULL,duration INTEGER NOT NULL,location_id INTEGER, bggid INTEGER);") | |
# Get most recent game from database and bgg | |
c.execute("select max(plays.date) from plays") | |
try: | |
print "Most recent game: " | |
#firstgame = c.fetchone()[0] | |
firstgame = datetime.strptime(c.fetchone()[0],'%Y-%m-%d') | |
c.execute("select max(playersplays.play_id) from playersplays") | |
play_id = c.fetchone()[0] + 1 | |
print firstgame, play_id | |
except TypeError: # no plays have been added to the database | |
print "NO PLAYS HAVE BEEN ADDED!" | |
play_id = 0 | |
firstgame = datetime.strptime('1/1/2000', '%m/%d/%Y') | |
play_id = 0 | |
lastgame = datetime.now() | |
# # # --- Use this if you want to specify a date range --- # # # | |
# # First game | |
# firstgame = datetime.strptime('1/11/2014', '%m/%d/%Y') | |
# | |
# # Last game | |
# lastgame = datetime.strptime('2/27/2016', '%m/%d/%Y') | |
for games in my_plays: | |
if firstgame < games.date <= lastgame: | |
bgg_name = games.game_name | |
try: | |
bgg_name = bgg_name.translate(None,"'&-:") | |
except TypeError: | |
bgg_name = unicode(bgg_name) # game name from BGG, converted to unicode from string | |
bgg_name = strip_accents(bgg_name) # remove accents | |
bgg_name = bgg_name.encode("ascii","ignore") | |
bgg_id = games.game_id # game id from BGG | |
duration = games.duration | |
play_date = games.date.strftime('%Y-%m-%d') | |
anon_check = 0 | |
print "Current game: " + bgg_name + " on " + play_date | |
# Check to see if the current game is in the games table | |
query = "SELECT * FROM games WHERE bggid = {bi}".\ | |
format(bi= bgg_id) | |
c.execute(query) | |
allrows = c.fetchall() | |
if not allrows: # bggid has not been added to the games table | |
print " Adding " + bgg_name + " to games table." | |
c.execute("INSERT INTO games (game_id, name, bggid) VALUES (NULL, '{nm}', {bi})".\ | |
format(nm = bgg_name, bi = bgg_id)) | |
num_games += 1 | |
# TODO: figure out how to get locations in here | |
# Check if play has been added | |
c.execute("SELECT game_id FROM games WHERE bggid = {bi}".format(bi = bgg_id)) # game id in the plays table | |
db_game_id = c.fetchone()[0] | |
bgg_play_id = games.id # BGG's unique ID for this play | |
# TODO: This only checks for a game on a date, would skip multiple games on the same date | |
c.execute("SELECT bggid FROM plays WHERE game_id = {gi} and date = {gd}" .\ | |
format(gi = db_game_id, gd = play_date)) | |
db_play_id = c.fetchall() # this errors out when the it selects nothing. either use a try with the below if statement or figure something else out | |
if not db_play_id or not bgg_play_id == db_play_id: # play has not been added | |
print " Adding play to plays table" | |
c.execute("INSERT INTO plays (play_id, game_id, date, duration) VALUES (NULL, \ | |
{gi}, '{dt}', {dur})" .\ | |
format(gi = db_game_id, dt = play_date, dur = duration)) | |
c.execute("SELECT game_id FROM games WHERE bggid = {bi}".format(bi = bgg_id)) # game id in the plays table | |
db_game_id = c.fetchone()[0] | |
num_plays += 1 | |
# Check if each player is in players table | |
for players in games.players: | |
name = players.name | |
firstname = name.split(" ",1)[0] | |
try: # Check for last name | |
lastname = name.split(" ",1)[1] | |
# Check for anonymous players | |
if lastname == "player": #bg stats makes anon player name "Anonymous Player". I want it to be be Anonymous 2, 3, etc | |
lastname = "" | |
if anon_check > 0: | |
lastname = str(anon_check) | |
anon_check += 1 | |
elif firstname == "Interactions": # Change firstname and make last name "" | |
firstname = name.split("- ",1)[1] | |
lastname = "" | |
except IndexError: | |
lastname = "" | |
firstname = firstname.translate(None, "'- ") # remove "'-" from name | |
lastname = lastname.translate(None, "'- ") # remove "'-" from name | |
c.execute("SELECT player_id FROM players WHERE firstname LIKE '{fn}' AND lastname LIKE '{ln}'" .\ | |
format(fn = firstname, ln = lastname)) | |
try: | |
playerid = c.fetchone()[0] | |
except TypeError: # Player is not in Players table | |
print " Adding " + firstname + " " + lastname + " to players table" | |
# Check if bgg id is available | |
bgg_user_id = players.username | |
c.execute("INSERT INTO players VALUES (NULL, '{fn}', '{ln}', '{bggid}')".\ | |
format(fn = firstname, ln = lastname, bggid = bgg_user_id)) | |
c.execute("SELECT player_id FROM players WHERE firstname LIKE '{fn}' AND lastname LIKE '{ln}'" .\ | |
format(fn = firstname, ln = lastname)) | |
playerid = c.fetchone()[0] | |
num_players += 1 | |
pscore = players.score | |
# If score is empty, set it to 0 | |
if not pscore: | |
pscore = 0 | |
elif "e" in pscore: | |
# Convert scientific notation to integer | |
pscore = pscore.replace("e ", "E") | |
pscore = int(float(pscore)) | |
winner = players.win | |
pnew = players.new | |
if not pnew: | |
pnew = 0 | |
# Add to playersplays table | |
print play_id, playerid | |
c.execute("INSERT INTO playersplays VALUES ({play}, {player}, {sc}, {win}, {new})".\ | |
format(play = play_id, player = playerid, sc = pscore, win = winner, new = pnew)) | |
play_id += 1 | |
conn.commit() | |
conn.close() | |
print "Completed!" | |
print " Total time: " + str(datetime.now()-startTime) | |
print " Total games added: " + str(num_games) | |
print " Total plays added: " + str(num_plays) | |
print " Total players added: " + str(num_players) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Script would error out when trying to add new plays to an existing db because the c.fetchall and c.fetchone commands will only work one time, but they were being called multiple times.