Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
batchstart = 0 #page number where batch will start
batchend = 1 #page number where batch will end
csv_location = '/Users/Dave/Desktop/BoardGameDataDump.csv'
###########STOP EDITING HERE.
import requests
from bs4 import BeautifulSoup
from boardgamegeek import BoardGameGeek
import time
import re
import csv
import sys
gamelist = []
gameid = []
#scrapes game ids from pages numbers between batchstart and batchend as list variable game_id
for i in range(batchstart,batchend):
start_time = time.time()
url = "https://boardgamegeek.com/browse/boardgame/page/"+str(i)
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
g_data = soup.find_all('a')
ids = re.findall(r'boardgame/\d+/',str(g_data))
game_id = re.findall(r'\d+', str(ids))
game_id_list = list(set(game_id))
print(game_id_list)
print("--- %s seconds ---" % (time.time() - start_time))
#adds headers to csv file
fd = open(csv_location,'a')
fd.write("name,id,rank,year,thumbnail,playing_time,image"
",g.rating_average,max_players"
",min_players,min_age,rating_average_weight,rating_bayes_average\n")
fd.close()
count = 0
bgg = BoardGameGeek()
for id in game_id_list:
time.sleep(2)
try:
g = bgg.game(None, game_id_list[count])
except Exception as error_info:
print(error_info)
pass
try:
gameinfo = (str(g.name),
str(g.id),
str(g.boardgame_rank),
str(g.year),
str(g.thumbnail),
str(g.playing_time),
str(g.image),
str(g.rating_average),
str(g.max_players),
str(g.min_players),
str(g.min_age),
str(g.rating_average_weight),
str(g.rating_bayes_average),
str(g.description))
gameinfo = list(gameinfo)
print((gameinfo))
with open(csv_location, 'a', newline='') as csvfile:
spamwriter = csv.writer(csvfile,
quotechar ='|', quoting=csv.QUOTE_MINIMAL, lineterminator = '\n')
spamwriter.writerow(gameinfo)
except Exception as error_info:
print(error_info)
print(count)
count += 1
fd.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment