Skip to content

Instantly share code, notes, and snippets.

@bcbwilla
Last active December 17, 2015 07:29
Show Gist options
  • Save bcbwilla/5573572 to your computer and use it in GitHub Desktop.
Save bcbwilla/5573572 to your computer and use it in GitHub Desktop.
Gather minecraft pvp match data from at oc.tc/matches
import urllib
from datetime import timedelta
from operator import itemgetter
import csv
from bs4 import BeautifulSoup
# function that does stuff
def match_stats(last_page=2, out_file='out.csv', info=False):
""" gets match statistics from oc.tc/matches pages
last_page - the highest match page to scrape data from. don't go too high!
out_file - the name of the output data file
info - if True, it will print stuff every 10 pages to the console as it
runs so you know what the script is up to.
using this requires an external library, BeautifulSoup. it can be
downloaded here http://www.crummy.com/software/BeautifulSoup/
"""
base_url = "https://oc.tc/matches?page="
matches = []
for page in range(1,last_page):
# print information
if info and page % 10 == 0:
print "on page " + str(page)
# get match page and get table
page = urllib.urlopen(base_url+str(page))
html = page.read()
page.close()
soup = BeautifulSoup(html)
table = soup.findAll('table', {'class':'table table-bordered table-striped'})
table = table[0].contents[3].findAll('tr')
for row in table:
m = {}
m['when'] = row.contents[1].a.contents[0].strip()
m['map'] = row.contents[5].contents[0].strip()
m['server'] = row.contents[7].contents[0].strip()
m['deaths'] = row.contents[9].contents[0].strip()
m['kills'] = row.contents[11].contents[0].strip()
m['players'] = row.contents[13].contents[0].strip()
# convert the total match time to seconds
t = row.contents[3].contents[0].strip()
t = t.split(':')
t = timedelta(minutes=int(t[0]),seconds=int(t[1]))
m['time'] = t.seconds
matches.append(m)
# sort by map name to make data easier to work with
matches = sorted(matches, key=itemgetter('map'))
# get rid of matches that are in progress
matches[:] = [m for m in matches if m['when'] != 'In Progress']
# make output csv file
f = open(out_file, "wb")
c = csv.writer(f)
c.writerow(['Map','Time (seconds)','Kills','Deaths'])
for m in matches:
c.writerow([m['map'], m['time'], m['kills'], m['deaths']])
f.close
if info:
print "done"
# using the function to get data
match_stats(last_page=200, out_file='maps.csv', info=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment