Last active
December 17, 2015 07:29
-
-
Save bcbwilla/5573572 to your computer and use it in GitHub Desktop.
Gather minecraft pvp match data from at oc.tc/matches
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
from datetime import timedelta | |
from operator import itemgetter | |
import csv | |
from bs4 import BeautifulSoup | |
# function that does stuff | |
def match_stats(last_page=2, out_file='out.csv', info=False): | |
""" gets match statistics from oc.tc/matches pages | |
last_page - the highest match page to scrape data from. don't go too high! | |
out_file - the name of the output data file | |
info - if True, it will print stuff every 10 pages to the console as it | |
runs so you know what the script is up to. | |
using this requires an external library, BeautifulSoup. it can be | |
downloaded here http://www.crummy.com/software/BeautifulSoup/ | |
""" | |
base_url = "https://oc.tc/matches?page=" | |
matches = [] | |
for page in range(1,last_page): | |
# print information | |
if info and page % 10 == 0: | |
print "on page " + str(page) | |
# get match page and get table | |
page = urllib.urlopen(base_url+str(page)) | |
html = page.read() | |
page.close() | |
soup = BeautifulSoup(html) | |
table = soup.findAll('table', {'class':'table table-bordered table-striped'}) | |
table = table[0].contents[3].findAll('tr') | |
for row in table: | |
m = {} | |
m['when'] = row.contents[1].a.contents[0].strip() | |
m['map'] = row.contents[5].contents[0].strip() | |
m['server'] = row.contents[7].contents[0].strip() | |
m['deaths'] = row.contents[9].contents[0].strip() | |
m['kills'] = row.contents[11].contents[0].strip() | |
m['players'] = row.contents[13].contents[0].strip() | |
# convert the total match time to seconds | |
t = row.contents[3].contents[0].strip() | |
t = t.split(':') | |
t = timedelta(minutes=int(t[0]),seconds=int(t[1])) | |
m['time'] = t.seconds | |
matches.append(m) | |
# sort by map name to make data easier to work with | |
matches = sorted(matches, key=itemgetter('map')) | |
# get rid of matches that are in progress | |
matches[:] = [m for m in matches if m['when'] != 'In Progress'] | |
# make output csv file | |
f = open(out_file, "wb") | |
c = csv.writer(f) | |
c.writerow(['Map','Time (seconds)','Kills','Deaths']) | |
for m in matches: | |
c.writerow([m['map'], m['time'], m['kills'], m['deaths']]) | |
f.close | |
if info: | |
print "done" | |
# using the function to get data | |
match_stats(last_page=200, out_file='maps.csv', info=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment