bcbwilla/mapreport.py

## mapreport.py
import urllib
from datetime import timedelta
from operator import itemgetter
import csv
from bs4 import BeautifulSoup

# function that does stuff
def match_stats(last_page=2, out_file='out.csv', info=False):
    """ gets match statistics from oc.tc/matches pages

        last_page - the highest match page to scrape data from. don't go too high!
        out_file - the name of the output data file
        info - if True, it will print stuff every 10 pages to the console as it
        runs so you know what the script is up to.

        using this requires an external library, BeautifulSoup.  it can be
        downloaded here http://www.crummy.com/software/BeautifulSoup/
    """

    base_url = "https://oc.tc/matches?page="
    matches = []

    for page in range(1,last_page):
        # print information
        if info and page % 10 == 0:
            print "on page " + str(page)

        # get match page and get table
        page = urllib.urlopen(base_url+str(page))
        html = page.read()
        page.close()
        soup = BeautifulSoup(html)
        table = soup.findAll('table', {'class':'table table-bordered table-striped'})
        table = table[0].contents[3].findAll('tr')

        for row in table:
            m = {}
            m['when'] = row.contents[1].a.contents[0].strip()
            m['map'] = row.contents[5].contents[0].strip()
            m['server'] = row.contents[7].contents[0].strip()
            m['deaths'] = row.contents[9].contents[0].strip()
            m['kills'] = row.contents[11].contents[0].strip()
            m['players'] = row.contents[13].contents[0].strip()

            # convert the total match time to seconds
            t = row.contents[3].contents[0].strip()
            t = t.split(':')
            t = timedelta(minutes=int(t[0]),seconds=int(t[1]))
            m['time'] = t.seconds
            matches.append(m)

    # sort by map name to make data easier to work with
    matches = sorted(matches, key=itemgetter('map'))
    # get rid of matches that are in progress
    matches[:] = [m for m in matches if m['when'] != 'In Progress']

    # make output csv file
    f = open(out_file, "wb")
    c = csv.writer(f)

    c.writerow(['Map','Time (seconds)','Kills','Deaths'])
    for m in matches:
        c.writerow([m['map'], m['time'], m['kills'], m['deaths']])
    f.close

    if info:
        print "done"


# using the function to get data
match_stats(last_page=200, out_file='maps.csv', info=True)
	import urllib
	from datetime import timedelta
	from operator import itemgetter
	import csv
	from bs4 import BeautifulSoup

	# function that does stuff
	def match_stats(last_page=2, out_file='out.csv', info=False):
	""" gets match statistics from oc.tc/matches pages

	last_page - the highest match page to scrape data from. don't go too high!
	out_file - the name of the output data file
	info - if True, it will print stuff every 10 pages to the console as it
	runs so you know what the script is up to.

	using this requires an external library, BeautifulSoup. it can be
	downloaded here http://www.crummy.com/software/BeautifulSoup/
	"""

	base_url = "https://oc.tc/matches?page="
	matches = []

	for page in range(1,last_page):
	# print information
	if info and page % 10 == 0:
	print "on page " + str(page)

	# get match page and get table
	page = urllib.urlopen(base_url+str(page))
	html = page.read()
	page.close()
	soup = BeautifulSoup(html)
	table = soup.findAll('table', {'class':'table table-bordered table-striped'})
	table = table[0].contents[3].findAll('tr')

	for row in table:
	m = {}
	m['when'] = row.contents[1].a.contents[0].strip()
	m['map'] = row.contents[5].contents[0].strip()
	m['server'] = row.contents[7].contents[0].strip()
	m['deaths'] = row.contents[9].contents[0].strip()
	m['kills'] = row.contents[11].contents[0].strip()
	m['players'] = row.contents[13].contents[0].strip()

	# convert the total match time to seconds
	t = row.contents[3].contents[0].strip()
	t = t.split(':')
	t = timedelta(minutes=int(t[0]),seconds=int(t[1]))
	m['time'] = t.seconds
	matches.append(m)

	# sort by map name to make data easier to work with
	matches = sorted(matches, key=itemgetter('map'))
	# get rid of matches that are in progress
	matches[:] = [m for m in matches if m['when'] != 'In Progress']

	# make output csv file
	f = open(out_file, "wb")
	c = csv.writer(f)

	c.writerow(['Map','Time (seconds)','Kills','Deaths'])
	for m in matches:
	c.writerow([m['map'], m['time'], m['kills'], m['deaths']])
	f.close

	if info:
	print "done"


	# using the function to get data
	match_stats(last_page=200, out_file='maps.csv', info=True)