aucchen/extract_current_ifcomp.py

## extract_current_ifcomp.py
# Extracts the current ifcomp games, and tries to link them to ifdb entries.
import datetime
import time

from bs4 import BeautifulSoup
import urllib.request

import ifdb

new_url = 'https://ifcomp.org/ballot?alphabetize=1'

end_date = datetime.datetime(2021, 11, 15)

with urllib.request.urlopen(new_url) as fp:
    data = fp.read()
    html = data.decode("utf8")

soup = BeautifulSoup(html, 'html.parser')

rows = soup.find_all('div', attrs={'class': 'well'})
games = []
current_game = {}
for i, row in enumerate(rows):
    print(i)
    if len(row.find_all('h2')) == 0:
        continue
    title = row.find('h2').text.strip().split('\n')[0]
    current_game['title'] = title
    current_game['is_parser'] = int(row['ifcomp-style'] == 'parser')
    playtime = row['ifcomp-playtime']
    if 'two hours' in playtime:
        current_game['time'] = 120
    elif 'an hour and a half' in playtime:
        current_game['time'] = 90
    elif 'one hour' in playtime:
        current_game['time'] = 60
    elif 'longer than two hours' in playtime:
        current_game['time'] = 150
    elif 'half an hour' in playtime:
        current_game['time'] = 30
    elif '15 minutes or less' in playtime:
        current_game['time'] = 15
    else:
        current_game['time'] = 0
    # TODO: find ifdb refs
    try:
        ifdb_id = ifdb.find_ifdb_id(title)
        time.sleep(0.5)
        current_game['ifdb_id'] = ifdb_id
        print(ifdb_id)
        rating, count = ifdb.get_ratings(ifdb_id, end_date)
        time.sleep(0.5)
        current_game['ifdb_rating'] = rating
        current_game['ifdb_rating_count'] = count
    except:
        current_game['ifdb_id'] = '???'
        current_game['ifdb_rating'] = 0
        current_game['ifdb_rating_count'] = 0
    print(current_game)
    games.append(current_game)
    current_game = {}

import pandas as pd
df = pd.DataFrame(games)
df.to_csv('data_2021.tsv', sep='\t', index=None)

## extract_old_ifcomp.py
# Extracts old ifcomp results (replace 2020 with the relevant year, and change the end_date)
import datetime
import time

from bs4 import BeautifulSoup
import urllib.request

import ifdb

old_url = 'https://ifcomp.org/comp/2020'

end_date = datetime.datetime(2020, 11, 30)

with urllib.request.urlopen(old_url) as fp:
    data = fp.read()
    html = data.decode("utf8")

soup = BeautifulSoup(html, 'html.parser')

rows = soup.find_all('div', attrs={'class': 'row'})
mode = 'title'
games = []
current_game = {}
for i, row in enumerate(rows):
    if mode == 'title':
        mode = 'info'
        title = row.find_all('h2')[1].text.strip().split('\n')[0]
        current_game['title'] = title
    elif mode == 'info':
        mode = 'rank'
        entries = row.find_all('td')
        current_game['score'] = float(entries[0].text.strip())
        current_game['rating_count'] = int(entries[1].text.strip())
        current_game['stdev'] = float(entries[2].text.strip())
        current_game['is_parser'] = int('Parser' in row.text)
        if 'Two hours' in row.text:
            current_game['time'] = 120
        elif 'An hour and a half' in row.text:
            current_game['time'] = 90
        elif 'One hour' in row.text:
            current_game['time'] = 60
        elif 'Longer than two hours' in row.text:
            current_game['time'] = 150
        elif 'Half an hour' in row.text:
            current_game['time'] = 30
        elif '15 minutes or less' in row.text:
            current_game['time'] = 15
        else:
            current_game['time'] = 0
        # TODO: find ifdb refs
        refs = row.find_all('a')
        for ref in refs:
            if 'ifdb' in ref['href']:
                ifdb_id = ref['href'].split('=')[1]
                current_game['ifdb_id'] = ifdb_id
                print(ifdb_id)
                rating, count = ifdb.get_ratings(ifdb_id, end_date)
                time.sleep(0.5)
                current_game['ifdb_rating'] = rating
                current_game['ifdb_rating_count'] = count
                system = ifdb.get_system(ifdb_id)
                current_game['system'] = system
                break
    elif mode == 'rank':
        print(current_game)
        mode = 'title'
        games.append(current_game)
        current_game = {}

import pandas as pd
df = pd.DataFrame(games)
df.to_csv('data_2020.tsv', sep='\t', index=None)

## get_ifdb_rankings.py
import pandas

import ifdb

filled_data = pandas.read_csv('data_2021.tsv', sep='\t', index_col=None)
new_rows = []

games = ifdb.get_rankings('IFComp 2021')
game_ranks = {x: i+1 for i, x in enumerate(games)}
game_ranks['Codex Sadistica'] = game_ranks['Codex Sadistica: A Heavy-Metal Minigame']

for i, row in filled_data.iterrows():
    new_row = row.copy()
    ifdb_rank = game_ranks[new_row['title']]
    new_row['ifdb_rank'] = ifdb_rank
    new_rows.append(new_row)

df = pandas.DataFrame(new_rows)
df.to_csv('data_2021.tsv', sep='\t', index=None)

## ifdb.py
import datetime
from bs4 import BeautifulSoup
import urllib.request


def find_ifdb_id(game_name):
    """
    Returns the ifdb id corresponding to a game name.
    """
    game_name = game_name.replace(' ', '+').replace('/', '%2F')
    url = 'https://ifdb.org/search?searchbar={0}'.format(game_name)
    print(url)
    with urllib.request.urlopen(url) as fp:
        data = fp.read()
        html = data.decode("ISO-8859-1")
    soup = BeautifulSoup(html, 'lxml')
    if 'TUID' in soup.text:
        spans = soup.find_all('span', attrs={'class': 'notes'})
        for span in spans:
            if 'TUID' in span.text:
                tuid = span.text.split(':')[-1].strip()
                return tuid
    else:
        url = soup.find('td').find('a')['href']
        return url.split('=')[-1]


def get_ratings(ifdb_id, end_date=None):
    """
    Given an IFDB game id, returns the game's rating and number of ratings (as of end_date)
    """
    url = 'https://ifdb.org/viewgame?id={0}&reviews&sortby=&ratings&pg=all'.format(ifdb_id)
    with urllib.request.urlopen(url) as fp:
        data = fp.read()
        html = data.decode("ISO-8859-1")
    soup = BeautifulSoup(html, 'lxml')
    indented_div = soup.find_all('div', attrs={'class': 'indented'})[0]
    all_stars = []
    current_stars = 0
    current_date = datetime.datetime(2010, 1, 1)
    for child in indented_div.children:
        if child.name == 'p':
            image = child.find('img')
            if image:
                current_stars = int(image['title'][0])
                if end_date is not None:
                    try:
                        text = ','.join(child.text.split(',')[-2:]).strip()
                        current_date = datetime.datetime.strptime(text, '%B %d, %Y')
                        if current_date > end_date:
                            continue
                    except:
                        continue
                all_stars.append(current_stars)
        elif child.name == 'img':
            current_stars = int(child['title'][0])
            if end_date is None:
                all_stars.append(current_stars)
        elif child.name == 'span' and end_date != None:
            text = child.text.strip(', ')
            try:
                current_date = datetime.datetime.strptime(text, '%B %d, %Y')
                if current_date > end_date:
                    continue
                all_stars.append(current_stars)
            except:
                pass
    count = len(all_stars)
    mean = 0
    if count > 0:
        mean = float(sum(all_stars))/count
    return mean, count


def get_rankings(tag="IFComp 2021"):
    """
    Returns a list of games sorted by their rank.
    """
    tag = tag.replace(' ', '+')
    url = 'https://ifdb.org/search?searchfor=tag%3A{0}&sortby=&pg=all'.format(tag)
    with urllib.request.urlopen(url) as fp:
        data = fp.read()
        html = data.decode("ISO-8859-1")
    soup = BeautifulSoup(html, 'lxml')
    main = soup.find('div', attrs={'class':'main'})
    all_links = main.find_all('a')
    games = []
    for link in all_links:
        bold = link.find('b')
        if bold:
            print(link.text)
            games.append(link.text)
    return games


def get_system(ifdb_id):
    """
    Returns the development system.
    """
    url = 'https://ifdb.org/viewgame?id={0}'.format(ifdb_id)
    with urllib.request.urlopen(url) as fp:
        data = fp.read()
        html = data.decode("ISO-8859-1")
    soup = BeautifulSoup(html, 'lxml')
    notes = soup.find('span', attrs={'class':'notes'})
    dev_system = None
    in_dev = False
    for el in notes:
        if in_dev:
            dev_system = el.text
            in_dev = False
            break
        if 'Development System' in el:
            in_dev = True
    return dev_system

## update_ifdb.py
# Updates the IFDB ratings for current ifcomp games.
import datetime
import time

import pandas

import ifdb

filled_data = pandas.read_csv('data_2021.tsv', sep='\t', index_col=None)
new_rows = []

end_date = datetime.datetime(2021, 11, 15)

for i, row in filled_data.iterrows():
    new_row = row.copy()
    title = row['title']
    print(i, title)
    ifdb_id = row['ifdb_id']
    print(ifdb_id)
    print('old ratings: {0} {1}'.format(row['ifdb_rating'], row['ifdb_rating_count']))
    try:
        rating, count = ifdb.get_ratings(ifdb_id, end_date)
        if count >= row['ifdb_rating_count']:
            new_row['ifdb_rating'] = rating
            new_row['ifdb_rating_count'] = count
            print('new ratings: {0} {1}'.format(rating, count))
            time.sleep(0.5)
    except:
        pass
    new_rows.append(new_row)

df = pandas.DataFrame(new_rows)
df.to_csv('data_2021.tsv', sep='\t', index=None)
	# Extracts the current ifcomp games, and tries to link them to ifdb entries.
	import datetime
	import time

	from bs4 import BeautifulSoup
	import urllib.request

	import ifdb

	new_url = 'https://ifcomp.org/ballot?alphabetize=1'

	end_date = datetime.datetime(2021, 11, 15)

	with urllib.request.urlopen(new_url) as fp:
	data = fp.read()
	html = data.decode("utf8")

	soup = BeautifulSoup(html, 'html.parser')

	rows = soup.find_all('div', attrs={'class': 'well'})
	games = []
	current_game = {}
	for i, row in enumerate(rows):
	print(i)
	if len(row.find_all('h2')) == 0:
	continue
	title = row.find('h2').text.strip().split('\n')[0]
	current_game['title'] = title
	current_game['is_parser'] = int(row['ifcomp-style'] == 'parser')
	playtime = row['ifcomp-playtime']
	if 'two hours' in playtime:
	current_game['time'] = 120
	elif 'an hour and a half' in playtime:
	current_game['time'] = 90
	elif 'one hour' in playtime:
	current_game['time'] = 60
	elif 'longer than two hours' in playtime:
	current_game['time'] = 150
	elif 'half an hour' in playtime:
	current_game['time'] = 30
	elif '15 minutes or less' in playtime:
	current_game['time'] = 15
	else:
	current_game['time'] = 0
	# TODO: find ifdb refs
	try:
	ifdb_id = ifdb.find_ifdb_id(title)
	time.sleep(0.5)
	current_game['ifdb_id'] = ifdb_id
	print(ifdb_id)
	rating, count = ifdb.get_ratings(ifdb_id, end_date)
	time.sleep(0.5)
	current_game['ifdb_rating'] = rating
	current_game['ifdb_rating_count'] = count
	except:
	current_game['ifdb_id'] = '???'
	current_game['ifdb_rating'] = 0
	current_game['ifdb_rating_count'] = 0
	print(current_game)
	games.append(current_game)
	current_game = {}

	import pandas as pd
	df = pd.DataFrame(games)
	df.to_csv('data_2021.tsv', sep='\t', index=None)
	# Extracts old ifcomp results (replace 2020 with the relevant year, and change the end_date)
	import datetime
	import time

	from bs4 import BeautifulSoup
	import urllib.request

	import ifdb

	old_url = 'https://ifcomp.org/comp/2020'

	end_date = datetime.datetime(2020, 11, 30)

	with urllib.request.urlopen(old_url) as fp:
	data = fp.read()
	html = data.decode("utf8")

	soup = BeautifulSoup(html, 'html.parser')

	rows = soup.find_all('div', attrs={'class': 'row'})
	mode = 'title'
	games = []
	current_game = {}
	for i, row in enumerate(rows):
	if mode == 'title':
	mode = 'info'
	title = row.find_all('h2')[1].text.strip().split('\n')[0]
	current_game['title'] = title
	elif mode == 'info':
	mode = 'rank'
	entries = row.find_all('td')
	current_game['score'] = float(entries[0].text.strip())
	current_game['rating_count'] = int(entries[1].text.strip())
	current_game['stdev'] = float(entries[2].text.strip())
	current_game['is_parser'] = int('Parser' in row.text)
	if 'Two hours' in row.text:
	current_game['time'] = 120
	elif 'An hour and a half' in row.text:
	current_game['time'] = 90
	elif 'One hour' in row.text:
	current_game['time'] = 60
	elif 'Longer than two hours' in row.text:
	current_game['time'] = 150
	elif 'Half an hour' in row.text:
	current_game['time'] = 30
	elif '15 minutes or less' in row.text:
	current_game['time'] = 15
	else:
	current_game['time'] = 0
	# TODO: find ifdb refs
	refs = row.find_all('a')
	for ref in refs:
	if 'ifdb' in ref['href']:
	ifdb_id = ref['href'].split('=')[1]
	current_game['ifdb_id'] = ifdb_id
	print(ifdb_id)
	rating, count = ifdb.get_ratings(ifdb_id, end_date)
	time.sleep(0.5)
	current_game['ifdb_rating'] = rating
	current_game['ifdb_rating_count'] = count
	system = ifdb.get_system(ifdb_id)
	current_game['system'] = system
	break
	elif mode == 'rank':
	print(current_game)
	mode = 'title'
	games.append(current_game)
	current_game = {}

	import pandas as pd
	df = pd.DataFrame(games)
	df.to_csv('data_2020.tsv', sep='\t', index=None)
	import pandas

	import ifdb

	filled_data = pandas.read_csv('data_2021.tsv', sep='\t', index_col=None)
	new_rows = []

	games = ifdb.get_rankings('IFComp 2021')
	game_ranks = {x: i+1 for i, x in enumerate(games)}
	game_ranks['Codex Sadistica'] = game_ranks['Codex Sadistica: A Heavy-Metal Minigame']

	for i, row in filled_data.iterrows():
	new_row = row.copy()
	ifdb_rank = game_ranks[new_row['title']]
	new_row['ifdb_rank'] = ifdb_rank
	new_rows.append(new_row)

	df = pandas.DataFrame(new_rows)
	df.to_csv('data_2021.tsv', sep='\t', index=None)
	# Updates the IFDB ratings for current ifcomp games.
	import datetime
	import time

	import pandas

	import ifdb

	filled_data = pandas.read_csv('data_2021.tsv', sep='\t', index_col=None)
	new_rows = []

	end_date = datetime.datetime(2021, 11, 15)

	for i, row in filled_data.iterrows():
	new_row = row.copy()
	title = row['title']
	print(i, title)
	ifdb_id = row['ifdb_id']
	print(ifdb_id)
	print('old ratings: {0} {1}'.format(row['ifdb_rating'], row['ifdb_rating_count']))
	try:
	rating, count = ifdb.get_ratings(ifdb_id, end_date)
	if count >= row['ifdb_rating_count']:
	new_row['ifdb_rating'] = rating
	new_row['ifdb_rating_count'] = count
	print('new ratings: {0} {1}'.format(rating, count))
	time.sleep(0.5)
	except:
	pass
	new_rows.append(new_row)

	df = pandas.DataFrame(new_rows)
	df.to_csv('data_2021.tsv', sep='\t', index=None)