Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Fetch TopCoder problem statement, test cases and expected result of system test. And save these data to file.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
Kosei Moriyama <>
import BeautifulSoup
import html2text
import urllib, urllib2, cookielib, re, os, sys
from optparse import OptionParser
tc_user_name = 'write your username of topcoder account here, or input it via command-line'
tc_password = 'write your password of topcoder account here, or input it via command-line'
prefix_search_from = ''
prefix_problem_statement = ''
url_round_result_page = '<rd>&pm=<pm>'
url_tc_secure = ''
url_problem_solution = '<cr>&rd=<rd>&pm=<pm>'
opener = ''
def connection(user_name, password):
url = ''
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Mozilla/4.0 (compatible MSIE 6.0 Windows NT 5.1)')]
params = {}
params['module'] = 'Login'
params['nextpage'] = url
params['username'] = user_name
params['password'] = password
param = urllib.urlencode(params)
res =, param)
res_str = str(
if res_str.find('Username or password incorrect') != -1:
opener = False
return opener
def searchProblem(search_query):
res = + search_query)
def getProblemAndRoundId(html):
# <A HREF="/tc?module=ProblemDetail&rd=10767&pm=7968" class="statText">
return map(lambda x: {'round_id': x[0], 'problem_id': x[1]},
re.findall('"/tc\?module=ProblemDetail&rd=([0-9]+?)&pm=([0-9]+?)"\sclass="statText"', html, re.I))
def getProblemParameters(search_query):
return getProblemAndRoundId(searchProblem(search_query))
def getProblemStatementPage(problem_id):
#return ''.join(urllib2.urlopen(prefix_problem_statement + problem_id).readlines())
return + problem_id).read()
def getRoundResultPage(problem_id, round_id):
url = url_round_result_page.replace('<pm>', problem_id).replace('<rd>', round_id)
res =
def getTopSubmissionId(html):
# <a href="/stat?c=problem_solution&amp;cr=14970299&amp;rd=14156&amp;pm=10880" class="statText">view</a>
return re.findall('href="/stat\?c=problem_solution&(?:amp;)*cr=([0-9]+?)&(?:amp;)*rd=[0-9]+?&(?:amp;)*pm=[0-9]+?" class="statText"', html, re.I)
def getTopSubmissionPage(problem_id, round_id, top_submission_id):
url = url_problem_solution.replace('<pm>', problem_id).replace('<rd>', round_id).replace('<cr>', top_submission_id)
res =
def getPages(search_query):
ret = {}
params = getProblemParameters(search_query)
if not params:
return False
top_html = getRoundResultPage(params[0]['problem_id'], params[0]['round_id'])
top_ids = getTopSubmissionId(top_html)
ret['problem_statement_page'] = getProblemStatementPage(params[0]['problem_id'])
ret['top_submission_page'] = getTopSubmissionPage(params[0]['problem_id'], params[0]['round_id'], top_ids[0])
return ret
def getProblemStatement(html):
soup = BeautifulSoup.BeautifulSoup(html)
title = soup.find('td', {'class': 'statTextBig'}).contents[0]
problem_statement = soup.find('td', {'class': 'problemText'}).table
return {'title': str(title)[28:], 'problem_statement': html2text.html2text(str(problem_statement))}
def getSystemTestData(html):
soup = BeautifulSoup.BeautifulSoup(html)
input = []
output = []
lines = soup.findAll('tr', {'valign': 'top'})
for line in lines:
input.append(line.contents[3].contents[0].replace(',', '').replace('{', '').replace('}', '').replace('"', ''))
output.append(line.contents[7].contents[0].replace(',', '').replace('{', '').replace('}', '').replace('"', ''))
return {'system_input': input, 'system_output': output}
def fetchProblemData(query):
res = {}
p = getPages(query)
if not p:
return False
tmp = getProblemStatement(p['problem_statement_page'])
for k, v in tmp.iteritems():
res[k] = v
tmp = getSystemTestData(p['top_submission_page'])
for k, v in tmp.iteritems():
res[k] = v
return res
def saveFiles(data):
if os.path.isdir(data['title']):
print 'The directory', data['title'], 'already exists.'
f = open(data['title'] + '/problem_statement.txt', 'w')
f = open(data['title'] + '/system_input.txt', 'w')
f = open(data['title'] + '/system_output.txt', 'w')
def tcget(query):
res = fetchProblemData(query)
if not res:
return False
return True
# ==============
# main
# ==============
## parse options
argc = len(sys.argv)
argv = sys.argv
usage = 'usage: %prog <PROBLEM NAME> [options]'
description = 'Search the specified problem of TopCoder, and save the problem statement and it\'s input/output of system test as a file.'
parser = OptionParser(usage=usage, description=description)
parser.add_option('-u', '--user_name', dest='user_name', help='user name of topcoder account', default=tc_user_name)
parser.add_option('-p', '--password', dest='password', help='password of topcoder account', default=tc_password)
if argc <= 1:
(options, args) = parser.parse_args()
problem_name = argv[1]
user_name = options.user_name
password = options.password
## search problem and save to file
opener = connection(user_name, password)
if not opener:
print 'Username or password incorrect.'
if not tcget(problem_name):
print 'The problem', "'" + problem_name + "'", 'did not find'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment