Created
May 6, 2022 23:10
-
-
Save adamant-pwn/74d86762849aefabc5f892e7b3b3e969 to your computer and use it in GitHub Desktop.
Some functions to simplify CF scraping
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from seleniumrequests import Chrome | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.keys import Keys | |
import selenium.webdriver.support.ui as ui | |
import getpass | |
import time | |
# initiate headless browser driver | |
# should be run once before any other function | |
def init(): | |
global driver | |
opt = Options() | |
opt.add_argument('--headless') | |
driver = Chrome(options=opt) | |
driver.get('https://codeforces.com') | |
def check_login(): | |
login_url = 'https://codeforces.com/enter' | |
driver.get(login_url) | |
if driver.current_url != login_url: | |
print('logged in as ' + driver.current_url.split('/')[-1]) | |
return True | |
else: | |
print('logged out') | |
return False | |
def try_login(handle, password): | |
login_url = 'https://codeforces.com/enter' | |
driver.get(login_url) | |
if driver.current_url == login_url: | |
driver.find_element('id', 'handleOrEmail').send_keys(handle) | |
driver.find_element('id', 'password').send_keys(password + Keys.ENTER) | |
# successful login redirects to a different page, unsuccessful brings up an error message | |
wait = ui.WebDriverWait(driver, 10) | |
wait.until(lambda driver: | |
driver.current_url != login_url or | |
driver.find_element('css selector', '.error.for__password')) | |
if driver.current_url != login_url: | |
print('successfully logged in as ' + handle) | |
else: | |
print('unsuccessful login attempt') | |
else: # redirects to https://codeforces.com/profile | |
print('already logged in') | |
def request_login(): | |
try_login(input('handle: '), getpass.getpass('password: ')) | |
def logout(): | |
if check_login(): | |
print('logging out...') | |
driver.find_element('link text', 'Logout').click() | |
check_login() | |
def submitSource(sId): | |
url = 'https://codeforces.com/data/submitSource' | |
csrf = driver.execute_script("return Codeforces.getCsrfToken()") | |
data = {'submissionId':str(sId),'csrf_token':csrf} | |
return driver.request('POST', url, data=data) | |
# wrapper for HTTP requests to Codeforces API | |
def cf_get(method, params): | |
return driver.request('get', 'https://codeforces.com/api/' + method, params=params) | |
# list of all submissions on which this user got AC | |
def ac_subs(user): | |
r = cf_get('user.status', {'handle':user}) | |
return [(it.get('contestId', None), it['id']) for it in r.json()['result'] if it['verdict'] == 'OK'] | |
# Dictionary (contestId, submissionId) -> source code | |
def scrape_ac(user): | |
acs = ac_subs(user) | |
if not check_login(): | |
print('it is recommended to be logged in to get restricted submissions') | |
print('you can skip authentication process if you don\'t feel like it') | |
request_login() | |
res = dict() | |
for cId, sId in acs: | |
sS = submitSource(sId) | |
if sS: | |
res[(cId, sId)] = sS.json()['source'] | |
time.sleep(0.3) # recommended wait so that submitSource keeps working | |
return res |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment