Skip to content

Instantly share code, notes, and snippets.

@Ir1d
Created May 28, 2020 12:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ir1d/5ff1ef72a8be4b383e359a3711e3f866 to your computer and use it in GitHub Desktop.
Save Ir1d/5ff1ef72a8be4b383e359a3711e3f866 to your computer and use it in GitHub Desktop.
Download all your accepted code from openjudge.cn
import bs4
import requests
import json
import os
from IPython import embed
email = input("> enter email: ")
passwd = input("> enter password: ")
userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
headers = {
"Referer": "http://openjudge.cn/auth/login/",
'User-Agent': userAgent,
"accept": "application/json, text/javascript, */*; q=0.01",
"accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
"cache-control": "no-cache",
"content-type": "application/x-www-form-urlencoded",
"pragma": "no-cache",
"x-requested-with": "XMLHttpRequest",
}
postData = {
'email': email,
'password': passwd
}
session = requests.session()
def login():
try:
loginRes = session.post(
"http://openjudge.cn/api/auth/login/", data=postData, headers=headers)
# rr = json.loads(loginRes.content)
# print(loginRes.content)
# print(rr['result'])
if (loginRes.status_code != 200 or (json.loads(loginRes.content)['result'] != 'SUCCESS')):
print("Login failed")
return False
except Exception as e:
# print(e)
print("Login failed")
return False
return True
def getUserID():
page = session.get("http://openjudge.cn/", headers=headers)
s = str(page.content, encoding="utf-8") # 获取网页存入字符串s
soup = bs4.BeautifulSoup(s, "lxml")
user = soup.select("#userMenu > li:nth-child(2) > a")
return user[0]['href']
def download(file_name, submission_url, retry=0):
if retry >= 2:
print(f"Retried for {retry} times, skipping now")
return
try:
page = session.get(submission_url, headers=headers)
s = str(page.content, encoding="utf-8") # 获取网页存入字符串s
soup = bs4.BeautifulSoup(s, "lxml")
code = soup.select('pre')
with open(os.path.join('./res/', file_name), 'w') as f:
f.write(code[0].contents[0].strip())
print(f"Finished processing {file_name} from {submission_url}")
except:
print(
f"Downloading failed of {file_name} from {submission_url}, retrying")
download(file_name, submission_url, retry + 1)
def processOnePage(url):
# print(url)
try:
print(f"Starting to crawl page {url}")
page = session.get(url, headers=headers)
s = str(page.content, encoding="utf-8") # 获取网页存入字符串s
soup = bs4.BeautifulSoup(s, "lxml")
li = soup.select(
"#main > div.user-group > div.recently-submit > table > tbody > tr")
for cur in li:
# print(cur.children)
if (cur.select(".result")[0].a.contents[0] == 'Accepted'):
file_name = cur.select(".title")[0].a.contents[0]
if (':' in file_name):
file_name = ':'.join(file_name.split(':')[
1:]).strip() + '.py'
submission_url = cur.select(".result")[0].a['href']
download(file_name, submission_url)
# return '//'
# embed()
# return url of next page if there's a next page
nx = soup.select(
"#main > div.user-group > div.recently-submit > p > span > a.nextprev")
# print(nx)
for button in nx:
# print(button['rel'])
if (button['rel'][0] == 'next'):
return button['href']
return None
print(f"Finished crawling page {url}")
except:
print(f"Crawling page {url} failed")
def solve():
if (not login()):
return
# print(getUserID())
cur_url = getUserID()
base = cur_url
while True:
cur_url = processOnePage(cur_url)
# return
# print(cur_url)
if (cur_url is None):
break
cur_url = base + cur_url
if __name__ == "__main__":
solve()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment