Skip to content

Instantly share code, notes, and snippets.

@nullableVoidPtr
Last active August 24, 2020 06:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nullableVoidPtr/fc19a11da962fde98895ac4582bcc715 to your computer and use it in GitHub Desktop.
Save nullableVoidPtr/fc19a11da962fde98895ac4582bcc715 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import asyncio
import aiohttp
import aiofiles
from pathlib import Path
import datetime
import os
import time
from sys import argv
from bs4 import BeautifulSoup, SoupStrainer
BASE_URL = "https://atcoder.jp"
extensions = {
"C": ".c",
"C++": ".cpp",
"C++14": ".cpp",
"Java": ".java",
"Python": ".py",
"Python3": ".py",
"Bash": ".sh",
"bc": ".bc",
"Awk": ".awk",
"C#": ".cs",
"Clojure": ".clj",
"Crystal": ".cr",
"D": ".d",
"Dart": ".dart",
"dc": ".dc",
"Erlang": ".erl",
"Elixir": ".ex",
"F#": ".fs",
"Forth": ".fs",
"Fortran": ".f08",
"Go": ".go",
"Haskell": ".hs",
"Haxe": ".hx",
"JavaScript": ".js",
"Julia": ".jl",
"Kotlin": ".kt",
"Lua": ".lua",
"Dash": ".sh",
"Nim": ".nim",
"Objective-C": ".m",
"Common Lisp": ".lisp",
"OCaml": ".ml",
"Octave": ".m",
"Pascal": ".pas",
"Perl": ".pl",
"Raku": ".p6",
"PHP": ".php",
"Prolog": ".pl",
"PyPy2": ".py",
"PyPy3": ".py",
"Racket": ".rkt",
"Ruby": ".rb",
"Rust": ".rs",
"Scala": ".scala",
"Java": ".java",
"Scheme": ".scm",
"Standard ML": ".sml",
"Swift": ".swift",
"Text": ".txt",
"TypeScript": ".ts",
"Visual Basic": ".vb",
"Zsh": ".sh",
"COBOL - Fixed": ".cob",
"COBOL - Free": ".cob",
"Brainfuck": ".bf",
"Ada2012": ".adb",
"Unlambda": ".unl",
"Cython": ".pyx",
"Sed": ".sed",
"Vim": ".vim",
}
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def getContests(session):
soup = BeautifulSoup(
await fetch(session, "https://atcoder.jp/contests/archive?page=1"),
"html.parser",
)
for contest in soup.select("table > tbody > tr > td:nth-child(2) > a:nth-child(2)"):
yield BASE_URL + contest["href"]
for i in range(
2, int(soup.select_one("ul.pagination > li:last-child > a").text) + 1
):
for contest in BeautifulSoup(
await fetch(session, f"https://atcoder.jp/contests/archive?page={i}"),
"html.parser",
parse_only=SoupStrainer("td"),
).select("a:nth-child(2)"):
yield BASE_URL + contest["href"]
async def getSubmissions(session, contest):
contest = f"{contest}/submissions?f.User={argv[1]}"
soup = BeautifulSoup(await fetch(session, f"{contest}&page=1"), "html.parser")
for submission in soup.select("table > tbody > tr > td:last-child > a"):
yield BASE_URL + submission["href"]
if last_page := soup.select_one("ul.pagination > li:last-child > a"):
for i in range(2, int(last_page.text) + 1):
for submission in BeautifulSoup(
await fetch(session, f"{contest}&page={i}"),
"html.parser",
parse_only=SoupStrainer("tr"),
).select("td:last-child > a"):
yield BASE_URL + submission["href"]
async def processSubmission(session, submission):
soup = BeautifulSoup(await fetch(session, submission), "html.parser")
submission, contest = soup.find("title").get_text(strip=True).split(" - ", 1)
submission = submission.split(" #", 1)[-1]
when, task, user, lang, score, size, status = [
td.get_text(strip=True) for td in soup.find("table").find_all("td")[:7]
]
when = time.mktime(
datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S%z").timetuple()
)
(
filename := Path(
f"./{argv[1]}/{contest}/{task}/{submission} - {status}{extensions.get(lang.split('(')[0].strip(), ' ' + lang)}"
)
).parent.mkdir(parents=True, exist_ok=True)
print(filename)
async with aiofiles.open(filename, mode="w+") as f:
await f.write(soup.find("pre", id="submission-code").get_text(strip=True))
os.utime(filename, (when, when))
async def main():
async with aiohttp.ClientSession() as session:
await asyncio.gather(
*[
processSubmission(session, submission)
async for contest in getContests(session)
async for submission in getSubmissions(session, contest)
]
)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
#!/usr/bin/python3
import re
from pathlib import Path
import datetime
import requests
import os
import time
from sys import argv
from bs4 import BeautifulSoup
BASE_URL = "https://atcoder.jp"
extensions = {
"C": ".c",
"C++": ".cpp",
"C++14": ".cpp",
"Java": ".java",
"Python": ".py",
"Python3": ".py",
"Bash": ".sh",
"bc": ".bc",
"Awk": ".awk",
"C#": ".cs",
"Clojure": ".clj",
"Crystal": ".cr",
"D": ".d",
"Dart": ".dart",
"dc": ".dc",
"Erlang": ".erl",
"Elixir": ".ex",
"F#": ".fs",
"Forth": ".fs",
"Fortran": ".f08",
"Go": ".go",
"Haskell": ".hs",
"Haxe": ".hx",
"JavaScript": ".js",
"Julia": ".jl",
"Kotlin": ".kt",
"Lua": ".lua",
"Dash": ".sh",
"Nim": ".nim",
"Objective-C": ".m",
"Common Lisp": ".lisp",
"OCaml": ".ml",
"Octave": ".m",
"Pascal": ".pas",
"Perl": ".pl",
"Raku": ".p6",
"PHP": ".php",
"Prolog": ".pl",
"PyPy2": ".py",
"PyPy3": ".py",
"Racket": ".rkt",
"Ruby": ".rb",
"Rust": ".rs",
"Scala": ".scala",
"Java": ".java",
"Scheme": ".scm",
"Standard ML": ".sml",
"Swift": ".swift",
"Text": ".txt",
"TypeScript": ".ts",
"Visual Basic": ".vb",
"Zsh": ".sh",
"COBOL - Fixed": ".cob",
"COBOL - Free": ".cob",
"Brainfuck": ".bf",
"Ada2012": ".adb",
"Unlambda": ".unl",
"Cython": ".pyx",
"Sed": ".sed",
"Vim": ".vim",
}
s = requests.session()
for contest_link in [
BASE_URL
+ row.find("a", attrs={"href": re.compile("^/contests/.+/submissions\?f\.User=")})[
"href"
]
for row in BeautifulSoup(
s.get(f"{BASE_URL}/users/{argv[1]}/history").text, "html.parser"
).select("table#history > tbody > tr")
]:
for pageNo in range(
1,
int(
BeautifulSoup(s.get(contest_link).text, "html.parser")
.select_one(
"div.text-center:last-child > ul.pagination > li:last-child > a"
)
.get_text(strip=True)
)
+ 1,
):
for submission_link in [
BASE_URL + detail["href"]
for detail in BeautifulSoup(s.get(f"{contest_link}&page={pageNo}").text, "html.parser").select("table > tbody > tr > td:last-child > a")
]:
soup = BeautifulSoup(s.get(submission_link).text, "html.parser")
submission, contest = (
soup.find("title").get_text(strip=True).split(" - ", 1)
)
submission = submission.split(" #", 1)[-1]
when, task, user, lang, score, size, status = [
td.get_text(strip=True) for td in soup.find("table").find_all("td")[:7]
]
when = time.mktime(datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S%z").timetuple())
(
filename := Path(
f"./{argv[1]}/{contest}/{task}/{submission} - {status}{extensions.get(lang.split('(')[0].strip(), ' ' + lang)}"
)
).parent.mkdir(parents=True, exist_ok=True)
print(filename)
with open(filename, "w+") as f:
f.write(soup.find("pre", id="submission-code").get_text(strip=True))
os.utime(filename, (when, when))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment