Skip to content

Instantly share code, notes, and snippets.

@NovaSagittarii
Last active December 12, 2022 23:09
Show Gist options
  • Save NovaSagittarii/8c997682db537e41f54df496ed017986 to your computer and use it in GitHub Desktop.
Save NovaSagittarii/8c997682db537e41f54df496ed017986 to your computer and use it in GitHub Desktop.
CodeForces Status submissions scraper (JS)
// Yoink all AC solution from the Status page of codeforces, using JS to interact with the site
// Submissions are output as {problemChar}-{contestantName}-{submissionId}.{language-suffix}
/** HOW TO USE
* 1. Go to status page - https://codeforces.com/group/----/contest/----/status
* 2. Open JS console (three dots in top right -> [More Tools] -> [Developer Tools] -> [Console])
* 3. Paste code snippet into JS console
* 4. [ALLOW] multiple file downloads
* 5. Hold [Enter] on your keyboard so the file is saved immediately once the prompt shows up (filename should be highlighted), there is likely to be many submissions
* NOTE: To abort, you must refresh the page inbetween download prompts, since download prompt has highest priority!
*/
// toss result into variable so you can interact with data
let __output__ = [];
(async function(){
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms)); // https://stackoverflow.com/a/39914235 // this is needed to slow down requests so we dont get bonked by ratelimit
const output = __output__;
const config = {
acceptedOnly: true, // only take AC submissions
delay: 1000, // delay between fetch requests (to avoid ratelimit)
limit: 3, // only take this many submissions before stopping
retry: 3, // make this many attempts to fetch again if it fails
writeToOutput: true, // append results to __output__ array
};
const fetchText = url => new Promise(async (res, rej) => {
for(let i = 0; i < config.retry; i ++){
try {
await sleep(config.delay||0);
const a = await fetch(url);
const b = await a.text();
res(b);
break;
}catch(err){
console.warn(err);
}
}
rej("failed");
});
if((document.getElementById("verdictName").value === "OK") !== config.acceptedOnly){
alert("Applying VERDICT=ACCEPTED filter to submissions, this will reload the page. You will need to rerun after the page finishes reloading");
document.getElementById("verdictName").value = "OK"
document.querySelector("input[value='Apply']").click()
return;
}
const finalPage = Math.max(1, ...[...document.querySelectorAll("span.page-index")].map(x => +x.innerText));
let submissionsCount = 0;
let expectedTotalSubmissions = finalPage*50;
for(let i = 1; i <= finalPage; i ++){
const url = location.href.replace(/status.*$/, 'status') + `/page/${i}?order=BY_JUDGED_DESC`;
let rawtext;
try { rawtext = await fetchText(url); } catch(e) {
console.warn("skip page (retry limit exceeded)", url);
continue;
}
const document = (new DOMParser).parseFromString(rawtext, "text/html");
for(const node of [...document.querySelectorAll("a.view-source,.id-cell>a[href]")]){
++ submissionsCount;
let rawtext;
try { rawtext = await fetchText(node.href); } catch (e) {
// just go to next one if it breaks (due to retry limit exceeded)
console.warn("skip submission (retry limit exceeded)", node.href);
continue;
}
const document = (new DOMParser).parseFromString(rawtext, "text/html");
// console.log(rawtext, document);
const tr = document.querySelectorAll("tr")[1];
const authorUsername = tr.querySelector("td>a").innerText.trim();
const submissionId = tr.querySelector("td").innerText.trim();
const lines = document.querySelector("pre.linenums");
const problem = (tr.querySelectorAll("td>a")[1]?.innerText||"?").replace(/[^A-Za-z\?]/g, '');
const language = ([...lines.classList].filter(x=>x.startsWith("lang-"))[0] || "txt").replace("lang-", "");
const submissionCode = lines.innerText;
if(config.writeToOutput) output.push([`${problem}-${authorUsername}-${submissionId}.${language}`, submissionCode]);
console.log(`(ETA: ~${((Math.min(config.limit, expectedTotalSubmissions) - submissionsCount)*config.delay/60e3).toFixed(1)} min)`, `${submissionsCount} of <=${expectedTotalSubmissions}`, problem, authorUsername, submissionId, language);
(function(data, filename, type){ // https://stackoverflow.com/a/30832210
const document = window.document; // restore scope cuz suddenly i need the original document again, oops
const file = new Blob([data], {type: type});
const a = document.createElement("a");
const url = URL.createObjectURL(file);
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
setTimeout(function() {
document.body.removeChild(a);
window.URL.revokeObjectURL(url);
}, 0);
})(submissionCode, `${problem}-${authorUsername}-${submissionId}.${language}`, "text/plain");
if(submissionsCount >= config.limit) break;
}
if(submissionsCount >= config.limit) break;
}
// console.log(output);
console.log("Script finished running, have a nice day");
return output;
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment