Last active
December 12, 2022 23:09
-
-
Save NovaSagittarii/8c997682db537e41f54df496ed017986 to your computer and use it in GitHub Desktop.
CodeForces Status submissions scraper (JS)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Yoink all AC solution from the Status page of codeforces, using JS to interact with the site | |
// Submissions are output as {problemChar}-{contestantName}-{submissionId}.{language-suffix} | |
/** HOW TO USE | |
* 1. Go to status page - https://codeforces.com/group/----/contest/----/status | |
* 2. Open JS console (three dots in top right -> [More Tools] -> [Developer Tools] -> [Console]) | |
* 3. Paste code snippet into JS console | |
* 4. [ALLOW] multiple file downloads | |
* 5. Hold [Enter] on your keyboard so the file is saved immediately once the prompt shows up (filename should be highlighted), there is likely to be many submissions | |
* NOTE: To abort, you must refresh the page inbetween download prompts, since download prompt has highest priority! | |
*/ | |
// toss result into variable so you can interact with data | |
let __output__ = []; | |
(async function(){ | |
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms)); // https://stackoverflow.com/a/39914235 // this is needed to slow down requests so we dont get bonked by ratelimit | |
const output = __output__; | |
const config = { | |
acceptedOnly: true, // only take AC submissions | |
delay: 1000, // delay between fetch requests (to avoid ratelimit) | |
limit: 3, // only take this many submissions before stopping | |
retry: 3, // make this many attempts to fetch again if it fails | |
writeToOutput: true, // append results to __output__ array | |
}; | |
const fetchText = url => new Promise(async (res, rej) => { | |
for(let i = 0; i < config.retry; i ++){ | |
try { | |
await sleep(config.delay||0); | |
const a = await fetch(url); | |
const b = await a.text(); | |
res(b); | |
break; | |
}catch(err){ | |
console.warn(err); | |
} | |
} | |
rej("failed"); | |
}); | |
if((document.getElementById("verdictName").value === "OK") !== config.acceptedOnly){ | |
alert("Applying VERDICT=ACCEPTED filter to submissions, this will reload the page. You will need to rerun after the page finishes reloading"); | |
document.getElementById("verdictName").value = "OK" | |
document.querySelector("input[value='Apply']").click() | |
return; | |
} | |
const finalPage = Math.max(1, ...[...document.querySelectorAll("span.page-index")].map(x => +x.innerText)); | |
let submissionsCount = 0; | |
let expectedTotalSubmissions = finalPage*50; | |
for(let i = 1; i <= finalPage; i ++){ | |
const url = location.href.replace(/status.*$/, 'status') + `/page/${i}?order=BY_JUDGED_DESC`; | |
let rawtext; | |
try { rawtext = await fetchText(url); } catch(e) { | |
console.warn("skip page (retry limit exceeded)", url); | |
continue; | |
} | |
const document = (new DOMParser).parseFromString(rawtext, "text/html"); | |
for(const node of [...document.querySelectorAll("a.view-source,.id-cell>a[href]")]){ | |
++ submissionsCount; | |
let rawtext; | |
try { rawtext = await fetchText(node.href); } catch (e) { | |
// just go to next one if it breaks (due to retry limit exceeded) | |
console.warn("skip submission (retry limit exceeded)", node.href); | |
continue; | |
} | |
const document = (new DOMParser).parseFromString(rawtext, "text/html"); | |
// console.log(rawtext, document); | |
const tr = document.querySelectorAll("tr")[1]; | |
const authorUsername = tr.querySelector("td>a").innerText.trim(); | |
const submissionId = tr.querySelector("td").innerText.trim(); | |
const lines = document.querySelector("pre.linenums"); | |
const problem = (tr.querySelectorAll("td>a")[1]?.innerText||"?").replace(/[^A-Za-z\?]/g, ''); | |
const language = ([...lines.classList].filter(x=>x.startsWith("lang-"))[0] || "txt").replace("lang-", ""); | |
const submissionCode = lines.innerText; | |
if(config.writeToOutput) output.push([`${problem}-${authorUsername}-${submissionId}.${language}`, submissionCode]); | |
console.log(`(ETA: ~${((Math.min(config.limit, expectedTotalSubmissions) - submissionsCount)*config.delay/60e3).toFixed(1)} min)`, `${submissionsCount} of <=${expectedTotalSubmissions}`, problem, authorUsername, submissionId, language); | |
(function(data, filename, type){ // https://stackoverflow.com/a/30832210 | |
const document = window.document; // restore scope cuz suddenly i need the original document again, oops | |
const file = new Blob([data], {type: type}); | |
const a = document.createElement("a"); | |
const url = URL.createObjectURL(file); | |
a.href = url; | |
a.download = filename; | |
document.body.appendChild(a); | |
a.click(); | |
setTimeout(function() { | |
document.body.removeChild(a); | |
window.URL.revokeObjectURL(url); | |
}, 0); | |
})(submissionCode, `${problem}-${authorUsername}-${submissionId}.${language}`, "text/plain"); | |
if(submissionsCount >= config.limit) break; | |
} | |
if(submissionsCount >= config.limit) break; | |
} | |
// console.log(output); | |
console.log("Script finished running, have a nice day"); | |
return output; | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment