Skip to content

Instantly share code, notes, and snippets.

@rikumi

rikumi/scraper.js

Last active Jan 10, 2018
Embed
What would you like to do?
微机原理与接口技术 答案抓取器
/*
微机原理与接口技术 答案抓取器
用法:
1. 使用 Chrome 浏览器;
2. 登录进入作业列表页面,要求所有作业都为已完成、且可查看答案的状态;
3. 地址栏输入地址进入框架内部:223.3.65.249/student/Home_Management/Show_All_Homework_CN.aspx;
4. 打开 F12 控制台,复制粘贴执行下列代码,待显示结果后,保存页面为 PDF。
*/
let initBody = function() {
document.body.innerHTML = ''
document.write('<style>h1,h2{display:block;margin:0;padding:15px 20px;font-size:24px;border-bottom:1px solid #f0f0f0}h2{font-weight:normal;font-size:18px}</style>')
}
if (/Show_All_Homework/i.test(location.href)) {
let results = {}
let links = [].slice.call(document.getElementsByTagName('a'))
let schoolnum = /sno=(\d+)/.exec(document.cookie)[1]
if (!schoolnum || schoolnum.length != 8) {
alert('抓取学号失败,你需要重新登录')
} else {
initBody()
document.write('<h1>抓取中…</h1>')
Promise.all(links.map(link => {
let homeworkTitle = link.innerText
let homeworkLink = link.href
let hno = /hno=(\d+)/.exec(homeworkLink)[1]
results[homeworkTitle] = {}
return fetch(homeworkLink, { credentials: "same-origin" }).then(r => r.text()).then(t => {
let types = t.match(/<a class="courses_0 courses_1".*?\/a>/mg)
return Promise.all(types.map(type => {
let typeTitle = type.replace(/<.*?>/g, '').trim()
let typeId = parseInt(/id="coursest(\d+)"/.exec(type)[1]) - 1
let resultLink = 'http://223.3.65.249/student/Question_Bank_Management/Show_Mark_Question_CN.aspx'
resultLink += '?type=' + typeId
resultLink += '&hno=' + hno
resultLink += '&sno=' + schoolnum
return fetch(resultLink, { credentials: "same-origin" }).then(r => r.text()).then(t => {
return '<h2>' + typeTitle + '</h2>' + /<body>([\s\S]*?)<\/body>/im.exec(t)[1]
})
})).then(array => '<h1>' + homeworkTitle + '</h1>' + array.join(''))
})
})).then(array => array.join('').replace(/<iframe.*?\/iframe>/img, '')).then(k => {
initBody()
k = k.replace(/<table id="DataList\d+_ctl\d+_results".*?\/table>/img, '')
k = k.replace(/<img id="DataList\d+_ctl\d+_answerstatus".*?>/img, '')
k = k.replace(/(<input.*?) checked(="checked")?(.*?>)/img, '$1$3')
document.write(k)
})
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.