Skip to content

Instantly share code, notes, and snippets.

@alexcu
Last active March 31, 2017 01:09
Show Gist options
  • Save alexcu/9204ea1c7ccfa4e42d443ab35f3d406e to your computer and use it in GitHub Desktop.
Save alexcu/9204ea1c7ccfa4e42d443ab35f3d406e to your computer and use it in GitHub Desktop.
FutureLearn Scraper
(function(){var s=document.createElement('script');s.src='https://gist.githubusercontent.com/alexcu/9204ea1c7ccfa4e42d443ab35f3d406e/raw/b11a51f15a7656b8746000616435ae779a5d21ed/put.js';document.body.appendChild(s);})();
(function(){var s=document.createElement('script');s.src='https://gist.githubusercontent.com/alexcu/9204ea1c7ccfa4e42d443ab35f3d406e/raw/b11a51f15a7656b8746000616435ae779a5d21ed/scrape.js';document.body.appendChild(s);})();
// Get elements - change this if FL changes its elements
var docElements = {
title: document.querySelector("#article_title"),
shortDesc: document.querySelector("#article_short_description"),
content: document.querySelector("#article_body")
}
// MD file titles -- change this if the MD structure title changes
var mdTitles = {
title: '# Title',
shortDesc: '# Short Description',
content: '# Content'
}
// Regexes from md titles (\Z is EOF)
var mdRegexes = {
title: new RegExp(mdTitles.title + "\n\n([^\n]*)"),
shortDesc: new RegExp(mdTitles.shortDesc + "\n\n([^\n]*)"),
content: new RegExp(mdTitles.content + "\n\n([^\Z]*)")
}
// Create file element with callback
var file = document.createElement("input")
file.type = "file"
file.id = file.name = "__fl_scraper_upload_file"
document.body.insertBefore(file, document.body.firstChild)
file.onchange = function () {
var file = this.files[0]
var reader = new FileReader()
reader.onload = function(progressEvent){
docElements.title.value = this.result.match(mdRegexes.title)[1]
docElements.shortDesc.value = this.result.match(mdRegexes.shortDesc)[1]
docElements.content.value = this.result.match(mdRegexes.content)[1]
}
reader.readAsText(file);
}
/*! @source http://purl.eligrey.com/github/FileSaver.js/blob/master/FileSaver.js */
var saveAs=saveAs||function(e){"use strict";if(typeof e==="undefined"||typeof navigator!=="undefined"&&/MSIE [1-9]\./.test(navigator.userAgent)){return}var t=e.document,n=function(){return e.URL||e.webkitURL||e},r=t.createElementNS("http://www.w3.org/1999/xhtml","a"),o="download"in r,a=function(e){var t=new MouseEvent("click");e.dispatchEvent(t)},i=/constructor/i.test(e.HTMLElement)||e.safari,f=/CriOS\/[\d]+/.test(navigator.userAgent),u=function(t){(e.setImmediate||e.setTimeout)(function(){throw t},0)},s="application/octet-stream",d=1e3*40,c=function(e){var t=function(){if(typeof e==="string"){n().revokeObjectURL(e)}else{e.remove()}};setTimeout(t,d)},l=function(e,t,n){t=[].concat(t);var r=t.length;while(r--){var o=e["on"+t[r]];if(typeof o==="function"){try{o.call(e,n||e)}catch(a){u(a)}}}},p=function(e){if(/^\s*(?:text\/\S*|application\/xml|\S*\/\S*\+xml)\s*;.*charset\s*=\s*utf-8/i.test(e.type)){return new Blob([String.fromCharCode(65279),e],{type:e.type})}return e},v=function(t,u,d){if(!d){t=p(t)}var v=this,w=t.type,m=w===s,y,h=function(){l(v,"writestart progress write writeend".split(" "))},S=function(){if((f||m&&i)&&e.FileReader){var r=new FileReader;r.onloadend=function(){var t=f?r.result:r.result.replace(/^data:[^;]*;/,"data:attachment/file;");var n=e.open(t,"_blank");if(!n)e.location.href=t;t=undefined;v.readyState=v.DONE;h()};r.readAsDataURL(t);v.readyState=v.INIT;return}if(!y){y=n().createObjectURL(t)}if(m){e.location.href=y}else{var o=e.open(y,"_blank");if(!o){e.location.href=y}}v.readyState=v.DONE;h();c(y)};v.readyState=v.INIT;if(o){y=n().createObjectURL(t);setTimeout(function(){r.href=y;r.download=u;a(r);h();c(y);v.readyState=v.DONE});return}S()},w=v.prototype,m=function(e,t,n){return new v(e,t||e.name||"download",n)};if(typeof navigator!=="undefined"&&navigator.msSaveOrOpenBlob){return function(e,t,n){t=t||e.name||"download";if(!n){e=p(e)}return navigator.msSaveOrOpenBlob(e,t)}}w.abort=function(){};w.readyState=w.INIT=0;w.WRITING=1;w.DONE=2;w.error=w.onwritestart=w.onprogress=w.onwrite=w.onabort=w.onerror=w.onwriteend=null;return m}(typeof self!=="undefined"&&self||typeof window!=="undefined"&&window||this.content);if(typeof module!=="undefined"&&module.exports){module.exports.saveAs=saveAs}else if(typeof define!=="undefined"&&define!==null&&define.amd!==null){define("FileSaver.js",function(){return saveAs})}
// Future learn scrape
var docElements = {
title: document.querySelector("#article_title"),
shortDesc: document.querySelector("#article_short_description"),
content: document.querySelector("#article_body")
}
// Template markdown file
var markdown = `# Title
${docElements.title.value || '<!-- Insert document title -->'}
# Short Description
${docElements.shortDesc.value || '<!-- Insert short description here -->'}
# Content
${docElements.content.value || '<!-- Insert document content here -->'}
`
var markdownBlob = new Blob([markdown], {type: "text/plain;charset=utf-8"})
// Save the file
saveAs(markdownBlob, (docElements.title.value || "file") + '.md')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment