Skip to content

Instantly share code, notes, and snippets.

@srghma
Last active January 11, 2022 12:16
Show Gist options
  • Save srghma/ab57d62080ad052f4f0938deb3d60787 to your computer and use it in GitHub Desktop.
Save srghma/ab57d62080ad052f4f0938deb3d60787 to your computer and use it in GitHub Desktop.
(function(console){
console.save = function(data, filename){
if(!data) {
console.error('Console.save: No data')
return;
}
if(!filename) filename = 'console.json'
if(typeof data === "object"){
data = JSON.stringify(data, undefined, 4)
}
var blob = new Blob([data], {type: 'text/json'}),
e = document.createEvent('MouseEvents'),
a = document.createElement('a')
a.download = filename
a.href = window.URL.createObjectURL(blob)
a.dataset.downloadurl = ['text/json', a.download, a.href].join(':')
e.initMouseEvent('click', true, false, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null)
a.dispatchEvent(e)
}
})(console)
///////////////
const delay = milliseconds => new Promise(resolve => {
setTimeout(resolve, milliseconds);
});
async function download(url, name) {
const a = document.createElement('a');
a.download = name;
a.href = url;
a.style.display = 'none';
document.body.append(a);
a.click();
// Chrome requires the timeout
await delay(100);
a.remove();
};
// TODO: limit is 10 in chrome simultaneous requests
async function multiDownload(urlAndNames) {
if (!urlAndNames) {
throw new Error('`urlAndName` required');
}
for (const [index, { url, name }] of urlAndNames.entries()) {
await delay(index * 1000);
download(url, name);
}
}
///////////////
links = [
"vectorfields",
"electrostatics",
"magnetostatics",
"faraday",
"light",
].map(async page => {
const pageHref = `https://ocw.mit.edu/courses/physics/8-02-physics-ii-electricity-and-magnetism-spring-2007/visualizations/${page}`
let pageText = await fetch(pageHref)
pageText = await pageText.text()
const pageDom = (new DOMParser()).parseFromString(pageText, "text/html");
let pageLinks = Array.from(pageDom.documentElement.querySelectorAll('#course_inner_section .thumbnail a'))
pageLinks = pageLinks.map(x => ({ text: x.textContent, href: x.href }))
return { page, pageLinks }
// x = await Promise.all(pageLinks.map(async x => {
// let href = x.href.replace(/^http/g, 'https')
// let t = await fetch(href)
// t = await t.text()
// return { link: href, text: t, name: x.textContent.replace('.', '') }
// }))
})
links = await Promise.all(links)
// console.save(JSON.stringify(links, null, 2))
//////////////////////
links = require(`/home/srghma/Downloads/console.json`)
output_dir_path = '/home/srghma/Downloads/mit-magnetisc-visualizations'
require('child_process').execSync(`rm -rfd ${output_dir_path}`, { stdio: 'inherit' })
links.map(({ page, pageLinks }, pageIndex) => {
pageLinks.map(async ({ href, text }, subpageIndex) => {
const outputPath = `${output_dir_path}/${page}/${subpageIndex + 1}-${text.replace('.', '')}`
require('mkdirp').sync(outputPath)
// console.log({ outputPath })
// command = `wget --recursive --page-requisites --adjust-extension --span-hosts --convert-links --restrict-file-names=windows --no-parent -P "${outputPath}" "${href}"`
// command = `wget --mirror --no-clobber --page-requisites --adjust-extension --span-hosts --convert-links --restrict-file-names=windows --no-parent -P "${outputPath}" "${href}"`
command = `cd "${outputPath}" && httrack --continue "${href}" +web.mit.edu/*.jnlp`
console.log(command)
require('child_process').execSync(command, { stdio: 'inherit' })
})
})
// z = x.map((x, index) => {
// // const oParser = new DOMParser();
// // const dom = oParser.parseFromString(x.text, "text/html");
// // let baseEl = dom.createElement('base');
// // baseEl.setAttribute('href', x.link);
// // dom.head.append(baseEl);
// let link = x.text.matchAll(/VISUALIZATION.*?\<a href="([^"]+)/g)
// link = (Array.from(link) || []).map(x => x[1])
// link = link[0]
// // let a = dom.documentElement.querySelectorAll('a')
// // a = Array.from(a)
// // a = a.find(x => x.attributes.href.value === link)
// // if (!a) { throw new Error() }
// // const downloadHref = a.href
// const downloadHref = new URL(link, x.link).href
// const downloadHrefExt = downloadHref.split('.').pop()
// const downloadName = `${index + 1}-${x.name}.${downloadHrefExt}`
// console.log(link, x.link)
// return { url: downloadHref, name: downloadName }
// })
// console.log(z)
// multiDownload(z)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment