Skip to content

Instantly share code, notes, and snippets.

@bmoren
Created July 25, 2019 19:36
Show Gist options
  • Save bmoren/b1df13f61d68ce9eac1ce6ba24984ae7 to your computer and use it in GitHub Desktop.
Save bmoren/b1df13f61d68ce9eac1ce6ba24984ae7 to your computer and use it in GitHub Desktop.
a nodejs script to convert indexhibit sites to a flat file markdown hierarchy
/*
use this to convert an indexhibit over and over site to a flatfile markdown structure.
change the input path for each indexhibit subsection folder, and run it for each section.
dont forget to copy each output after completion as this script will auto-delete the output folder upon completion for further runnings.
2019 – MIT
Ben Moren
http://benmoren.com
*/
const cheerio = require('cheerio')
const fs = require("fs")
const junk = require('junk');
const del = require('del');
// eg. "/output/"
const outputPath = __dirname + "/output/"
// eg. "/Users/bmoren/Desktop/benmorenscrape/benmoren.com/"
const projectPath = "/Users/bmoren/Desktop/benmorenscrape/benmoren.com/"
// eg. "projects/"
const subsectionPath = "video/"
const inputPath = projectPath + subsectionPath
console.log("generating output directory")
del.sync(outputPath)
fs.mkdirSync(outputPath)
console.log("opening input directory")
fs.readdir(inputPath, function(err,files){
//ignore system files
let notjunk = files.filter(junk.not)
notjunk = notjunk.filter(function(file){
if (file != 'index.html'){
return file
}
})
// console.log(notjunk)
//main loop to go through each file
notjunk.forEach(function(file){
console.log("~+~+~+~+~+~+~ " + file + " ~+~+~+~+~+~+~")
//get the index.html and load it into cheerio for jquery operations!
let index = fs.readFileSync(inputPath + file + "/index.html")
let $ = cheerio.load(index)
let markdown;
//+~++~+~+~+~+~+~+~+~+~+ frontmatter +~+~+~+~+~+~+~+~+~+~~+
markdown = "---\n"
markdown += "title: " + $('.highlight').text() + "\n"
markdown += "layout: project " //for later use!
//add a list of the vimeo video numbers to front matter (maybe this will be useful?)
let videoList = [] ;
$('.vimeo iframe').each((i,selection)=>{
let path = $(selection).attr('src').split('/')
let number = path[path.length-1]
videoList.push(number)
})
markdown += `\nvimeo: ${videoList}`
markdown += "\n---\n"
//+~+~+~+~+~+~+~+~+~+~+~+~~+~+ body +~+~+~+~+~+~+~+~+~+~+~+~+~
if($(".highlight").parent().nextUntil("#img-container").html() != null){
$(".highlight").parent().nextUntil("#img-container").each((i,selection)=>{
if (i > 0) markdown += "\n\n";
markdown += $(selection).html()
})
}
//grab vimeo links and add to the markdown body
markdown += "\n\n"
$('.vimeo iframe').each((i,selection)=>{
// console.log($(selection).attr('src'))
markdown += `<iframe src="${$(selection).attr('src')}"></iframe> \n`
})
//make a new folder and markdown file and add the contents
console.log(markdown)
fs.mkdirSync(outputPath + file);
fs.writeFileSync(outputPath + file + "/index.md", markdown)
//+~+~+~+~+~+~+~+~+~+~+~+ copy images +~+~+~+~+~+~+~+~+~+~+~+~+~+~+~
$('.asset img').each((i,selection)=>{
let path = $(selection).attr('src').split('/')
let sourcePath = `${projectPath}${path[2]}/${path[3]}/${path[4]}`
// console.log(outputPath + file + "/" + path[path.length-1]);
fs.copyFile(sourcePath, outputPath + file + "/" + path[path.length-1] , function(err){
if (err) console.error(err)
} )
});
}) //end main loop
}) //end readdir callback
{
"name": "scrapetoflat",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"cheerio": "^1.0.0-rc.3",
"del": "^5.0.0",
"junk": "^3.1.0"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment