Created
August 24, 2021 23:06
-
-
Save wbamberg/d3802faafd1b6ee80cd6ef95c4e740d5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
const fs = require("fs") | |
const path = require("path") | |
const jsdom = require("jsdom"); | |
const { JSDOM } = jsdom; | |
/* get all ".html" files under the given directory into an array */ | |
function getAllFiles(dirPath, files) { | |
const dir = fs.readdirSync(dirPath); | |
files = files || []; | |
for (const file of dir) { | |
if (fs.statSync(dirPath + "/" + file).isDirectory()) { | |
files = getAllFiles(dirPath + "/" + file, files) | |
} else { | |
if (file.endsWith(".html")) { | |
files.push(path.join(__dirname, dirPath, "/", file)) | |
} | |
} | |
} | |
return files | |
} | |
const allDocPaths = getAllFiles(process.argv[2]); | |
const query = "div.note"; | |
/* scrape off the front matter. | |
We're considering this very roughly as the second occurrence of "---" in the file.*/ | |
function removeFrontMatter(doc) { | |
const regexp = /---/g; | |
const matchArray = [...doc.matchAll(regexp)]; | |
let end = matchArray[1].index + 3; | |
return doc.substr(end); | |
} | |
/* | |
Matches the correct format for a note | |
*/ | |
function isPNote(note) { | |
let first = note.firstElementChild; | |
if (first && first.tagName === "P") { | |
let second = first.firstElementChild; | |
if (second && second.tagName === "STRONG" && second.textContent === "Note:") | |
{ | |
return true; | |
} | |
} | |
return false; | |
} | |
/* | |
Matches any note whose first child is a paragraph | |
*/ | |
function isP(note) { | |
let first = note.firstElementChild; | |
if (first && first.tagName === "P") { | |
return true; | |
} | |
return false; | |
} | |
/* | |
Matches the format for a note which uses H4 | |
*/ | |
function isHNote(note) { | |
let first = note.firstElementChild; | |
if (first && first.tagName === "H4" && first.textContent === "Note") { | |
return true; | |
} | |
return false; | |
} | |
/* | |
Matches the format for a note which uses `<strong>` without a wrapping paragraph | |
*/ | |
function isStrong(note) { | |
let first = note.firstElementChild; | |
if (first && first.tagName === "STRONG" && first.textContent === "Note:") { | |
return true; | |
} | |
return false; | |
} | |
for (const docPath of allDocPaths) { | |
let docHTML = fs.readFileSync(docPath, {encoding: "utf-8"}); | |
docHTML = removeFrontMatter(docHTML); | |
const { window } = new JSDOM(docHTML); | |
const found = window.document.querySelectorAll(query); | |
if (found.length > 0) { | |
let log = false; | |
for (let item of found) { | |
// obviously, change this condition to log different sorts of notes, | |
// such as all notes which use the heading style | |
if (!isPNote(item)) { | |
// this is weird but gives me a quick way to review the note contents, | |
// to decide if it really fits a particular pattern | |
console.log(`${item.innerHTML.substring(0, 50)} ${docPath}`); | |
log = true; | |
} | |
} | |
if (log) { | |
//console.log(docPath) | |
} | |
} | |
window.close(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment