Skip to content

Instantly share code, notes, and snippets.

@wbamberg
Created August 24, 2021 23:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wbamberg/d3802faafd1b6ee80cd6ef95c4e740d5 to your computer and use it in GitHub Desktop.
Save wbamberg/d3802faafd1b6ee80cd6ef95c4e740d5 to your computer and use it in GitHub Desktop.
"use strict";
const fs = require("fs")
const path = require("path")
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
/* get all ".html" files under the given directory into an array */
function getAllFiles(dirPath, files) {
const dir = fs.readdirSync(dirPath);
files = files || [];
for (const file of dir) {
if (fs.statSync(dirPath + "/" + file).isDirectory()) {
files = getAllFiles(dirPath + "/" + file, files)
} else {
if (file.endsWith(".html")) {
files.push(path.join(__dirname, dirPath, "/", file))
}
}
}
return files
}
const allDocPaths = getAllFiles(process.argv[2]);
const query = "div.note";
/* scrape off the front matter.
We're considering this very roughly as the second occurrence of "---" in the file.*/
function removeFrontMatter(doc) {
const regexp = /---/g;
const matchArray = [...doc.matchAll(regexp)];
let end = matchArray[1].index + 3;
return doc.substr(end);
}
/*
Matches the correct format for a note
*/
function isPNote(note) {
let first = note.firstElementChild;
if (first && first.tagName === "P") {
let second = first.firstElementChild;
if (second && second.tagName === "STRONG" && second.textContent === "Note:")
{
return true;
}
}
return false;
}
/*
Matches any note whose first child is a paragraph
*/
function isP(note) {
let first = note.firstElementChild;
if (first && first.tagName === "P") {
return true;
}
return false;
}
/*
Matches the format for a note which uses H4
*/
function isHNote(note) {
let first = note.firstElementChild;
if (first && first.tagName === "H4" && first.textContent === "Note") {
return true;
}
return false;
}
/*
Matches the format for a note which uses `<strong>` without a wrapping paragraph
*/
function isStrong(note) {
let first = note.firstElementChild;
if (first && first.tagName === "STRONG" && first.textContent === "Note:") {
return true;
}
return false;
}
for (const docPath of allDocPaths) {
let docHTML = fs.readFileSync(docPath, {encoding: "utf-8"});
docHTML = removeFrontMatter(docHTML);
const { window } = new JSDOM(docHTML);
const found = window.document.querySelectorAll(query);
if (found.length > 0) {
let log = false;
for (let item of found) {
// obviously, change this condition to log different sorts of notes,
// such as all notes which use the heading style
if (!isPNote(item)) {
// this is weird but gives me a quick way to review the note contents,
// to decide if it really fits a particular pattern
console.log(`${item.innerHTML.substring(0, 50)} ${docPath}`);
log = true;
}
}
if (log) {
//console.log(docPath)
}
}
window.close();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment