Created
December 12, 2019 20:56
-
-
Save thomaswilburn/fc7c8dabb7962941b629b7b3e78da414 to your computer and use it in GitHub Desktop.
annotatedDocs.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var { google } = require("googleapis"); | |
var async = require("async"); | |
var os = require("os"); | |
var path = require("path"); | |
var { authenticate } = require("./googleauth"); | |
module.exports = function(grunt) { | |
grunt.registerTask("docs", "Load Google Docs into the data folder", function() { | |
var config = grunt.file.readJSON("project.json"); | |
var auth = null; | |
try { | |
auth = authenticate(); | |
} catch (err) { | |
console.log(err); | |
return grunt.fail.warn("Couldn't load access token for Docs, try running `grunt google-auth`"); | |
} | |
var done = this.async(); | |
var drive = google.drive({ auth, version: "v3" }); | |
var docs = google.docs({ auth, version: "v1" }).documents; | |
var formatters = { | |
link: text => `[${text.content}](${text.textStyle.link.url})`, | |
// underline: text => `_${text.content}_`, | |
bold: text => `**${text.content}**`, | |
italic: text => `*${text.content}*` | |
}; | |
var normalize = function(text) { | |
return text.trim().replace(/"/g, '"'); | |
}; | |
async.eachLimit( | |
config.docs, | |
2, // adjust this up or down based on rate limiting | |
async function(fileId) { | |
var documentId = fileId; | |
var meta = await drive.files.get({ fileId }); | |
var commentResponse = await drive.comments.list({ fileId, fields: "*", pageSize: 100 }); | |
// console.log(commentResponse.data.comments); | |
var docResponse = await docs.get({ documentId }); | |
var name = meta.data.name.replace(/\s+/g, "_"); | |
console.log(`Writing document as data/${name}`); | |
grunt.file.write(path.join("data", name + ".raw.json"), JSON.stringify(docResponse.data, null, 2)); | |
grunt.file.write(path.join("data", name + ".comments.json"), JSON.stringify(commentResponse.data, null, 2)); | |
var parsed = ""; | |
docResponse.data.body.content.forEach(function(block) { | |
if (!block.paragraph) return; | |
var text = block.paragraph.elements.map(function(element) { | |
// can't use formatters if we want to match comments | |
if (false) for (var f in formatters) { | |
if (f in element.textRun.textStyle) { | |
element.textRun.content = formatters[f](element.textRun); | |
} | |
} | |
return element.textRun.content; | |
}).join(""); | |
// if (block.paragraph.bullet) text = "* " + text; | |
parsed += text; | |
}); | |
parsed = parsed.replace(/skip:[\n\s\S]+:endskip/, ""); | |
var comments = []; | |
for (var comment of commentResponse.data.comments.slice().reverse()) { | |
var { id, anchor, content, author, resolved, deleted, replies } = comment; | |
if (resolved || deleted) continue; | |
var quote = normalize(comment.quotedFileContent.value); | |
var match = parsed.indexOf(quote); | |
var last = replies.pop(); | |
if (last && last.content) { | |
content = last.content; | |
} | |
var [ commentText, tag ] = content.split("|"); | |
tag = (tag || "").trim(); | |
content = commentText.trim(); | |
if (match > -1) { | |
comments.push({ | |
start: match, | |
end: match + quote.length, | |
anchor, | |
author, | |
content, | |
tag, | |
id, | |
quote | |
}); | |
} else { | |
console.log(`Unable to find a match for comment #${anchor}`); | |
} | |
} | |
comments.sort((a, b) => a.start - b.start); | |
comments.forEach(function(comment, i) { | |
var next = comments[i + 1]; | |
if (!next) return; | |
if (comment.end > next.start) comment.end = next.start; | |
}); | |
comments.slice().reverse().forEach(function(comment) { | |
parsed = parsed.slice(0, comment.start) | |
+ `<a class="comment-anchor ${comment.tag}" href="#${comment.id}" id="${comment.id}-anchor">${comment.quote}</a>` | |
+ parsed.slice(comment.end); | |
}); | |
var clean = text => text.trim().replace(/^(.+):/gm, "\\$1:"); | |
var output = ` | |
document: | |
${clean(parsed)} | |
:end | |
[comments] | |
${comments.map(c => ` | |
id: ${c.id} | |
anchor: ${c.anchor} | |
author: ${c.author.displayName} | |
tag: ${c.tag} | |
text: | |
${clean(c.content)} | |
:end | |
`).join("")} | |
[] | |
` | |
grunt.file.write(path.join("data", name + ".parsed.txt"), output); | |
}, | |
done | |
); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment