thomaswilburn/loadDocs.js

## loadDocs.js
var { google } = require("googleapis");
var async = require("async");
var os = require("os");
var path = require("path");
var { authenticate } = require("./googleauth");

module.exports = function(grunt) {

  grunt.registerTask("docs", "Load Google Docs into the data folder", function() {

    var config = grunt.file.readJSON("project.json");
    var auth = null;
    try {
      auth = authenticate();
    } catch (err) {
      console.log(err);
      return grunt.fail.warn("Couldn't load access token for Docs, try running `grunt google-auth`");
    }

    var done = this.async();

    var drive = google.drive({ auth, version: "v3" });
    var docs = google.docs({ auth, version: "v1" }).documents;

    var formatters = {
      link: text => `[${text.content}](${text.textStyle.link.url})`,
      // underline: text => `_${text.content}_`,
      bold: text => `**${text.content}**`,
      italic: text => `*${text.content}*`
    };

    var normalize = function(text) {
      return text.trim().replace(/&quot;/g, '"');
    };

    async.eachLimit(
      config.docs,
      2, // adjust this up or down based on rate limiting
      async function(fileId) {
        var documentId = fileId;
        var meta = await drive.files.get({ fileId });
        var commentResponse = await drive.comments.list({ fileId, fields: "*", pageSize: 100 });
        // console.log(commentResponse.data.comments);
        var docResponse = await docs.get({ documentId });
        var name = meta.data.name.replace(/\s+/g, "_");
        console.log(`Writing document as data/${name}`);

        grunt.file.write(path.join("data", name + ".raw.json"), JSON.stringify(docResponse.data, null, 2));
        grunt.file.write(path.join("data", name + ".comments.json"), JSON.stringify(commentResponse.data, null, 2));

        var parsed = "";
        docResponse.data.body.content.forEach(function(block) {
          if (!block.paragraph) return;
          var text = block.paragraph.elements.map(function(element) {
            // can't use formatters if we want to match comments
            if (false) for (var f in formatters) {
              if (f in element.textRun.textStyle) {
                element.textRun.content = formatters[f](element.textRun);
              }
            }
            return element.textRun.content;
          }).join("");
          // if (block.paragraph.bullet) text = "* " + text;
          parsed += text;
        });

        parsed = parsed.replace(/skip:[\n\s\S]+:endskip/, "");

        var comments = [];
        for (var comment of commentResponse.data.comments.slice().reverse()) {
          var { id, anchor, content, author, resolved, deleted, replies } = comment;
          if (resolved || deleted) continue;
          var quote = normalize(comment.quotedFileContent.value);
          var match = parsed.indexOf(quote);
          var last = replies.pop();
          if (last && last.content) {
            content = last.content;
          }
          var [ commentText, tag ] = content.split("|");
          tag = (tag || "").trim();
          content = commentText.trim();
          if (match > -1) {
            comments.push({
              start: match,
              end: match + quote.length,
              anchor,
              author,
              content,
              tag,
              id,
              quote
            });
          } else {
            console.log(`Unable to find a match for comment #${anchor}`);
          }
        }

        comments.sort((a, b) => a.start - b.start);
        comments.forEach(function(comment, i) {
          var next = comments[i + 1];
          if (!next) return;
          if (comment.end > next.start) comment.end = next.start;
        });

        comments.slice().reverse().forEach(function(comment) {
          parsed = parsed.slice(0, comment.start)
            + `<a class="comment-anchor ${comment.tag}" href="#${comment.id}" id="${comment.id}-anchor">${comment.quote}</a>`
            + parsed.slice(comment.end);
        });

        var clean = text => text.trim().replace(/^(.+):/gm, "\\$1:");

        var output = `
document:
${clean(parsed)}
:end

[comments]
${comments.map(c => `
id: ${c.id}
anchor: ${c.anchor}
author: ${c.author.displayName}
tag: ${c.tag}
text:
${clean(c.content)}
:end

`).join("")}
[]
        `

        grunt.file.write(path.join("data", name + ".parsed.txt"), output);

      },
      done
    );

  });
}
	var { google } = require("googleapis");
	var async = require("async");
	var os = require("os");
	var path = require("path");
	var { authenticate } = require("./googleauth");

	module.exports = function(grunt) {

	grunt.registerTask("docs", "Load Google Docs into the data folder", function() {

	var config = grunt.file.readJSON("project.json");
	var auth = null;
	try {
	auth = authenticate();
	} catch (err) {
	console.log(err);
	return grunt.fail.warn("Couldn't load access token for Docs, try running `grunt google-auth`");
	}

	var done = this.async();

	var drive = google.drive({ auth, version: "v3" });
	var docs = google.docs({ auth, version: "v1" }).documents;

	var formatters = {
	link: text => `[${text.content}](${text.textStyle.link.url})`,
	// underline: text => `_${text.content}_`,
	bold: text => `${text.content}`,
	italic: text => `${text.content}`
	};

	var normalize = function(text) {
	return text.trim().replace(/"/g, '"');
	};

	async.eachLimit(
	config.docs,
	2, // adjust this up or down based on rate limiting
	async function(fileId) {
	var documentId = fileId;
	var meta = await drive.files.get({ fileId });
	var commentResponse = await drive.comments.list({ fileId, fields: "*", pageSize: 100 });
	// console.log(commentResponse.data.comments);
	var docResponse = await docs.get({ documentId });
	var name = meta.data.name.replace(/\s+/g, "_");
	console.log(`Writing document as data/${name}`);

	grunt.file.write(path.join("data", name + ".raw.json"), JSON.stringify(docResponse.data, null, 2));
	grunt.file.write(path.join("data", name + ".comments.json"), JSON.stringify(commentResponse.data, null, 2));

	var parsed = "";
	docResponse.data.body.content.forEach(function(block) {
	if (!block.paragraph) return;
	var text = block.paragraph.elements.map(function(element) {
	// can't use formatters if we want to match comments
	if (false) for (var f in formatters) {
	if (f in element.textRun.textStyle) {
	element.textRun.content = formatters[f](element.textRun);
	}
	}
	return element.textRun.content;
	}).join("");
	// if (block.paragraph.bullet) text = "* " + text;
	parsed += text;
	});

	parsed = parsed.replace(/skip:[\n\s\S]+:endskip/, "");

	var comments = [];
	for (var comment of commentResponse.data.comments.slice().reverse()) {
	var { id, anchor, content, author, resolved, deleted, replies } = comment;
	if (resolved \|\| deleted) continue;
	var quote = normalize(comment.quotedFileContent.value);
	var match = parsed.indexOf(quote);
	var last = replies.pop();
	if (last && last.content) {
	content = last.content;
	}
	var [ commentText, tag ] = content.split("\|");
	tag = (tag \|\| "").trim();
	content = commentText.trim();
	if (match > -1) {
	comments.push({
	start: match,
	end: match + quote.length,
	anchor,
	author,
	content,
	tag,
	id,
	quote
	});
	} else {
	console.log(`Unable to find a match for comment #${anchor}`);
	}
	}

	comments.sort((a, b) => a.start - b.start);
	comments.forEach(function(comment, i) {
	var next = comments[i + 1];
	if (!next) return;
	if (comment.end > next.start) comment.end = next.start;
	});

	comments.slice().reverse().forEach(function(comment) {
	parsed = parsed.slice(0, comment.start)
	+ `<a class="comment-anchor ${comment.tag}" href="#${comment.id}" id="${comment.id}-anchor">${comment.quote}</a>`
	+ parsed.slice(comment.end);
	});

	var clean = text => text.trim().replace(/^(.+):/gm, "\\$1:");

	var output = `
	document:
	${clean(parsed)}
	:end

	[comments]
	${comments.map(c => `
	id: ${c.id}
	anchor: ${c.anchor}
	author: ${c.author.displayName}
	tag: ${c.tag}
	text:
	${clean(c.content)}
	:end

	`).join("")}
	[]
	`

	grunt.file.write(path.join("data", name + ".parsed.txt"), output);

	},
	done
	);

	});
	}