undrafted/*setup.md

## *setup.md

      
    Raw
  

              *setup.md
            
          
    Google Doc Clean HTML Output, powered by Apps Script

Add this function to your Apps Script project and feed it an ID to a Google Doc.
If you need this on a lot of Docs, you may want to make a new Apps Script project to use as a library. Simply copy and paste this code to the new project, go to the script that's tied to your Doc, add your new Library (the script ID), and call var html = *NameOfYourLibraryProject*.getContent(DocumentApp.getActiveDocument().getId());

  
## code.gs
/**
 * Takes in a Google Doc ID, gets that doc in HTML format, cleans up the markup, and returns the resulting HTML string.
 *
 * @param {string} the id of the google doc
 * @param {boolean} [useCaching] enable or disable caching. default true.
 * @return {string} the doc's body in html format
 */
function getContent(id, useCaching) {

  if (!id) {
    throw "Please call this API with a valid Google Doc ID";
  }

  if (useCaching == null) {
    useCaching = true;
  }

  if (typeof useCaching != "boolean") {
    throw "If you're going to specify useCaching, it must be boolean.";
  }

  var cache = CacheService.getScriptCache();
  var cached = cache.get(id); // see if we have a cached version of our parsed html
  if (cached && useCaching) {
    var html = cached;
    Logger.log("Pulling doc html from cache...");
  } else {

    Logger.log("Grabbing and parsing fresh html from the doc...");

    try {
      var doc = DriveApp.getFileById(id);
    } catch (err) {
      throw "Please call this API with a valid Google Doc ID. " + err.message;
    }

    var docName = doc.getName();

    var forDriveScope = DriveApp.getStorageUsed(); // needed to get Drive Scope requested in ScriptApp.getOAuthToken();
    var url = "https://docs.google.com/feeds/download/documents/export/Export?id=" + id + "&exportFormat=html";
    var param = {
      method: "get",
      headers: {"Authorization": "Bearer " + ScriptApp.getOAuthToken()},
      muteHttpExceptions:true,
    };

    var html = UrlFetchApp.fetch(url, param).getContentText();

    // nuke the whole head section, including the stylesheet and meta tag
    html = html.replace(/<head>.*<\/head>/, '');
    // remove almost all html attributes
    html = html.replace(/ (id|class|style|start|colspan|rowspan)="[^"]*"/g, '');
    // remove all of the spans, as well as the outer html and body
    html = html.replace(/<(span|\/span|body|\/body|html|\/html)>/g, '');
    // clearly the superior way of denoting line breaks
    html = html.replace(/<br>/g, '<br />');

    cache.put(id, html, 900) // cache doc contents for 15 minutes, in case we get a lot of requests

  }

  Logger.log(html);

  return html;

}
	/**
	* Takes in a Google Doc ID, gets that doc in HTML format, cleans up the markup, and returns the resulting HTML string.
	*
	* @param {string} the id of the google doc
	* @param {boolean} [useCaching] enable or disable caching. default true.
	* @return {string} the doc's body in html format
	*/
	function getContent(id, useCaching) {

	if (!id) {
	throw "Please call this API with a valid Google Doc ID";
	}

	if (useCaching == null) {
	useCaching = true;
	}

	if (typeof useCaching != "boolean") {
	throw "If you're going to specify useCaching, it must be boolean.";
	}

	var cache = CacheService.getScriptCache();
	var cached = cache.get(id); // see if we have a cached version of our parsed html
	if (cached && useCaching) {
	var html = cached;
	Logger.log("Pulling doc html from cache...");
	} else {

	Logger.log("Grabbing and parsing fresh html from the doc...");

	try {
	var doc = DriveApp.getFileById(id);
	} catch (err) {
	throw "Please call this API with a valid Google Doc ID. " + err.message;
	}

	var docName = doc.getName();

	var forDriveScope = DriveApp.getStorageUsed(); // needed to get Drive Scope requested in ScriptApp.getOAuthToken();
	var url = "https://docs.google.com/feeds/download/documents/export/Export?id=" + id + "&exportFormat=html";
	var param = {
	method: "get",
	headers: {"Authorization": "Bearer " + ScriptApp.getOAuthToken()},
	muteHttpExceptions:true,
	};

	var html = UrlFetchApp.fetch(url, param).getContentText();

	// nuke the whole head section, including the stylesheet and meta tag
	html = html.replace(/<head>.*<\/head>/, '');
	// remove almost all html attributes
	html = html.replace(/ (id\|class\|style\|start\|colspan\|rowspan)="[^"]*"/g, '');
	// remove all of the spans, as well as the outer html and body
	html = html.replace(/<(span\|\/span\|body\|\/body\|html\|\/html)>/g, '');
	// clearly the superior way of denoting line breaks
	html = html.replace(/<br>/g, '<br />');

	cache.put(id, html, 900) // cache doc contents for 15 minutes, in case we get a lot of requests

	}

	Logger.log(html);

	return html;

	}