Skip to content

Instantly share code, notes, and snippets.

@bafta-benj
Last active February 15, 2019 09:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save bafta-benj/a5c432e5957ba48f98007fb802f48ca4 to your computer and use it in GitHub Desktop.
Save bafta-benj/a5c432e5957ba48f98007fb802f48ca4 to your computer and use it in GitHub Desktop.
/*
Credit:
The vast majority of this code comes from:
https://github.com/mangini/gdocs2md/issues
Ben Jefferson added the following to create this fork
- Replaced emailing of markdown files with creating files in Google Drive in a "markdown" directory alongside the file being converted.
- Added support for linking to (and automatic conversion) other Google docs in the same directory or a subdirectory
License:
The original file and Ben Jefferson's subsequent edits are all released under the Apache2 License see https://github.com/mangini/gdocs2md for more details
Usage:
Adding this script to your doc:
- Tools > Script Manager > New
- Select "Blank Project", then paste this code in and save as "Export Markdown.gs"
- Run > Test as add-on
- Choose the document you want to test this on - this will open the document in a new tab
Running the script:
- Make sure there is a "markdown" directory in the same directory as the document you are editting - this is where the generated files will go
- Add-ons > Convert To Markdown > update Markdown
- On successfull completion a pop-up will appear saying "All Done"
*/
var outputDirSuffix='_markdown';
var parentFolder;
var outputBaseDir;
var processedFiles;
var usedFilenames;
function onInstall(e) {
onOpen(e);
}
function onOpen(e) {
DocumentApp.getUi().createAddonMenu()
.addItem('Update Markdown', 'processActiveDocument')
.addToUi();
}
function processActiveDocument() {
processedFiles = {};
usedFilenames = {};
var theDocument = DocumentApp.getActiveDocument();
parentFolder = DriveApp.getFileById(theDocument.getId()).getParents();
// A file can live in multiple directories - this code just uses the first
parentFolder = parentFolder.next();
var outputBaseDirName = theDocument.getName() + outputDirSuffix;
outputBaseDir = parentFolder.getFoldersByName(outputBaseDirName);
// Create the output dir if it doesn't already exist
if (outputBaseDir.hasNext()) outputBaseDir = outputBaseDir.next();
else outputBaseDir = parentFolder.createFolder(outputBaseDirName);
ConvertToMarkdown( theDocument );
// Remove any old unused files from the output directory
var outputDirFiles = outputBaseDir.getFiles();
while (outputDirFiles.hasNext()) {
var file = outputDirFiles.next();
if (!usedFilenames[file.getName()]) outputBaseDir.removeFile(file);
}
DocumentApp.getUi().alert('All done!');
}
function getFilename( filename, extension ){
var count=0;
var testFilename;
do {
testFilename = filename;
if (count) testFilename += '_'+count;
if (extension) testFilename += extension;
count++;
} while (usedFilenames[testFilename]);
usedFilenames[testFilename] = true;
var needsDeleting = outputBaseDir.getFilesByName(testFilename);
if (needsDeleting.hasNext()) {
outputBaseDir.removeFile(needsDeleting.next());
}
return testFilename;
}
function ConvertToMarkdown( theDocument ) {
if (processedFiles[theDocument.getId()]) return processedFiles[theDocument.getId()];
var processedFilename = getFilename(theDocument.getName(),".md");
processedFiles[theDocument.getId()]=processedFilename;
var imageBaseFilename = processedFilename.slice(0,-3);
var body = theDocument.getBody();
var numChildren = body.getNumChildren();
var text = "";
var inSrc = false;
var inClass = false;
var globalListCounters = {};
// edbacher: added a variable for indent in src <pre> block. Let style sheet do margin.
var srcIndent = "";
// Walk through all the child elements of the doc.
for (var i = 0; i < numChildren; i++) {
var child = body.getChild(i);
var result = processParagraph(i, child, inSrc, imageBaseFilename, globalListCounters);
if (result!==null) {
if (result.sourcePretty==="start" && !inSrc) {
inSrc=true;
text+="<pre class=\"prettyprint\">\n";
} else if (result.sourcePretty==="end" && inSrc) {
inSrc=false;
text+="</pre>\n\n";
} else if (result.source==="start" && !inSrc) {
inSrc=true;
text+="<pre>\n";
} else if (result.source==="end" && inSrc) {
inSrc=false;
text+="</pre>\n\n";
} else if (result.inClass==="start" && !inClass) {
inClass=true;
text+="<div class=\""+result.className+"\">\n";
} else if (result.inClass==="end" && inClass) {
inClass=false;
text+="</div>\n\n";
} else if (inClass) {
text+=result.text+"\n\n";
} else if (inSrc) {
text+=(srcIndent+escapeHTML(result.text)+"\n");
} else if (result.text && result.text.length>0) {
text+=result.text+"\n\n";
}
if (result.images && result.images.length>0) {
for (var j=0; j<result.images.length; j++) {
createOutputFile(outputBaseDir,getFilename(result.images[j].name), result.images[j].bytes, result.images[j].type, true );
}
}
} else if (inSrc) { // support empty lines inside source code
text+='\n';
}
}
createOutputFile(outputBaseDir,processedFilename, text, "text/plain" );
return processedFilename;
}
function createOutputFile( outputBaseDir, filename, content, type, isBlob ) {
var existingFiles = outputBaseDir.getFilesByName( filename );
while (existingFiles.hasNext()) {
outputBaseDir.removeFile(existingFiles.next());
}
outputBaseDir.createFile(Utilities.newBlob(content, type, filename));
}
function escapeHTML(text) {
return text.replace(/</g, '&lt;').replace(/>/g, '&gt;');
}
// Process each child element (not just paragraphs).
function processParagraph(index, element, inSrc, imageBaseFilename, listCounters) {
// First, check for things that require no processing.
if (element.getNumChildren()==0) {
return null;
}
// Punt on TOC.
if (element.getType() === DocumentApp.ElementType.TABLE_OF_CONTENTS) {
return {"text": "[[TOC]]"};
}
// Set up for real results.
var result = {};
var pOut = "";
var textElements = [];
var imageCounter = 0;
// Handle Table elements. Pretty simple-minded now, but works for simple tables.
// Note that Markdown does not process within block-level HTML, so it probably
// doesn't make sense to add markup within tables.
if (element.getType() === DocumentApp.ElementType.TABLE) {
textElements.push("<table>\n");
var nCols = element.getChild(0).getNumCells();
for (var i = 0; i < element.getNumChildren(); i++) {
textElements.push(" <tr>\n");
// process this row
for (var j = 0; j < nCols; j++) {
textElements.push(" <td>" + element.getChild(i).getChild(j).getText() + "</td>\n");
}
textElements.push(" </tr>\n");
}
textElements.push("</table>\n");
}
// Process various types (ElementType).
for (var i = 0; i < element.getNumChildren(); i++) {
var t=element.getChild(i).getType();
if (t === DocumentApp.ElementType.TABLE_ROW) {
// do nothing: already handled TABLE_ROW
} else if (t === DocumentApp.ElementType.TEXT) {
var txt=element.getChild(i);
pOut += txt.getText();
textElements.push(txt);
} else if (t === DocumentApp.ElementType.INLINE_IMAGE) {
result.images = result.images || [];
var contentType = element.getChild(i).getBlob().getContentType();
var extension = "";
if (/\/png$/.test(contentType)) {
extension = ".png";
} else if (/\/gif$/.test(contentType)) {
extension = ".gif";
} else if (/\/jpe?g$/.test(contentType)) {
extension = ".jpg";
} else {
throw "Unsupported image type: "+contentType;
}
var name = imageBaseFilename + '_' + imageCounter + extension;
imageCounter++;
textElements.push('![image alt text]('+name+')');
result.images.push( {
"bytes": element.getChild(i).getBlob().getBytes(),
"type": contentType,
"name": name});
} else if (t === DocumentApp.ElementType.PAGE_BREAK) {
// ignore
} else if (t === DocumentApp.ElementType.HORIZONTAL_RULE) {
textElements.push('* * *\n');
} else if (t === DocumentApp.ElementType.FOOTNOTE) {
textElements.push(' (NOTE: '+element.getChild(i).getFootnoteContents().getText()+')');
} else {
throw "Paragraph "+index+" of type "+element.getType()+" has an unsupported child: "
+t+" "+(element.getChild(i)["getText"] ? element.getChild(i).getText():'')+" index="+index;
}
}
if (textElements.length==0) {
// Isn't result empty now?
return result;
}
// evb: Add source pretty too. (And abbreviations: src and srcp.)
// process source code block:
if (/^\s*---\s+srcp\s*$/.test(pOut) || /^\s*---\s+source pretty\s*$/.test(pOut)) {
result.sourcePretty = "start";
} else if (/^\s*---\s+src\s*$/.test(pOut) || /^\s*---\s+source code\s*$/.test(pOut)) {
result.source = "start";
} else if (/^\s*---\s+class\s+([^ ]+)\s*$/.test(pOut)) {
result.inClass = "start";
result.className = RegExp.$1;
} else if (/^\s*---\s*$/.test(pOut)) {
result.source = "end";
result.sourcePretty = "end";
result.inClass = "end";
} else if (/^\s*---\s+jsperf\s*([^ ]+)\s*$/.test(pOut)) {
result.text = '<iframe style="width: 100%; height: 340px; overflow: hidden; border: 0;" '+
'src="http://www.html5rocks.com/static/jsperfview/embed.html?id='+RegExp.$1+
'"></iframe>';
} else {
var prefix = findPrefix(inSrc, element, listCounters);
var pOut = "";
for (var i=0; i<textElements.length; i++) {
pOut += processTextElement(inSrc, textElements[i]);
}
// replace Unicode quotation marks
pOut = pOut.replace('\u201d', '"').replace('\u201c', '"');
result.text = prefix+pOut;
}
return result;
}
// Add correct prefix to list items.
function findPrefix(inSrc, element, listCounters) {
var prefix="";
if (!inSrc) {
if (element.getType()===DocumentApp.ElementType.PARAGRAPH) {
var paragraphObj = element;
switch (paragraphObj.getHeading()) {
// Add a # for each heading level. No break, so we accumulate the right number.
case DocumentApp.ParagraphHeading.HEADING6: prefix+="#";
case DocumentApp.ParagraphHeading.HEADING5: prefix+="#";
case DocumentApp.ParagraphHeading.HEADING4: prefix+="#";
case DocumentApp.ParagraphHeading.HEADING3: prefix+="#";
case DocumentApp.ParagraphHeading.HEADING2: prefix+="#";
case DocumentApp.ParagraphHeading.HEADING1: prefix+="# ";
default:
}
} else if (element.getType()===DocumentApp.ElementType.LIST_ITEM) {
var listItem = element;
var nesting = listItem.getNestingLevel()
for (var i=0; i<nesting; i++) {
prefix += " ";
}
var gt = listItem.getGlyphType();
// Bullet list (<ul>):
if (gt === DocumentApp.GlyphType.BULLET
|| gt === DocumentApp.GlyphType.HOLLOW_BULLET
|| gt === DocumentApp.GlyphType.SQUARE_BULLET) {
prefix += "* ";
} else {
// Ordered list (<ol>):
var key = listItem.getListId() + '.' + listItem.getNestingLevel();
var counter = listCounters[key] || 0;
counter++;
listCounters[key] = counter;
prefix += counter+". ";
}
}
}
return prefix;
}
function processLinkedFile( url ) {
if (url.substr(0,24) !== 'https://docs.google.com/') return url;
var fileId = url.match(/.*[^-\w]([-\w]{25,})[^-\w]?.*/);
if (!fileId || !fileId[1]) return url;
fileId = fileId[1];
linkedFile = DriveApp.getFileById(fileId);
if (!linkedFile) return url;
if (linkedFile.getMimeType()!=MimeType.GOOGLE_DOCS) return url
newUrl = ConvertToMarkdown( DocumentApp.openById(fileId) );
if (newUrl.length) return newUrl;
return url;
}
function processTextElement(inSrc, txt) {
if (typeof(txt) === 'string') {
return txt;
}
var pOut = txt.getText();
if (! txt.getTextAttributeIndices) {
return pOut;
}
var attrs=txt.getTextAttributeIndices();
var lastOff=pOut.length;
for (var i=attrs.length-1; i>=0; i--) {
var off=attrs[i];
var url=txt.getLinkUrl(off);
var font=txt.getFontFamily(off);
if (url) { // start of link
if (i>=1 && attrs[i-1]==off-1 && txt.getLinkUrl(attrs[i-1])===url) {
// detect links that are in multiple pieces because of errors on formatting:
i-=1;
off=attrs[i];
url=txt.getLinkUrl(off);
}
url = processLinkedFile( url );
pOut=pOut.substring(0, off)+'['+pOut.substring(off, lastOff)+']('+url+')'+pOut.substring(lastOff);
} else if (font) {
if (!inSrc && font===font.COURIER_NEW) {
while (i>=1 && txt.getFontFamily(attrs[i-1]) && txt.getFontFamily(attrs[i-1])===font.COURIER_NEW) {
// detect fonts that are in multiple pieces because of errors on formatting:
i-=1;
off=attrs[i];
}
pOut=pOut.substring(0, off)+'`'+pOut.substring(off, lastOff)+'`'+pOut.substring(lastOff);
}
}
if (txt.isBold(off)) {
var d1 = d2 = "**";
if (txt.isItalic(off)) {
// edbacher: changed this to handle bold italic properly.
d1 = "**_"; d2 = "_**";
}
pOut=pOut.substring(0, off)+d1+pOut.substring(off, lastOff)+d2+pOut.substring(lastOff);
} else if (txt.isItalic(off)) {
pOut=pOut.substring(0, off)+'*'+pOut.substring(off, lastOff)+'*'+pOut.substring(lastOff);
}
lastOff=off;
}
return pOut;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment