-
-
Save Getinwiththem/3bfa55104330aa6c0b2897f43c5b4d26 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Credit: | |
The vast majority of this code comes from: | |
https://github.com/mangini/gdocs2md/issues | |
Ben Jefferson added the following to create this fork | |
- Replaced emailing of markdown files with creating files in Google Drive in a "markdown" directory alongside the file being converted. | |
- Added support for linking to (and automatic conversion) other Google docs in the same directory or a subdirectory | |
License: | |
The original file and Ben Jefferson's subsequent edits are all released under the Apache2 License see https://github.com/mangini/gdocs2md for more details | |
Usage: | |
Adding this script to your doc: | |
- Tools > Script Manager > New | |
- Select "Blank Project", then paste this code in and save as "Export Markdown.gs" | |
- Run > Test as add-on | |
- Choose the document you want to test this on - this will open the document in a new tab | |
Running the script: | |
- Make sure there is a "markdown" directory in the same directory as the document you are editting - this is where the generated files will go | |
- Add-ons > Convert To Markdown > update Markdown | |
- On successfull completion a pop-up will appear saying "All Done" | |
*/ | |
var outputDirSuffix='_markdown'; | |
var parentFolder; | |
var outputBaseDir; | |
var processedFiles; | |
var usedFilenames; | |
function onInstall(e) { | |
onOpen(e); | |
} | |
function onOpen(e) { | |
DocumentApp.getUi().createAddonMenu() | |
.addItem('Update Markdown', 'processActiveDocument') | |
.addToUi(); | |
} | |
function processActiveDocument() { | |
processedFiles = {}; | |
usedFilenames = {}; | |
var theDocument = DocumentApp.getActiveDocument(); | |
parentFolder = DriveApp.getFileById(theDocument.getId()).getParents(); | |
// A file can live in multiple directories - this code just uses the first | |
parentFolder = parentFolder.next(); | |
var outputBaseDirName = theDocument.getName() + outputDirSuffix; | |
outputBaseDir = parentFolder.getFoldersByName(outputBaseDirName); | |
// Create the output dir if it doesn't already exist | |
if (outputBaseDir.hasNext()) outputBaseDir = outputBaseDir.next(); | |
else outputBaseDir = parentFolder.createFolder(outputBaseDirName); | |
ConvertToMarkdown( theDocument ); | |
// Remove any old unused files from the output directory | |
var outputDirFiles = outputBaseDir.getFiles(); | |
while (outputDirFiles.hasNext()) { | |
var file = outputDirFiles.next(); | |
if (!usedFilenames[file.getName()]) outputBaseDir.removeFile(file); | |
} | |
DocumentApp.getUi().alert('All done!'); | |
} | |
function getFilename( filename, extension ){ | |
var count=0; | |
var testFilename; | |
do { | |
testFilename = filename; | |
if (count) testFilename += '_'+count; | |
if (extension) testFilename += extension; | |
count++; | |
} while (usedFilenames[testFilename]); | |
usedFilenames[testFilename] = true; | |
var needsDeleting = outputBaseDir.getFilesByName(testFilename); | |
if (needsDeleting.hasNext()) { | |
outputBaseDir.removeFile(needsDeleting.next()); | |
} | |
return testFilename; | |
} | |
function ConvertToMarkdown( theDocument ) { | |
if (processedFiles[theDocument.getId()]) return processedFiles[theDocument.getId()]; | |
var processedFilename = getFilename(theDocument.getName(),".md"); | |
processedFiles[theDocument.getId()]=processedFilename; | |
var imageBaseFilename = processedFilename.slice(0,-3); | |
var body = theDocument.getBody(); | |
var numChildren = body.getNumChildren(); | |
var text = ""; | |
var inSrc = false; | |
var inClass = false; | |
var globalListCounters = {}; | |
// edbacher: added a variable for indent in src <pre> block. Let style sheet do margin. | |
var srcIndent = ""; | |
// Walk through all the child elements of the doc. | |
for (var i = 0; i < numChildren; i++) { | |
var child = body.getChild(i); | |
var result = processParagraph(i, child, inSrc, imageBaseFilename, globalListCounters); | |
if (result!==null) { | |
if (result.sourcePretty==="start" && !inSrc) { | |
inSrc=true; | |
text+="<pre class=\"prettyprint\">\n"; | |
} else if (result.sourcePretty==="end" && inSrc) { | |
inSrc=false; | |
text+="</pre>\n\n"; | |
} else if (result.source==="start" && !inSrc) { | |
inSrc=true; | |
text+="<pre>\n"; | |
} else if (result.source==="end" && inSrc) { | |
inSrc=false; | |
text+="</pre>\n\n"; | |
} else if (result.inClass==="start" && !inClass) { | |
inClass=true; | |
text+="<div class=\""+result.className+"\">\n"; | |
} else if (result.inClass==="end" && inClass) { | |
inClass=false; | |
text+="</div>\n\n"; | |
} else if (inClass) { | |
text+=result.text+"\n\n"; | |
} else if (inSrc) { | |
text+=(srcIndent+escapeHTML(result.text)+"\n"); | |
} else if (result.text && result.text.length>0) { | |
text+=result.text+"\n\n"; | |
} | |
if (result.images && result.images.length>0) { | |
for (var j=0; j<result.images.length; j++) { | |
createOutputFile(outputBaseDir,getFilename(result.images[j].name), result.images[j].bytes, result.images[j].type, true ); | |
} | |
} | |
} else if (inSrc) { // support empty lines inside source code | |
text+='\n'; | |
} | |
} | |
createOutputFile(outputBaseDir,processedFilename, text, "text/plain" ); | |
return processedFilename; | |
} | |
function createOutputFile( outputBaseDir, filename, content, type, isBlob ) { | |
var existingFiles = outputBaseDir.getFilesByName( filename ); | |
while (existingFiles.hasNext()) { | |
outputBaseDir.removeFile(existingFiles.next()); | |
} | |
outputBaseDir.createFile(Utilities.newBlob(content, type, filename)); | |
} | |
function escapeHTML(text) { | |
return text.replace(/</g, '<').replace(/>/g, '>'); | |
} | |
// Process each child element (not just paragraphs). | |
function processParagraph(index, element, inSrc, imageBaseFilename, listCounters) { | |
// First, check for things that require no processing. | |
if (element.getNumChildren()==0) { | |
return null; | |
} | |
// Punt on TOC. | |
if (element.getType() === DocumentApp.ElementType.TABLE_OF_CONTENTS) { | |
return {"text": "[[TOC]]"}; | |
} | |
// Set up for real results. | |
var result = {}; | |
var pOut = ""; | |
var textElements = []; | |
var imageCounter = 0; | |
// Handle Table elements. Pretty simple-minded now, but works for simple tables. | |
// Note that Markdown does not process within block-level HTML, so it probably | |
// doesn't make sense to add markup within tables. | |
if (element.getType() === DocumentApp.ElementType.TABLE) { | |
textElements.push("<table>\n"); | |
var nCols = element.getChild(0).getNumCells(); | |
for (var i = 0; i < element.getNumChildren(); i++) { | |
textElements.push(" <tr>\n"); | |
// process this row | |
for (var j = 0; j < nCols; j++) { | |
textElements.push(" <td>" + element.getChild(i).getChild(j).getText() + "</td>\n"); | |
} | |
textElements.push(" </tr>\n"); | |
} | |
textElements.push("</table>\n"); | |
} | |
// Process various types (ElementType). | |
for (var i = 0; i < element.getNumChildren(); i++) { | |
var t=element.getChild(i).getType(); | |
if (t === DocumentApp.ElementType.TABLE_ROW) { | |
// do nothing: already handled TABLE_ROW | |
} else if (t === DocumentApp.ElementType.TEXT) { | |
var txt=element.getChild(i); | |
pOut += txt.getText(); | |
textElements.push(txt); | |
} else if (t === DocumentApp.ElementType.INLINE_IMAGE) { | |
result.images = result.images || []; | |
var contentType = element.getChild(i).getBlob().getContentType(); | |
var extension = ""; | |
if (/\/png$/.test(contentType)) { | |
extension = ".png"; | |
} else if (/\/gif$/.test(contentType)) { | |
extension = ".gif"; | |
} else if (/\/jpe?g$/.test(contentType)) { | |
extension = ".jpg"; | |
} else { | |
throw "Unsupported image type: "+contentType; | |
} | |
var name = imageBaseFilename + '_' + imageCounter + extension; | |
imageCounter++; | |
textElements.push('![image alt text]('+name+')'); | |
result.images.push( { | |
"bytes": element.getChild(i).getBlob().getBytes(), | |
"type": contentType, | |
"name": name}); | |
} else if (t === DocumentApp.ElementType.PAGE_BREAK) { | |
// ignore | |
} else if (t === DocumentApp.ElementType.HORIZONTAL_RULE) { | |
textElements.push('* * *\n'); | |
} else if (t === DocumentApp.ElementType.FOOTNOTE) { | |
textElements.push(' (NOTE: '+element.getChild(i).getFootnoteContents().getText()+')'); | |
} else { | |
throw "Paragraph "+index+" of type "+element.getType()+" has an unsupported child: " | |
+t+" "+(element.getChild(i)["getText"] ? element.getChild(i).getText():'')+" index="+index; | |
} | |
} | |
if (textElements.length==0) { | |
// Isn't result empty now? | |
return result; | |
} | |
// evb: Add source pretty too. (And abbreviations: src and srcp.) | |
// process source code block: | |
if (/^\s*---\s+srcp\s*$/.test(pOut) || /^\s*---\s+source pretty\s*$/.test(pOut)) { | |
result.sourcePretty = "start"; | |
} else if (/^\s*---\s+src\s*$/.test(pOut) || /^\s*---\s+source code\s*$/.test(pOut)) { | |
result.source = "start"; | |
} else if (/^\s*---\s+class\s+([^ ]+)\s*$/.test(pOut)) { | |
result.inClass = "start"; | |
result.className = RegExp.$1; | |
} else if (/^\s*---\s*$/.test(pOut)) { | |
result.source = "end"; | |
result.sourcePretty = "end"; | |
result.inClass = "end"; | |
} else if (/^\s*---\s+jsperf\s*([^ ]+)\s*$/.test(pOut)) { | |
result.text = '<iframe style="width: 100%; height: 340px; overflow: hidden; border: 0;" '+ | |
'src="http://www.html5rocks.com/static/jsperfview/embed.html?id='+RegExp.$1+ | |
'"></iframe>'; | |
} else { | |
var prefix = findPrefix(inSrc, element, listCounters); | |
var pOut = ""; | |
for (var i=0; i<textElements.length; i++) { | |
pOut += processTextElement(inSrc, textElements[i]); | |
} | |
// replace Unicode quotation marks | |
pOut = pOut.replace('\u201d', '"').replace('\u201c', '"'); | |
result.text = prefix+pOut; | |
} | |
return result; | |
} | |
// Add correct prefix to list items. | |
function findPrefix(inSrc, element, listCounters) { | |
var prefix=""; | |
if (!inSrc) { | |
if (element.getType()===DocumentApp.ElementType.PARAGRAPH) { | |
var paragraphObj = element; | |
switch (paragraphObj.getHeading()) { | |
// Add a # for each heading level. No break, so we accumulate the right number. | |
case DocumentApp.ParagraphHeading.HEADING6: prefix+="#"; | |
case DocumentApp.ParagraphHeading.HEADING5: prefix+="#"; | |
case DocumentApp.ParagraphHeading.HEADING4: prefix+="#"; | |
case DocumentApp.ParagraphHeading.HEADING3: prefix+="#"; | |
case DocumentApp.ParagraphHeading.HEADING2: prefix+="#"; | |
case DocumentApp.ParagraphHeading.HEADING1: prefix+="# "; | |
default: | |
} | |
} else if (element.getType()===DocumentApp.ElementType.LIST_ITEM) { | |
var listItem = element; | |
var nesting = listItem.getNestingLevel() | |
for (var i=0; i<nesting; i++) { | |
prefix += " "; | |
} | |
var gt = listItem.getGlyphType(); | |
// Bullet list (<ul>): | |
if (gt === DocumentApp.GlyphType.BULLET | |
|| gt === DocumentApp.GlyphType.HOLLOW_BULLET | |
|| gt === DocumentApp.GlyphType.SQUARE_BULLET) { | |
prefix += "* "; | |
} else { | |
// Ordered list (<ol>): | |
var key = listItem.getListId() + '.' + listItem.getNestingLevel(); | |
var counter = listCounters[key] || 0; | |
counter++; | |
listCounters[key] = counter; | |
prefix += counter+". "; | |
} | |
} | |
} | |
return prefix; | |
} | |
function processLinkedFile( url ) { | |
if (url.substr(0,24) !== 'https://docs.google.com/') return url; | |
var fileId = url.match(/.*[^-\w]([-\w]{25,})[^-\w]?.*/); | |
if (!fileId || !fileId[1]) return url; | |
fileId = fileId[1]; | |
linkedFile = DriveApp.getFileById(fileId); | |
if (!linkedFile) return url; | |
if (linkedFile.getMimeType()!=MimeType.GOOGLE_DOCS) return url | |
newUrl = ConvertToMarkdown( DocumentApp.openById(fileId) ); | |
if (newUrl.length) return newUrl; | |
return url; | |
} | |
function processTextElement(inSrc, txt) { | |
if (typeof(txt) === 'string') { | |
return txt; | |
} | |
var pOut = txt.getText(); | |
if (! txt.getTextAttributeIndices) { | |
return pOut; | |
} | |
var attrs=txt.getTextAttributeIndices(); | |
var lastOff=pOut.length; | |
for (var i=attrs.length-1; i>=0; i--) { | |
var off=attrs[i]; | |
var url=txt.getLinkUrl(off); | |
var font=txt.getFontFamily(off); | |
if (url) { // start of link | |
if (i>=1 && attrs[i-1]==off-1 && txt.getLinkUrl(attrs[i-1])===url) { | |
// detect links that are in multiple pieces because of errors on formatting: | |
i-=1; | |
off=attrs[i]; | |
url=txt.getLinkUrl(off); | |
} | |
url = processLinkedFile( url ); | |
pOut=pOut.substring(0, off)+'['+pOut.substring(off, lastOff)+']('+url+')'+pOut.substring(lastOff); | |
} else if (font) { | |
if (!inSrc && font===font.COURIER_NEW) { | |
while (i>=1 && txt.getFontFamily(attrs[i-1]) && txt.getFontFamily(attrs[i-1])===font.COURIER_NEW) { | |
// detect fonts that are in multiple pieces because of errors on formatting: | |
i-=1; | |
off=attrs[i]; | |
} | |
pOut=pOut.substring(0, off)+'`'+pOut.substring(off, lastOff)+'`'+pOut.substring(lastOff); | |
} | |
} | |
if (txt.isBold(off)) { | |
var d1 = d2 = "**"; | |
if (txt.isItalic(off)) { | |
// edbacher: changed this to handle bold italic properly. | |
d1 = "**_"; d2 = "_**"; | |
} | |
pOut=pOut.substring(0, off)+d1+pOut.substring(off, lastOff)+d2+pOut.substring(lastOff); | |
} else if (txt.isItalic(off)) { | |
pOut=pOut.substring(0, off)+'*'+pOut.substring(off, lastOff)+'*'+pOut.substring(lastOff); | |
} | |
lastOff=off; | |
} | |
return pOut; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment