Skip to content

Instantly share code, notes, and snippets.

@simonw
Last active January 3, 2024 00:02
Show Gist options
  • Star 29 You must be signed in to star a gist
  • Fork 13 You must be signed in to fork a gist
  • Save simonw/0acc8b879787ee30ddfdc5c4d9998e5d to your computer and use it in GitHub Desktop.
Save simonw/0acc8b879787ee30ddfdc5c4d9998e5d to your computer and use it in GitHub Desktop.
Google Apps script to convert a Google Docs document into reStructuredText
function onOpen() {
var ui = DocumentApp.getUi();
ui.createMenu('Convert to .RST')
.addItem('Convert to .RST and email me the result', 'ConvertToRestructuredText')
.addToUi();
}
// Adopted from https://github.com/mangini/gdocs2md by Renato Mangini
// License: Apache License Version 2.0
String.prototype.repeat = String.prototype.repeat || function(num) {
var s = '';
for (var i = 0; i < num; i++) {
s += this;
}
return s;
};
function ConvertToRestructuredText() {
var doc = DocumentApp.getActiveDocument();
var numChildren = doc.getActiveSection().getNumChildren();
var text = "";
var inSrc = false;
var inClass = false;
var globalImageCounter = 0;
var globalListCounters = {};
// edbacher: added a variable for indent in src <pre> block. Let style sheet do margin.
var srcIndent = "";
var attachments = [];
// Walk through all the child elements of the doc.
for (var i = 0; i < numChildren; i++) {
var child = doc.getActiveSection().getChild(i);
var result = processParagraph(i, child, inSrc, globalImageCounter, globalListCounters);
globalImageCounter += (result && result.images) ? result.images.length : 0;
if (result!==null) {
if (result.sourcePretty==="start" && !inSrc) {
inSrc=true;
text+="<pre class=\"prettyprint\">\n";
} else if (result.sourcePretty==="end" && inSrc) {
inSrc=false;
text+="</pre>\n\n";
} else if (result.source==="start" && !inSrc) {
inSrc=true;
text+="<pre>\n";
} else if (result.source==="end" && inSrc) {
inSrc=false;
text+="</pre>\n\n";
} else if (result.inClass==="start" && !inClass) {
inClass=true;
text+="<div class=\""+result.className+"\">\n";
} else if (result.inClass==="end" && inClass) {
inClass=false;
text+="</div>\n\n";
} else if (inClass) {
text+=result.text+"\n\n";
} else if (inSrc) {
text+=(srcIndent+escapeHTML(result.text)+"\n");
} else if (result.text && result.text.length>0) {
text+=result.text+"\n\n";
}
if (result.images && result.images.length>0) {
for (var j=0; j<result.images.length; j++) {
attachments.push( {
"fileName": result.images[j].name,
"mimeType": result.images[j].type,
"content": result.images[j].bytes } );
}
}
} else if (inSrc) { // support empty lines inside source code
text+='\n';
}
}
attachments.push({"fileName":doc.getName()+".md", "mimeType": "text/plain", "content": text});
MailApp.sendEmail(Session.getActiveUser().getEmail(),
"[MARKDOWN_MAKER] "+doc.getName(),
"Your converted markdown document is attached (converted from "+doc.getUrl()+")"+
"\n\nDon't know how to use the format options? See http://github.com/mangini/gdocs2md\n",
{ "attachments": attachments });
}
function escapeHTML(text) {
return text.replace(/</g, '&lt;').replace(/>/g, '&gt;');
}
// Process each child element (not just paragraphs).
function processParagraph(index, element, inSrc, imageCounter, listCounters) {
// First, check for things that require no processing.
if (element.getNumChildren()==0) {
return null;
}
// Punt on TOC.
if (element.getType() === DocumentApp.ElementType.TABLE_OF_CONTENTS) {
return {"text": "[[TOC]]"};
}
// Set up for real results.
var result = {};
var pOut = "";
var textElements = [];
var imagePrefix = "image_";
// Handle Table elements. Pretty simple-minded now, but works for simple tables.
// Note that Markdown does not process within block-level HTML, so it probably
// doesn't make sense to add markup within tables.
if (element.getType() === DocumentApp.ElementType.TABLE) {
textElements.push("<table>\n");
var nCols = element.getChild(0).getNumCells();
for (var i = 0; i < element.getNumChildren(); i++) {
textElements.push(" <tr>\n");
// process this row
for (var j = 0; j < nCols; j++) {
textElements.push(" <td>" + element.getChild(i).getChild(j).getText() + "</td>\n");
}
textElements.push(" </tr>\n");
}
textElements.push("</table>\n");
}
// Process various types (ElementType).
for (var i = 0; i < element.getNumChildren(); i++) {
var t=element.getChild(i).getType();
if (t === DocumentApp.ElementType.TABLE_ROW) {
// do nothing: already handled TABLE_ROW
} else if (t === DocumentApp.ElementType.TEXT) {
var txt=element.getChild(i);
pOut += txt.getText();
textElements.push(txt);
} else if (t === DocumentApp.ElementType.INLINE_IMAGE) {
result.images = result.images || [];
var contentType = element.getChild(i).getBlob().getContentType();
var extension = "";
if (/\/png$/.test(contentType)) {
extension = ".png";
} else if (/\/gif$/.test(contentType)) {
extension = ".gif";
} else if (/\/jpe?g$/.test(contentType)) {
extension = ".jpg";
} else {
throw "Unsupported image type: "+contentType;
}
var name = imagePrefix + imageCounter + extension;
imageCounter++;
textElements.push('.. image:: '+name + '\n');
result.images.push( {
"bytes": element.getChild(i).getBlob().getBytes(),
"type": contentType,
"name": name});
} else if (t === DocumentApp.ElementType.PAGE_BREAK) {
// ignore
} else if (t === DocumentApp.ElementType.HORIZONTAL_RULE) {
textElements.push('------------\n');
} else if (t === DocumentApp.ElementType.FOOTNOTE) {
textElements.push(' (NOTE: '+element.getChild(i).getFootnoteContents().getText()+')');
} else {
throw "Paragraph "+index+" of type "+element.getType()+" has an unsupported child: "
+t+" "+(element.getChild(i)["getText"] ? element.getChild(i).getText():'')+" index="+index;
}
}
if (textElements.length==0) {
// Isn't result empty now?
return result;
}
// evb: Add source pretty too. (And abbreviations: src and srcp.)
// process source code block:
if (/^\s*---\s+srcp\s*$/.test(pOut) || /^\s*---\s+source pretty\s*$/.test(pOut)) {
result.sourcePretty = "start";
} else if (/^\s*---\s+src\s*$/.test(pOut) || /^\s*---\s+source code\s*$/.test(pOut)) {
result.source = "start";
} else if (/^\s*---\s+class\s+([^ ]+)\s*$/.test(pOut)) {
result.inClass = "start";
result.className = RegExp.$1;
} else if (/^\s*---\s*$/.test(pOut)) {
result.source = "end";
result.sourcePretty = "end";
result.inClass = "end";
} else if (/^\s*---\s+jsperf\s*([^ ]+)\s*$/.test(pOut)) {
result.text = '<iframe style="width: 100%; height: 340px; overflow: hidden; border: 0;" '+
'src="http://www.html5rocks.com/static/jsperfview/embed.html?id='+RegExp.$1+
'"></iframe>';
} else {
adornments = findAdornments(inSrc, element, listCounters);
var pOut = "";
for (var i=0; i<textElements.length; i++) {
pOut += processTextElement(inSrc, textElements[i]);
}
// replace Unicode quotation marks
pOut = pOut.replace('\u201d', '"').replace('\u201c', '"');
result.text = adornments.overline + adornments.prefix + pOut + adornments.underline;
}
return result;
}
// Figure out adornments for headings and list items
function findAdornments(inSrc, element, listCounters) {
var prefix = "";
var overline = "";
var underline = "";
if (!inSrc) {
if (element.getType()===DocumentApp.ElementType.PARAGRAPH) {
var paragraphObj = element;
var length = paragraphObj.getText().length;
switch (paragraphObj.getHeading()) {
// Add a # for each heading level. No break, so we accumulate the right number.
case DocumentApp.ParagraphHeading.HEADING6:
underline = '~'.repeat(length);
break;
case DocumentApp.ParagraphHeading.HEADING5:
underline = '^'.repeat(length);
break;
case DocumentApp.ParagraphHeading.HEADING4:
underline = '+'.repeat(length);
break;
case DocumentApp.ParagraphHeading.HEADING3:
underline = '#'.repeat(length);
break;
case DocumentApp.ParagraphHeading.HEADING2:
underline = '-'.repeat(length);
break;
case DocumentApp.ParagraphHeading.HEADING1:
underline = '='.repeat(length);
break;
case DocumentApp.ParagraphHeading.SUBTITLE:
overline = '-'.repeat(length + 2);
prefix = ' ';
underline = '-'.repeat(length + 2);
break;
case DocumentApp.ParagraphHeading.TITLE:
overline = '='.repeat(length + 2);
prefix = ' ';
underline = '='.repeat(length + 2);
break;
}
} else if (element.getType()===DocumentApp.ElementType.LIST_ITEM) {
var listItem = element;
var nesting = listItem.getNestingLevel()
for (var i=0; i<nesting; i++) {
prefix += " ";
}
var gt = listItem.getGlyphType();
// Bullet list (<ul>):
if (gt === DocumentApp.GlyphType.BULLET
|| gt === DocumentApp.GlyphType.HOLLOW_BULLET
|| gt === DocumentApp.GlyphType.SQUARE_BULLET) {
prefix += "* ";
} else {
// Ordered list (<ol>):
var key = listItem.getListId() + '.' + listItem.getNestingLevel();
var counter = listCounters[key] || 0;
counter++;
listCounters[key] = counter;
prefix += counter+". ";
}
}
}
if (overline) {
overline += '\n';
}
if (underline) {
underline = '\n' + underline;
}
return {
overline: overline,
prefix: prefix,
underline: underline
};
}
function processTextElement(inSrc, txt) {
if (typeof(txt) === 'string') {
return txt;
}
var pOut = txt.getText();
if (! txt.getTextAttributeIndices) {
return pOut;
}
var attrs=txt.getTextAttributeIndices();
var lastOff=pOut.length;
for (var i=attrs.length-1; i>=0; i--) {
var off=attrs[i];
var url=txt.getLinkUrl(off);
var font=txt.getFontFamily(off);
if (url) { // start of link
if (i>=1 && attrs[i-1]==off-1 && txt.getLinkUrl(attrs[i-1])===url) {
// detect links that are in multiple pieces because of errors on formatting:
i-=1;
off=attrs[i];
url=txt.getLinkUrl(off);
}
// Double underscores gives us an "anonymous" link reference, avoids errors for duplicate link text
pOut=pOut.substring(0, off)+'`'+pOut.substring(off, lastOff)+' <'+url+'>`__'+pOut.substring(lastOff);
} else if (font) {
if (!inSrc && font===font.COURIER_NEW) {
while (i>=1 && txt.getFontFamily(attrs[i-1]) && txt.getFontFamily(attrs[i-1])===font.COURIER_NEW) {
// detect fonts that are in multiple pieces because of errors on formatting:
i-=1;
off=attrs[i];
}
pOut=pOut.substring(0, off)+'`'+pOut.substring(off, lastOff)+'`'+pOut.substring(lastOff);
}
}
if (txt.isBold(off)) {
var d1 = d2 = "**";
if (txt.isItalic(off)) {
// edbacher: changed this to handle bold italic properly.
d1 = "**_"; d2 = "_**";
}
pOut=pOut.substring(0, off)+d1+pOut.substring(off, lastOff)+d2+pOut.substring(lastOff);
} else if (txt.isItalic(off)) {
pOut=pOut.substring(0, off)+'*'+pOut.substring(off, lastOff)+'*'+pOut.substring(lastOff);
}
lastOff=off;
}
return pOut;
}
@fazlerabbi37
Copy link

any instruction on how to use this? @simonw

Copy link

ghost commented Jul 26, 2019

How to use it?

@adelleolson
Copy link

Trying to use this script. Getting an error: Cannot call DocumentApp.getUi() from this context. (line 3, file "Code")

@smiile8888
Copy link

smiile8888 commented May 15, 2020

@simonw Thank you for this useful script!

To try this apps-script:

  1. Go to your Google Doc, any docs that you create
  2. Click Tool --> Script editor
  3. Copy and paste the code on script editor, then save
  4. Go back to the docs and refresh, you will see the Convert to .RST on the menu bar
  5. When you click for the first time, a prompt window asking for the authorization will pop up

Then you good to go!

Click again on the menu, the content on the docs will be converted to .md and send to your email (the one you are logging in and using for the authorization)

Hope this help!

@Anastasiia-J
Copy link

Hi, thanks. It worked in the beginning.
However, I after some time it has started to give this error:
TypeError: element.getNumChildren is not a function (line 93, file "Code")

Why could it be, how can I fix it?

@Shark-with-a-Drill
Copy link

Just ran this now and it works beautifully , thanks!

@lemketron
Copy link

Click again on the menu, the content on the docs will be converted to .md and send to your email (the one you are logging in and using for the authorization)

Since the point of this script is to convert to rst instead of md, I believe line 77 should be changed to email the file as a .rst file not .md.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment