public
Last active

Grabbing full text from urls and creating a .doc/html from a template using Google Apps Script

  • Download Gist
gistfile1.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
function doGet(e){
// get some variables passed from the querystring
var project = e.parameter.title;
var range = e.parameter.range;
var sheet = e.parameter.sheet;
// Grab a basic html template to fill in the blanks - see https://developers.google.com/apps-script/html_service
var t = HtmlService.createTemplateFromFile("reportTemplate");
 
// some bits of code to grab the source urls
var doc = SpreadsheetApp.openById(ScriptProperties.getProperty('active'));
var sheet = doc.getSheetByName(sheet);
var data = sheet.getRange(range).getValues();
var row = data[0];
t.title = project; // exposing the project title to the template
var options = {"method" : "get"};
var output = [];
// for each url fetch the post content
for (i in row){
if (row[i] !="NONE"){
var url = row[i]+"?feed=rss2&withoutcomments=1";
// cheating by using a Yahoo Pipe to convert xml into json
url = "http://pipes.yahoo.com/pipes/pipe.run?_id=6792be788747534d87c0f6ebdadd0a5e&_render=json&url="+encodeURIComponent(url);
// some caching to improve performance
var cache = CacheService.getPrivateCache(); // using Cache service to prevent too many urlfetch
var cached = cache.get(row[i]);
if (cached != null) { // if value in cache return it
output.push(cached);
} else {
// if no cache fetch the data
var response = UrlFetchApp.fetch(url, options);
var item = Utilities.jsonParse(response.getContentText());
var res = item.value.items[0]["content:encoded"];
output.push(res || "Error getting content"); // collecting the data ready to pass to the template
cache.put(row[i], res, 3600); // cache the result for next time
Utilities.sleep(5000); // sleep to prevent service invoked too many times
}
}
}
t.content = output; // exposing the project title to the template
// app evaluates template with passed data, sets as XML (not really needed) and forces download with .doc extension
return ContentService.createTextOutput(t.evaluate().getContent())
.setMimeType(ContentService.MimeType.XML)
.downloadAsFile(project+".doc");
}
reportTemplate.html
HTML
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
<html xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
<title>JISC final report template</title>
<style>
<!--
h1 {
page-break-after:avoid;
font-size:16.0pt;
font-family:"Arial", "sans-serif";
}
h2 {
page-break-after:avoid;
font-size:14.0pt;
font-family:"Arial", "sans-serif";
font-style:italic;
}
h3 {
page-break-after:avoid;
font-size:13.0pt;
font-family:"Arial", "sans-serif";
}
p.MsoHeader, li.MsoHeader, div.MsoHeader {
tab-stops:center 216.0pt right 432.0pt;
font-size:10.0pt;
font-family:"Arial", "sans-serif";
}
p.MsoFooter, li.MsoFooter, div.MsoFooter {
tab-stops:center 216.0pt right 432.0pt;
font-size:10.0pt;
font-family:"Arial", "sans-serif";
}
p.MsoAcetate, li.MsoAcetate, div.MsoAcetate {
font-size:8.0pt;
font-family:"Tahoma", "sans-serif";
}
-->
</style>
</head>
 
<body lang=EN-GB>
<div class=WordSection1>
<h1><img width=99 height=57
src="http://www.jisc.ac.uk/images/printLogo.gif" v:shapes="_x0000_i1025"></h1>
<h1>JISC Final Report </h1>
<p style='line-height:12.0pt'><b><i>Before completing this template
please note:</i></b></p>
<ul type=disc>
<li><i>The <u>Project Management Guidelines</u> explain the purpose of final reports.</i></li>
<li><i>Fill in the information for the header,
e.g. project acronym, version, and date.</i></li>
<li><i>Prepare a cover sheet using the <u>cover
sheet template</u> and attach to final report.</i></li>
<li style='line-height:12.0pt; '><i>This template
is for completion by JISC funded project managers</i></li>
<li style='line-height:12.0pt; '><i>Text in
italics is explanatory and should be deleted in completed documents.</i></li>
</ul>
<h2>Title Page</h2>
<p><i><?= title || "N/A" ?></i></p>
<h2>Project Plan</h2>
<div><?!= content[0] ?></div>
<h2>Aims</h2>
<div><?!= content[1] ?></div>
<h2>Usecase</h2>
<div><?!= content[2] ?></div>
<h2>Nutshell</h2>
<div><?!= content[3] ?></div>
<h2>Outputs</h2>
<div><?!= content[4] ?></div>
<h2>Outputs List</h2>
<div><?!= content[5] ?></div>
<h2>Lessons Learned</h2>
<div><?!= content[6] ?></div>
<h2>Impact</h2>
<div><?!= content[7] ?></div>
<h2>Grand Finale</h2>
<div><?!= content[8] ?></div>
</div>
</body>
</html>

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.