Last active
February 17, 2024 11:30
-
-
Save ashd97/b260eeafec9dac70e9475b89860da565 to your computer and use it in GitHub Desktop.
Get latest news by emails on google apps script trigger
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// data feed URLs | |
var dataSources = [ | |
"http://blog.qt.io/feed/", | |
"http://feed.rutracker.cc/atom/f/1426.atom", | |
"http://feeds2.feedburner.com/blogspot/hsDu", | |
]; | |
// Settings | |
var EMAIL_TITLE = "News digest"; | |
var daystimer = 31; // Number of days out to scan events, set the same period in project's trigger (Day timer/ Week timer/ Month timer). You will recive only new messages. | |
var maxmsgsize = 300000; // Value in bytes, try to decrease it if the message does not pass. There is a limit on the body size per email. (Gmail clips emails that have a message size larger than 102kb, and hides the full content behind a View entire message link.) | |
var showDescription = true; // Print full feed item. | |
var deliverAddress = Session.getActiveUser().getEmail(); // "username@domain"; | |
var url = ''; // Paste here OPML url | |
function SendEmail(Message) { | |
Message += "<br/><a>Emails left for today: </a>" + MailApp.getRemainingDailyQuota(); | |
GmailApp.sendEmail(deliverAddress, EMAIL_TITLE, "", { | |
htmlBody: Message | |
}); | |
} | |
/******* | |
** Trying to deliver all updates from parsed feeds into one or several emails (there is a limit on the body size per email). Run it to test the script. | |
*/ | |
function deliverNews() { | |
var FullEmail = "<h1>" + EMAIL_TITLE + "</h1>\n"; | |
var parsedFeedItems = ""; | |
var itemsize = 0; | |
if (url !== '') | |
opmlparser(url); | |
var i = 0; | |
do { | |
FullEmail = "<h1>" + EMAIL_TITLE + "</h1>\n"; | |
while (i < dataSources.length) { | |
parsedFeedItems = retrieveFeedItems(dataSources[i]); | |
itemsize = roughSizeOfObject(parsedFeedItems); //??? | |
if (itemsize < maxmsgsize) { | |
if ((roughSizeOfObject(FullEmail) + itemsize) < maxmsgsize) { | |
FullEmail += parsedFeedItems; | |
i++; | |
} else | |
break; | |
} else { | |
FullEmail += "<p>This feed " + dataSources[i] + " is too large, try to decrease daystimer value or increase maxmsgsize value.</p>"; | |
i++; | |
break; | |
} | |
} | |
try { | |
FullEmail += SendEmail(FullEmail); | |
} catch (Exception) { | |
FullEmail = Exception + "<br/>Try to decrease maxFullEmailsize value. There is a limit on the body size per email."; | |
FullEmail += SendEmail(FullEmail); | |
break; | |
} | |
} while (i < dataSources.length); | |
Logger.log(FullEmail.length); | |
} | |
/******* | |
** Retrive and parse items from one feed to html | |
*/ | |
function retrieveFeedItems(feedUrl) { | |
var parsedItems = "<div>"; | |
try { | |
var feedSrc = UrlFetchApp.fetch(feedUrl).getContentText(); | |
var feedDoc = XmlService.parse(feedSrc); | |
if (feedDoc) | |
var root = feedDoc.getRootElement(); | |
} catch (Exception) { | |
parsedItems += "Can't fetch feed " + feedUrl + " " + Exception + "<br/>"; | |
} | |
var type = ""; | |
if (root && root.getName() == "rss") { | |
var version = root.getAttribute("version").getValue(); | |
if (version == "2.0") | |
type = "rss2"; | |
} else if (root && root.getName() == "feed") { | |
type = "atom"; | |
} | |
var pubTime = ""; | |
var date = ""; | |
if (type == "atom") { | |
var atom = XmlService.getNamespace('http://www.w3.org/2005/Atom'); | |
var entries = root.getChildren('entry', atom); | |
for (var i = 0; i < entries.length; i++) { | |
var pubTimeName = (entries[i].getChild('published', atom)) ? 'published' : 'updated'; | |
pubTime = entries[i].getChild(pubTimeName, atom).getValue(); | |
date = pubTime.split(/[: T-]/).map(parseFloat); | |
date = new Date(date[0], date[1] - 1, date[2], date[3] || 0, date[4] || 0, date[5] || 0, 0); | |
if (!limitheadlines(date)) { | |
parsedItems += "<li>" + | |
root.getChild('title', atom).getText() + " " + | |
"<a href='" + entries[i].getChild("link", atom).getAttribute('href').getValue() + "'>" + | |
entries[i].getChildText("title", atom) + "</a> " + pubTime + "<br/>"; | |
if (showDescription) { | |
parsedItems += (entries[i].getChildText('summary', atom) ? entries[i].getChildText('summary', atom) : ""); | |
parsedItems += (entries[i].getChildText('content', atom) ? entries[i].getChildText('content', atom) : ""); | |
} | |
parsedItems += "</li><br/>"; | |
} | |
} | |
} | |
if (type == "rss2") { | |
var items = root.getChild("channel").getChildren("item"); | |
for (var i = 0; i < items.length; i++) { | |
pubTime = items[i].getChildText("pubDate"); | |
date = new Date(pubTime); | |
if (!limitheadlines(date)) { | |
parsedItems += "<li>" + | |
root.getChild("channel").getChildText("title") + " " + | |
"<a href='" + items[i].getChildText("link") + "'>" + items[i].getChildText("title") +"</a> " + | |
pubTime + "\n<br/>"; | |
if (showDescription) { | |
parsedItems += (items[i].getChildText('description') ? items[i].getChildText('description') : ""); | |
var contentNS = XmlService.getNamespace('http://purl.org/rss/1.0/modules/content/'); | |
parsedItems += (items[i].getChildText('encoded', contentNS) ? items[i].getChildText('encoded', contentNS) : ""); | |
} | |
parsedItems += "</li><br/>"; | |
} | |
} | |
} | |
parsedItems += "</div>"; | |
return parsedItems; | |
} | |
/******* | |
** Scan only new messages. | |
*/ | |
function limitheadlines(date) { | |
var todaydate = new Date(); | |
var millisecondstimer = 86400000 * daystimer + 3600000; // 1 hour = 3600000ms | |
var mfrom = todaydate.valueOf() - millisecondstimer; | |
if (date.valueOf() <= mfrom) | |
return true; | |
else | |
return false; | |
} | |
/******* | |
** Calculate size of javascript object, it is not a accurate value | |
*/ | |
function roughSizeOfObject(object) { | |
var objectList = []; | |
var stack = [object]; | |
var bytes = 0; | |
while (stack.length) { | |
var value = stack.pop(); | |
if (typeof value === 'boolean') { | |
bytes += 4; | |
} else if (typeof value === 'string') { | |
bytes += value.length * 2; | |
} else if (typeof value === 'number') { | |
bytes += 8; | |
} else if ( | |
typeof value === 'object' && objectList.indexOf(value) === -1) { | |
objectList.push(value); | |
for (var i in value) { | |
stack.push(value[i]); | |
} | |
} | |
} | |
return bytes; | |
} | |
function opmlparser(url) { | |
var xml = UrlFetchApp.fetch(url).getContentText(); | |
var document = XmlService.parse(xml); | |
var outlines = ""; | |
outlines = document.getRootElement().getChild('body').getChild('outline').getChildren('outline'); | |
if (outlines === "") | |
outlines = document.getRootElement().getChild('body').getChildren('outline'); | |
for (var i = 0; i < outlines.length; i++) | |
dataSources.push(outlines[i].getAttribute('xmlUrl').getValue()); | |
} | |
/******* | |
> *Note*: Set number of days out to scan events in script variable called “daystimer”, and set the same period in trigger, under the Resources menu, select “Project Triggers”. (Day timer/ Week timer/ Month timer) Then run the deliverNews function to preview your news digest. | |
It’s not ideal for now, but it already saves time. | |
Google user-agents: | |
"Mozilla/5.0 (compatible; Google-Apps-Script)" | |
"Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko Firefox/11.0 (via ggpht.com GoogleImageProxy)" | |
## APIs Used | |
* [UrlFetchApp] (https://developers.google.com/apps-script/reference/url-fetch/url-fetch-app) | |
* [XmlService] (https://developers.google.com/apps-script/reference/xml-service/xml-service) | |
* [GmailApp] (https://developers.google.com/apps-script/reference/gmail/gmail-app) | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment