Skip to content

Instantly share code, notes, and snippets.

@dnicolson
Last active April 20, 2023 16:23
Show Gist options
  • Save dnicolson/165168672b52bac2a3ff82199c4392db to your computer and use it in GitHub Desktop.
Save dnicolson/165168672b52bac2a3ff82199c4392db to your computer and use it in GitHub Desktop.
Lambda to provide an RSS feed of Exberliner's daily news blog
const { parse } = require('node-html-parser');
const { Feed } = require('feed');
const fetch = require('node-fetch');
const BERLIN_DAILY_NEWS_URL = 'https://www.exberliner.com/english-news-berlin/';
const parseHomeHtml = (html) => {
const root = parse(html);
const [description, title] = root.querySelector('title').rawText.split(' - ');
const nodes = root.querySelectorAll('.hero-head__box');
const articleLinks = nodes.map((node) => node.getAttribute('href'));
return { title, description, articleLinks };
};
const parseArticleHtml = (html) => {
const root = parse(html);
const headerNodes = root.querySelectorAll('.entry-header > h1, .entry-header > div, .entry-header > p');
const articleHeader = headerNodes.reduce((acc, node) => {
if (node.rawTagName === 'h1') {
acc.title = node.rawText;
}
if (node.classNames === 'user-actions') {
const tempDate = node.childNodes[1].innerText.replace(' -', '').replace(' Uhr', '');
const d = tempDate.match(/(\d\d)\.(\d\d)\.(\d\d\d\d)(.*)/);
acc.date = new Date(Date.parse(`${d[3]}.${d[2]}.${d[1]}${d[4]}`));
}
if (node.rawTagName === 'p' && node.classList.contains('single-excerpt')) {
acc.headline = `<p>${node.rawText}</p>`;
}
return acc;
}, {});
const bodyNodes = root.querySelectorAll('.entry-content > figure, .entry-content > p');
const article = bodyNodes.reduce((acc, node) => {
if (node.rawTagName === 'p') {
if (node.childNodes[0] && !node.childNodes[0].rawTagName) {
if (!Date.parse(node.innerText)) {
acc.body += node.toString();
}
}
} else if (node.rawTagName === 'figure') {
acc.image = node.toString().replace(/src=.*data-src\="(.*?)"/, 'src="$1"');
}
return acc;
}, Object.assign(articleHeader, {body: ''}));
return article;
};
const generateRssFeed = (title, description, link, articles) => {
const feed = new Feed({
title,
description,
link,
});
articles.forEach((article) => {
feed.addItem({
title: article.title,
description: article.headline + article.image + article.body,
date: article.date,
link: article.link,
});
});
return feed.rss2();
};
exports.handler = async (_event) => {
const date = new Date();
if (date.getHours() < 8 || date.getHours() > 16 || date.getDay() === 0 || date.getDay() === 6) {
const response = {
statusCode: 503,
};
return response;
}
const news = await fetch(BERLIN_DAILY_NEWS_URL);
const html = await news.text();
const { title, description, articleLinks } = parseHomeHtml(html);
const articles = await Promise.all(articleLinks.map(async (link) => {
const article = await fetch(link);
const html = await article.text();
return Object.assign(parseArticleHtml(html), { link });
}));
const rss = generateRssFeed(title, description, BERLIN_DAILY_NEWS_URL, articles);
const response = {
statusCode: 200,
body: rss,
};
return response;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment