Skip to content

Instantly share code, notes, and snippets.

@imolein
Created February 5, 2021 11:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save imolein/79b3fadb8f54ad6988551f49d6b44d22 to your computer and use it in GitHub Desktop.
Save imolein/79b3fadb8f54ad6988551f49d6b44d22 to your computer and use it in GitHub Desktop.
Generiert einen RSS feed aus den Pressemitteilungen des Landkreis Meissen
#!/usr/bin/env lua
-- Generiert einen RSS feed aus den Pressemitteilungen des Landkreis Meissen.
-- Schade das es keinen direkt auf der Seite gibt...
--
-- Abhängigkeiten:
-- * lua - https://lua.org
-- * luasocket - https://github.com/diegonehab/luasocket
-- * lua-htmlparser - https://github.com/msva/lua-htmlparser
-- * etlua - https://github.com/leafo/etlua
-- * date - https://github.com/Tieske/date
local http = require('socket.http')
local htmlparser = require('htmlparser')
local etlua = require('etlua')
local date = require('date')
local URL = 'http://www.kreis-meissen.org'
local DESC = 'RSS Feed der Pressemitteilungen Kreis Meißen.'
local GEN = 'km_press_to_rss.lua'
local OUT = './km_pressemitteilungen.xml'
local LOG_FILE = './km_press_to_rss.log'
local TEMPLATE_FILE = './template.xml'
http.USERAGENT = 'km_press_to_rss/0.0.0 (Bitte stellt einen RSS Feed bereit)'
local logfile_fh = assert(io.open(LOG_FILE, 'a'))
local function logger(level, msg, fmt)
logfile_fh:write(('%s %s - %s\n'):format(os.date(nil, os.time()), level:upper(), msg:format(fmt)))
end
-- generiert vom Datum aus dem Titel einen rfc-822 date time string
local function rfc_822_date_time(raw)
local ok, date_obj = pcall(date, raw[3], raw[2], raw[1])
if not ok then return end
return date_obj:fmt('%a, %d %b %Y %T %z')
end
-- erstellt für jeden Artikel eine table und fügt diese data.articles hinzu
local function get_articles(parsed, data)
for _, element in ipairs(parsed:select('div.inhaltsbereich-box')) do
local title = element:select('div.border_o_r > h2')[1]:getcontent()
local rfc_date = rfc_822_date_time({ title:match('^(%d+)%.(%d+)%.(%d+)%s+.*$') })
logger('info', 'Found article %q', title)
table.insert(data.articles, {
title = title,
content = element:select('p')[1]:getcontent():gsub(' ', ' '),
link = URL .. element:select('a')[1].attributes.href,
date = rfc_date
})
end
return data
end
-- parsed die empfangene Webseite
local function parse_html(raw_html)
local ok, parsed = pcall(htmlparser.parse, raw_html)
if not ok then
logger('error', 'Parsing received raw HTML failed: %s', parsed)
return
end
logger('info', 'Successful parsed raw HTML, now picking needed data out of it')
local data = {
title = parsed:select('title')[1]:getcontent(),
url = URL .. '/61.html',
description = DESC,
generator = GEN,
articles = {}
}
return get_articles(parsed, data)
end
-- holt die Pressemitteilungswebseite
local function get_data_from_url()
local data, code = http.request(URL.. '/61.html')
if code ~= 200 then
logger('error', 'Failed to receive website')
os.exit(1)
end
logger( 'info','Successful received press website')
return parse_html(data)
end
-- lädt das Template in etlua
local function load_template()
local fh = assert(io.open(TEMPLATE_FILE, 'r'))
local template_xml = fh:read('*a')
fh:close()
return etlua.compile(template_xml)
end
local template = load_template()
local data = get_data_from_url()
logfile_fh:close()
if not data then os.exit(1) end
-- schreibt den generierten RSS Feed in eine Datei
local fh = io.open(OUT, 'w')
fh:write(template(data))
fh:close()
<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
<channel>
<title><%= title %></title>
<description><%= description %></description>
<link><%- url %></link>
<generator><%= generator %></generator>
<% if articles then %>
<% for _, article in ipairs(articles) do %>
<item>
<title><%= article.title %></title>
<description>
<% if article.content then %>
<%- article.content %>
<% end %>
</description>
<link><%- article.link %></link>
<% if article.date then %>
<pubDate><%= article.date %></pubDate>
<% end %>
</item>
<% end %>
<% end %>
</channel>
</rss>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment