Last active
January 16, 2022 12:18
-
-
Save selfawaresoup/a303df880c13821c8dcebc6485834022 to your computer and use it in GitHub Desktop.
Magnus Archives Archive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
<<~WHY | |
Because who knows how long the RSS feed for The Magnus Archives will remain available. | |
This script will download all the episodes titled "MAG ...", so none of the table-top gaming sessions and behind the scenes stuff they did after the show ended and none of the promo stuff for other podcasts. | |
It'll also create a nice little HTML file that lists them in the right order from beginning to end with embedded audio players for each episode. | |
What you need: | |
- Ruby | |
- Bundler | |
WHY | |
require 'net/http' | |
require 'bundler/inline' | |
require 'fileutils' | |
require 'erb' | |
gemfile(true) do | |
source 'https://rubygems.org' | |
gem 'nokogiri' | |
gem 'down' | |
gem 'reverse_markdown' | |
gem 'kramdown' | |
end | |
feed_url = "https://feeds.acast.com/public/shows/b6085bcd-3542-4a43-b6a8-021e3fd251b8" | |
xml = Net::HTTP.get(URI(feed_url)) # => String | |
doc = Nokogiri::Slop(xml) | |
items = doc.xpath("//rss//channel//item") | |
mag_title_regex = /^MAG (\d{1,3})([\.\d]*)[ -]+([\w ]+)/ | |
episodes = [] | |
items.each do |item| | |
next unless item.at("enclosure") | |
match = item.at("title")&.content&.match mag_title_regex | |
next if match.nil? | |
ep_nr = match[1] | |
ep_sub_nr = match[2] | |
title = match[3] | |
next if title == "Liveshow" | |
description = ReverseMarkdown.convert(item.at("description")&.content) | |
description.gsub!(/\* \* \*.+/m, '') # remove ACast snippet at the bottom | |
episodes.push({ | |
number: ep_nr, | |
sub_number: ep_sub_nr, | |
title: title, | |
description: description , | |
pubDate: Date.parse(item.at("pubDate")&.content), | |
file_url: item.at("enclosure/@url"), | |
file_path: "./episodes/MAG %03d%s - %s.mp3" % [ep_nr, ep_sub_nr, title] | |
}) | |
end | |
FileUtils.mkdir_p "archive/episodes" | |
Dir.chdir "archive" | |
episodes = episodes.reverse | |
episodes.each do |ep| | |
file_path = ep[:file_path] | |
next if File.exists? file_path | |
puts "Downloading %s" % [file_path] | |
Down.download(ep[:file_url], destination: file_path) | |
system( | |
"touch", | |
"-t", | |
ep[:pubDate].strftime("%Y%m%d%H%M"), | |
file_path | |
) | |
end | |
css = <<~CSS | |
body { | |
font-size: 14px; | |
font-family: sans-serif; | |
max-width: 60rem; | |
margin: 5rem auto; | |
} | |
audio { | |
width: 100%; | |
} | |
CSS | |
HTML = <<~HTML | |
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8" /> | |
<meta name="viewport" content="width=device-width" /> | |
<style type="text/css"><%= css %></style> | |
<title>The Magnus Archives Archive</title> | |
</head> | |
<body> | |
<h1>The Magnus Archives Archive</h1> | |
<% episodes.each_with_index do |ep, i| %> | |
<% if i > 0 %> | |
<hr/> | |
<% end %> | |
<h2><%= ep[:number]%><%= ep[:sub_number]%> - <%= ep[:title] %></h2> | |
<audio src="<%= ep[:file_path] %>" controls preload="none"></audio> | |
<div class="description"> | |
<%= Kramdown::Document.new(ep[:description]).to_html %> | |
</div> | |
<% end %> | |
</html> | |
HTML | |
result = ERB.new(HTML).result_with_hash({episodes: episodes, css: css}) | |
File.write "index.html", result | |
Dir.chdir ".." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment