-
-
Save martymcguire/57bc398d74557904d1eea5ebc01c8c3e to your computer and use it in GitHub Desktop.
Import overcast.fm full data export OPML as Hugo posts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# USAGE: $0 path/to/overcast-detailed-export.xml | |
require 'fileutils' | |
require 'json' | |
require 'nokogiri' | |
require 'open-uri' | |
puts ARGV[0] | |
doc = Nokogiri::XML(File.open(ARGV[0])) | |
cards_cache = JSON.load(File.open('data/cards_cache.json')) | |
# don't make posts for listens from these feeds | |
skip_feeds = [ | |
# secret feed URLs here! | |
] | |
doc.xpath('//outline[@type="rss"]').each do |ep| | |
next if skip_feeds.include? ep.attr('xmlUrl') | |
if cards_cache.include? ep.attr('htmlUrl') | |
puts "#{ep.attr('htmlUrl')} already in cache." | |
next | |
end | |
puts "Fetching card data for #{ep.attr('htmlUrl')} from #{ep.attr('xmlUrl')}." | |
begin | |
feed_doc = Nokogiri::XML(open(ep.attr('xmlUrl'))) | |
feed_doc.xpath('//channel/itunes:image').each do |img| | |
puts "Found: #{img.attr('href')}" | |
cards_cache[ep.attr('htmlUrl')] = { | |
'photo' => img.attr('href') | |
} | |
end | |
rescue Exception => e | |
puts e.inspect | |
end | |
sleep 1 | |
end | |
puts cards_cache.to_json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# USAGE: $0 path/to/overcast-detailed-export.xml | |
require 'date' | |
require 'fileutils' | |
require 'nokogiri' | |
require 'json' | |
require 'yaml' | |
puts ARGV[0] | |
doc = Nokogiri::XML(File.open(ARGV[0])) | |
# don't make posts for listens from these feeds | |
skip_feeds = [ | |
# secret feed URLs here! | |
] | |
doc.xpath('//outline[@type="podcast-episode" and @played="1"]').each do |ep| | |
# ep.parent is the podcast (aka outline type="rss") | |
next if skip_feeds.include? ep.parent.attr('xmlUrl') | |
urls = [ep_url = ep.attr('overcastUrl')] | |
# use the included episode URL if it "makes sense". | |
puts ep.attr('url') | |
if (!!ep.attr('url')) && (! ep.attr('url').end_with?('mp3')) && (ep.attr('url') != ep.parent.attr('htmlUrl')) | |
urls.unshift(ep.attr('url')) | |
end | |
slug = (('Listened to ') + ep.attr('title')).downcase.gsub(/[\W[:punct:]]+/,'-').gsub(/-$/,'') | |
post = { | |
'h' => 'entry', | |
'date' => ep.attr('userUpdatedDate'), | |
'listen-of' => urls.first, | |
'slug' => slug, | |
'refs' => { | |
"#{urls.first}" => { | |
'url' => urls.first, | |
'data' => { | |
'type' => 'entry', | |
'published' => ep.attr('pubDate'), | |
'url' => urls.first, | |
'audio' => [ep.attr('enclosureUrl')], | |
'name' => ep.attr('title'), | |
'author' => { | |
'type' => 'h-card', | |
'name' => ep.parent.attr('title'), | |
'url' => ep.parent.attr('htmlUrl') | |
} | |
} | |
} | |
} | |
} | |
post_date = DateTime.parse(post['date']) | |
file = "content/listens/#{post_date.strftime('%Y/%m/%d')}/#{slug}.md" | |
dir = File.dirname(file) | |
FileUtils.mkdir_p dir | |
open(file, 'w') do |f| | |
f.write post.to_yaml | |
f.write "---\n" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment