Skip to content

Instantly share code, notes, and snippets.

@ledermann
Created August 2, 2010 15:03
Show Gist options
  • Save ledermann/504775 to your computer and use it in GitHub Desktop.
Save ledermann/504775 to your computer and use it in GitHub Desktop.
# Import a posterous.com Blog into nanoc (http://nanoc.stoneship.org)
# Uses Nokogiri to parse the XML data from the posterous API
require 'rubygems'
require 'nanoc3'
require 'nokogiri'
require 'open-uri'
module Nanoc3::Extra
module Importers
class Posterous
def initialize(hostname, user, password)
@hostname = hostname
@user = user
@password = password
@site = Nanoc3::Site.new('.')
end
def run
page = 1
sum = 0
loop do
puts "Processing page #{page}..."
file = open("http://posterous.com/api/readposts?hostname=#{@hostname}&page=#{page}", :http_basic_authentication => [@user, @password])
doc = Nokogiri::XML(file.read)
count = process(doc)
break if count == 0
page += 1
sum += count
end
puts "Finished. Imported #{sum} articles."
end
def process(xml)
count = 0
xml.xpath('//rsp/post').each do |post|
count += 1
date = post.xpath('date').text
title = post.xpath('title').text
author = post.xpath('author').text
body = Nokogiri::HTML(post.xpath('body').text)
media = []
body.xpath("//a[contains(@href, 'getfile')]").each do |element|
media << element.attributes['href'].text
end
puts "- #{title}"
# Build post name
post_name = title.downcase.
gsub(/[^a-z\-_]/, '-').
gsub(/^-+|-+|-+$/, '-')
post_date = Date.parse(date)
content = body.text.gsub('See the full gallery on posterous','').
gsub('Watch on posterous','').
gsub(/\(\d+ KB\)/,'').
strip
# Create text item
identifier = '/articles/' + post_date.year.to_s + '/' + '%02d' % post_date.month + '/' + '%02d' % post_date.day + '-' + post_name + '/index/'
attributes = { :title => title,
:author => author }
@site.data_sources[0].create_item(content, attributes, identifier, :extension => '.txt')
# Download media and create items
media.each do |url|
puts "Download #{url}"
file = open(url)
content = file.read
file_name = url.split('/').last.gsub(/\.scaled.+\.jpg/i,'').gsub(/\.jpg/i,'')
extension = File.extname(url)
identifier = '/articles/' + post_date.year.to_s + '/' + '%02d' % post_date.month + '/' + '%02d' % post_date.day + '-' + post_name + '/' + file_name + '/'
@site.data_sources[0].create_item(content, {}, identifier, :extension => extension)
end
end
count
end
end
end
end
begin
importer = Nanoc3::Extra::Importers::Posterous.new('my_hostname', 'steve', 'secret')
importer.run
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment