Skip to content

Instantly share code, notes, and snippets.

@mnbi
Created September 14, 2010 12:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mnbi/578963 to your computer and use it in GitHub Desktop.
Save mnbi/578963 to your computer and use it in GitHub Desktop.
#!/opt/local/bin/ruby1.9 -w
# -*- coding: utf-8 -*-
# get_titles.rb: get post tiltes from Blogger RSS feed.
require 'net/http'
require 'uri'
require 'rss'
BLOGID = "put your blog ID here"
def get_rss(start_index = 1)
url = URI.parse('http://www.blogger.com/')
res = Net::HTTP.start(url.host, url.port) { |http|
http.get("/feeds/#{BLOGID}/posts/default?alt=rss&start-index=#{start_index}")
}
res.body
end
start_index = 1
current_page = 0
total_pages = current_page + 1
current_post = 0
total_posts = 0
posts = Array.new
# get first page & extract total number of posts.
raw_data = get_rss(1)
/<openSearch:totalResults>(\d+)<\/openSearch:totalResults>/ =~ raw_data
total_posts = Regexp.last_match[1].to_i
posts[current_page] = Array.new
rss = RSS::Parser.parse(raw_data, false)
rss.channel.items.each { |item|
posts[current_page].push item
}
current_page += 1
current_post += rss.channel.items.length
# get more pages
while current_post < total_posts
posts[current_page] = Array.new
rss = RSS::Parser.parse(get_rss(current_post + 1), false)
rss.channel.items.each { |item|
posts[current_page].push item
}
current_page += 1
current_post += rss.channel.items.length
end
total_pages = current_page
STDERR.puts "#{total_pages} pages"
STDERR.puts "#{current_post} posts"
# construct output HTML page.
STDOUT.print <<HTML1
<!DOCTYPE HTML>
<html lang="ja">
<head>
<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>
<title>Blog Posts</title>
</head>
<body>
<div>
<p>Total Posts: #{total_posts}</p>
HTML1
current_page = 0
while current_page < total_pages
STDERR.puts "Now processing page \##{current_page}."
STDOUT.print <<PAGE1
<div class="page">
<h2>Page #{current_page}</h2>
<ol>
PAGE1
posts[current_page].each { |item|
STDOUT.puts "<li><a href='#{item.link}'>#{item.title}</a></li>"
}
STDOUT.print <<PAGE2
</ol>
</div>
PAGE2
current_page += 1
end
STDOUT.print <<HTML2
</div>
</body>
</html>
HTML2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment