Skip to content

Instantly share code, notes, and snippets.

@beathyate
Created January 22, 2010 15:30
Show Gist options
  • Save beathyate/283841 to your computer and use it in GitHub Desktop.
Save beathyate/283841 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'sinatra'
require "mongo_mapper"
before do
if ENV['RACK_ENV'] == 'production'
MongoMapper.connection = Mongo::Connection.new(ENV['DB_HOST'], ENV['DB_PORT'])
MongoMapper.database = ENV['DB_NAME']
MongoMapper.database.authenticate(ENV['DB_USER'], ENV['DB_PASS'])
else
MongoMapper.connection = Mongo::Connection.new('localhost', '27017')
MongoMapper.database = 'dev_db'
end
load "models.rb"
MongoMapper.ensure_indexes!
end
get '/' do
@posts = Post.all # :tags => 'whatever', :type => 'photo', etc…
erb :index
end
class Post
include MongoMapper::Document
key :tumblelog, String
key :type, String
key :format, String
key :url, String
key :slug, String
key :tumblr_id, Integer
key :date_gmt, Date
key :tags, Array
key :content, Hash
def url_with_slug
url + '/' + slug
end
end
require 'rubygems'
require 'mongo_mapper'
require 'open-uri'
require 'nokogiri'
TUMBLELOGS = ['demo']
task :env do
if ENV['RACK_ENV'] == 'production'
MongoMapper.connection = Mongo::Connection.new(ENV['DB_HOST'], ENV['DB_PORT'])
MongoMapper.database = ENV['DB_NAME']
MongoMapper.database.authenticate(ENV['DB_USER'], ENV['DB_PASS'])
else
MongoMapper.connection = Mongo::Connection.new('localhost', '27017')
MongoMapper.database = 'dev_db'
end
load "models.rb"
MongoMapper.ensure_indexes!
end
task :cron => :env do
TUMBLELOGS.each do |tl|
# pull one post to get the total pages
posts = Nokogiri::XML.parse(open('http://'+tl+'.tumblr.com/api/read?num=1'))
# calculate total posts and pages based on the api's post per page limit
total_posts = posts.css('tumblr posts').first['total'].to_i
pages = total_posts / 50
pages += 1 if (total_posts % 50) > 0
# iterate through the pages
(1..pages).each do |page|
# pull the page
xml = Nokogiri::XML.parse(open("http://#{tl}.tumblr.com/api/read?start=#{(page*50)-50}&num=50"))
# go through each post
xml.css('tumblr post').each do |post|
p = Post.find_or_create_by_tumblr_id(post['id'])
p.tumblelog = tl
p.type = post['type']
p.format = post['format']
p.url = post['url']
p.slug = post['slug']
p.date_gmt = post['date-gmt']
p.content.clear
post.children.each do |c|
case c.name
when 'photo-url'
p.content[(c.name.gsub(/-/,"_")+"_#{c['max-width']}").to_sym] = c.content
when 'tag'
p.tags << c.content
when 'conversation'
p.content[:conversation] = []
c.children.each { |l| p.content[:conversation] << { :label => l['name'], :phrase => l.text } }
else
p.content[c.name.gsub(/-/,"_").to_sym] = c.content unless c.name == 'conversation-text' || c.name == 'conversation-line'
end
end
p.save!
puts "Importing: #{p.url}\n"
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment