Skip to content

Instantly share code, notes, and snippets.

@larryfox
Created June 3, 2014 21:11
Show Gist options
  • Save larryfox/30ffa6cfb4c8a0fa4746 to your computer and use it in GitHub Desktop.
Save larryfox/30ffa6cfb4c8a0fa4746 to your computer and use it in GitHub Desktop.
tumblr 2 siteleaf
#!/usr/bin/env ruby
require 'tumblr_client'
require 'siteleaf'
require 'sanitize'
require 'pry'
require 'json'
##
## User Configuration
##
CONF = {
## Tumblr configuration
total_posts: 300, # number of posts to collect.
t_user: 'your tumblr blog name',
t_key: 'your tumblr api key',
t_secret: 'your tumblr api secret',
## Siteleaf configuration
sl_page_title: 'the siteleaf page name to import into',
site_id: 'your siteleaf site id',
sl_key: 'your siteleaf api key',
sl_secret: 'your siteleaf api secret',
## Useful if the process fails allong the way…
offset: 0,
}
##
## Client Configuration
##
Tumblr.configure do |c|
c.consumer_key = CONF[:t_key]
c.consumer_secret = CONF[:t_secret]
end
Siteleaf.api_key = CONF[:sl_key]
Siteleaf.api_secret = CONF[:sl_secret]
##
## Post Class
##
class Post
attr_reader :post, :published_at, :type
def initialize(post)
@post = post
@published_at = Time.at(post['timestamp'])
@type = post['type']
end
def title
return @caption if defined? @caption
# Lots of junk characters…
@caption = Sanitize.clean(post['caption']).to_s.gsub(/\s+/, ' ').strip
@caption = unless caption.empty?
caption.sub(/( —.+| -.+| \(.+\))$/, '').strip
else
post['id'].to_s
end
end
def body
return @body if defined? @body
caption = Sanitize.clean(post['caption'], {
elements: %w[b em i strong u a],
attributes: { 'a' => ['href'] },
add_attributes: { 'a' => {'rel' => 'nofollow'} },
protocols: { 'a' => {'href' => ['http', 'https', :relative]}}
})
@body = if caption
caption.sub(/(\(.+via.+\))/, '').strip
elsif post['text']
post['text']
end
end
def meta
return @meta if defined? @meta
@meta = [{ key: 'type', value: type }]
add_source_meta
add_cite_meta
add_embed_meta
@meta
end
def photos
return @photos if defined? @photos
@photos = if post['photos']
post['photos'].map { |p| p['original_size']['url'] }
else
[]
end
end
def taxonomy
return @taxonomy if defined? @taxonomy
@taxonomy = unless post['tags'].empty?
{ taxonomy: [{ key: 'Tags', values: post['tags'] }] }
else
{}
end
end
def params
return @params if defined? @params
@params = {
title: title.to_s,
body: body.to_s,
meta: meta,
published_at: published_at
}.merge!(taxonomy)
end
private
def add_source_meta
if post['source_url']
@meta << { key: 'source', value: post['source_url'] }
elsif post['caption']
post['caption'].match(/\(<a href=\"(.+)\" target=\"_blank\">via<\/a>\)/) do |m|
@meta << { key: 'source', value: m[1] }
end
end
end
def add_cite_meta
@meta << { key: 'cite', value: post['source'] } if post['source']
end
def add_embed_meta
if post['permalink_url']
service, id = parse_video_url(post['permalink_url'])
@meta << { key: "embed_#{service}", value: id } if service && id
end
end
def parse_video_url(video_url)
if m = video_url.match(/^.*(?:youtu.be\/|v\/|u\/\w\/|embed\/|watch\?v=|\&v=)([^#\&\?]*).*/)
['youtube', m[1]]
elsif m = video_url.match(/^.+vimeo.com\/(?:.*\/)?([^#\?]*)/)
['vimeo', m[1]]
end
end
end
##
## Fetch Tumblr posts
##
client = Tumblr::Client.new
total_pages = (CONF[:total_posts] / 20).floor
limit = CONF[:total_posts] > 20 ? 20 : CONF[:total_posts]
tumblr_posts = (0..total_pages).map { |i|
posts = client.posts("#{CONF[:t_user]}.tumblr.com", {
limit: limit,
offset: i * 20 + CONF[:offset]
})
posts['posts']
}.flatten
##
## Siteleaf
##
site = Siteleaf::Site.find(CONF[:site_id])
unless sl_page = site.pages.detect { |p| p.title == CONF[:sl_page_title] }
sl_page = Siteleaf::Page.create({
site_id: CONF[:site_id],
title: CONF[:sl_page_title]
})
end
tumblr_posts.each_with_index do |p, i|
post = Post.new(p)
# I’m skipping link and text posts. Sorry.
next if %w[text link].include? post.type
puts "Importing post #{p['id']} (#{i+1+CONF[:offset]} of #{tumblr_posts.length+CONF[:offset]})"
sl_post = Siteleaf::Post.create(post.params.merge({
parent_id: sl_page.id
}))
post.photos.each_with_index do |photo, i|
puts " creating asset #{i+1} of #{post.photos.length}"
Siteleaf::Asset.create({
post_id: sl_post.id,
url: photo
})
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment