Created
June 3, 2014 21:11
-
-
Save larryfox/30ffa6cfb4c8a0fa4746 to your computer and use it in GitHub Desktop.
tumblr 2 siteleaf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'tumblr_client' | |
require 'siteleaf' | |
require 'sanitize' | |
require 'pry' | |
require 'json' | |
## | |
## User Configuration | |
## | |
CONF = { | |
## Tumblr configuration | |
total_posts: 300, # number of posts to collect. | |
t_user: 'your tumblr blog name', | |
t_key: 'your tumblr api key', | |
t_secret: 'your tumblr api secret', | |
## Siteleaf configuration | |
sl_page_title: 'the siteleaf page name to import into', | |
site_id: 'your siteleaf site id', | |
sl_key: 'your siteleaf api key', | |
sl_secret: 'your siteleaf api secret', | |
## Useful if the process fails allong the way… | |
offset: 0, | |
} | |
## | |
## Client Configuration | |
## | |
Tumblr.configure do |c| | |
c.consumer_key = CONF[:t_key] | |
c.consumer_secret = CONF[:t_secret] | |
end | |
Siteleaf.api_key = CONF[:sl_key] | |
Siteleaf.api_secret = CONF[:sl_secret] | |
## | |
## Post Class | |
## | |
class Post | |
attr_reader :post, :published_at, :type | |
def initialize(post) | |
@post = post | |
@published_at = Time.at(post['timestamp']) | |
@type = post['type'] | |
end | |
def title | |
return @caption if defined? @caption | |
# Lots of junk characters… | |
@caption = Sanitize.clean(post['caption']).to_s.gsub(/\s+/, ' ').strip | |
@caption = unless caption.empty? | |
caption.sub(/( —.+| -.+| \(.+\))$/, '').strip | |
else | |
post['id'].to_s | |
end | |
end | |
def body | |
return @body if defined? @body | |
caption = Sanitize.clean(post['caption'], { | |
elements: %w[b em i strong u a], | |
attributes: { 'a' => ['href'] }, | |
add_attributes: { 'a' => {'rel' => 'nofollow'} }, | |
protocols: { 'a' => {'href' => ['http', 'https', :relative]}} | |
}) | |
@body = if caption | |
caption.sub(/(\(.+via.+\))/, '').strip | |
elsif post['text'] | |
post['text'] | |
end | |
end | |
def meta | |
return @meta if defined? @meta | |
@meta = [{ key: 'type', value: type }] | |
add_source_meta | |
add_cite_meta | |
add_embed_meta | |
@meta | |
end | |
def photos | |
return @photos if defined? @photos | |
@photos = if post['photos'] | |
post['photos'].map { |p| p['original_size']['url'] } | |
else | |
[] | |
end | |
end | |
def taxonomy | |
return @taxonomy if defined? @taxonomy | |
@taxonomy = unless post['tags'].empty? | |
{ taxonomy: [{ key: 'Tags', values: post['tags'] }] } | |
else | |
{} | |
end | |
end | |
def params | |
return @params if defined? @params | |
@params = { | |
title: title.to_s, | |
body: body.to_s, | |
meta: meta, | |
published_at: published_at | |
}.merge!(taxonomy) | |
end | |
private | |
def add_source_meta | |
if post['source_url'] | |
@meta << { key: 'source', value: post['source_url'] } | |
elsif post['caption'] | |
post['caption'].match(/\(<a href=\"(.+)\" target=\"_blank\">via<\/a>\)/) do |m| | |
@meta << { key: 'source', value: m[1] } | |
end | |
end | |
end | |
def add_cite_meta | |
@meta << { key: 'cite', value: post['source'] } if post['source'] | |
end | |
def add_embed_meta | |
if post['permalink_url'] | |
service, id = parse_video_url(post['permalink_url']) | |
@meta << { key: "embed_#{service}", value: id } if service && id | |
end | |
end | |
def parse_video_url(video_url) | |
if m = video_url.match(/^.*(?:youtu.be\/|v\/|u\/\w\/|embed\/|watch\?v=|\&v=)([^#\&\?]*).*/) | |
['youtube', m[1]] | |
elsif m = video_url.match(/^.+vimeo.com\/(?:.*\/)?([^#\?]*)/) | |
['vimeo', m[1]] | |
end | |
end | |
end | |
## | |
## Fetch Tumblr posts | |
## | |
client = Tumblr::Client.new | |
total_pages = (CONF[:total_posts] / 20).floor | |
limit = CONF[:total_posts] > 20 ? 20 : CONF[:total_posts] | |
tumblr_posts = (0..total_pages).map { |i| | |
posts = client.posts("#{CONF[:t_user]}.tumblr.com", { | |
limit: limit, | |
offset: i * 20 + CONF[:offset] | |
}) | |
posts['posts'] | |
}.flatten | |
## | |
## Siteleaf | |
## | |
site = Siteleaf::Site.find(CONF[:site_id]) | |
unless sl_page = site.pages.detect { |p| p.title == CONF[:sl_page_title] } | |
sl_page = Siteleaf::Page.create({ | |
site_id: CONF[:site_id], | |
title: CONF[:sl_page_title] | |
}) | |
end | |
tumblr_posts.each_with_index do |p, i| | |
post = Post.new(p) | |
# I’m skipping link and text posts. Sorry. | |
next if %w[text link].include? post.type | |
puts "Importing post #{p['id']} (#{i+1+CONF[:offset]} of #{tumblr_posts.length+CONF[:offset]})" | |
sl_post = Siteleaf::Post.create(post.params.merge({ | |
parent_id: sl_page.id | |
})) | |
post.photos.each_with_index do |photo, i| | |
puts " creating asset #{i+1} of #{post.photos.length}" | |
Siteleaf::Asset.create({ | |
post_id: sl_post.id, | |
url: photo | |
}) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment