larryfox/tumblr_2_siteleaf.rb

## tumblr_2_siteleaf.rb
#!/usr/bin/env ruby

require 'tumblr_client'
require 'siteleaf'
require 'sanitize'
require 'pry'
require 'json'


##
## User Configuration
##

CONF = {

  ## Tumblr configuration
  total_posts: 300, # number of posts to collect.
  t_user: 'your tumblr blog name',
  t_key: 'your tumblr api key',
  t_secret: 'your tumblr api secret',

  ## Siteleaf configuration
  sl_page_title: 'the siteleaf page name to import into',
  site_id: 'your siteleaf site id',
  sl_key: 'your siteleaf api key',
  sl_secret: 'your siteleaf api secret',

  ## Useful if the process fails allong the way…
  offset: 0,
}


##
## Client Configuration
##

Tumblr.configure do |c|
  c.consumer_key = CONF[:t_key]
  c.consumer_secret = CONF[:t_secret]
end

Siteleaf.api_key    = CONF[:sl_key]
Siteleaf.api_secret = CONF[:sl_secret]


##
## Post Class
##

class Post
  attr_reader :post, :published_at, :type

  def initialize(post)
    @post = post
    @published_at = Time.at(post['timestamp'])
    @type = post['type']
  end

  def title
    return @caption if defined? @caption
    # Lots of junk characters…
    @caption = Sanitize.clean(post['caption']).to_s.gsub(/\s+/, ' ').strip
    @caption = unless caption.empty?
         caption.sub(/( —.+| -.+| \(.+\))$/, '').strip
      else
        post['id'].to_s
      end
  end

  def body
    return @body if defined? @body
    caption = Sanitize.clean(post['caption'], {
      elements: %w[b em i strong u a],
      attributes: { 'a' => ['href'] },
      add_attributes: { 'a' => {'rel' => 'nofollow'} },
      protocols: { 'a' => {'href' => ['http', 'https', :relative]}}
    })
    @body = if caption
        caption.sub(/(\(.+via.+\))/, '').strip
      elsif post['text']
        post['text']
      end
  end

  def meta
    return @meta if defined? @meta
    @meta = [{ key: 'type', value: type }]
    add_source_meta
    add_cite_meta
    add_embed_meta
    @meta
  end

  def photos
    return @photos if defined? @photos
    @photos = if post['photos']
        post['photos'].map { |p| p['original_size']['url'] }
      else
        []
      end
  end

  def taxonomy
    return @taxonomy if defined? @taxonomy
    @taxonomy = unless post['tags'].empty?
        { taxonomy: [{ key: 'Tags', values: post['tags'] }] }
      else
        {}
      end
  end

  def params
    return @params if defined? @params
    @params = {
      title: title.to_s,
      body: body.to_s,
      meta: meta,
      published_at: published_at
    }.merge!(taxonomy)
  end

  private

    def add_source_meta
      if post['source_url']
        @meta << { key: 'source', value: post['source_url'] }
      elsif post['caption']
        post['caption'].match(/\(<a href=\"(.+)\" target=\"_blank\">via<\/a>\)/) do |m|
          @meta << { key: 'source', value: m[1] }
        end
      end
    end

    def add_cite_meta
      @meta << { key: 'cite', value: post['source'] } if post['source']
    end

    def add_embed_meta
      if post['permalink_url']
        service, id = parse_video_url(post['permalink_url'])
        @meta << { key: "embed_#{service}", value: id } if service && id
      end
    end

    def parse_video_url(video_url)
      if m = video_url.match(/^.*(?:youtu.be\/|v\/|u\/\w\/|embed\/|watch\?v=|\&v=)([^#\&\?]*).*/)
        ['youtube', m[1]]
      elsif m = video_url.match(/^.+vimeo.com\/(?:.*\/)?([^#\?]*)/)
        ['vimeo', m[1]]
      end
    end
end


##
## Fetch Tumblr posts
##

client = Tumblr::Client.new
total_pages = (CONF[:total_posts] / 20).floor
limit = CONF[:total_posts] > 20 ? 20 : CONF[:total_posts]

tumblr_posts = (0..total_pages).map { |i|
  posts = client.posts("#{CONF[:t_user]}.tumblr.com", {
    limit: limit,
    offset: i * 20 + CONF[:offset]
  })
  posts['posts']
}.flatten


##
## Siteleaf
##

site = Siteleaf::Site.find(CONF[:site_id])
unless sl_page = site.pages.detect { |p| p.title == CONF[:sl_page_title] }
  sl_page = Siteleaf::Page.create({
    site_id: CONF[:site_id],
    title:   CONF[:sl_page_title]
  })
end

tumblr_posts.each_with_index do |p, i|
  post = Post.new(p)

  # I’m skipping link and text posts. Sorry.
  next if %w[text link].include? post.type

  puts "Importing post #{p['id']} (#{i+1+CONF[:offset]} of #{tumblr_posts.length+CONF[:offset]})"

  sl_post = Siteleaf::Post.create(post.params.merge({
    parent_id: sl_page.id
  }))

  post.photos.each_with_index do |photo, i|
    puts "    creating asset #{i+1} of #{post.photos.length}"

    Siteleaf::Asset.create({
      post_id: sl_post.id,
      url: photo
    })
  end
end
	#!/usr/bin/env ruby

	require 'tumblr_client'
	require 'siteleaf'
	require 'sanitize'
	require 'pry'
	require 'json'


	##
	## User Configuration
	##

	CONF = {

	## Tumblr configuration
	total_posts: 300, # number of posts to collect.
	t_user: 'your tumblr blog name',
	t_key: 'your tumblr api key',
	t_secret: 'your tumblr api secret',

	## Siteleaf configuration
	sl_page_title: 'the siteleaf page name to import into',
	site_id: 'your siteleaf site id',
	sl_key: 'your siteleaf api key',
	sl_secret: 'your siteleaf api secret',

	## Useful if the process fails allong the way…
	offset: 0,
	}



	##
	## Client Configuration
	##

	Tumblr.configure do \|c\|
	c.consumer_key = CONF[:t_key]
	c.consumer_secret = CONF[:t_secret]
	end

	Siteleaf.api_key = CONF[:sl_key]
	Siteleaf.api_secret = CONF[:sl_secret]



	##
	## Post Class
	##

	class Post
	attr_reader :post, :published_at, :type

	def initialize(post)
	@post = post
	@published_at = Time.at(post['timestamp'])
	@type = post['type']
	end

	def title
	return @caption if defined? @caption
	# Lots of junk characters…
	@caption = Sanitize.clean(post['caption']).to_s.gsub(/\s+/, ' ').strip
	@caption = unless caption.empty?
	caption.sub(/( —.+\| -.+\| \(.+\))$/, '').strip
	else
	post['id'].to_s
	end
	end

	def body
	return @body if defined? @body
	caption = Sanitize.clean(post['caption'], {
	elements: %w[b em i strong u a],
	attributes: { 'a' => ['href'] },
	add_attributes: { 'a' => {'rel' => 'nofollow'} },
	protocols: { 'a' => {'href' => ['http', 'https', :relative]}}
	})
	@body = if caption
	caption.sub(/(\(.+via.+\))/, '').strip
	elsif post['text']
	post['text']
	end
	end

	def meta
	return @meta if defined? @meta
	@meta = [{ key: 'type', value: type }]
	add_source_meta
	add_cite_meta
	add_embed_meta
	@meta
	end

	def photos
	return @photos if defined? @photos
	@photos = if post['photos']
	post['photos'].map { \|p\| p['original_size']['url'] }
	else
	[]
	end
	end

	def taxonomy
	return @taxonomy if defined? @taxonomy
	@taxonomy = unless post['tags'].empty?
	{ taxonomy: [{ key: 'Tags', values: post['tags'] }] }
	else
	{}
	end
	end

	def params
	return @params if defined? @params
	@params = {
	title: title.to_s,
	body: body.to_s,
	meta: meta,
	published_at: published_at
	}.merge!(taxonomy)
	end

	private

	def add_source_meta
	if post['source_url']
	@meta << { key: 'source', value: post['source_url'] }
	elsif post['caption']
	post['caption'].match(/\(<a href=\"(.+)\" target=\"_blank\">via<\/a>\)/) do \|m\|
	@meta << { key: 'source', value: m[1] }
	end
	end
	end

	def add_cite_meta
	@meta << { key: 'cite', value: post['source'] } if post['source']
	end

	def add_embed_meta
	if post['permalink_url']
	service, id = parse_video_url(post['permalink_url'])
	@meta << { key: "embed_#{service}", value: id } if service && id
	end
	end

	def parse_video_url(video_url)
	if m = video_url.match(/^.(?:youtu.be\/\|v\/\|u\/\w\/\|embed\/\|watch\?v=\|\&v=)([^#\&\?]).*/)
	['youtube', m[1]]
	elsif m = video_url.match(/^.+vimeo.com\/(?:.\/)?([^#\?])/)
	['vimeo', m[1]]
	end
	end
	end



	##
	## Fetch Tumblr posts
	##

	client = Tumblr::Client.new
	total_pages = (CONF[:total_posts] / 20).floor
	limit = CONF[:total_posts] > 20 ? 20 : CONF[:total_posts]

	tumblr_posts = (0..total_pages).map { \|i\|
	posts = client.posts("#{CONF[:t_user]}.tumblr.com", {
	limit: limit,
	offset: i * 20 + CONF[:offset]
	})
	posts['posts']
	}.flatten


	##
	## Siteleaf
	##

	site = Siteleaf::Site.find(CONF[:site_id])
	unless sl_page = site.pages.detect { \|p\| p.title == CONF[:sl_page_title] }
	sl_page = Siteleaf::Page.create({
	site_id: CONF[:site_id],
	title: CONF[:sl_page_title]
	})
	end

	tumblr_posts.each_with_index do \|p, i\|
	post = Post.new(p)

	# I’m skipping link and text posts. Sorry.
	next if %w[text link].include? post.type

	puts "Importing post #{p['id']} (#{i+1+CONF[:offset]} of #{tumblr_posts.length+CONF[:offset]})"

	sl_post = Siteleaf::Post.create(post.params.merge({
	parent_id: sl_page.id
	}))

	post.photos.each_with_index do \|photo, i\|
	puts " creating asset #{i+1} of #{post.photos.length}"

	Siteleaf::Asset.create({
	post_id: sl_post.id,
	url: photo
	})
	end
	end