koozie/typepad_to_jekyll.rb Secret

## typepad_to_jekyll.rb
#!/usr/bin/env ruby

# C. Stansbury            28 JUN 2014
#
# Script to convert Typepad Backup (MTIF Format) to
# Jekyll posts.  MTIF -> Jekyll _posts
#
# Will create a post file in the _posts directory for
# each post found in the typead backup.

require 'pp'
require 'date'

class Comment
  attr_accessor :author, :email, :ip, :url, :date, :body
end

class Post
  attr_accessor :author, :title, :status, :allow_comments, :convert_breaks
  attr_accessor :allow_pings, :basename, :unique_url, :date, :body
  attr_accessor :extended_body, :excerpt

  attr_reader   :categories, :keywords, :comments

  def initialize
    @categories = []
    @keywords = []
    @comments = []
    @body = ''
    @extended_body = ''
    @excerpt = ''
  end

  def filename
    "#{date.strftime("%Y-%m-%d")}-#{basename}.html"
  end
end

class Application

  attr_accessor   :source_filename, :destination_dir
  attr_reader     :posts_dir, :drafts_dir, :import_status, :current_line, :last_line
  attr_reader     :current_post

  def initialize
    setup
  end

  def process
    setup
    test_inputs
    process_typepad_file
  end

  def usage
    puts "usage: #{File.basename(__FILE__)} <typepad_backup_filename> <jekyll base directory>"
  end

  private

  def process_typepad_file
    new_post
    File.open(source_filename,'r') do |file|
      file.each do |line|
        @last_line = current_line
        @current_line = line
        process_line
      end
    end
  end

  def new_post
    @import_status = :post_header  #:post_header, :body, :extended_body, :excerpt, :keywords, :boundary_5
    @current_post = Post.new
  end

  #write post to _posts directory in jekyll format
  def write_post
    pp current_post
    cp = current_post
    if cp.status == :publish
      dir = posts_dir
    else
      dir = drafts_dir
    end
    fname = File.join(dir, cp.filename)
    File.open(fname, 'w') do |file|
      file.puts '---'
      file.puts 'layout: post'
      file.puts 'title: ' + clean_yaml(cp.title)
      file.puts "date: #{cp.date.strftime("%Y-%m-%d %H:%M:%S")}"
      if cp.categories.size == 1
        file.puts "category: #{cp.categories.first}"
      elsif cp.categories.size > 1
        file.puts "categories: #{cp.categories.join(' ')}"
      end
      file.puts '---'
      file.puts cp.body
      file.puts cp.extended_body
    end
  end

  #handle colons, quotes, and double quotes
  def clean_yaml(str)
    if str.include?(':') or str.include?("'") or str.include?('"')
      return "'" + str.gsub(/"/, '\"').gsub(/'/, "''") + "'"
    else
      return str
    end
  end

  def check_boundary
    @import_status = :boundary_5 if current_line =~ /^-----/
    @import_status = :boundary_7 if current_line =~ /^-------/
  end

  def process_line
    check_boundary
    case import_status
    when :post_header
      process_header_line
    when :body
      process_body
    when :extended_body
      process_extended_body
    when :excerpt
      process_excerpt
    when :boundary_7
      write_post
      new_post
    when :boundary_5
      process_boundary5_item
    end
  end

  def process_boundary5_item
    case current_line
    when /^BODY/
      @import_status = :body
    when /^EXTENDED BODY/
      @import_status = :extended_body
    when /^EXCERPT/
      @import_status = :excerpt
    when /^KEYWORDS/
      @import_status = :keywords
    end
  end

  def process_body
    current_post.body += current_line
  end

  def process_extended_body
    current_post.extended_body += current_line
  end

  def process_excerpt
    current_post.excerpt += current_line if current_line.strip.chomp.size > 0
  end

  def process_header_line
    return if current_line.strip == ""
    case current_line
    when /^AUTHOR/
      current_post.author = current_line.split('AUTHOR:').last.strip
    when /^TITLE/
      current_post.title = current_line.split('TITLE:').last.strip
    when /^STATUS/
      current_post.status = current_line.split('STATUS:').last.strip.downcase.to_sym
    when /^ALLOW COMMENTS/
      current_post.allow_comments = current_line.split('ALLOW COMMENTS:').last.strip == '1' ? true : false
    when /^CONVERT BREAKS/
      current_post.convert_breaks = current_line.split('CONVERT BREAKS:').last.strip
    when /^ALLOW PINGS/
      current_post.allow_pings = current_line.split('ALLOW PINGS:').last.strip == '1' ? true : false
    when /^BASENAME/
      current_post.basename = current_line.split('BASENAME:').last.strip
    when /^CATEGORY/
      category_name = current_line.split('CATEGORY:').last.strip
      current_post.categories << clean_category_name(category_name)
    when /^UNIQUE URL/
      current_post.unique_url = current_line.split('UNIQUE URL:').last.strip
    when /^DATE/
      d_str = current_line.split('DATE:').last.strip
      current_post.date = Date.strptime(d_str, "%m/%d/%Y %I:%M:%S %p")
    end
  end

  def clean_category_name(cat)
    cat.downcase.gsub(/ /,'-')
  end

  def setup
    @posts_dir = File.join(destination_dir.to_s, '_posts')
    @drafts_dir = File.join(destination_dir.to_s, '_drafts')
    @current_line = ''
    @last_line = ''
  end

  def test_inputs
    if not File.readable?(source_filename.to_s)
      puts "Unable to read file [#{source_filename}]"
      exit 1
    end

    dirs = [destination_dir, posts_dir, drafts_dir]
    dirs.each do |dir|
      if not File.directory?(dir)
        puts "Not a directory [#{dir}]"
        exit 1
      end
    end
  end
end


#================= MAIN ====================

app = Application.new

if not ARGV.size == 2
  app.usage
  exit 1
end

app.source_filename = ARGV.first
app.destination_dir = ARGV.last
app.process
	#!/usr/bin/env ruby

	# C. Stansbury 28 JUN 2014
	#
	# Script to convert Typepad Backup (MTIF Format) to
	# Jekyll posts. MTIF -> Jekyll _posts
	#
	# Will create a post file in the _posts directory for
	# each post found in the typead backup.

	require 'pp'
	require 'date'

	class Comment
	attr_accessor :author, :email, :ip, :url, :date, :body
	end

	class Post
	attr_accessor :author, :title, :status, :allow_comments, :convert_breaks
	attr_accessor :allow_pings, :basename, :unique_url, :date, :body
	attr_accessor :extended_body, :excerpt

	attr_reader :categories, :keywords, :comments

	def initialize
	@categories = []
	@keywords = []
	@comments = []
	@body = ''
	@extended_body = ''
	@excerpt = ''
	end

	def filename
	"#{date.strftime("%Y-%m-%d")}-#{basename}.html"
	end
	end

	class Application

	attr_accessor :source_filename, :destination_dir
	attr_reader :posts_dir, :drafts_dir, :import_status, :current_line, :last_line
	attr_reader :current_post

	def initialize
	setup
	end

	def process
	setup
	test_inputs
	process_typepad_file
	end

	def usage
	puts "usage: #{File.basename(__FILE__)} <typepad_backup_filename> <jekyll base directory>"
	end

	private

	def process_typepad_file
	new_post
	File.open(source_filename,'r') do \|file\|
	file.each do \|line\|
	@last_line = current_line
	@current_line = line
	process_line
	end
	end
	end

	def new_post
	@import_status = :post_header #:post_header, :body, :extended_body, :excerpt, :keywords, :boundary_5
	@current_post = Post.new
	end

	#write post to _posts directory in jekyll format
	def write_post
	pp current_post
	cp = current_post
	if cp.status == :publish
	dir = posts_dir
	else
	dir = drafts_dir
	end
	fname = File.join(dir, cp.filename)
	File.open(fname, 'w') do \|file\|
	file.puts '---'
	file.puts 'layout: post'
	file.puts 'title: ' + clean_yaml(cp.title)
	file.puts "date: #{cp.date.strftime("%Y-%m-%d %H:%M:%S")}"
	if cp.categories.size == 1
	file.puts "category: #{cp.categories.first}"
	elsif cp.categories.size > 1
	file.puts "categories: #{cp.categories.join(' ')}"
	end
	file.puts '---'
	file.puts cp.body
	file.puts cp.extended_body
	end
	end

	#handle colons, quotes, and double quotes
	def clean_yaml(str)
	if str.include?(':') or str.include?("'") or str.include?('"')
	return "'" + str.gsub(/"/, '\"').gsub(/'/, "''") + "'"
	else
	return str
	end
	end

	def check_boundary
	@import_status = :boundary_5 if current_line =~ /^-----/
	@import_status = :boundary_7 if current_line =~ /^-------/
	end

	def process_line
	check_boundary
	case import_status
	when :post_header
	process_header_line
	when :body
	process_body
	when :extended_body
	process_extended_body
	when :excerpt
	process_excerpt
	when :boundary_7
	write_post
	new_post
	when :boundary_5
	process_boundary5_item
	end
	end

	def process_boundary5_item
	case current_line
	when /^BODY/
	@import_status = :body
	when /^EXTENDED BODY/
	@import_status = :extended_body
	when /^EXCERPT/
	@import_status = :excerpt
	when /^KEYWORDS/
	@import_status = :keywords
	end
	end

	def process_body
	current_post.body += current_line
	end

	def process_extended_body
	current_post.extended_body += current_line
	end

	def process_excerpt
	current_post.excerpt += current_line if current_line.strip.chomp.size > 0
	end

	def process_header_line
	return if current_line.strip == ""
	case current_line
	when /^AUTHOR/
	current_post.author = current_line.split('AUTHOR:').last.strip
	when /^TITLE/
	current_post.title = current_line.split('TITLE:').last.strip
	when /^STATUS/
	current_post.status = current_line.split('STATUS:').last.strip.downcase.to_sym
	when /^ALLOW COMMENTS/
	current_post.allow_comments = current_line.split('ALLOW COMMENTS:').last.strip == '1' ? true : false
	when /^CONVERT BREAKS/
	current_post.convert_breaks = current_line.split('CONVERT BREAKS:').last.strip
	when /^ALLOW PINGS/
	current_post.allow_pings = current_line.split('ALLOW PINGS:').last.strip == '1' ? true : false
	when /^BASENAME/
	current_post.basename = current_line.split('BASENAME:').last.strip
	when /^CATEGORY/
	category_name = current_line.split('CATEGORY:').last.strip
	current_post.categories << clean_category_name(category_name)
	when /^UNIQUE URL/
	current_post.unique_url = current_line.split('UNIQUE URL:').last.strip
	when /^DATE/
	d_str = current_line.split('DATE:').last.strip
	current_post.date = Date.strptime(d_str, "%m/%d/%Y %I:%M:%S %p")
	end
	end

	def clean_category_name(cat)
	cat.downcase.gsub(/ /,'-')
	end

	def setup
	@posts_dir = File.join(destination_dir.to_s, '_posts')
	@drafts_dir = File.join(destination_dir.to_s, '_drafts')
	@current_line = ''
	@last_line = ''
	end

	def test_inputs
	if not File.readable?(source_filename.to_s)
	puts "Unable to read file [#{source_filename}]"
	exit 1
	end

	dirs = [destination_dir, posts_dir, drafts_dir]
	dirs.each do \|dir\|
	if not File.directory?(dir)
	puts "Not a directory [#{dir}]"
	exit 1
	end
	end
	end
	end


	#================= MAIN ====================

	app = Application.new

	if not ARGV.size == 2
	app.usage
	exit 1
	end

	app.source_filename = ARGV.first
	app.destination_dir = ARGV.last
	app.process