-
-
Save koozie/5de72c600630999c414d to your computer and use it in GitHub Desktop.
Typepad to Jekyll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# C. Stansbury 28 JUN 2014 | |
# | |
# Script to convert Typepad Backup (MTIF Format) to | |
# Jekyll posts. MTIF -> Jekyll _posts | |
# | |
# Will create a post file in the _posts directory for | |
# each post found in the typead backup. | |
require 'pp' | |
require 'date' | |
class Comment | |
attr_accessor :author, :email, :ip, :url, :date, :body | |
end | |
class Post | |
attr_accessor :author, :title, :status, :allow_comments, :convert_breaks | |
attr_accessor :allow_pings, :basename, :unique_url, :date, :body | |
attr_accessor :extended_body, :excerpt | |
attr_reader :categories, :keywords, :comments | |
def initialize | |
@categories = [] | |
@keywords = [] | |
@comments = [] | |
@body = '' | |
@extended_body = '' | |
@excerpt = '' | |
end | |
def filename | |
"#{date.strftime("%Y-%m-%d")}-#{basename}.html" | |
end | |
end | |
class Application | |
attr_accessor :source_filename, :destination_dir | |
attr_reader :posts_dir, :drafts_dir, :import_status, :current_line, :last_line | |
attr_reader :current_post | |
def initialize | |
setup | |
end | |
def process | |
setup | |
test_inputs | |
process_typepad_file | |
end | |
def usage | |
puts "usage: #{File.basename(__FILE__)} <typepad_backup_filename> <jekyll base directory>" | |
end | |
private | |
def process_typepad_file | |
new_post | |
File.open(source_filename,'r') do |file| | |
file.each do |line| | |
@last_line = current_line | |
@current_line = line | |
process_line | |
end | |
end | |
end | |
def new_post | |
@import_status = :post_header #:post_header, :body, :extended_body, :excerpt, :keywords, :boundary_5 | |
@current_post = Post.new | |
end | |
#write post to _posts directory in jekyll format | |
def write_post | |
pp current_post | |
cp = current_post | |
if cp.status == :publish | |
dir = posts_dir | |
else | |
dir = drafts_dir | |
end | |
fname = File.join(dir, cp.filename) | |
File.open(fname, 'w') do |file| | |
file.puts '---' | |
file.puts 'layout: post' | |
file.puts 'title: ' + clean_yaml(cp.title) | |
file.puts "date: #{cp.date.strftime("%Y-%m-%d %H:%M:%S")}" | |
if cp.categories.size == 1 | |
file.puts "category: #{cp.categories.first}" | |
elsif cp.categories.size > 1 | |
file.puts "categories: #{cp.categories.join(' ')}" | |
end | |
file.puts '---' | |
file.puts cp.body | |
file.puts cp.extended_body | |
end | |
end | |
#handle colons, quotes, and double quotes | |
def clean_yaml(str) | |
if str.include?(':') or str.include?("'") or str.include?('"') | |
return "'" + str.gsub(/"/, '\"').gsub(/'/, "''") + "'" | |
else | |
return str | |
end | |
end | |
def check_boundary | |
@import_status = :boundary_5 if current_line =~ /^-----/ | |
@import_status = :boundary_7 if current_line =~ /^-------/ | |
end | |
def process_line | |
check_boundary | |
case import_status | |
when :post_header | |
process_header_line | |
when :body | |
process_body | |
when :extended_body | |
process_extended_body | |
when :excerpt | |
process_excerpt | |
when :boundary_7 | |
write_post | |
new_post | |
when :boundary_5 | |
process_boundary5_item | |
end | |
end | |
def process_boundary5_item | |
case current_line | |
when /^BODY/ | |
@import_status = :body | |
when /^EXTENDED BODY/ | |
@import_status = :extended_body | |
when /^EXCERPT/ | |
@import_status = :excerpt | |
when /^KEYWORDS/ | |
@import_status = :keywords | |
end | |
end | |
def process_body | |
current_post.body += current_line | |
end | |
def process_extended_body | |
current_post.extended_body += current_line | |
end | |
def process_excerpt | |
current_post.excerpt += current_line if current_line.strip.chomp.size > 0 | |
end | |
def process_header_line | |
return if current_line.strip == "" | |
case current_line | |
when /^AUTHOR/ | |
current_post.author = current_line.split('AUTHOR:').last.strip | |
when /^TITLE/ | |
current_post.title = current_line.split('TITLE:').last.strip | |
when /^STATUS/ | |
current_post.status = current_line.split('STATUS:').last.strip.downcase.to_sym | |
when /^ALLOW COMMENTS/ | |
current_post.allow_comments = current_line.split('ALLOW COMMENTS:').last.strip == '1' ? true : false | |
when /^CONVERT BREAKS/ | |
current_post.convert_breaks = current_line.split('CONVERT BREAKS:').last.strip | |
when /^ALLOW PINGS/ | |
current_post.allow_pings = current_line.split('ALLOW PINGS:').last.strip == '1' ? true : false | |
when /^BASENAME/ | |
current_post.basename = current_line.split('BASENAME:').last.strip | |
when /^CATEGORY/ | |
category_name = current_line.split('CATEGORY:').last.strip | |
current_post.categories << clean_category_name(category_name) | |
when /^UNIQUE URL/ | |
current_post.unique_url = current_line.split('UNIQUE URL:').last.strip | |
when /^DATE/ | |
d_str = current_line.split('DATE:').last.strip | |
current_post.date = Date.strptime(d_str, "%m/%d/%Y %I:%M:%S %p") | |
end | |
end | |
def clean_category_name(cat) | |
cat.downcase.gsub(/ /,'-') | |
end | |
def setup | |
@posts_dir = File.join(destination_dir.to_s, '_posts') | |
@drafts_dir = File.join(destination_dir.to_s, '_drafts') | |
@current_line = '' | |
@last_line = '' | |
end | |
def test_inputs | |
if not File.readable?(source_filename.to_s) | |
puts "Unable to read file [#{source_filename}]" | |
exit 1 | |
end | |
dirs = [destination_dir, posts_dir, drafts_dir] | |
dirs.each do |dir| | |
if not File.directory?(dir) | |
puts "Not a directory [#{dir}]" | |
exit 1 | |
end | |
end | |
end | |
end | |
#================= MAIN ==================== | |
app = Application.new | |
if not ARGV.size == 2 | |
app.usage | |
exit 1 | |
end | |
app.source_filename = ARGV.first | |
app.destination_dir = ARGV.last | |
app.process | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment