Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Script I wrote and used to migrate my SquareSpace blog to Octopress.
#!/bin/env ruby
require 'rubygems'
require 'fileutils'
require 'pathname'
require 'pp'
require 'date'
require 'uri'
require 'html2markdown'
# usage: ruby squarespace_to_octopress.rb squarespace_export.txt octopress _posts directory
# squarespace_export.txt is a file from JournalPage -> Configuration -> Export Data in Squarespace.
# octopress _posts directory is your octopress/source/_posts directory
# crawled site root directory is an optional argument that specifies a directory on local disk that is a crawled
# mirror of your site - used to extracting tags
if (ARGV.length != 2 && ARGV.length != 3)
$stderr.puts "Usage: #{$0} <squarespace_export.txt> <octopress _posts directory> [ crawled site root directory ]"
exit 2
end
@posts = []
def reset_state
@current = {:date => "", :title => "", :body => "", :markdown => "", :author => "", :allow_comments => false, :published => false, :categories => []}
@inBody = false
@inHeader = true #The file start with the first header
@inComments = false
end
File.open(ARGV[0]) do |f|
reset_state
f.each_line do |line|
if line.strip == "--------"
@posts.push @current if (@current[:title] != "")
reset_state
elsif @inBody
if line.strip == "-----"
p = HTMLPage.new :contents => @current[:body]
@current[:markdown]=p.markdown
@inBody = false
else
@current[:body] += line
end
elsif @inHeader
if line.strip == "-----"
@inHeader = false
elsif (line =~ /^AUTHOR:/)
@current[:author] = line.strip.gsub(/^AUTHOR: /,'')
elsif (line =~ /^TITLE:/)
@current[:title] = line.strip.gsub(/^TITLE: /,'')
elsif (line =~ /^DATE:/)
@current[:date] = Date.strptime line.strip.gsub(/^DATE: /,'') , '%m/%d/%Y %I:%M:%S %p'
elsif (line =~ /^CATEGORY:/)
@current[:categories].push line.strip.gsub(/^CATEGORY: /,'')
elsif (line =~ /^ALLOW COMMENTS:/)
@current[:allow_comments] = (line.strip.gsub(/^ALLOW COMMENTS: /,'') == "1")
elsif (line =~ /^STATUS:/)
@current[:published] = (line.strip.gsub(/^STATUS: /,'') == "Publish")
elsif line.strip == "-----"
@inHeader = false
end
elsif @inComments
if line.strip == "-----"
@inComments = false
else
$stderr.puts "Warning: Skipping unsupported comment"
end
else
if line.strip == "BODY:"
@inBody = true
elsif line.strip == "COMMENT:"
@inComments = true
end
end
end
@posts.push @current if (@current[:title] != "")
end
count=0
@posts.each { |p|
# .gsub(':','%3a')
# http://www.escortmissions.com/blog/2012/7/7/new-work-in-progress-million-words-multiplayer-crossword-gam.html
# http://www.escortmissions.com/blog/2012/6/10/my-top-5-factors-for-ios-contracting-success.html
sqsp_title=p[:title].downcase.gsub(/[^a-z0-9\- ]/,'').gsub(/ /,'-').gsub(/--*/,'-')[ 0 .. 59 ].sub(/-$/,'')
sqsp_slug="#{p[:date].strftime('%Y/%-m/%-d')}/#{sqsp_title}.html"
if (ARGV.length == 3)
crawled_file_name = "#{ARGV[2]}/blog/#{sqsp_slug}"
pn = Pathname.new(crawled_file_name)
if (!pn.exist?)
## try the previous day - I have a tendency to post aroud midnight
previous_day = (p[:date]-1)
previous_day_crawled_file_name = "#{ARGV[2]}/blog/#{previous_day.strftime('%Y/%-m/%-d')}/#{sqsp_title}.html"
pn_prev = Pathname.new(previous_day_crawled_file_name)
if (pn_prev.exist?)
sqsp_slug="#{previous_day.strftime('%Y/%-m/%-d')}/#{sqsp_title}.html"
pn=pn_prev
crawled_file_name=previous_day_crawled_file_name
end
end
if (!pn.exist?)
$stderr.puts "Warning, Couldn't find file #{crawled_file_name}"
end
end
filename = sqsp_title.sub(/$/,'.markdown').sub(/^/,p[:date].strftime('%Y-%m-%d-'))
File.open("#{ARGV[1]}/#{filename}", 'w') { |file|
file.puts("---")
file.puts("layout: post")
file.puts("title: #{p[:title].gsub(':','&#58;')}")
if (p[:date].is_a?(Date))
file.puts("date: #{p[:date].strftime('%Y-%m-%d %H:%M')}")
else
$stderr.puts "Bad date: #{p[:date]} on post #{count}:#{filename}"
end
file.puts("permalink: /blog/#{sqsp_slug}")
file.puts("comments: #{p[:allow_comments]}")
if (p[:categories].length > 0)
file.puts("categories: #{p[:categories]}")
end
if (crawled_file_name)
if (pn.exist?)
crawled_page = Nokogiri::HTML(open(crawled_file_name))
tags=[]
crawled_page.css("span[class='tag-element']").each {|tag|
tags.push tag.text.strip
}
if (tags.length>0)
file.puts("tags: #{tags}")
end
end
end
file.puts("---")
file.puts(p[:markdown])
}
count += 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment