Created
July 8, 2013 05:30
-
-
Save carlbrown/5946410 to your computer and use it in GitHub Desktop.
Script I wrote and used to migrate my SquareSpace blog to Octopress.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env ruby | |
require 'rubygems' | |
require 'fileutils' | |
require 'pathname' | |
require 'pp' | |
require 'date' | |
require 'uri' | |
require 'html2markdown' | |
# usage: ruby squarespace_to_octopress.rb squarespace_export.txt octopress _posts directory | |
# squarespace_export.txt is a file from JournalPage -> Configuration -> Export Data in Squarespace. | |
# octopress _posts directory is your octopress/source/_posts directory | |
# crawled site root directory is an optional argument that specifies a directory on local disk that is a crawled | |
# mirror of your site - used to extracting tags | |
if (ARGV.length != 2 && ARGV.length != 3) | |
$stderr.puts "Usage: #{$0} <squarespace_export.txt> <octopress _posts directory> [ crawled site root directory ]" | |
exit 2 | |
end | |
@posts = [] | |
def reset_state | |
@current = {:date => "", :title => "", :body => "", :markdown => "", :author => "", :allow_comments => false, :published => false, :categories => []} | |
@inBody = false | |
@inHeader = true #The file start with the first header | |
@inComments = false | |
end | |
File.open(ARGV[0]) do |f| | |
reset_state | |
f.each_line do |line| | |
if line.strip == "--------" | |
@posts.push @current if (@current[:title] != "") | |
reset_state | |
elsif @inBody | |
if line.strip == "-----" | |
p = HTMLPage.new :contents => @current[:body] | |
@current[:markdown]=p.markdown | |
@inBody = false | |
else | |
@current[:body] += line | |
end | |
elsif @inHeader | |
if line.strip == "-----" | |
@inHeader = false | |
elsif (line =~ /^AUTHOR:/) | |
@current[:author] = line.strip.gsub(/^AUTHOR: /,'') | |
elsif (line =~ /^TITLE:/) | |
@current[:title] = line.strip.gsub(/^TITLE: /,'') | |
elsif (line =~ /^DATE:/) | |
@current[:date] = Date.strptime line.strip.gsub(/^DATE: /,'') , '%m/%d/%Y %I:%M:%S %p' | |
elsif (line =~ /^CATEGORY:/) | |
@current[:categories].push line.strip.gsub(/^CATEGORY: /,'') | |
elsif (line =~ /^ALLOW COMMENTS:/) | |
@current[:allow_comments] = (line.strip.gsub(/^ALLOW COMMENTS: /,'') == "1") | |
elsif (line =~ /^STATUS:/) | |
@current[:published] = (line.strip.gsub(/^STATUS: /,'') == "Publish") | |
elsif line.strip == "-----" | |
@inHeader = false | |
end | |
elsif @inComments | |
if line.strip == "-----" | |
@inComments = false | |
else | |
$stderr.puts "Warning: Skipping unsupported comment" | |
end | |
else | |
if line.strip == "BODY:" | |
@inBody = true | |
elsif line.strip == "COMMENT:" | |
@inComments = true | |
end | |
end | |
end | |
@posts.push @current if (@current[:title] != "") | |
end | |
count=0 | |
@posts.each { |p| | |
# .gsub(':','%3a') | |
# http://www.escortmissions.com/blog/2012/7/7/new-work-in-progress-million-words-multiplayer-crossword-gam.html | |
# http://www.escortmissions.com/blog/2012/6/10/my-top-5-factors-for-ios-contracting-success.html | |
sqsp_title=p[:title].downcase.gsub(/[^a-z0-9\- ]/,'').gsub(/ /,'-').gsub(/--*/,'-')[ 0 .. 59 ].sub(/-$/,'') | |
sqsp_slug="#{p[:date].strftime('%Y/%-m/%-d')}/#{sqsp_title}.html" | |
if (ARGV.length == 3) | |
crawled_file_name = "#{ARGV[2]}/blog/#{sqsp_slug}" | |
pn = Pathname.new(crawled_file_name) | |
if (!pn.exist?) | |
## try the previous day - I have a tendency to post aroud midnight | |
previous_day = (p[:date]-1) | |
previous_day_crawled_file_name = "#{ARGV[2]}/blog/#{previous_day.strftime('%Y/%-m/%-d')}/#{sqsp_title}.html" | |
pn_prev = Pathname.new(previous_day_crawled_file_name) | |
if (pn_prev.exist?) | |
sqsp_slug="#{previous_day.strftime('%Y/%-m/%-d')}/#{sqsp_title}.html" | |
pn=pn_prev | |
crawled_file_name=previous_day_crawled_file_name | |
end | |
end | |
if (!pn.exist?) | |
$stderr.puts "Warning, Couldn't find file #{crawled_file_name}" | |
end | |
end | |
filename = sqsp_title.sub(/$/,'.markdown').sub(/^/,p[:date].strftime('%Y-%m-%d-')) | |
File.open("#{ARGV[1]}/#{filename}", 'w') { |file| | |
file.puts("---") | |
file.puts("layout: post") | |
file.puts("title: #{p[:title].gsub(':',':')}") | |
if (p[:date].is_a?(Date)) | |
file.puts("date: #{p[:date].strftime('%Y-%m-%d %H:%M')}") | |
else | |
$stderr.puts "Bad date: #{p[:date]} on post #{count}:#{filename}" | |
end | |
file.puts("permalink: /blog/#{sqsp_slug}") | |
file.puts("comments: #{p[:allow_comments]}") | |
if (p[:categories].length > 0) | |
file.puts("categories: #{p[:categories]}") | |
end | |
if (crawled_file_name) | |
if (pn.exist?) | |
crawled_page = Nokogiri::HTML(open(crawled_file_name)) | |
tags=[] | |
crawled_page.css("span[class='tag-element']").each {|tag| | |
tags.push tag.text.strip | |
} | |
if (tags.length>0) | |
file.puts("tags: #{tags}") | |
end | |
end | |
end | |
file.puts("---") | |
file.puts(p[:markdown]) | |
} | |
count += 1; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment