Skip to content

Instantly share code, notes, and snippets.

@atomicules
Created March 22, 2011 10:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save atomicules/881023 to your computer and use it in GitHub Desktop.
Save atomicules/881023 to your computer and use it in GitHub Desktop.
Some Ruby scripts I used to import/convert posts from various sources to Jekyll.
# Script to convert a directory of Nanoblogger posts to Jekyll
#
# Nanoblogger is a command line, static blogging app, not that
# dissimilar to Jekyll: http://nanoblogger.sourceforge.net/
#
# It's been years since I've used it though, but the below script
# worked for me in converting the files to Jekyll.
Dir['*.txt'].each do |f|
# Need to read file to find title
title = ''
lines = IO.readlines(f)
lines.each do |l|
if /TITLE/ =~ l
title = l[6..-1].strip # strip leading whitespace, trailing return
break
end
end
lines.slice!(0..lines.index("BODY:\n")) # Remove Nanoblogger front matter
begin
lines.slice!(lines.index("END-----\n")..-1) # Remove Nanoblogger end matter
rescue
lines.slice!(lines.index("END-----")..-1) # Might not have line break
end
# Add in Jekyll Yaml Front Matter
# !Important note to self. I had already split the archive of posts into subfolders based
# on category. So this script would be run on each subfolder and the category manually set
# below.
lines.unshift("---", "layout: page", "type: text", "title: #{title}", "categories: code", "---")
# Replace data in file #IO.writelines?
File.open(f,"w") do |file|
lines.each do |l|
file << l+"\n" # Have to put newlines back in
end
end
# Then rename file based on title
title = title.gsub(/\s/,"-") # replace spaces, any other dodgy characters can manually fix
title = "-"+title # Add leading -
newf = f.gsub(/T.*/, title+".html")
newf = newf.gsub(/\\|\/|:|\*|\?|\"|<|>|\|/, "_") # Windows safe filenames
File.rename(f, newf)
end
# Script to extract posts from a Rails database dump to Jekyll
#
# Hence it's quite specific to the setup I had and probably of
# little use to anyone else.
sql = IO.readlines('newsite_production_dump 11.07.2009.sql')
#temp = sql.select{|v| v =~ /INSERT INTO `posts`/}[0]
#sql.index(temp) # Next line is "VALUES", then data starts until?
# Might as well just manually identify relevant line numbers
# Doesn't deal with Windows fobiles such as question marks not being allowed in file names
sql[61..88].each do |post| # The line numbers in the sql dump that contain the posts.
bit = post.split(/','/)
title = bit[0].match(/'.*/)[0][1..-1] # Starting from single quotation mark, but then removing that
body = bit[1]+bit[2]
body = body.gsub(/\\r\\n/,'').gsub(/\\/,'')
date = bit[3].split(' ')[0] # Lazy, but works
# write somewhere
File.open(date+"-"+title.gsub(/\s/,"-")+".html", "w") do |file|
file << "---\n"
file << "layout: page\n"
file << "type: text\n"
file << "title: #{title}\n"
file << "categories: \n" # Will have to fill these in manually, later
file << "---\n"
file << body
end
end
# Script to download Tumblr posts to Jekyll Format
#
# Note this is somewhat specific to how I intend to use Jekyll, i.e. basically trying to have different
# post "types" in the same way Tumblr does and have special meta data in the Yaml front matter (See
# https://github.com/i5m/i5m.github.com), but the # script could be easily adapted to suit.
require 'rubygems' # 1.8.7 just because I happened to already have gems installed
require 'open-uri'
require 'nokogiri'
require 'fileutils'
tumblrdomain = "i5m.co.uk" # Or "i5m.tumblr.com", etc
def writepost(tumblrid, date, filetitle, format, type, categories, body, title, meta1, meta2)
# Can only have one optional argument in Ruby 1.8 so will pass nils instead
File.open(date+"-"+filetitle+"."+format, "w") do |file|
file << "---\n"
file << "layout: page\n"
file << "type: #{type}\n"
file << "title: #{title}\n" # Jekyll will use file title if this is blank.
# Certain special meta data dependent on file type
case type
when "photo"
file << "photo: #{meta1}\n"
file << "click-through: #{meta2}\n"
when "quote"
file << "quote: #{meta1}\n"
when "link"
file << "link: #{meta1}\n"
when "video"
file << "video: #{meta1}\n"
end
file << "categories: \n"
categories.each do |cat|
file << "- #{cat}\n"
end
file << "---\n"
file << body
# Ooops, forgot to put tumblr id anywhere. Nevermind.
# You might want to inlcude this as YAML front matter though
end
end
# Have to do initial api page load to find total number of posts
doc = Nokogiri::HTML(open('http://'+tumblrdomain+'/api/read'))
# Find total number of posts
total = doc.xpath('//posts')[0]['total']
start = 0
(total.to_i/50.0).ceil.times do |i|
doc = Nokogiri::HTML(open('http://'+tumblrdomain+'/api/read?&filter=none&num=50&start='+start.to_s))
doc.xpath('//post').each do |post|
tumblrid = post['id']
date = post['date-gmt'].split(' ')[0]
filetitle = post['slug']
format = post['format']
type = post['type']
type = "text" if type == "regular" #Damn Tumblr!
categories = []
post.xpath('.//tag').each { |tag| categories << tag.content }
title = nil # For posts without
meta1 = nil
meta2 = nil
body = nil # Sometimes just a video or photo and no description.
case type
when "text"
body = post.xpath('.//regular-body')[0].content
title = post.xpath('.//regular-title')[0].content
when "photo"
# For the most part will still have to manually fix these, i.e Change from Tumblr hosted media to Flickr, etc
if post.xpath('.//photo-caption')[0] != nil # There isn't always a description
body = post.xpath('.//photo-caption')[0].content
end
meta1 = post.xpath('.//photo-url')[0].content # photo url
if !post.xpath('.//photo-link-url')[0].nil?
meta2 = post.xpath('.//photo-link-url')[0].content # click through
end
# Download files?
unless File.exists? 'images'
FileUtils.mkdir 'images'
end
f = File.new("images/"+meta1.split("/")[-1], 'wb')
f.write(open(meta1).read)
f.close
# Will still have to fix links, since even relative images still going to want to rename, etc.
when "quote"
if post.xpath('.//quote-source')[0] != nil
body = post.xpath('.//quote-source')[0].content
end
meta1 = post.xpath('.//quote-text')[0].content
when "link"
if post.xpath('.//link-description')[0] != nil
body = post.xpath('.//link-description')[0].content
end
meta1 = post.xpath('.//link-url')[0].content
if post.xpath('.//link-text')[0] != nil # There doesn't have to be a title
title = post.xpath('.//link-text')[0].content
end
when "chat"
# I only have one Chat post on Tumblr. I have no idea how to implement a Chat style post in
# Jekyll / Liquid so I won't have any of these any more.
when "audio"
# I only have one or two audio posts. Again, no idea how to implement on Jekyll (I guess will be
# very similar to video post?) so for now just pull any text, no links.
if post.xpath('.//audio-caption')[0] != nil
body = post.xpath('.//audio-caption')[0].content
end
when "video"
# Will likely have to fix lots of these as well, I only ever linked to externally hosted videos so
# no need to download files
if post.xpath('.//video-caption')[0] != nil # Must be nicer way to do all these ifs?
body = post.xpath('.//video-caption')[0].content
end
meta1 = post.xpath('.//video-source')[0].content
end
writepost(tumblrid, date, filetitle, format, type, categories, body, title, meta1, meta2)
end
start = 50*(i+1) # Since i is 0 to start with
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment