atomicules/nanoblogger2jekyll.rb

## nanoblogger2jekyll.rb
# Script to convert a directory of Nanoblogger posts to Jekyll
#
# Nanoblogger is a command line, static blogging app, not that
# dissimilar to Jekyll: http://nanoblogger.sourceforge.net/
#
# It's been years since I've used it though, but the below script
# worked for me in converting the files to Jekyll.

Dir['*.txt'].each do |f|
	# Need to read file to find title
	title = ''
	lines = IO.readlines(f)
	lines.each do |l|
		if /TITLE/ =~ l
			title = l[6..-1].strip # strip leading whitespace, trailing return
			break
		end
	end
	lines.slice!(0..lines.index("BODY:\n")) # Remove Nanoblogger front matter
	begin
		lines.slice!(lines.index("END-----\n")..-1) # Remove Nanoblogger end matter
	rescue
		lines.slice!(lines.index("END-----")..-1) # Might not have line break
	end
	# Add in Jekyll Yaml Front Matter
	# !Important note to self. I had already split the archive of posts into subfolders based
	# on category. So this script would be run on each subfolder and the category manually set
	# below.
	lines.unshift("---", "layout: page", "type: text", "title: #{title}", "categories: code", "---")
	# Replace data in file #IO.writelines?
	File.open(f,"w") do |file|
		lines.each do |l|
			file << l+"\n" # Have to put newlines back in
		end
	end
	# Then rename file based on title
	title = title.gsub(/\s/,"-") # replace spaces, any other dodgy characters can manually fix
	title = "-"+title # Add leading -
	newf = f.gsub(/T.*/, title+".html")
	newf = newf.gsub(/\\|\/|:|\*|\?|\"|<|>|\|/, "_") # Windows safe filenames
	File.rename(f, newf)
end

## railssql2jekyll.rb
# Script to extract posts from a Rails database dump to Jekyll
#
# Hence it's quite specific to the setup I had and probably of
# little use to anyone else.

sql = IO.readlines('newsite_production_dump 11.07.2009.sql')
#temp  = sql.select{|v| v =~ /INSERT INTO `posts`/}[0]
#sql.index(temp) # Next line is "VALUES", then data starts until?
# Might as well just manually identify relevant line numbers

# Doesn't deal with Windows fobiles such as question marks not being allowed in file names
sql[61..88].each do |post| # The line numbers in the sql dump that contain the posts.
	bit = post.split(/','/)
	title = bit[0].match(/'.*/)[0][1..-1] # Starting from single quotation mark, but then removing that
	body = bit[1]+bit[2]
	body = body.gsub(/\\r\\n/,'').gsub(/\\/,'')
	date = bit[3].split(' ')[0] # Lazy, but works
	# write somewhere
	File.open(date+"-"+title.gsub(/\s/,"-")+".html", "w") do |file|
		file << "---\n"
		file << "layout: page\n"
		file << "type: text\n"
		file << "title: #{title}\n"
		file << "categories: \n" # Will have to fill these in manually, later
		file << "---\n"
		file << body
	end
end

## tumblr2jekyll.rb
# Script to download Tumblr posts to Jekyll Format
#
# Note this is somewhat specific to how I intend to use Jekyll, i.e. basically trying to have different
# post "types" in the same way Tumblr does and have special meta data in the Yaml front matter (See
# https://github.com/i5m/i5m.github.com), but the # script could be easily adapted to suit.

require 'rubygems' # 1.8.7 just because I happened to already have gems installed
require 'open-uri'
require 'nokogiri'
require 'fileutils'

tumblrdomain = "i5m.co.uk" # Or "i5m.tumblr.com", etc

def writepost(tumblrid, date, filetitle, format, type, categories, body, title, meta1, meta2)
	# Can only have one optional argument in Ruby 1.8 so will pass nils instead
	File.open(date+"-"+filetitle+"."+format, "w") do |file|
		file << "---\n"
		file << "layout: page\n"
		file << "type: #{type}\n"
		file << "title: #{title}\n" # Jekyll will use file title if this is blank.
		# Certain special meta data dependent on file type
		case type
			when "photo"
				file << "photo: #{meta1}\n"
				file << "click-through: #{meta2}\n"
			when "quote"
				file << "quote: #{meta1}\n"
			when "link"
				file << "link: #{meta1}\n"
			when "video"
				file << "video: #{meta1}\n"
		end
		file << "categories: \n"
		categories.each do |cat|
			file << "- #{cat}\n"
		end
		file << "---\n"
		file << body
		# Ooops, forgot to put tumblr id anywhere. Nevermind.
		# You might want to inlcude this as YAML front matter though
	end
end


# Have to do initial api page load to find total number of posts
doc = Nokogiri::HTML(open('http://'+tumblrdomain+'/api/read'))
# Find total number of posts
total = doc.xpath('//posts')[0]['total']
start = 0
(total.to_i/50.0).ceil.times do |i|
	doc = Nokogiri::HTML(open('http://'+tumblrdomain+'/api/read?&filter=none&num=50&start='+start.to_s))
	doc.xpath('//post').each do |post|
		tumblrid =  post['id']
		date = post['date-gmt'].split(' ')[0]
		filetitle = post['slug']
		format = post['format']
		type = post['type']
		type = "text" if type == "regular" #Damn Tumblr!
		categories = []
		post.xpath('.//tag').each { |tag| categories << tag.content }
		title = nil # For posts without
		meta1 = nil
		meta2 = nil
		body = nil # Sometimes just a video or photo and no description.
		case type
			when "text"
				body = post.xpath('.//regular-body')[0].content
				title = post.xpath('.//regular-title')[0].content
			when "photo"
				# For the most part will still have to manually fix these, i.e Change from Tumblr hosted media to Flickr, etc
				if post.xpath('.//photo-caption')[0] != nil # There isn't always a description
					body = post.xpath('.//photo-caption')[0].content
				end
				meta1 = post.xpath('.//photo-url')[0].content # photo url
				if !post.xpath('.//photo-link-url')[0].nil?
					meta2 = post.xpath('.//photo-link-url')[0].content # click through
				end
				# Download files?
				unless File.exists? 'images'
					FileUtils.mkdir 'images'
				end
				f = File.new("images/"+meta1.split("/")[-1], 'wb')
				f.write(open(meta1).read)
				f.close
				# Will still have to fix links, since even relative images still going to want to rename, etc.
			when "quote"
				if post.xpath('.//quote-source')[0] != nil
					body = post.xpath('.//quote-source')[0].content
				end
				meta1 = post.xpath('.//quote-text')[0].content
			when "link"
				if post.xpath('.//link-description')[0] != nil
					body = post.xpath('.//link-description')[0].content
				end
				meta1 = post.xpath('.//link-url')[0].content
				if post.xpath('.//link-text')[0] != nil # There doesn't have to be a title
					title = post.xpath('.//link-text')[0].content
				end
			when "chat"
				# I only have one Chat post on Tumblr. I have no idea how to implement a Chat style post in
				# Jekyll / Liquid so I won't have any of these any more.
			when "audio"
				# I only have one or two audio posts. Again, no idea how to implement on Jekyll (I guess will be
				# very similar to video post?) so for now just pull any text, no links.
				if post.xpath('.//audio-caption')[0] != nil
					body = post.xpath('.//audio-caption')[0].content
				end
			when "video"
				# Will likely have to fix lots of these as well, I only ever linked to externally hosted videos so
				# no need to download files
				if post.xpath('.//video-caption')[0] != nil # Must be nicer way to do all these ifs?
					body =  post.xpath('.//video-caption')[0].content
				end
				meta1 = post.xpath('.//video-source')[0].content
		end
		writepost(tumblrid, date, filetitle, format, type, categories, body, title, meta1, meta2)
	end
	start = 50*(i+1) # Since i is 0 to start with
end
	# Script to convert a directory of Nanoblogger posts to Jekyll
	#
	# Nanoblogger is a command line, static blogging app, not that
	# dissimilar to Jekyll: http://nanoblogger.sourceforge.net/
	#
	# It's been years since I've used it though, but the below script
	# worked for me in converting the files to Jekyll.

	Dir['*.txt'].each do \|f\|
	# Need to read file to find title
	title = ''
	lines = IO.readlines(f)
	lines.each do \|l\|
	if /TITLE/ =~ l
	title = l[6..-1].strip # strip leading whitespace, trailing return
	break
	end
	end
	lines.slice!(0..lines.index("BODY:\n")) # Remove Nanoblogger front matter
	begin
	lines.slice!(lines.index("END-----\n")..-1) # Remove Nanoblogger end matter
	rescue
	lines.slice!(lines.index("END-----")..-1) # Might not have line break
	end
	# Add in Jekyll Yaml Front Matter
	# !Important note to self. I had already split the archive of posts into subfolders based
	# on category. So this script would be run on each subfolder and the category manually set
	# below.
	lines.unshift("---", "layout: page", "type: text", "title: #{title}", "categories: code", "---")
	# Replace data in file #IO.writelines?
	File.open(f,"w") do \|file\|
	lines.each do \|l\|
	file << l+"\n" # Have to put newlines back in
	end
	end
	# Then rename file based on title
	title = title.gsub(/\s/,"-") # replace spaces, any other dodgy characters can manually fix
	title = "-"+title # Add leading -
	newf = f.gsub(/T.*/, title+".html")
	newf = newf.gsub(/\\\|\/\|:\|\*\|\?\|\"\|<\|>\|\\|/, "_") # Windows safe filenames
	File.rename(f, newf)
	end
	# Script to extract posts from a Rails database dump to Jekyll
	#
	# Hence it's quite specific to the setup I had and probably of
	# little use to anyone else.

	sql = IO.readlines('newsite_production_dump 11.07.2009.sql')
	#temp = sql.select{\|v\| v =~ /INSERT INTO `posts`/}[0]
	#sql.index(temp) # Next line is "VALUES", then data starts until?
	# Might as well just manually identify relevant line numbers

	# Doesn't deal with Windows fobiles such as question marks not being allowed in file names
	sql[61..88].each do \|post\| # The line numbers in the sql dump that contain the posts.
	bit = post.split(/','/)
	title = bit[0].match(/'.*/)[0][1..-1] # Starting from single quotation mark, but then removing that
	body = bit[1]+bit[2]
	body = body.gsub(/\\r\\n/,'').gsub(/\\/,'')
	date = bit[3].split(' ')[0] # Lazy, but works
	# write somewhere
	File.open(date+"-"+title.gsub(/\s/,"-")+".html", "w") do \|file\|
	file << "---\n"
	file << "layout: page\n"
	file << "type: text\n"
	file << "title: #{title}\n"
	file << "categories: \n" # Will have to fill these in manually, later
	file << "---\n"
	file << body
	end
	end
	# Script to download Tumblr posts to Jekyll Format
	#
	# Note this is somewhat specific to how I intend to use Jekyll, i.e. basically trying to have different
	# post "types" in the same way Tumblr does and have special meta data in the Yaml front matter (See
	# https://github.com/i5m/i5m.github.com), but the # script could be easily adapted to suit.

	require 'rubygems' # 1.8.7 just because I happened to already have gems installed
	require 'open-uri'
	require 'nokogiri'
	require 'fileutils'

	tumblrdomain = "i5m.co.uk" # Or "i5m.tumblr.com", etc

	def writepost(tumblrid, date, filetitle, format, type, categories, body, title, meta1, meta2)
	# Can only have one optional argument in Ruby 1.8 so will pass nils instead
	File.open(date+"-"+filetitle+"."+format, "w") do \|file\|
	file << "---\n"
	file << "layout: page\n"
	file << "type: #{type}\n"
	file << "title: #{title}\n" # Jekyll will use file title if this is blank.
	# Certain special meta data dependent on file type
	case type
	when "photo"
	file << "photo: #{meta1}\n"
	file << "click-through: #{meta2}\n"
	when "quote"
	file << "quote: #{meta1}\n"
	when "link"
	file << "link: #{meta1}\n"
	when "video"
	file << "video: #{meta1}\n"
	end
	file << "categories: \n"
	categories.each do \|cat\|
	file << "- #{cat}\n"
	end
	file << "---\n"
	file << body
	# Ooops, forgot to put tumblr id anywhere. Nevermind.
	# You might want to inlcude this as YAML front matter though
	end
	end


	# Have to do initial api page load to find total number of posts
	doc = Nokogiri::HTML(open('http://'+tumblrdomain+'/api/read'))
	# Find total number of posts
	total = doc.xpath('//posts')[0]['total']
	start = 0
	(total.to_i/50.0).ceil.times do \|i\|
	doc = Nokogiri::HTML(open('http://'+tumblrdomain+'/api/read?&filter=none&num=50&start='+start.to_s))
	doc.xpath('//post').each do \|post\|
	tumblrid = post['id']
	date = post['date-gmt'].split(' ')[0]
	filetitle = post['slug']
	format = post['format']
	type = post['type']
	type = "text" if type == "regular" #Damn Tumblr!
	categories = []
	post.xpath('.//tag').each { \|tag\| categories << tag.content }
	title = nil # For posts without
	meta1 = nil
	meta2 = nil
	body = nil # Sometimes just a video or photo and no description.
	case type
	when "text"
	body = post.xpath('.//regular-body')[0].content
	title = post.xpath('.//regular-title')[0].content
	when "photo"
	# For the most part will still have to manually fix these, i.e Change from Tumblr hosted media to Flickr, etc
	if post.xpath('.//photo-caption')[0] != nil # There isn't always a description
	body = post.xpath('.//photo-caption')[0].content
	end
	meta1 = post.xpath('.//photo-url')[0].content # photo url
	if !post.xpath('.//photo-link-url')[0].nil?
	meta2 = post.xpath('.//photo-link-url')[0].content # click through
	end
	# Download files?
	unless File.exists? 'images'
	FileUtils.mkdir 'images'
	end
	f = File.new("images/"+meta1.split("/")[-1], 'wb')
	f.write(open(meta1).read)
	f.close
	# Will still have to fix links, since even relative images still going to want to rename, etc.
	when "quote"
	if post.xpath('.//quote-source')[0] != nil
	body = post.xpath('.//quote-source')[0].content
	end
	meta1 = post.xpath('.//quote-text')[0].content
	when "link"
	if post.xpath('.//link-description')[0] != nil
	body = post.xpath('.//link-description')[0].content
	end
	meta1 = post.xpath('.//link-url')[0].content
	if post.xpath('.//link-text')[0] != nil # There doesn't have to be a title
	title = post.xpath('.//link-text')[0].content
	end
	when "chat"
	# I only have one Chat post on Tumblr. I have no idea how to implement a Chat style post in
	# Jekyll / Liquid so I won't have any of these any more.
	when "audio"
	# I only have one or two audio posts. Again, no idea how to implement on Jekyll (I guess will be
	# very similar to video post?) so for now just pull any text, no links.
	if post.xpath('.//audio-caption')[0] != nil
	body = post.xpath('.//audio-caption')[0].content
	end
	when "video"
	# Will likely have to fix lots of these as well, I only ever linked to externally hosted videos so
	# no need to download files
	if post.xpath('.//video-caption')[0] != nil # Must be nicer way to do all these ifs?
	body = post.xpath('.//video-caption')[0].content
	end
	meta1 = post.xpath('.//video-source')[0].content
	end
	writepost(tumblrid, date, filetitle, format, type, categories, body, title, meta1, meta2)
	end
	start = 50*(i+1) # Since i is 0 to start with
	end