Created
March 22, 2011 10:19
-
-
Save atomicules/881023 to your computer and use it in GitHub Desktop.
Some Ruby scripts I used to import/convert posts from various sources to Jekyll.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to convert a directory of Nanoblogger posts to Jekyll | |
# | |
# Nanoblogger is a command line, static blogging app, not that | |
# dissimilar to Jekyll: http://nanoblogger.sourceforge.net/ | |
# | |
# It's been years since I've used it though, but the below script | |
# worked for me in converting the files to Jekyll. | |
Dir['*.txt'].each do |f| | |
# Need to read file to find title | |
title = '' | |
lines = IO.readlines(f) | |
lines.each do |l| | |
if /TITLE/ =~ l | |
title = l[6..-1].strip # strip leading whitespace, trailing return | |
break | |
end | |
end | |
lines.slice!(0..lines.index("BODY:\n")) # Remove Nanoblogger front matter | |
begin | |
lines.slice!(lines.index("END-----\n")..-1) # Remove Nanoblogger end matter | |
rescue | |
lines.slice!(lines.index("END-----")..-1) # Might not have line break | |
end | |
# Add in Jekyll Yaml Front Matter | |
# !Important note to self. I had already split the archive of posts into subfolders based | |
# on category. So this script would be run on each subfolder and the category manually set | |
# below. | |
lines.unshift("---", "layout: page", "type: text", "title: #{title}", "categories: code", "---") | |
# Replace data in file #IO.writelines? | |
File.open(f,"w") do |file| | |
lines.each do |l| | |
file << l+"\n" # Have to put newlines back in | |
end | |
end | |
# Then rename file based on title | |
title = title.gsub(/\s/,"-") # replace spaces, any other dodgy characters can manually fix | |
title = "-"+title # Add leading - | |
newf = f.gsub(/T.*/, title+".html") | |
newf = newf.gsub(/\\|\/|:|\*|\?|\"|<|>|\|/, "_") # Windows safe filenames | |
File.rename(f, newf) | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to extract posts from a Rails database dump to Jekyll | |
# | |
# Hence it's quite specific to the setup I had and probably of | |
# little use to anyone else. | |
sql = IO.readlines('newsite_production_dump 11.07.2009.sql') | |
#temp = sql.select{|v| v =~ /INSERT INTO `posts`/}[0] | |
#sql.index(temp) # Next line is "VALUES", then data starts until? | |
# Might as well just manually identify relevant line numbers | |
# Doesn't deal with Windows fobiles such as question marks not being allowed in file names | |
sql[61..88].each do |post| # The line numbers in the sql dump that contain the posts. | |
bit = post.split(/','/) | |
title = bit[0].match(/'.*/)[0][1..-1] # Starting from single quotation mark, but then removing that | |
body = bit[1]+bit[2] | |
body = body.gsub(/\\r\\n/,'').gsub(/\\/,'') | |
date = bit[3].split(' ')[0] # Lazy, but works | |
# write somewhere | |
File.open(date+"-"+title.gsub(/\s/,"-")+".html", "w") do |file| | |
file << "---\n" | |
file << "layout: page\n" | |
file << "type: text\n" | |
file << "title: #{title}\n" | |
file << "categories: \n" # Will have to fill these in manually, later | |
file << "---\n" | |
file << body | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to download Tumblr posts to Jekyll Format | |
# | |
# Note this is somewhat specific to how I intend to use Jekyll, i.e. basically trying to have different | |
# post "types" in the same way Tumblr does and have special meta data in the Yaml front matter (See | |
# https://github.com/i5m/i5m.github.com), but the # script could be easily adapted to suit. | |
require 'rubygems' # 1.8.7 just because I happened to already have gems installed | |
require 'open-uri' | |
require 'nokogiri' | |
require 'fileutils' | |
tumblrdomain = "i5m.co.uk" # Or "i5m.tumblr.com", etc | |
def writepost(tumblrid, date, filetitle, format, type, categories, body, title, meta1, meta2) | |
# Can only have one optional argument in Ruby 1.8 so will pass nils instead | |
File.open(date+"-"+filetitle+"."+format, "w") do |file| | |
file << "---\n" | |
file << "layout: page\n" | |
file << "type: #{type}\n" | |
file << "title: #{title}\n" # Jekyll will use file title if this is blank. | |
# Certain special meta data dependent on file type | |
case type | |
when "photo" | |
file << "photo: #{meta1}\n" | |
file << "click-through: #{meta2}\n" | |
when "quote" | |
file << "quote: #{meta1}\n" | |
when "link" | |
file << "link: #{meta1}\n" | |
when "video" | |
file << "video: #{meta1}\n" | |
end | |
file << "categories: \n" | |
categories.each do |cat| | |
file << "- #{cat}\n" | |
end | |
file << "---\n" | |
file << body | |
# Ooops, forgot to put tumblr id anywhere. Nevermind. | |
# You might want to inlcude this as YAML front matter though | |
end | |
end | |
# Have to do initial api page load to find total number of posts | |
doc = Nokogiri::HTML(open('http://'+tumblrdomain+'/api/read')) | |
# Find total number of posts | |
total = doc.xpath('//posts')[0]['total'] | |
start = 0 | |
(total.to_i/50.0).ceil.times do |i| | |
doc = Nokogiri::HTML(open('http://'+tumblrdomain+'/api/read?&filter=none&num=50&start='+start.to_s)) | |
doc.xpath('//post').each do |post| | |
tumblrid = post['id'] | |
date = post['date-gmt'].split(' ')[0] | |
filetitle = post['slug'] | |
format = post['format'] | |
type = post['type'] | |
type = "text" if type == "regular" #Damn Tumblr! | |
categories = [] | |
post.xpath('.//tag').each { |tag| categories << tag.content } | |
title = nil # For posts without | |
meta1 = nil | |
meta2 = nil | |
body = nil # Sometimes just a video or photo and no description. | |
case type | |
when "text" | |
body = post.xpath('.//regular-body')[0].content | |
title = post.xpath('.//regular-title')[0].content | |
when "photo" | |
# For the most part will still have to manually fix these, i.e Change from Tumblr hosted media to Flickr, etc | |
if post.xpath('.//photo-caption')[0] != nil # There isn't always a description | |
body = post.xpath('.//photo-caption')[0].content | |
end | |
meta1 = post.xpath('.//photo-url')[0].content # photo url | |
if !post.xpath('.//photo-link-url')[0].nil? | |
meta2 = post.xpath('.//photo-link-url')[0].content # click through | |
end | |
# Download files? | |
unless File.exists? 'images' | |
FileUtils.mkdir 'images' | |
end | |
f = File.new("images/"+meta1.split("/")[-1], 'wb') | |
f.write(open(meta1).read) | |
f.close | |
# Will still have to fix links, since even relative images still going to want to rename, etc. | |
when "quote" | |
if post.xpath('.//quote-source')[0] != nil | |
body = post.xpath('.//quote-source')[0].content | |
end | |
meta1 = post.xpath('.//quote-text')[0].content | |
when "link" | |
if post.xpath('.//link-description')[0] != nil | |
body = post.xpath('.//link-description')[0].content | |
end | |
meta1 = post.xpath('.//link-url')[0].content | |
if post.xpath('.//link-text')[0] != nil # There doesn't have to be a title | |
title = post.xpath('.//link-text')[0].content | |
end | |
when "chat" | |
# I only have one Chat post on Tumblr. I have no idea how to implement a Chat style post in | |
# Jekyll / Liquid so I won't have any of these any more. | |
when "audio" | |
# I only have one or two audio posts. Again, no idea how to implement on Jekyll (I guess will be | |
# very similar to video post?) so for now just pull any text, no links. | |
if post.xpath('.//audio-caption')[0] != nil | |
body = post.xpath('.//audio-caption')[0].content | |
end | |
when "video" | |
# Will likely have to fix lots of these as well, I only ever linked to externally hosted videos so | |
# no need to download files | |
if post.xpath('.//video-caption')[0] != nil # Must be nicer way to do all these ifs? | |
body = post.xpath('.//video-caption')[0].content | |
end | |
meta1 = post.xpath('.//video-source')[0].content | |
end | |
writepost(tumblrid, date, filetitle, format, type, categories, body, title, meta1, meta2) | |
end | |
start = 50*(i+1) # Since i is 0 to start with | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment