Skip to content

Instantly share code, notes, and snippets.

@pedrovanzella
Last active December 15, 2015 14:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pedrovanzella/5278076 to your computer and use it in GitHub Desktop.
Save pedrovanzella/5278076 to your computer and use it in GitHub Desktop.
Imports octopress markdown posts into a more standard markdown, prefered by pelican. Also imports images and rewrites the links accordingly.
# encoding: utf-8
#!/usr/bin/env ruby
require 'fileutils'
#
# import_from_octopress.rb
# Imports Jekyll / Octopress formatted blogposts into pelican.
# Author: Pedro Vanzella <pedro@pedrovanzella.com>
#
#=============== EXAMPLE FROM OCTOPRESS ====================
#
# ---
# layout: post
# title: "Title"
# date: "YYYY-MM-DD HH:mm"
# comments: true
# categories: [A, B, C]
# ---
#
# Text in markdown goes here
#
#=============== END EXAMPLE ===============================
#=============== EXAMPLE FROM PELICAN ======================
#
# Title: Title
# Date: YYYY-MM-DD HH:mm
# Tags: a, b, c
# Category: Single
# Slug: title
# Author: My Name
# Summary: For indexing and feeds
#
# Text in Markdown goes here
#
# =============== END EXAMPLE ==============================
class String
# colorize functions
def red; colorize(self, "\e[1m\e[31m"); end
def dark_red; colorize(self, "\e[31m"); end
def green; colorize(self, "\e[1m\e[32m"); end
def dark_green; colorize(self, "\e[32m"); end
def yellow; colorize(self, "\e[1m\e[33m"); end
def blue; colorize(self, "\e[1m\e[34m"); end
def dark_blue; colorize(self, "\e[34m"); end
def pur; colorize(self, "\e[1m\e[35m"); end
def colorize(text, color_code) "#{color_code}#{text}\e[0m" ; end
end
def usage
puts "[-]".red + " ruby import_from_octopress.rb <source dir> [<destination dir>]".dark_red
end
if ARGV[0].nil?
usage
exit
end
AUTHOR = "Pedro Vanzella"
source_dir = ARGV[0]
destination_dir = ARGV[1].nil? ? File.expand_path("../content", __FILE__) : ARGV[1]
posts_source = source_dir + "/_posts"
images_destination = destination_dir + "/images"
puts "Source: #{posts_source}"
puts "Destination: #{destination_dir}"
puts "Image Destination: #{images_destination}"
unless FileTest::directory? images_destination
puts "[+] Creating image directory: ".green + images_destination
Dir::mkdir images_destination
end
Dir.glob("#{posts_source}/*.{md,markdown}").each do |file|
puts "Reading ".blue + "#{file}".dark_green
input_file = File.open(file, "r:UTF-8")
input_file = input_file.read
headers = input_file.match(/^-{3}$(.*)^-{3}$/m)
# puts "Headers:".blue
# puts headers
title = headers.to_s.match(/title: (.*)/)
title = title.to_s.gsub("title: ", "").gsub("\"", "")
# puts "Title: ".blue + title
date = headers.to_s.match(/date: (.*)/)
date = date.to_s.gsub("date: ", "").gsub("\"", "").gsub("'", "")
# puts "Date: ".blue + date
categories = headers.to_s.match(/categories: (.*)/)
# If it's in the [a, b, c] format
if categories.to_s.match(/\[/)
# puts "[+] Categories in the '[a, b, c]' format".blue
cats = categories.to_s.gsub("categories: ", "").gsub("[", "").gsub("]", "").split(", ")
# If it's in the - a\n- b format
elsif !categories
cats = []
# scan matches all occurences of the regex, match only matches the first
unless headers.to_s.scan(/^-\s(.+)$/) == []
# puts "[+] Categories in the '-' format".blue
headers.to_s.scan(/^-\s(.+)$/).each do |m|
cats << m.first.gsub("- ", "")
end
end
# It must be in the a b c format
else
# puts "[+] Categories in the 'a b c' format".blue
cats = categories.to_s.gsub("categories: ", "").split(" ")
end
cats.each { |c| c.gsub!(",", "")} # Removes any potential commas left over yet another format
categories = cats
# puts "Categories: ".blue
# categories.each do |c|
# puts "\t#{c}"
# end
tags = headers.to_s.match(/tags: (.*)/)
# If it's in the [a, b, c] format
if tags.to_s.match(/\[/)
# puts "[+] Tags in the '[a, b, c]' format".blue
ts = tags.to_s.gsub("tags: ", "").gsub("[", "").gsub("]", "").split(", ")
# If it's in the - a\n- b format
elsif !tags
# scan matches all occurences of the regex, match only matches the first
ts = []
unless headers.to_s.scan(/^-\s(.+)$/) == []
# puts "[+] Tags in the '-' format".blue
headers.to_s.scan(/^-\s(.+)$/).each do |m|
ts << m.first.gsub("- ", "")
end
end
# It must be in the a b c format
else
# puts "[+] Tags in the 'a b c' format".blue
ts = tags.to_s.gsub("tags: ", "").split(" ")
end
ts.each { |t| t.gsub!(",", "")} # Removes any potential commas left over from yet another format
tags = ts
# if tags
# puts "Tags: ".blue
# tags.each do |t|
# puts "\t#{t}"
# end
# end
slug = File.basename(file).gsub(/.markdown$/, "").gsub(/^[0-9]{4}-[0-9]{2}-[0-9]{2}-/, "")
# puts "Slug: ".blue + slug
text = input_file.gsub(headers.to_s, "")
# puts "Text:".blue
text.gsub!(/{% codeblock lang:([a-zA-Z0-9]*) %}(.*){% endcodeblock %}/m, "```\\1\\2```\n\n")
# puts text
# This should be extracted into a function
text.gsub!(/!\[(.*?)\]\((\S*?)\s*(\".*?\")*\)/) do |match|
# $n, n > 0, contains the capture of last regex. $~ containts the MatchData object
image_name = $2.to_s
puts "Image Name: ".blue + image_name
basename = File.basename(image_name)
full_image_path = source_dir + '/' + image_name
post_images_dir = images_destination + "/" + slug + "/"
unless FileTest::directory? post_images_dir
puts "[+] Creating image directory for this post: ".green + post_images_dir
Dir::mkdir post_images_dir
end
if image_name.match(/^https?:\/\//)
puts "[+] We have a URL here: ".green + image_name
destination_image = post_images_dir + image_name.gsub(/^.*\/|\.*$/, "")
puts cmd = "curl #{image_name} -o #{destination_image}"
system(cmd)
else
puts "[+] This is a local image".green
destination_image = post_images_dir + basename
# Let's assume no images were hotlinked, for now
puts "[+] Copying image file ".green + full_image_path
puts "\t To: ".green + destination_image
FileUtils::copy(full_image_path, destination_image)
end
image_link_path = "/static/images/" + slug + "/" + basename
"![#{basename}](#{image_link_path})"
end
text.gsub!(/{%\simg\s(\S*)\s.*\s%}/) do |match|
# $n, n > 0, contains the capture of last regex. $~ containts the MatchData object
image_name = $1.to_s
puts "Image Name: ".blue + image_name
basename = File.basename(image_name)
full_image_path = source_dir + '/' + image_name
post_images_dir = images_destination + "/" + slug + "/"
unless FileTest::directory? post_images_dir
puts "[+] Creating image directory for this post: ".green + post_images_dir
Dir::mkdir post_images_dir
end
if image_name.match(/^http(s?):\/\//)
puts "[+] We have a URL here: ".green + image_name
destination_image = post_images_dir + image_name.gsub(/^.*\/|\.*$/, "")
puts cmd = "curl #{image_name} -o #{destination_image}"
system(cmd)
else
puts "[+] This is a local image".green
destination_image = post_images_dir + basename
# Let's assume no images were hotlinked, for now
puts "[+] Copying image file ".green + full_image_path
puts "\t To: ".green + destination_image
FileUtils::copy(full_image_path, destination_image)
end
image_link_path = "/static/images/" + slug + "/" + basename
"![#{basename}](#{image_link_path})"
end
puts
output_file = "#{destination_dir}/#{(File.basename(file).gsub(/markdown$/, "md"))}"
puts "Writing ".blue + "#{output_file}".dark_green
output_file = File.open(output_file, "w") do |f|
f.puts "Title: " + title
f.puts "Date: " + date
category = unless categories.first.nil?
categories.first.capitalize
else
puts "[-] WARNING: Missing category. Using 'General'.".red
"General"
end
f.puts "Category: " + category
unless tags.first.nil? || categories.first.nil?
tags += categories
f.print "Tags: "
# I was using categories as tags
tags.uniq.each do |t|
if tags.last == t
f.print "#{t.capitalize}\n"
else
f.print "#{t.capitalize}, "
end
end
end
f.puts "Slug: " + slug
f.puts "Author: " + AUTHOR
f.puts # Blank line
f.puts text
end
puts "==================================================================================".yellow
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment