Last active
December 15, 2015 14:58
-
-
Save pedrovanzella/5278076 to your computer and use it in GitHub Desktop.
Imports octopress markdown posts into a more standard markdown, prefered by pelican. Also imports images and rewrites the links accordingly.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
#!/usr/bin/env ruby | |
require 'fileutils' | |
# | |
# import_from_octopress.rb | |
# Imports Jekyll / Octopress formatted blogposts into pelican. | |
# Author: Pedro Vanzella <pedro@pedrovanzella.com> | |
# | |
#=============== EXAMPLE FROM OCTOPRESS ==================== | |
# | |
# --- | |
# layout: post | |
# title: "Title" | |
# date: "YYYY-MM-DD HH:mm" | |
# comments: true | |
# categories: [A, B, C] | |
# --- | |
# | |
# Text in markdown goes here | |
# | |
#=============== END EXAMPLE =============================== | |
#=============== EXAMPLE FROM PELICAN ====================== | |
# | |
# Title: Title | |
# Date: YYYY-MM-DD HH:mm | |
# Tags: a, b, c | |
# Category: Single | |
# Slug: title | |
# Author: My Name | |
# Summary: For indexing and feeds | |
# | |
# Text in Markdown goes here | |
# | |
# =============== END EXAMPLE ============================== | |
class String | |
# colorize functions | |
def red; colorize(self, "\e[1m\e[31m"); end | |
def dark_red; colorize(self, "\e[31m"); end | |
def green; colorize(self, "\e[1m\e[32m"); end | |
def dark_green; colorize(self, "\e[32m"); end | |
def yellow; colorize(self, "\e[1m\e[33m"); end | |
def blue; colorize(self, "\e[1m\e[34m"); end | |
def dark_blue; colorize(self, "\e[34m"); end | |
def pur; colorize(self, "\e[1m\e[35m"); end | |
def colorize(text, color_code) "#{color_code}#{text}\e[0m" ; end | |
end | |
def usage | |
puts "[-]".red + " ruby import_from_octopress.rb <source dir> [<destination dir>]".dark_red | |
end | |
if ARGV[0].nil? | |
usage | |
exit | |
end | |
AUTHOR = "Pedro Vanzella" | |
source_dir = ARGV[0] | |
destination_dir = ARGV[1].nil? ? File.expand_path("../content", __FILE__) : ARGV[1] | |
posts_source = source_dir + "/_posts" | |
images_destination = destination_dir + "/images" | |
puts "Source: #{posts_source}" | |
puts "Destination: #{destination_dir}" | |
puts "Image Destination: #{images_destination}" | |
unless FileTest::directory? images_destination | |
puts "[+] Creating image directory: ".green + images_destination | |
Dir::mkdir images_destination | |
end | |
Dir.glob("#{posts_source}/*.{md,markdown}").each do |file| | |
puts "Reading ".blue + "#{file}".dark_green | |
input_file = File.open(file, "r:UTF-8") | |
input_file = input_file.read | |
headers = input_file.match(/^-{3}$(.*)^-{3}$/m) | |
# puts "Headers:".blue | |
# puts headers | |
title = headers.to_s.match(/title: (.*)/) | |
title = title.to_s.gsub("title: ", "").gsub("\"", "") | |
# puts "Title: ".blue + title | |
date = headers.to_s.match(/date: (.*)/) | |
date = date.to_s.gsub("date: ", "").gsub("\"", "").gsub("'", "") | |
# puts "Date: ".blue + date | |
categories = headers.to_s.match(/categories: (.*)/) | |
# If it's in the [a, b, c] format | |
if categories.to_s.match(/\[/) | |
# puts "[+] Categories in the '[a, b, c]' format".blue | |
cats = categories.to_s.gsub("categories: ", "").gsub("[", "").gsub("]", "").split(", ") | |
# If it's in the - a\n- b format | |
elsif !categories | |
cats = [] | |
# scan matches all occurences of the regex, match only matches the first | |
unless headers.to_s.scan(/^-\s(.+)$/) == [] | |
# puts "[+] Categories in the '-' format".blue | |
headers.to_s.scan(/^-\s(.+)$/).each do |m| | |
cats << m.first.gsub("- ", "") | |
end | |
end | |
# It must be in the a b c format | |
else | |
# puts "[+] Categories in the 'a b c' format".blue | |
cats = categories.to_s.gsub("categories: ", "").split(" ") | |
end | |
cats.each { |c| c.gsub!(",", "")} # Removes any potential commas left over yet another format | |
categories = cats | |
# puts "Categories: ".blue | |
# categories.each do |c| | |
# puts "\t#{c}" | |
# end | |
tags = headers.to_s.match(/tags: (.*)/) | |
# If it's in the [a, b, c] format | |
if tags.to_s.match(/\[/) | |
# puts "[+] Tags in the '[a, b, c]' format".blue | |
ts = tags.to_s.gsub("tags: ", "").gsub("[", "").gsub("]", "").split(", ") | |
# If it's in the - a\n- b format | |
elsif !tags | |
# scan matches all occurences of the regex, match only matches the first | |
ts = [] | |
unless headers.to_s.scan(/^-\s(.+)$/) == [] | |
# puts "[+] Tags in the '-' format".blue | |
headers.to_s.scan(/^-\s(.+)$/).each do |m| | |
ts << m.first.gsub("- ", "") | |
end | |
end | |
# It must be in the a b c format | |
else | |
# puts "[+] Tags in the 'a b c' format".blue | |
ts = tags.to_s.gsub("tags: ", "").split(" ") | |
end | |
ts.each { |t| t.gsub!(",", "")} # Removes any potential commas left over from yet another format | |
tags = ts | |
# if tags | |
# puts "Tags: ".blue | |
# tags.each do |t| | |
# puts "\t#{t}" | |
# end | |
# end | |
slug = File.basename(file).gsub(/.markdown$/, "").gsub(/^[0-9]{4}-[0-9]{2}-[0-9]{2}-/, "") | |
# puts "Slug: ".blue + slug | |
text = input_file.gsub(headers.to_s, "") | |
# puts "Text:".blue | |
text.gsub!(/{% codeblock lang:([a-zA-Z0-9]*) %}(.*){% endcodeblock %}/m, "```\\1\\2```\n\n") | |
# puts text | |
# This should be extracted into a function | |
text.gsub!(/!\[(.*?)\]\((\S*?)\s*(\".*?\")*\)/) do |match| | |
# $n, n > 0, contains the capture of last regex. $~ containts the MatchData object | |
image_name = $2.to_s | |
puts "Image Name: ".blue + image_name | |
basename = File.basename(image_name) | |
full_image_path = source_dir + '/' + image_name | |
post_images_dir = images_destination + "/" + slug + "/" | |
unless FileTest::directory? post_images_dir | |
puts "[+] Creating image directory for this post: ".green + post_images_dir | |
Dir::mkdir post_images_dir | |
end | |
if image_name.match(/^https?:\/\//) | |
puts "[+] We have a URL here: ".green + image_name | |
destination_image = post_images_dir + image_name.gsub(/^.*\/|\.*$/, "") | |
puts cmd = "curl #{image_name} -o #{destination_image}" | |
system(cmd) | |
else | |
puts "[+] This is a local image".green | |
destination_image = post_images_dir + basename | |
# Let's assume no images were hotlinked, for now | |
puts "[+] Copying image file ".green + full_image_path | |
puts "\t To: ".green + destination_image | |
FileUtils::copy(full_image_path, destination_image) | |
end | |
image_link_path = "/static/images/" + slug + "/" + basename | |
"![#{basename}](#{image_link_path})" | |
end | |
text.gsub!(/{%\simg\s(\S*)\s.*\s%}/) do |match| | |
# $n, n > 0, contains the capture of last regex. $~ containts the MatchData object | |
image_name = $1.to_s | |
puts "Image Name: ".blue + image_name | |
basename = File.basename(image_name) | |
full_image_path = source_dir + '/' + image_name | |
post_images_dir = images_destination + "/" + slug + "/" | |
unless FileTest::directory? post_images_dir | |
puts "[+] Creating image directory for this post: ".green + post_images_dir | |
Dir::mkdir post_images_dir | |
end | |
if image_name.match(/^http(s?):\/\//) | |
puts "[+] We have a URL here: ".green + image_name | |
destination_image = post_images_dir + image_name.gsub(/^.*\/|\.*$/, "") | |
puts cmd = "curl #{image_name} -o #{destination_image}" | |
system(cmd) | |
else | |
puts "[+] This is a local image".green | |
destination_image = post_images_dir + basename | |
# Let's assume no images were hotlinked, for now | |
puts "[+] Copying image file ".green + full_image_path | |
puts "\t To: ".green + destination_image | |
FileUtils::copy(full_image_path, destination_image) | |
end | |
image_link_path = "/static/images/" + slug + "/" + basename | |
"![#{basename}](#{image_link_path})" | |
end | |
puts | |
output_file = "#{destination_dir}/#{(File.basename(file).gsub(/markdown$/, "md"))}" | |
puts "Writing ".blue + "#{output_file}".dark_green | |
output_file = File.open(output_file, "w") do |f| | |
f.puts "Title: " + title | |
f.puts "Date: " + date | |
category = unless categories.first.nil? | |
categories.first.capitalize | |
else | |
puts "[-] WARNING: Missing category. Using 'General'.".red | |
"General" | |
end | |
f.puts "Category: " + category | |
unless tags.first.nil? || categories.first.nil? | |
tags += categories | |
f.print "Tags: " | |
# I was using categories as tags | |
tags.uniq.each do |t| | |
if tags.last == t | |
f.print "#{t.capitalize}\n" | |
else | |
f.print "#{t.capitalize}, " | |
end | |
end | |
end | |
f.puts "Slug: " + slug | |
f.puts "Author: " + AUTHOR | |
f.puts # Blank line | |
f.puts text | |
end | |
puts "==================================================================================".yellow | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment