pedrovanzella/import_from_octopress.rb

## import_from_octopress.rb
# encoding: utf-8
#!/usr/bin/env ruby
require 'fileutils'

#
# import_from_octopress.rb
# Imports Jekyll / Octopress formatted blogposts into pelican.
# Author: Pedro Vanzella <pedro@pedrovanzella.com>
#

#=============== EXAMPLE FROM OCTOPRESS ====================
#
# ---
# layout: post
# title: "Title"
# date: "YYYY-MM-DD HH:mm"
# comments: true
# categories: [A, B, C]
# ---
#
# Text in markdown goes here
#
#=============== END EXAMPLE ===============================

#=============== EXAMPLE FROM PELICAN ======================
#
# Title: Title
# Date: YYYY-MM-DD HH:mm
# Tags: a, b, c
# Category: Single
# Slug: title
# Author: My Name
# Summary: For indexing and feeds
#
# Text in Markdown goes here
#
# =============== END EXAMPLE ==============================

class String
    # colorize functions
    def red; colorize(self, "\e[1m\e[31m"); end
	def dark_red; colorize(self, "\e[31m"); end
    def green; colorize(self, "\e[1m\e[32m"); end
    def dark_green; colorize(self, "\e[32m"); end
    def yellow; colorize(self, "\e[1m\e[33m"); end
    def blue; colorize(self, "\e[1m\e[34m"); end
    def dark_blue; colorize(self, "\e[34m"); end
    def pur; colorize(self, "\e[1m\e[35m"); end
    def colorize(text, color_code) "#{color_code}#{text}\e[0m" ; end
end

def usage
	puts "[-]".red + " ruby import_from_octopress.rb <source dir> [<destination dir>]".dark_red
end

if ARGV[0].nil?
	usage
	exit
end

AUTHOR = "Pedro Vanzella"

source_dir = ARGV[0]
destination_dir = ARGV[1].nil? ? File.expand_path("../content", __FILE__) : ARGV[1]

posts_source = source_dir + "/_posts"

images_destination = destination_dir + "/images"

puts "Source: #{posts_source}"
puts "Destination: #{destination_dir}"
puts "Image Destination: #{images_destination}"

unless FileTest::directory? images_destination
	puts "[+] Creating image directory: ".green + images_destination
	Dir::mkdir images_destination
end

Dir.glob("#{posts_source}/*.{md,markdown}").each do |file|
	puts "Reading ".blue + "#{file}".dark_green

	input_file = File.open(file, "r:UTF-8")
	input_file = input_file.read

	headers = input_file.match(/^-{3}$(.*)^-{3}$/m)
#	puts "Headers:".blue
#	puts headers

	title = headers.to_s.match(/title: (.*)/)
	title = title.to_s.gsub("title: ", "").gsub("\"", "")
#	puts "Title: ".blue + title

	date = headers.to_s.match(/date: (.*)/)
	date = date.to_s.gsub("date: ", "").gsub("\"", "").gsub("'", "")
#	puts "Date: ".blue + date


	categories = headers.to_s.match(/categories: (.*)/)

	# If it's in the [a, b, c] format
	if categories.to_s.match(/\[/)
#		puts "[+] Categories in the '[a, b, c]' format".blue
		cats = categories.to_s.gsub("categories: ", "").gsub("[", "").gsub("]", "").split(", ")
	# If it's in the - a\n- b format
	elsif !categories
		cats = []
		# scan matches all occurences of the regex, match only matches the first
		unless headers.to_s.scan(/^-\s(.+)$/) == []
#			puts "[+] Categories in the '-' format".blue
			headers.to_s.scan(/^-\s(.+)$/).each do |m|
				cats << m.first.gsub("- ", "")
			end
		end
	# It must be in the a b c format
	else
#		puts "[+] Categories in the 'a b c' format".blue
		cats = categories.to_s.gsub("categories: ", "").split(" ")
	end

	cats.each { |c| c.gsub!(",", "")} # Removes any potential commas left over yet another format

	categories = cats

#	puts "Categories: ".blue
#	categories.each do |c|
#		puts "\t#{c}"
#	end

	tags = headers.to_s.match(/tags: (.*)/)

	# If it's in the [a, b, c] format
	if tags.to_s.match(/\[/)
#		puts "[+] Tags in the '[a, b, c]' format".blue
		ts = tags.to_s.gsub("tags: ", "").gsub("[", "").gsub("]", "").split(", ")
	# If it's in the - a\n- b format
	elsif !tags
		# scan matches all occurences of the regex, match only matches the first
		ts = []
		unless headers.to_s.scan(/^-\s(.+)$/) == []
#			puts "[+] Tags in the '-' format".blue
			headers.to_s.scan(/^-\s(.+)$/).each do |m|
				ts << m.first.gsub("- ", "")
			end
		end
	# It must be in the a b c format
	else
#		puts "[+] Tags in the 'a b c' format".blue
		ts = tags.to_s.gsub("tags: ", "").split(" ")
	end

	ts.each { |t| t.gsub!(",", "")} # Removes any potential commas left over from yet another format

	tags = ts

#	if tags
#		puts "Tags: ".blue
#		tags.each do |t|
#			puts "\t#{t}"
#		end
#	end

	slug = File.basename(file).gsub(/.markdown$/, "").gsub(/^[0-9]{4}-[0-9]{2}-[0-9]{2}-/, "")
#	puts "Slug: ".blue + slug

	text = input_file.gsub(headers.to_s, "")
#	puts "Text:".blue

	text.gsub!(/{% codeblock lang:([a-zA-Z0-9]*) %}(.*){% endcodeblock %}/m, "```\\1\\2```\n\n")
#	puts text

	# This should be extracted into a function
	text.gsub!(/!\[(.*?)\]\((\S*?)\s*(\".*?\")*\)/) do |match|
		# $n, n > 0, contains the capture of last regex. $~ containts the MatchData object
		image_name = $2.to_s
		puts "Image Name: ".blue + image_name
		basename = File.basename(image_name)
		full_image_path = source_dir + '/' + image_name

		post_images_dir = images_destination + "/" + slug + "/"
		unless FileTest::directory? post_images_dir
			puts "[+] Creating image directory for this post: ".green + post_images_dir
			Dir::mkdir post_images_dir
		end

		if image_name.match(/^https?:\/\//)
			puts "[+] We have a URL here: ".green + image_name
			destination_image = post_images_dir + image_name.gsub(/^.*\/|\.*$/, "")
			puts cmd = "curl #{image_name} -o #{destination_image}"
			system(cmd)
		else
			puts "[+] This is a local image".green
			destination_image = post_images_dir + basename

			# Let's assume no images were hotlinked, for now
			puts "[+] Copying image file ".green + full_image_path
			puts "\t To: ".green + destination_image
			FileUtils::copy(full_image_path, destination_image)
		end

		image_link_path = "/static/images/" + slug + "/" + basename

		"![#{basename}](#{image_link_path})"
	end

	text.gsub!(/{%\simg\s(\S*)\s.*\s%}/) do |match|
		# $n, n > 0, contains the capture of last regex. $~ containts the MatchData object
		image_name = $1.to_s
		puts "Image Name: ".blue + image_name
		basename = File.basename(image_name)
		full_image_path = source_dir + '/' + image_name

		post_images_dir = images_destination + "/" + slug + "/"
		unless FileTest::directory? post_images_dir
			puts "[+] Creating image directory for this post: ".green + post_images_dir
			Dir::mkdir post_images_dir
		end

		if image_name.match(/^http(s?):\/\//)
			puts "[+] We have a URL here: ".green + image_name
			destination_image = post_images_dir + image_name.gsub(/^.*\/|\.*$/, "")
			puts cmd = "curl #{image_name} -o #{destination_image}"
			system(cmd)
		else
			puts "[+] This is a local image".green
			destination_image = post_images_dir + basename

			# Let's assume no images were hotlinked, for now
			puts "[+] Copying image file ".green + full_image_path
			puts "\t To: ".green + destination_image
			FileUtils::copy(full_image_path, destination_image)
		end

		image_link_path = "/static/images/" + slug + "/" + basename

		"![#{basename}](#{image_link_path})"
	end


	puts
	output_file = "#{destination_dir}/#{(File.basename(file).gsub(/markdown$/, "md"))}"
	puts "Writing ".blue + "#{output_file}".dark_green

	output_file = File.open(output_file, "w") do |f|
		f.puts "Title: " + title
		f.puts "Date: " + date
		category = unless categories.first.nil?
					   categories.first.capitalize
				   else
					   puts "[-] WARNING: Missing category. Using 'General'.".red
					   "General"
				   end
		f.puts "Category: " + category

		unless tags.first.nil? || categories.first.nil?
			tags += categories
			f.print "Tags: "
			# I was using categories as tags
			tags.uniq.each do |t|
				if tags.last == t
					f.print "#{t.capitalize}\n"
				else
					f.print "#{t.capitalize}, "
				end
			end
		end
		f.puts "Slug: " + slug
		f.puts "Author: " + AUTHOR

		f.puts # Blank line

		f.puts text
	end

	puts "==================================================================================".yellow
end
	# encoding: utf-8
	#!/usr/bin/env ruby
	require 'fileutils'

	#
	# import_from_octopress.rb
	# Imports Jekyll / Octopress formatted blogposts into pelican.
	# Author: Pedro Vanzella <pedro@pedrovanzella.com>
	#

	#=============== EXAMPLE FROM OCTOPRESS ====================
	#
	# ---
	# layout: post
	# title: "Title"
	# date: "YYYY-MM-DD HH:mm"
	# comments: true
	# categories: [A, B, C]
	# ---
	#
	# Text in markdown goes here
	#
	#=============== END EXAMPLE ===============================

	#=============== EXAMPLE FROM PELICAN ======================
	#
	# Title: Title
	# Date: YYYY-MM-DD HH:mm
	# Tags: a, b, c
	# Category: Single
	# Slug: title
	# Author: My Name
	# Summary: For indexing and feeds
	#
	# Text in Markdown goes here
	#
	# =============== END EXAMPLE ==============================

	class String
	# colorize functions
	def red; colorize(self, "\e[1m\e[31m"); end
	def dark_red; colorize(self, "\e[31m"); end
	def green; colorize(self, "\e[1m\e[32m"); end
	def dark_green; colorize(self, "\e[32m"); end
	def yellow; colorize(self, "\e[1m\e[33m"); end
	def blue; colorize(self, "\e[1m\e[34m"); end
	def dark_blue; colorize(self, "\e[34m"); end
	def pur; colorize(self, "\e[1m\e[35m"); end
	def colorize(text, color_code) "#{color_code}#{text}\e[0m" ; end
	end

	def usage
	puts "[-]".red + " ruby import_from_octopress.rb <source dir> [<destination dir>]".dark_red
	end

	if ARGV[0].nil?
	usage
	exit
	end

	AUTHOR = "Pedro Vanzella"

	source_dir = ARGV[0]
	destination_dir = ARGV[1].nil? ? File.expand_path("../content", __FILE__) : ARGV[1]

	posts_source = source_dir + "/_posts"

	images_destination = destination_dir + "/images"

	puts "Source: #{posts_source}"
	puts "Destination: #{destination_dir}"
	puts "Image Destination: #{images_destination}"

	unless FileTest::directory? images_destination
	puts "[+] Creating image directory: ".green + images_destination
	Dir::mkdir images_destination
	end

	Dir.glob("#{posts_source}/*.{md,markdown}").each do \|file\|
	puts "Reading ".blue + "#{file}".dark_green

	input_file = File.open(file, "r:UTF-8")
	input_file = input_file.read

	headers = input_file.match(/^-{3}$(.*)^-{3}$/m)
	# puts "Headers:".blue
	# puts headers

	title = headers.to_s.match(/title: (.*)/)
	title = title.to_s.gsub("title: ", "").gsub("\"", "")
	# puts "Title: ".blue + title

	date = headers.to_s.match(/date: (.*)/)
	date = date.to_s.gsub("date: ", "").gsub("\"", "").gsub("'", "")
	# puts "Date: ".blue + date


	categories = headers.to_s.match(/categories: (.*)/)

	# If it's in the [a, b, c] format
	if categories.to_s.match(/\[/)
	# puts "[+] Categories in the '[a, b, c]' format".blue
	cats = categories.to_s.gsub("categories: ", "").gsub("[", "").gsub("]", "").split(", ")
	# If it's in the - a\n- b format
	elsif !categories
	cats = []
	# scan matches all occurences of the regex, match only matches the first
	unless headers.to_s.scan(/^-\s(.+)$/) == []
	# puts "[+] Categories in the '-' format".blue
	headers.to_s.scan(/^-\s(.+)$/).each do \|m\|
	cats << m.first.gsub("- ", "")
	end
	end
	# It must be in the a b c format
	else
	# puts "[+] Categories in the 'a b c' format".blue
	cats = categories.to_s.gsub("categories: ", "").split(" ")
	end

	cats.each { \|c\| c.gsub!(",", "")} # Removes any potential commas left over yet another format

	categories = cats

	# puts "Categories: ".blue
	# categories.each do \|c\|
	# puts "\t#{c}"
	# end

	tags = headers.to_s.match(/tags: (.*)/)

	# If it's in the [a, b, c] format
	if tags.to_s.match(/\[/)
	# puts "[+] Tags in the '[a, b, c]' format".blue
	ts = tags.to_s.gsub("tags: ", "").gsub("[", "").gsub("]", "").split(", ")
	# If it's in the - a\n- b format
	elsif !tags
	# scan matches all occurences of the regex, match only matches the first
	ts = []
	unless headers.to_s.scan(/^-\s(.+)$/) == []
	# puts "[+] Tags in the '-' format".blue
	headers.to_s.scan(/^-\s(.+)$/).each do \|m\|
	ts << m.first.gsub("- ", "")
	end
	end
	# It must be in the a b c format
	else
	# puts "[+] Tags in the 'a b c' format".blue
	ts = tags.to_s.gsub("tags: ", "").split(" ")
	end

	ts.each { \|t\| t.gsub!(",", "")} # Removes any potential commas left over from yet another format

	tags = ts

	# if tags
	# puts "Tags: ".blue
	# tags.each do \|t\|
	# puts "\t#{t}"
	# end
	# end

	slug = File.basename(file).gsub(/.markdown$/, "").gsub(/^[0-9]{4}-[0-9]{2}-[0-9]{2}-/, "")
	# puts "Slug: ".blue + slug

	text = input_file.gsub(headers.to_s, "")
	# puts "Text:".blue

	text.gsub!(/{% codeblock lang:([a-zA-Z0-9]) %}(.){% endcodeblock %}/m, "```\\1\\2```\n\n")
	# puts text

	# This should be extracted into a function
	text.gsub!(/!\[(.?)\]\((\S?)\s(\".?\")*\)/) do \|match\|
	# $n, n > 0, contains the capture of last regex. $~ containts the MatchData object
	image_name = $2.to_s
	puts "Image Name: ".blue + image_name
	basename = File.basename(image_name)
	full_image_path = source_dir + '/' + image_name

	post_images_dir = images_destination + "/" + slug + "/"
	unless FileTest::directory? post_images_dir
	puts "[+] Creating image directory for this post: ".green + post_images_dir
	Dir::mkdir post_images_dir
	end

	if image_name.match(/^https?:\/\//)
	puts "[+] We have a URL here: ".green + image_name
	destination_image = post_images_dir + image_name.gsub(/^.\/\|\.$/, "")
	puts cmd = "curl #{image_name} -o #{destination_image}"
	system(cmd)
	else
	puts "[+] This is a local image".green
	destination_image = post_images_dir + basename

	# Let's assume no images were hotlinked, for now
	puts "[+] Copying image file ".green + full_image_path
	puts "\t To: ".green + destination_image
	FileUtils::copy(full_image_path, destination_image)
	end

	image_link_path = "/static/images/" + slug + "/" + basename

	"![#{basename}](#{image_link_path})"
	end

	text.gsub!(/{%\simg\s(\S)\s.\s%}/) do \|match\|
	# $n, n > 0, contains the capture of last regex. $~ containts the MatchData object
	image_name = $1.to_s
	puts "Image Name: ".blue + image_name
	basename = File.basename(image_name)
	full_image_path = source_dir + '/' + image_name

	post_images_dir = images_destination + "/" + slug + "/"
	unless FileTest::directory? post_images_dir
	puts "[+] Creating image directory for this post: ".green + post_images_dir
	Dir::mkdir post_images_dir
	end

	if image_name.match(/^http(s?):\/\//)
	puts "[+] We have a URL here: ".green + image_name
	destination_image = post_images_dir + image_name.gsub(/^.\/\|\.$/, "")
	puts cmd = "curl #{image_name} -o #{destination_image}"
	system(cmd)
	else
	puts "[+] This is a local image".green
	destination_image = post_images_dir + basename

	# Let's assume no images were hotlinked, for now
	puts "[+] Copying image file ".green + full_image_path
	puts "\t To: ".green + destination_image
	FileUtils::copy(full_image_path, destination_image)
	end

	image_link_path = "/static/images/" + slug + "/" + basename

	"![#{basename}](#{image_link_path})"
	end



	puts
	output_file = "#{destination_dir}/#{(File.basename(file).gsub(/markdown$/, "md"))}"
	puts "Writing ".blue + "#{output_file}".dark_green

	output_file = File.open(output_file, "w") do \|f\|
	f.puts "Title: " + title
	f.puts "Date: " + date
	category = unless categories.first.nil?
	categories.first.capitalize
	else
	puts "[-] WARNING: Missing category. Using 'General'.".red
	"General"
	end
	f.puts "Category: " + category

	unless tags.first.nil? \|\| categories.first.nil?
	tags += categories
	f.print "Tags: "
	# I was using categories as tags
	tags.uniq.each do \|t\|
	if tags.last == t
	f.print "#{t.capitalize}\n"
	else
	f.print "#{t.capitalize}, "
	end
	end
	end
	f.puts "Slug: " + slug
	f.puts "Author: " + AUTHOR

	f.puts # Blank line

	f.puts text
	end

	puts "==================================================================================".yellow
	end