Created
February 19, 2015 09:25
-
-
Save adrianshort/e114d3f07ff026e844c5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Convert Jekyll blog posts to DokuWiki pages | |
# Adrian Short (https://adrianshort.org/) 15 Feb 2015 | |
require 'fileutils' | |
require 'yaml' | |
require 'pp' | |
require 'pandoc-ruby' | |
INPUT_DIR = "./_posts" | |
OUTPUT_BASEDIR = "./blog" | |
unpublished_files = [] # collect drafts, i.e. files where published == false | |
# Loop through Markdown files | |
Dir.glob(File.join(INPUT_DIR, "*.{md,markdown}")) do |fn| | |
f = File.open(fn) | |
contents = f.read | |
metadata = YAML.load(contents) | |
# skip drafts | |
if metadata['published'] == false | |
unpublished_files << metadata['title'] | |
next | |
end | |
output = "====== %s ======\n\n" % metadata['title'] | |
output += PandocRuby.convert(contents, :from => :markdown, :to => :dokuwiki) | |
# convert <!-- more --> tags | |
output.gsub!(/<HTML>\n<!-- more -->\n<\/HTML>/, '===== =====') | |
# fix blockquotes (I'm using the DokuWiki blockquote plugin) | |
output.gsub!(/<HTML><blockquote>\n(.+)<\/blockquote><\/HTML>/m, \ | |
"<blockquote>\n\\1</blockquote>\n") | |
# Merge categories and tags | |
tags = metadata['tags'] || [] | |
categories = metadata['categories'] || [] | |
tags = tags.concat(categories).uniq | |
if tags | |
# wrap tags containing spaces in double quotes | |
output += "{{tag>%s}}\n" % tags \ | |
.map{ |t| t.include?(' ') ? "\"%s\"" % t : t } \ | |
.sort_by(&:downcase).join(' ') | |
end | |
f.close | |
# write to new file | |
out_dir = File.join(OUTPUT_BASEDIR, metadata['date'].year.to_s) | |
FileUtils.mkdir_p out_dir | |
out_fn = File.join(out_dir, fn.match(/\d{4}-\d\d-\d\d-(.+)\./)[1] + '.txt') | |
out_f = File.open(out_fn, 'w') { |f| f << output } | |
# Set the modified and last access time for the file | |
# Use `cp -p` to preserve these times when copying | |
File.utime(metadata['date'], metadata['date'], out_fn) | |
end | |
puts "Drafts skipped:" | |
unpublished_files.each{ |f| puts f } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I recommend installing Pandoc straight from their website rather than through your package manager as OS packages are often quite out of date and don't include the DokuWiki writer.