Skip to content

Instantly share code, notes, and snippets.

Last active March 31, 2018 09:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robdmoore/476075a7b81afb7420fc1b0da63bbda9 to your computer and use it in GitHub Desktop.
Save robdmoore/476075a7b81afb7420fc1b0da63bbda9 to your computer and use it in GitHub Desktop.
Wordpress -> Jekyll converter

Following you can convert your posts across to Jekyll format, but it adds superfluous metadata and also doesn't correctly handle Wordpress shorttags. This program helps with that.

Fair warning: it's deliberately dirty / hacky code. I ran it with the posts in a Git repo and inspected file-by-file tweaking as I went.

using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Web;
// <package id = "HtmlAgilityPack" version="1.5.5" targetFramework="net461" />
// <package id = "ReverseMarkdown" version="1.5.0" targetFramework="net461" />
namespace JekyllConverter
class Program
static void Main(string[] args)
var converter = new ReverseMarkdown.Converter();
var dir = args.Length > 0 ? args[0] : @"C:\dev\temp\jekylltest\test";
var files = Directory.GetFiles(dir);
var preambleRegex = new Regex(@"(?s)^---.*?^---", RegexOptions.Multiline);
var codeHtmlRegex = new Regex("<br\\s*/>|<\\/?p>");
foreach (var file in files)
var fileToWrite = file;
var fileContent = File.ReadAllText(file);
var preamble = preambleRegex.Match(fileContent).Value;
var content = fileContent.Replace(preamble, "");
if (file.EndsWith(".html"))
var codeBlocks = Regex.Matches(content, @"(?s)\[code(\s+lang(uage)?=""(.+?)"")?\](.*?)\[\/code\]", RegexOptions.Multiline).OfType<Match>().ToList();
codeBlocks.ForEach(match => content = content.Replace(match.Value, "<p>|||</p>"));
content = Regex.Replace(content, @"(\[caption.+?caption=""(.+?)"".*?](.+?)\[\/caption\])|(\[caption.*?]\s*(<a.+?</a>)(.+?)\[\/caption\])", "$5$3<br /><em>$2$6</em>");
var markdown = converter.Convert(content);
var index = 0;
var markdownWithCode = Regex.Replace(markdown, "\\|\\|\\|", m =>
var match = codeBlocks[index++];
return $"```{match.Groups[3].Value}{HttpUtility.HtmlDecode(codeHtmlRegex.Replace(match.Groups[4].Value, ""))}```";
content = markdownWithCode;
fileToWrite = Regex.Replace(file, @"\.html$", ".md");
preamble = Regex.Replace(preamble, @"^parent_id:.+?$\s+", "", RegexOptions.Multiline);
preamble = Regex.Replace(preamble, @"^published:.+?$\s+", "", RegexOptions.Multiline);
preamble = Regex.Replace(preamble, @"^password:.+?$\s+", "", RegexOptions.Multiline);
preamble = Regex.Replace(preamble, @"^status:.+?$\s+", "", RegexOptions.Multiline);
preamble = Regex.Replace(preamble, @"^meta:[\s\S]+?(?=(^\S))+", "", RegexOptions.Multiline);
preamble = Regex.Replace(preamble, @"^author:[\s\S]+?display_name:\s+(.+?)$[\s\S]+?(?=(^\S))+", "author: $1\n", RegexOptions.Multiline);
File.WriteAllText(fileToWrite, preamble + content);
if (fileToWrite != file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment