Skip to content

Instantly share code, notes, and snippets.

@brainwipe
Created October 17, 2018 22:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brainwipe/f806c1e89e0f003ce24578a1dcdf1dd6 to your computer and use it in GitHub Desktop.
Save brainwipe/f806c1e89e0f003ce24578a1dcdf1dd6 to your computer and use it in GitHub Desktop.
Converts Blogger export XML and converts to Markdown
// Requires nuget Package https://www.nuget.org/packages/Html2Markdown/
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Xml;
using Html2Markdown;
namespace BloggerToMd
{
public static class BloggerToMarkdown
{
public static void Convert(string bloggerFileName)
{
var document = Load(bloggerFileName);
var manager = BloggerNameSpaceManager(document);
var blogPosts = document.DocumentElement.SelectNodes("atom:entry[contains(atom:id, 'post')]", manager);
var folder = Path.GetDirectoryName(bloggerFileName);
foreach (XmlNode blogPost in blogPosts)
{
var post = ToBlogPost(blogPost, manager);
SaveToFile(post, folder);
}
}
private static XmlDocument Load(string bloggerFileName)
{
var doc = new XmlDocument();
doc.Load(bloggerFileName);
return doc;
}
private static XmlNamespaceManager BloggerNameSpaceManager(XmlDocument bloggerDocument)
{
var manager = new XmlNamespaceManager(bloggerDocument.NameTable);
manager.AddNamespace("openSearch", "http://a9.com/-/spec/opensearchrss/1.0/");
manager.AddNamespace("gd", "http://schemas.google.com/g/2005");
manager.AddNamespace("thr", "http://purl.org/syndication/thread/1.0");
manager.AddNamespace("georss", "http://www.georss.org/georss");
manager.AddNamespace("atom", "http://www.w3.org/2005/Atom");
return manager;
}
private static BlogPost ToBlogPost(XmlNode blogPost, XmlNamespaceManager manager)
{
var converter = new Converter();
var title = blogPost.SelectSingleNode("atom:title", manager).InnerText;
var date = DateTimeOffset.Parse(blogPost.SelectSingleNode("atom:published", manager).InnerText);
var tagNodes = blogPost.SelectNodes("atom:category[@scheme='http://www.blogger.com/atom/ns#']/@term",
manager);
var tags = new List<string>();
foreach (XmlNode tag in tagNodes)
{
tags.Add(tag.Value);
}
var content = blogPost.SelectSingleNode("atom:content", manager).InnerText;
var markdown = converter.Convert(content);
return new BlogPost(title, date, tags.ToArray(), markdown);
}
private static void SaveToFile(BlogPost post, string folder)
{
var fileName = $"{post.Date:yyyy-MM-dd}-{SafeFileName(post.Title)}.md";
var sb = new StringBuilder();
sb.Append($@"Title: {post.Title}
Date: {post.Date:dd/MM/yy}
---
");
sb.Append(post.Markdown);
var fullPath = Path.Combine(folder, fileName);
File.WriteAllText(fullPath, sb.ToString());
}
private static string SafeFileName(string fileName) =>
Path.GetInvalidFileNameChars()
.Aggregate(fileName, (current, c) => current.Replace(c, '-'))
.Substring(0,10);
private class BlogPost
{
public BlogPost(string title, DateTimeOffset date, string[] tags, string markdown)
{
Title = title;
Date = date;
Tags = tags;
Markdown = markdown;
}
public string Title { get; }
public DateTimeOffset Date { get; }
public string[] Tags { get; }
public string Markdown { get; }
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment