Skip to content

Instantly share code, notes, and snippets.

@synhershko
Created August 6, 2013 20:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save synhershko/6168484 to your computer and use it in GitHub Desktop.
Save synhershko/6168484 to your computer and use it in GitHub Desktop.
Code to import WordPress dump / backup file to NSemble. Create a new Console app, add the following files, and add these as links: BlogPost.cs, Constants.cs, DynamicContent.cs, NSembleUserAuthentication.cs, PostComments.cs, User.cs
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using NSemble.Core.Models;
namespace NSemble.Core.Extensions
{
public static class DynamicContentHelpers
{
public static string TitleToSlug(string title)
{
// 2 - Strip diacritical marks using Michael Kaplan's function or equivalent
title = RemoveDiacritics(title);
// 3 - Lowercase the string for canonicalization
title = title.ToLowerInvariant();
// 4 - Replace all the non-word characters with dashes
title = ReplaceNonWordWithDashes(title);
// 1 - Trim the string of leading/trailing whitespace
title = title.Trim(' ', '-');
return title;
}
// http://blogs.msdn.com/michkap/archive/2007/05/14/2629747.aspx
/// <summary>
/// Strips the value from any non English character by replacing those with their English equivalent.
/// </summary>
/// <param name="value">The string to normalize.</param>
/// <returns>A string where all characters are part of the basic English ANSI encoding.</returns>
/// <seealso cref="http://stackoverflow.com/questions/249087/how-do-i-remove-diacritics-accents-from-a-string-in-net"/>
private static string RemoveDiacritics(string value)
{
var stFormD = value.Normalize(NormalizationForm.FormD);
var sb = new StringBuilder();
foreach (var t in stFormD)
{
var uc = CharUnicodeInfo.GetUnicodeCategory(t);
if (uc != UnicodeCategory.NonSpacingMark)
{
sb.Append(t);
}
}
return (sb.ToString().Normalize(NormalizationForm.FormC));
}
private static string ReplaceNonWordWithDashes(string title)
{
// Remove Apostrophe Tags
title = Regex.Replace(title, "[’'“”\"&]{1,}", "", RegexOptions.None);
// Replaces all non-alphanumeric character by a space
var builder = new StringBuilder();
foreach (var t in title)
{
builder.Append(char.IsLetterOrDigit(t) ? t : ' ');
}
title = builder.ToString();
// Replace multiple spaces to a single dash
title = Regex.Replace(title, @"\s{1,}", "-", RegexOptions.None);
return title;
}
static readonly Regex CodeBlockFinder = new Regex(@"\[code lang=(.+?)\s*\](.*?)\[/code\]", RegexOptions.Compiled | RegexOptions.Singleline);
static readonly Regex FirstLineSpacesFinder = new Regex(@"^(\s|\t)+", RegexOptions.Compiled);
private static string GenerateCodeBlock(string lang, string code)
{
code = HttpUtility.HtmlDecode(code);
return string.Format("{0}{1}{0}", Environment.NewLine,
ConvertMarkdownCodeStatment(code)//.Replace("<", "&lt;"), // to support syntax highlighting on pre tags
, lang
);
}
private static string ConvertMarkdownCodeStatment(string code)
{
var line = code.Split(new[] { Environment.NewLine }, StringSplitOptions.None);
var firstLineSpaces = GetFirstLineSpaces(line.FirstOrDefault());
var firstLineSpacesLength = firstLineSpaces.Length;
var formattedLines = line.Select(l => string.Format(" {0}", l.Substring(l.Length < firstLineSpacesLength ? 0 : firstLineSpacesLength)));
return string.Join(Environment.NewLine, formattedLines);
}
private static string GetFirstLineSpaces(string firstLine)
{
if (firstLine == null)
return string.Empty;
var match = FirstLineSpacesFinder.Match(firstLine);
if (match.Success)
{
return firstLine.Substring(0, match.Length);
}
return string.Empty;
}
}
}
static void Main(string[] args)
{
var reader = new WordPressExportReader();
var posts = reader.Read(@"C:\wordpress.2013-08-02.xml");
var redirectsTable = new RedirectsTable();
using (var store = new DocumentStore {Url = "http://localhost:8080", DefaultDatabase = "NSemble"}.Initialize())
{
using (var session = store.OpenSession())
{
var user = new User { UserName = "me@mydomain.com", FirstName = "Foo", LastName = "Bar", Claims = new List<string> { "admin" } };
NSembleUserAuthentication.SetUserPassword(user, "password");
session.Store(user, "users/" + user.UserName);
foreach (var post in posts)
{
if (post.IsPage)
{
continue;
}
var blogPost = new BlogPost
{
AllowComments = true,
CommentsCount = post.Comments.Count,
Content = post.Content,
ContentType = DynamicContentType.Markdown,
Title = post.Title,
Tags = post.Tags,
PublishedAt = post.PublishedAt,
LastEditedAt = null,
PrivateViewingKey = Guid.NewGuid().ToString(),
CurrentState = post.IsDraft ? BlogPost.State.Draft : BlogPost.State.Public,
AuthorId = "users/" + user.UserName,
};
var comments = new PostComments();
foreach (var c in post.Comments)
{
comments.Comments.Add(new PostComments.Comment
{
Approved = true,
Author = c.AuthorName,
Content = c.Content,
CreatedAt = c.PostedAt,
Email = c.AuthorEmail,
UserHostAddress = c.AuthorIP,
Website = c.AuthorUrl,
Replies = new List<PostComments.Comment>(),
});
}
if (string.IsNullOrWhiteSpace(blogPost.Title) || string.IsNullOrWhiteSpace(blogPost.Content))
{
blogPost.CurrentState = BlogPost.State.Private;
}
session.Store(blogPost);
session.Store(comments, blogPost.Id + "/comments");
if (blogPost.CurrentState == BlogPost.State.Public)
{
redirectsTable.theTable.Add(
string.Format("/blog/{0}/{1}/{2}", post.PublishedAt.Year,post.PublishedAt.Month.ToString("D2"), post.Slug),
new RedirectsTable.RedirectCommand
{
HttpStatusCode = HttpStatusCode.MovedPermanently,
NewRoute = string.Format("/blog/{0}/{1}/{3}-{2}", post.PublishedAt.Year, post.PublishedAt.Month.ToString("D2"), post.Slug, blogPost.Id.Substring(blogPost.Id.IndexOf('/') + 1)),
});
}
}
redirectsTable.theTable.Add("/", new RedirectsTable.RedirectCommand {HttpStatusCode = HttpStatusCode.SeeOther, NewRoute = "/blog"});
session.Store(redirectsTable, Constants.RedirectsTableDocumentId);
session.Store(new Dictionary<string, AreaConfigs>
{
{"/blog", new AreaConfigs { AreaName = "MyBlog", ModuleName = "Blog" }},
{"/", new AreaConfigs { AreaName = "MyContent", ModuleName = "ContentPages" }},
{"/auth", new AreaConfigs { AreaName = "Auth", ModuleName = "Membership" }}
}, Constants.AreasDocumentName);
session.SaveChanges();
}
}
}
using System;
using System.Collections.Generic;
using System.Xml;
namespace WordPressExportReader
{
public class WordPressExportReader
{
public List<WordPressPost> Read(string exportPath)
{
var doc = new XmlDocument();
doc.Load(exportPath);
var manager = new XmlNamespaceManager(doc.NameTable);
manager.AddNamespace("wp", "http://wordpress.org/export/1.1/");
manager.AddNamespace("dc", "http://purl.org/dc/elements/1.1/");
manager.AddNamespace("wfw", "http://wellformedweb.org/CommentAPI/");
manager.AddNamespace("content", "http://purl.org/rss/1.0/modules/content/");
var nav = doc.CreateNavigator();
// Compile a standard XPath expression
var expr = nav.Compile("rss/channel/item");
var iterator = nav.Select(expr);
var results = new List<WordPressPost>();
while (iterator.MoveNext())
{
var p = new WordPressPost();
switch (iterator.Current.SelectSingleNode("wp:status", manager).Value)
{
case "publish":
case "inherit":
break;
case "trash":
continue;
case "draft":
p.IsDraft = true;
break;
}
if ("page".Equals(iterator.Current.SelectSingleNode("wp:post_type", manager).Value))
{
p.IsPage = true;
}
p.Title = iterator.Current.SelectSingleNode("title").Value;
p.Content = iterator.Current.SelectSingleNode("content:encoded", manager).Value;
p.Slug = iterator.Current.SelectSingleNode("wp:post_name", manager).Value;
p.PublishedAt = DateTimeOffset.Parse(iterator.Current.SelectSingleNode("pubDate").Value);
p.OriginalUrl = iterator.Current.SelectSingleNode("link").Value;
var tagsIterator = iterator.Current.Select("category");
p.Tags = new List<string>();
while (tagsIterator.MoveNext())
{
p.Tags.Add(tagsIterator.Current.Value);
}
p.Comments = new List<WordPressPost.Comment>();
var commentsIterator = iterator.Current.Select("wp:comment", manager);
while (commentsIterator.MoveNext())
{
var approved = "1".Equals(commentsIterator.Current.SelectSingleNode("wp:comment_approved", manager).Value);
if (!approved) continue;
var parent = int.Parse(commentsIterator.Current.SelectSingleNode("wp:comment_parent", manager).Value);
var comment = new WordPressPost.Comment
{
AuthorName = commentsIterator.Current.SelectSingleNode("wp:comment_author", manager).Value,
AuthorEmail = commentsIterator.Current.SelectSingleNode("wp:comment_author_email", manager).Value,
AuthorIP = commentsIterator.Current.SelectSingleNode("wp:comment_author_IP", manager).Value,
PostedAt = DateTimeOffset.Parse(commentsIterator.Current.SelectSingleNode("wp:comment_date_gmt", manager).Value),
AuthorUrl = commentsIterator.Current.SelectSingleNode("wp:comment_author_url", manager).Value,
Content = commentsIterator.Current.SelectSingleNode("wp:comment_content", manager).Value,
};
var commentType = commentsIterator.Current.SelectSingleNode("wp:comment_type", manager).Value;
if (!string.IsNullOrWhiteSpace(commentType)) comment.Type = commentType;
if (parent == 0)
{
comment.Id = int.Parse(commentsIterator.Current.SelectSingleNode("wp:comment_id", manager).Value);
comment.Replies = new List<WordPressPost.Comment>();
p.Comments.Add(comment);
}
else
{
foreach (var c in p.Comments)
{
if (c.Id == parent)
{
c.Replies.Add(comment);
break;
}
}
}
}
results.Add(p);
}
return results;
}
}
}
using System;
using System.Collections.Generic;
using System.Text;
namespace WordPressExportReader
{
public class WordPressPost
{
public class Comment
{
public int Id { get; set; }
public string AuthorName { get; set; }
public string AuthorEmail { get; set; }
public string AuthorUrl { get; set; }
public string AuthorIP { get; set; }
public string Content { get; set; }
public string Type { get; set; }
public DateTimeOffset PostedAt { get; set; }
public List<Comment> Replies { get; set; }
}
public string Title { get; set; }
public string Content { get; set; }
public string Slug { get; set; }
public string OriginalUrl { get; set; }
public List<string> Tags { get; set; }
public bool IsPage { get; set; }
public bool IsDraft { get; set; }
public DateTimeOffset PublishedAt { get; set; }
public List<Comment> Comments = new List<Comment>();
public override string ToString()
{
var sb = new StringBuilder();
if (IsDraft) sb.Append("[!!Draft!!]");
if (IsPage ) sb.Append("[Page]");
sb.Append(' ');
sb.Append(Slug);
sb.Append(' ');
sb.Append(Title);
sb.Append(string.Format(" ({0} comments)", Comments.Count));
return sb.ToString();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment