Last active
December 19, 2019 13:43
-
-
Save SurinderBhomra/13fa30412be71aa50cc838198f7fa580 to your computer and use it in GitHub Desktop.
Console App - Export Kentico Blog Posts To Markdown Files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CMS.DataEngine; | |
using CMS.DocumentEngine; | |
using CMS.Helpers; | |
using CMS.MediaLibrary; | |
using Export.BlogPosts.Models; | |
using ReverseMarkdown; | |
using System; | |
using System.Collections.Generic; | |
using System.Configuration; | |
using System.IO; | |
using System.Linq; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
namespace Export.BlogPosts | |
{ | |
class Program | |
{ | |
public const string SiteName = "SurinderBhomra"; | |
public const string MarkdownFilesOutputPath = @"C:\Temp\BlogPosts\"; | |
public const string NewMediaBaseFolder = "/media"; | |
public const string CloudImageServiceUrl = "https://xxxx.cloudimg.io"; | |
static void Main(string[] args) | |
{ | |
CMSApplication.Init(); | |
List<BlogPost> blogPosts = GetBlogPosts(); | |
if (blogPosts.Any()) | |
{ | |
foreach (BlogPost bp in blogPosts) | |
{ | |
bool isMDFileGenerated = CreateMDFile(bp); | |
Console.WriteLine($"{bp.PostDate:yyyy-MM-dd} - {bp.Title} - {(isMDFileGenerated ? "EXPORTED" : "FAILED")}"); | |
} | |
Console.ReadLine(); | |
} | |
} | |
/// <summary> | |
/// Retrieve all blog posts from Kentico. | |
/// </summary> | |
/// <returns></returns> | |
private static List<BlogPost> GetBlogPosts() | |
{ | |
List<BlogPost> posts = new List<BlogPost>(); | |
InfoDataSet<TreeNode> query = DocumentHelper.GetDocuments() | |
.OnSite(SiteName) | |
.Types("SurinderBhomra.BlogPost") | |
.Path("/Blog", PathTypeEnum.Children) | |
.Culture("en-GB") | |
.CombineWithDefaultCulture() | |
.NestingLevel(-1) | |
.Published() | |
.OrderBy("BlogPostDate DESC") | |
.TypedResult; | |
if (!DataHelper.DataSourceIsEmpty(query)) | |
{ | |
foreach (TreeNode blogPost in query) | |
{ | |
posts.Add(new BlogPost | |
{ | |
Guid = blogPost.NodeGUID.ToString(), | |
Title = blogPost.GetStringValue("BlogPostTitle", string.Empty), | |
Summary = blogPost.GetStringValue("BlogPostSummary", string.Empty), | |
Body = RichTextToMarkdown(blogPost.GetStringValue("BlogPostBody", string.Empty)), | |
PostDate = blogPost.GetDateTimeValue("BlogPostDate", DateTime.MinValue), | |
Slug = blogPost.NodeAlias, | |
DisqusId = blogPost.NodeGUID.ToString(), | |
Categories = blogPost.Categories.DisplayNames.Select(c => c.Value.ToString()).ToList(), | |
Tags = blogPost.DocumentTags.Replace("\"", string.Empty).Split(',').Select(t => t.Trim(' ')).Where(t => !string.IsNullOrEmpty(t)).ToList(), | |
SocialImage = GetMediaFilePath(blogPost.GetStringValue("ShareImageUrl", string.Empty)), | |
TeaserImage = GetMediaFilePath(blogPost.GetStringValue("BlogPostTeaser", string.Empty)) | |
}); | |
} | |
} | |
return posts; | |
} | |
/// <summary> | |
/// Creates the markdown content based on Blog Post data. | |
/// </summary> | |
/// <param name="bp"></param> | |
/// <returns></returns> | |
private static string GenerateMDContent(BlogPost bp) | |
{ | |
StringBuilder mdBuilder = new StringBuilder(); | |
#region Post Attributes | |
mdBuilder.Append($"---{Environment.NewLine}"); | |
mdBuilder.Append($"title: \"{bp.Title.Replace("\"", "\\\"")}\"{Environment.NewLine}"); | |
mdBuilder.Append($"summary: \"{HTMLHelper.HTMLDecode(bp.Summary).Replace("\"", "\\\"")}\"{Environment.NewLine}"); | |
mdBuilder.Append($"date: \"{bp.PostDate.ToString("yyyy-MM-ddTHH:mm:ssZ")}\"{Environment.NewLine}"); | |
mdBuilder.Append($"draft: {bp.IsDraft.ToString().ToLower()}{Environment.NewLine}"); | |
mdBuilder.Append($"slug: \"/{bp.Slug}\"{Environment.NewLine}"); | |
mdBuilder.Append($"disqusId: \"{bp.DisqusId}\"{Environment.NewLine}"); | |
mdBuilder.Append($"teaserImage: \"{bp.TeaserImage}\"{Environment.NewLine}"); | |
mdBuilder.Append($"socialImage: \"{bp.SocialImage}\"{Environment.NewLine}"); | |
#region Categories | |
if (bp.Categories?.Count > 0) | |
{ | |
CommaDelimitedStringCollection categoriesCommaDelimited = new CommaDelimitedStringCollection(); | |
foreach (string categoryName in bp.Categories) | |
categoriesCommaDelimited.Add($"\"{categoryName}\""); | |
mdBuilder.Append($"categories: [{categoriesCommaDelimited.ToString()}]{Environment.NewLine}"); | |
} | |
#endregion | |
#region Tags | |
if (bp.Tags?.Count > 0) | |
{ | |
CommaDelimitedStringCollection tagsCommaDelimited = new CommaDelimitedStringCollection(); | |
foreach (string tagName in bp.Tags) | |
tagsCommaDelimited.Add($"\"{tagName}\""); | |
mdBuilder.Append($"tags: [{tagsCommaDelimited.ToString()}]{Environment.NewLine}"); | |
} | |
#endregion | |
mdBuilder.Append($"---{Environment.NewLine}{Environment.NewLine}"); | |
#endregion | |
// Add blog post body content. | |
mdBuilder.Append(bp.Body); | |
return mdBuilder.ToString(); | |
} | |
/// <summary> | |
/// Creates files with a .md extension. | |
/// </summary> | |
/// <param name="bp"></param> | |
/// <returns></returns> | |
private static bool CreateMDFile(BlogPost bp) | |
{ | |
string markdownContents = GenerateMDContent(bp); | |
if (string.IsNullOrEmpty(markdownContents)) | |
return false; | |
string fileName = $"{bp.PostDate:yyyy-MM-dd}---{bp.Slug}.md"; | |
File.WriteAllText($@"{MarkdownFilesOutputPath}{fileName}", markdownContents); | |
if (File.Exists($@"{MarkdownFilesOutputPath}{fileName}")) | |
return true; | |
return false; | |
} | |
/// <summary> | |
/// Gets the full relative path of an file based on its Permanent URL ID. | |
/// </summary> | |
/// <param name="filePath"></param> | |
/// <returns></returns> | |
private static string GetMediaFilePath(string filePath) | |
{ | |
if (filePath.Contains("getmedia")) | |
{ | |
// Get GUID from file path. | |
Match regexFileMatch = Regex.Match(filePath, @"(\{){0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}"); | |
if (regexFileMatch.Success) | |
{ | |
MediaFileInfo mediaFile = MediaFileInfoProvider.GetMediaFileInfo(Guid.Parse(regexFileMatch.Value), SiteName); | |
if (mediaFile != null) | |
return $"{NewMediaBaseFolder}/{mediaFile.FilePath}"; | |
} | |
} | |
// Return the file path and remove the base file path. | |
return filePath.Replace("/SurinderBhomra/media/Surinder", NewMediaBaseFolder); | |
} | |
/// <summary> | |
/// Convert parsed rich text value to markdown. | |
/// </summary> | |
/// <param name="richText"></param> | |
/// <returns></returns> | |
public static string RichTextToMarkdown(string richText) | |
{ | |
if (!string.IsNullOrEmpty(richText)) | |
{ | |
#region Loop through all images and correct the path | |
// Clean up tilda's. | |
richText = richText.Replace("~/", "/"); | |
#region Transform Image Url's Using Width Parameter | |
Regex regexFileUrlWidth = new Regex(@"\/getmedia\/(\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}\}{0,1})\/([\w,\s-]+\.[A-Za-z]{3})(\?width=([0-9]*))", RegexOptions.Multiline | RegexOptions.IgnoreCase); | |
foreach (Match fileUrl in regexFileUrlWidth.Matches(richText)) | |
{ | |
string width = fileUrl.Groups[4] != null ? fileUrl.Groups[4].Value : string.Empty; | |
string newMediaUrl = $"{CloudImageServiceUrl}/width/{width}/n/https://www.surinderbhomra.com{GetMediaFilePath(ClearQueryStrings(fileUrl.Value))}"; | |
if (newMediaUrl != string.Empty) | |
richText = richText.Replace(fileUrl.Value, newMediaUrl); | |
} | |
#endregion | |
#region Transform Generic File Url's | |
Regex regexGenericFileUrl = new Regex(@"\/getmedia\/(\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}\}{0,1})\/([\w,\s-]+\.[A-Za-z]{3})", RegexOptions.Multiline | RegexOptions.IgnoreCase); | |
foreach (Match fileUrl in regexGenericFileUrl.Matches(richText)) | |
{ | |
// Construct media URL required by image hosting company - CloudImage. | |
string newMediaUrl = $"{CloudImageServiceUrl}/cdno/n/n/https://www.surinderbhomra.com{GetMediaFilePath(ClearQueryStrings(fileUrl.Value))}"; | |
if (newMediaUrl != string.Empty) | |
richText = richText.Replace(fileUrl.Value, newMediaUrl); | |
} | |
#endregion | |
#endregion | |
Config config = new Config | |
{ | |
UnknownTags = Config.UnknownTagsOption.PassThrough, // Include the unknown tag completely in the result (default as well) | |
GithubFlavored = true, // generate GitHub flavoured markdown, supported for BR, PRE and table tags | |
RemoveComments = true, // will ignore all comments | |
SmartHrefHandling = true // remove markdown output for links where appropriate | |
}; | |
Converter markdownConverter = new Converter(config); | |
return markdownConverter.Convert(richText).Replace(@"[!\", @"[!").Replace(@"\]", @"]"); | |
} | |
return string.Empty; | |
} | |
/// <summary> | |
/// Returns media url without query string values. | |
/// </summary> | |
/// <param name="mediaUrl"></param> | |
/// <returns></returns> | |
private static string ClearQueryStrings(string mediaUrl) | |
{ | |
if (mediaUrl == null) | |
return string.Empty; | |
if (mediaUrl.Contains("?")) | |
mediaUrl = mediaUrl.Split('?').ToList()[0]; | |
return mediaUrl.Replace("~", string.Empty); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment