Skip to content

Instantly share code, notes, and snippets.

@SurinderBhomra
Last active December 19, 2019 13:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SurinderBhomra/13fa30412be71aa50cc838198f7fa580 to your computer and use it in GitHub Desktop.
Save SurinderBhomra/13fa30412be71aa50cc838198f7fa580 to your computer and use it in GitHub Desktop.
Console App - Export Kentico Blog Posts To Markdown Files
using CMS.DataEngine;
using CMS.DocumentEngine;
using CMS.Helpers;
using CMS.MediaLibrary;
using Export.BlogPosts.Models;
using ReverseMarkdown;
using System;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace Export.BlogPosts
{
class Program
{
public const string SiteName = "SurinderBhomra";
public const string MarkdownFilesOutputPath = @"C:\Temp\BlogPosts\";
public const string NewMediaBaseFolder = "/media";
public const string CloudImageServiceUrl = "https://xxxx.cloudimg.io";
static void Main(string[] args)
{
CMSApplication.Init();
List<BlogPost> blogPosts = GetBlogPosts();
if (blogPosts.Any())
{
foreach (BlogPost bp in blogPosts)
{
bool isMDFileGenerated = CreateMDFile(bp);
Console.WriteLine($"{bp.PostDate:yyyy-MM-dd} - {bp.Title} - {(isMDFileGenerated ? "EXPORTED" : "FAILED")}");
}
Console.ReadLine();
}
}
/// <summary>
/// Retrieve all blog posts from Kentico.
/// </summary>
/// <returns></returns>
private static List<BlogPost> GetBlogPosts()
{
List<BlogPost> posts = new List<BlogPost>();
InfoDataSet<TreeNode> query = DocumentHelper.GetDocuments()
.OnSite(SiteName)
.Types("SurinderBhomra.BlogPost")
.Path("/Blog", PathTypeEnum.Children)
.Culture("en-GB")
.CombineWithDefaultCulture()
.NestingLevel(-1)
.Published()
.OrderBy("BlogPostDate DESC")
.TypedResult;
if (!DataHelper.DataSourceIsEmpty(query))
{
foreach (TreeNode blogPost in query)
{
posts.Add(new BlogPost
{
Guid = blogPost.NodeGUID.ToString(),
Title = blogPost.GetStringValue("BlogPostTitle", string.Empty),
Summary = blogPost.GetStringValue("BlogPostSummary", string.Empty),
Body = RichTextToMarkdown(blogPost.GetStringValue("BlogPostBody", string.Empty)),
PostDate = blogPost.GetDateTimeValue("BlogPostDate", DateTime.MinValue),
Slug = blogPost.NodeAlias,
DisqusId = blogPost.NodeGUID.ToString(),
Categories = blogPost.Categories.DisplayNames.Select(c => c.Value.ToString()).ToList(),
Tags = blogPost.DocumentTags.Replace("\"", string.Empty).Split(',').Select(t => t.Trim(' ')).Where(t => !string.IsNullOrEmpty(t)).ToList(),
SocialImage = GetMediaFilePath(blogPost.GetStringValue("ShareImageUrl", string.Empty)),
TeaserImage = GetMediaFilePath(blogPost.GetStringValue("BlogPostTeaser", string.Empty))
});
}
}
return posts;
}
/// <summary>
/// Creates the markdown content based on Blog Post data.
/// </summary>
/// <param name="bp"></param>
/// <returns></returns>
private static string GenerateMDContent(BlogPost bp)
{
StringBuilder mdBuilder = new StringBuilder();
#region Post Attributes
mdBuilder.Append($"---{Environment.NewLine}");
mdBuilder.Append($"title: \"{bp.Title.Replace("\"", "\\\"")}\"{Environment.NewLine}");
mdBuilder.Append($"summary: \"{HTMLHelper.HTMLDecode(bp.Summary).Replace("\"", "\\\"")}\"{Environment.NewLine}");
mdBuilder.Append($"date: \"{bp.PostDate.ToString("yyyy-MM-ddTHH:mm:ssZ")}\"{Environment.NewLine}");
mdBuilder.Append($"draft: {bp.IsDraft.ToString().ToLower()}{Environment.NewLine}");
mdBuilder.Append($"slug: \"/{bp.Slug}\"{Environment.NewLine}");
mdBuilder.Append($"disqusId: \"{bp.DisqusId}\"{Environment.NewLine}");
mdBuilder.Append($"teaserImage: \"{bp.TeaserImage}\"{Environment.NewLine}");
mdBuilder.Append($"socialImage: \"{bp.SocialImage}\"{Environment.NewLine}");
#region Categories
if (bp.Categories?.Count > 0)
{
CommaDelimitedStringCollection categoriesCommaDelimited = new CommaDelimitedStringCollection();
foreach (string categoryName in bp.Categories)
categoriesCommaDelimited.Add($"\"{categoryName}\"");
mdBuilder.Append($"categories: [{categoriesCommaDelimited.ToString()}]{Environment.NewLine}");
}
#endregion
#region Tags
if (bp.Tags?.Count > 0)
{
CommaDelimitedStringCollection tagsCommaDelimited = new CommaDelimitedStringCollection();
foreach (string tagName in bp.Tags)
tagsCommaDelimited.Add($"\"{tagName}\"");
mdBuilder.Append($"tags: [{tagsCommaDelimited.ToString()}]{Environment.NewLine}");
}
#endregion
mdBuilder.Append($"---{Environment.NewLine}{Environment.NewLine}");
#endregion
// Add blog post body content.
mdBuilder.Append(bp.Body);
return mdBuilder.ToString();
}
/// <summary>
/// Creates files with a .md extension.
/// </summary>
/// <param name="bp"></param>
/// <returns></returns>
private static bool CreateMDFile(BlogPost bp)
{
string markdownContents = GenerateMDContent(bp);
if (string.IsNullOrEmpty(markdownContents))
return false;
string fileName = $"{bp.PostDate:yyyy-MM-dd}---{bp.Slug}.md";
File.WriteAllText($@"{MarkdownFilesOutputPath}{fileName}", markdownContents);
if (File.Exists($@"{MarkdownFilesOutputPath}{fileName}"))
return true;
return false;
}
/// <summary>
/// Gets the full relative path of an file based on its Permanent URL ID.
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
private static string GetMediaFilePath(string filePath)
{
if (filePath.Contains("getmedia"))
{
// Get GUID from file path.
Match regexFileMatch = Regex.Match(filePath, @"(\{){0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}");
if (regexFileMatch.Success)
{
MediaFileInfo mediaFile = MediaFileInfoProvider.GetMediaFileInfo(Guid.Parse(regexFileMatch.Value), SiteName);
if (mediaFile != null)
return $"{NewMediaBaseFolder}/{mediaFile.FilePath}";
}
}
// Return the file path and remove the base file path.
return filePath.Replace("/SurinderBhomra/media/Surinder", NewMediaBaseFolder);
}
/// <summary>
/// Convert parsed rich text value to markdown.
/// </summary>
/// <param name="richText"></param>
/// <returns></returns>
public static string RichTextToMarkdown(string richText)
{
if (!string.IsNullOrEmpty(richText))
{
#region Loop through all images and correct the path
// Clean up tilda's.
richText = richText.Replace("~/", "/");
#region Transform Image Url's Using Width Parameter
Regex regexFileUrlWidth = new Regex(@"\/getmedia\/(\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}\}{0,1})\/([\w,\s-]+\.[A-Za-z]{3})(\?width=([0-9]*))", RegexOptions.Multiline | RegexOptions.IgnoreCase);
foreach (Match fileUrl in regexFileUrlWidth.Matches(richText))
{
string width = fileUrl.Groups[4] != null ? fileUrl.Groups[4].Value : string.Empty;
string newMediaUrl = $"{CloudImageServiceUrl}/width/{width}/n/https://www.surinderbhomra.com{GetMediaFilePath(ClearQueryStrings(fileUrl.Value))}";
if (newMediaUrl != string.Empty)
richText = richText.Replace(fileUrl.Value, newMediaUrl);
}
#endregion
#region Transform Generic File Url's
Regex regexGenericFileUrl = new Regex(@"\/getmedia\/(\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}\}{0,1})\/([\w,\s-]+\.[A-Za-z]{3})", RegexOptions.Multiline | RegexOptions.IgnoreCase);
foreach (Match fileUrl in regexGenericFileUrl.Matches(richText))
{
// Construct media URL required by image hosting company - CloudImage.
string newMediaUrl = $"{CloudImageServiceUrl}/cdno/n/n/https://www.surinderbhomra.com{GetMediaFilePath(ClearQueryStrings(fileUrl.Value))}";
if (newMediaUrl != string.Empty)
richText = richText.Replace(fileUrl.Value, newMediaUrl);
}
#endregion
#endregion
Config config = new Config
{
UnknownTags = Config.UnknownTagsOption.PassThrough, // Include the unknown tag completely in the result (default as well)
GithubFlavored = true, // generate GitHub flavoured markdown, supported for BR, PRE and table tags
RemoveComments = true, // will ignore all comments
SmartHrefHandling = true // remove markdown output for links where appropriate
};
Converter markdownConverter = new Converter(config);
return markdownConverter.Convert(richText).Replace(@"[!\", @"[!").Replace(@"\]", @"]");
}
return string.Empty;
}
/// <summary>
/// Returns media url without query string values.
/// </summary>
/// <param name="mediaUrl"></param>
/// <returns></returns>
private static string ClearQueryStrings(string mediaUrl)
{
if (mediaUrl == null)
return string.Empty;
if (mediaUrl.Contains("?"))
mediaUrl = mediaUrl.Split('?').ToList()[0];
return mediaUrl.Replace("~", string.Empty);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment