Created October 24, 2021 06:40
Migrating blogger to Vuepress
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.Json;
using System.Text.RegularExpressions;
using ServiceStack;
namespace BloggerMigration
class Program
private static string mdTemplate = @"---
title: {0}
date: {1}
static void Main(string[] args)
var initialResponse =
var posts = initialResponse.Items;
while (initialResponse.NextPageToken != null)
initialResponse =
var uniquePosts = posts.GroupBy(x => x.Id)
.Select(x => x.First())
var converter = new ReverseMarkdown.Converter();
foreach (var postItem in uniquePosts)
var imgDirName = postItem.Url.Split("/").Last().Replace(".html", "");
var images = GetImageLinks(postItem.Content).Where(x => x.Contains("")).ToList();
var imgDirPath = ($"..\\..\\..\\images\\archive\\{imgDirName}\\").MapAbsolutePath();
foreach (var img in images)
var scale = img.Split("/")[7];
var bytes = img.Replace($"/{scale}/", "/s10000/").GetBytesFromUrl();
var imgName = img.Split("/")[8];
File.WriteAllBytes((Path.Join(imgDirPath, imgName)).MapAbsolutePath(), bytes);
postItem.Content = postItem.Content.Replace(img, $"/images/archive/{imgDirName}/{imgName}");
// Update links
var anchorLinks = GetAnchorLinks(postItem.Content)
.Where(x => x.Contains(""))
foreach (var link in anchorLinks)
var imgName = link.Split("/")[8];
postItem.Content = postItem.Content.Replace(link, $"/images/archive/{imgDirName}/{imgName}");
// HTML -> MarkDown
postItem.Content = converter.Convert(postItem.Content);
// Replace Gist embed scripts with code fence.
var scripts = GetScriptUrls(postItem.Content)
.Where(x => x.Contains(""))
foreach (var script in scripts)
var url = script.Substring(0, script.Length - 3);
var gistId = url.Split("/").Last();
var gistJson = ("" + gistId).GetJsonFromUrl(request =>
request.Headers.Add("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0");
var gistDoc = JsonDocument.Parse(gistJson);
GistFile gistFile = new GistFile();
foreach (var file in gistDoc.RootElement.GetProperty("files").EnumerateObject())
gistFile = file.Value.ToString().FromJson<GistFile>();
postItem.Content = postItem.Content.Replace($"<script src=\"{script}\"></script>",
// Write modified MD
var tags = postItem.Labels != null ? "tags:\n- " + postItem.Labels.Join("\n- ") : "";
var mdOutput = mdTemplate.Fmt(postItem.Title, postItem.Published.ToString("yyyy-MM-dd"),
var fileName = postItem.Url.Split("/").Last().Replace(".html", ".md");
File.WriteAllText(("..\\..\\..\\_archive\\" + fileName).MapAbsolutePath(), mdOutput);
static IEnumerable<String> GetScriptUrls(String inputHTML)
const string pattern = @"<script\b[^\<\>]+?\bsrc\s*=\s*[""'](?<L>.+?)[""'][^\<\>]*?\>";
foreach (Match match in Regex.Matches(inputHTML, pattern, RegexOptions.IgnoreCase))
var imageLink = match.Groups["L"].Value;
yield return imageLink;
static IEnumerable<String> GetImageLinks(String inputHTML)
const string pattern = @"<img\b[^\<\>]+?\bsrc\s*=\s*[""'](?<L>.+?)[""'][^\<\>]*?\>";
foreach (Match match in Regex.Matches(inputHTML, pattern, RegexOptions.IgnoreCase))
var imageLink = match.Groups["L"].Value;
yield return imageLink;
static IEnumerable<String> GetAnchorLinks(String inputHTML)
const string pattern = @"<a\b[^\<\>]+?\bhref\s*=\s*[""'](?<L>.+?)[""'][^\<\>]*?\>";
foreach (Match match in Regex.Matches(inputHTML, pattern, RegexOptions.IgnoreCase))
var anchorLinks = match.Groups["L"].Value;
yield return anchorLinks;
public class BloggerPostsResponse
public string NextPageToken { get; set; }
public List<PostItem> Items { get; set; }
public class PostItem
public string Id { get; set; }
public DateTime Published { get; set; }
public string Url { get; set; }
public string Title { get; set; }
public string Content { get; set; }
public List<string> Labels { get; set; }
public class GistFile
public string filename { get; set; }
public string type { get; set; }
public string content { get; set; }
public string raw_url { get; set; }
