Skip to content

Instantly share code, notes, and snippets.

@bricelam
Created February 9, 2023 21:45
Show Gist options
  • Save bricelam/24efc068b18b893baeeb0bb4e3f1007f to your computer and use it in GitHub Desktop.
Save bricelam/24efc068b18b893baeeb0bb4e3f1007f to your computer and use it in GitHub Desktop.
Copies comments from a Disqus export to GitHub Discussions for giscus
using System.Diagnostics;
using System.Text.RegularExpressions;
using System.Xml.Linq;
using Octokit.GraphQL;
using Octokit.GraphQL.Model;
var disqus = XElement.Load("disqus-export.xml");
var ns = disqus.GetDefaultNamespace();
var dsq = disqus.GetNamespaceOfPrefix("dsq")!;
var threads = new Dictionary<long, Thread>();
var posts = new Dictionary<long, Post>();
foreach (var element in disqus.Elements())
{
if (element.Name.LocalName == "thread")
{
threads.Add(
long.Parse(element.Attribute(dsq + "id")!.Value),
new Thread
{
Link = new Uri(element.Element(ns + "link")!.Value)
});
}
else if (element.Name.LocalName == "post")
{
if (element.Element(ns + "isDeleted")!.Value != "false"
|| element.Element(ns + "isSpam")!.Value != "false")
continue;
var id = long.Parse(element.Attribute(dsq + "id")!.Value);
var author = element.Element(ns + "author")!;
var parent = element.Element(ns + "parent")?.Attribute(dsq + "id")!.Value;
posts.Add(
id,
new Post
{
Id = id,
Message = element.Element(ns + "message")!.Value,
CreatedAt = DateTime.Parse(element.Element(ns + "createdAt")!.Value).ToUniversalTime(),
Author = new Author
{
Name = author.Element(ns + "name")!.Value,
Username = author.Element(ns + "username")?.Value
},
ThreadId = long.Parse(element.Element(ns + "thread")!.Attribute(dsq + "id")!.Value),
ParentId = parent is null
? null
: long.Parse(parent)
});
}
}
// TODO: Use an API key with "public_repo"
var connection = new Connection(new ProductHeaderValue("Disqus"), "TODO");
var repository = new Query()
// TODO: Update to your repo and user
.Repository("bricelam.github.io", "bricelam");
var repositoryId = await connection.Run(
repository
.Select(r => r.Id));
var categories = await connection.Run(
repository.DiscussionCategories()
.AllPages()
.Select(
c => new
{
c.Id,
c.Name
}));
var categoryId = categories
.First(c => c.Name == "General")
.Id;
var discussions = await connection.Run(
repository.Discussions(categoryId: categoryId)
.AllPages()
.Select(
d => new
{
d.Id,
d.Number,
d.Title
}));
foreach (var post in posts.Values
.OrderBy(p => p.CreatedAt)
// TODO: If an error is encountered, uncomment and paste the last id tired below
//.SkipWhile(p => p.Id != 1000000000L)
)
{
Console.WriteLine(post.Id);
var thread = threads[post.ThreadId];
if (thread.NotFound)
{
continue;
}
else if (!thread.DiscussionId.HasValue)
{
string html;
try
{
html = await new HttpClient().GetStringAsync(thread.Link);
var redirectMatch = Regex.Match(html, @"<meta http-equiv=""refresh"" content=""0; url=(.+)"">");
if (redirectMatch.Success)
html = await new HttpClient().GetStringAsync(redirectMatch.Groups[1].Value);
}
catch (HttpRequestException ex)
{
Debug.Assert(ex.StatusCode == System.Net.HttpStatusCode.NotFound);
thread.NotFound = true;
Console.WriteLine($"404: {thread.Link}");
continue;
}
var title = Regex.Match(html, @"<meta property=""og:title"" content=""(.+)"" />").Groups[1].Value;
var discussion = discussions.FirstOrDefault(d => d.Title == title);
thread.DiscussionId = discussion?.Id;
thread.DiscussionNumber = discussion?.Number;
if (!thread.DiscussionId.HasValue)
{
var description = Regex.Match(html, @"<meta property=""og:description"" content=""(.+)"" />").Groups[1].Value;
var discussion1 = await connection.Run(
new Mutation()
.CreateDiscussion(
new CreateDiscussionInput
{
RepositoryId = repositoryId,
Title = title,
Body = $"""
# {title}
{description}
{thread.Link}
""",
CategoryId = categoryId
})
.Select(
d => new
{
d.Discussion.Id,
d.Discussion.Number
}));
thread.DiscussionId = discussion1.Id;
thread.DiscussionNumber = discussion1.Number;
// TODO: Fine-tune these
System.Threading.Thread.Sleep(10000);
}
}
Debug.Assert(thread.DiscussionId.HasValue);
Debug.Assert(thread.DiscussionNumber.HasValue);
var username = !string.IsNullOrEmpty(post.Author.Username)
? $"[{post.Author.Name}](https://disqus.com/by/{post.Author.Username})"
: post.Author.Name;
Post? parent = null;
var current = post;
while (current.ParentId.HasValue
// TODO: Handle deleted parents better
&& posts.ContainsKey(current.ParentId.Value))
{
current = posts[current.ParentId.Value];
parent = current;
}
// When continuing after an error, we may need to find the corresponding comment again
if (parent is not null
&& parent.CommentId is null)
{
var username1 = !string.IsNullOrEmpty(parent.Author.Username)
? $"[{parent.Author.Name}](https://disqus.com/by/{parent.Author.Username})"
: parent.Author.Name;
var comments = await connection.Run(
repository
.Discussion(thread.DiscussionNumber.Value)
.Comments()
.AllPages()
.Select(
c => new
{
c.Id,
c.Body
}));
parent.CommentId = comments.First(c => c.Body.StartsWith($"_From **{username1}** on {parent.CreatedAt:MMMM d, yyyy H:mm}_")).Id;
}
post.CommentId = await connection.Run(
new Mutation()
.AddDiscussionComment(
new AddDiscussionCommentInput
{
DiscussionId = thread.DiscussionId.Value,
ReplyToId = parent?.CommentId,
// TODO: Update the URL below
Body = $"""
_From **{username}** on {post.CreatedAt:MMMM d, yyyy H:mm}_
{post.Message}
_Copied from original comment on [Disqus](https://disqus.com/home/forum/TODO)_
"""
})
.Select(c => c.Comment.Id));
// TODO: Fine-tune these
System.Threading.Thread.Sleep(10000);
}
class Thread
{
public required Uri Link { get; set; }
public ID? DiscussionId { get; set; }
public int? DiscussionNumber { get; set; }
public bool NotFound { get; set; }
}
class Post
{
public required long Id { get; set; }
public required string Message { get; set; }
public required DateTime CreatedAt { get; set; }
public required Author Author { get; set; }
public required long ThreadId { get; set; }
public long? ParentId { get; set; }
public ID? CommentId { get; set; }
}
class Author
{
public required string Name { get; set; }
public string? Username { get; set; }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment