Created
February 9, 2023 21:45
-
-
Save bricelam/24efc068b18b893baeeb0bb4e3f1007f to your computer and use it in GitHub Desktop.
Copies comments from a Disqus export to GitHub Discussions for giscus
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Diagnostics; | |
using System.Text.RegularExpressions; | |
using System.Xml.Linq; | |
using Octokit.GraphQL; | |
using Octokit.GraphQL.Model; | |
var disqus = XElement.Load("disqus-export.xml"); | |
var ns = disqus.GetDefaultNamespace(); | |
var dsq = disqus.GetNamespaceOfPrefix("dsq")!; | |
var threads = new Dictionary<long, Thread>(); | |
var posts = new Dictionary<long, Post>(); | |
foreach (var element in disqus.Elements()) | |
{ | |
if (element.Name.LocalName == "thread") | |
{ | |
threads.Add( | |
long.Parse(element.Attribute(dsq + "id")!.Value), | |
new Thread | |
{ | |
Link = new Uri(element.Element(ns + "link")!.Value) | |
}); | |
} | |
else if (element.Name.LocalName == "post") | |
{ | |
if (element.Element(ns + "isDeleted")!.Value != "false" | |
|| element.Element(ns + "isSpam")!.Value != "false") | |
continue; | |
var id = long.Parse(element.Attribute(dsq + "id")!.Value); | |
var author = element.Element(ns + "author")!; | |
var parent = element.Element(ns + "parent")?.Attribute(dsq + "id")!.Value; | |
posts.Add( | |
id, | |
new Post | |
{ | |
Id = id, | |
Message = element.Element(ns + "message")!.Value, | |
CreatedAt = DateTime.Parse(element.Element(ns + "createdAt")!.Value).ToUniversalTime(), | |
Author = new Author | |
{ | |
Name = author.Element(ns + "name")!.Value, | |
Username = author.Element(ns + "username")?.Value | |
}, | |
ThreadId = long.Parse(element.Element(ns + "thread")!.Attribute(dsq + "id")!.Value), | |
ParentId = parent is null | |
? null | |
: long.Parse(parent) | |
}); | |
} | |
} | |
// TODO: Use an API key with "public_repo" | |
var connection = new Connection(new ProductHeaderValue("Disqus"), "TODO"); | |
var repository = new Query() | |
// TODO: Update to your repo and user | |
.Repository("bricelam.github.io", "bricelam"); | |
var repositoryId = await connection.Run( | |
repository | |
.Select(r => r.Id)); | |
var categories = await connection.Run( | |
repository.DiscussionCategories() | |
.AllPages() | |
.Select( | |
c => new | |
{ | |
c.Id, | |
c.Name | |
})); | |
var categoryId = categories | |
.First(c => c.Name == "General") | |
.Id; | |
var discussions = await connection.Run( | |
repository.Discussions(categoryId: categoryId) | |
.AllPages() | |
.Select( | |
d => new | |
{ | |
d.Id, | |
d.Number, | |
d.Title | |
})); | |
foreach (var post in posts.Values | |
.OrderBy(p => p.CreatedAt) | |
// TODO: If an error is encountered, uncomment and paste the last id tired below | |
//.SkipWhile(p => p.Id != 1000000000L) | |
) | |
{ | |
Console.WriteLine(post.Id); | |
var thread = threads[post.ThreadId]; | |
if (thread.NotFound) | |
{ | |
continue; | |
} | |
else if (!thread.DiscussionId.HasValue) | |
{ | |
string html; | |
try | |
{ | |
html = await new HttpClient().GetStringAsync(thread.Link); | |
var redirectMatch = Regex.Match(html, @"<meta http-equiv=""refresh"" content=""0; url=(.+)"">"); | |
if (redirectMatch.Success) | |
html = await new HttpClient().GetStringAsync(redirectMatch.Groups[1].Value); | |
} | |
catch (HttpRequestException ex) | |
{ | |
Debug.Assert(ex.StatusCode == System.Net.HttpStatusCode.NotFound); | |
thread.NotFound = true; | |
Console.WriteLine($"404: {thread.Link}"); | |
continue; | |
} | |
var title = Regex.Match(html, @"<meta property=""og:title"" content=""(.+)"" />").Groups[1].Value; | |
var discussion = discussions.FirstOrDefault(d => d.Title == title); | |
thread.DiscussionId = discussion?.Id; | |
thread.DiscussionNumber = discussion?.Number; | |
if (!thread.DiscussionId.HasValue) | |
{ | |
var description = Regex.Match(html, @"<meta property=""og:description"" content=""(.+)"" />").Groups[1].Value; | |
var discussion1 = await connection.Run( | |
new Mutation() | |
.CreateDiscussion( | |
new CreateDiscussionInput | |
{ | |
RepositoryId = repositoryId, | |
Title = title, | |
Body = $""" | |
# {title} | |
{description} | |
{thread.Link} | |
""", | |
CategoryId = categoryId | |
}) | |
.Select( | |
d => new | |
{ | |
d.Discussion.Id, | |
d.Discussion.Number | |
})); | |
thread.DiscussionId = discussion1.Id; | |
thread.DiscussionNumber = discussion1.Number; | |
// TODO: Fine-tune these | |
System.Threading.Thread.Sleep(10000); | |
} | |
} | |
Debug.Assert(thread.DiscussionId.HasValue); | |
Debug.Assert(thread.DiscussionNumber.HasValue); | |
var username = !string.IsNullOrEmpty(post.Author.Username) | |
? $"[{post.Author.Name}](https://disqus.com/by/{post.Author.Username})" | |
: post.Author.Name; | |
Post? parent = null; | |
var current = post; | |
while (current.ParentId.HasValue | |
// TODO: Handle deleted parents better | |
&& posts.ContainsKey(current.ParentId.Value)) | |
{ | |
current = posts[current.ParentId.Value]; | |
parent = current; | |
} | |
// When continuing after an error, we may need to find the corresponding comment again | |
if (parent is not null | |
&& parent.CommentId is null) | |
{ | |
var username1 = !string.IsNullOrEmpty(parent.Author.Username) | |
? $"[{parent.Author.Name}](https://disqus.com/by/{parent.Author.Username})" | |
: parent.Author.Name; | |
var comments = await connection.Run( | |
repository | |
.Discussion(thread.DiscussionNumber.Value) | |
.Comments() | |
.AllPages() | |
.Select( | |
c => new | |
{ | |
c.Id, | |
c.Body | |
})); | |
parent.CommentId = comments.First(c => c.Body.StartsWith($"_From **{username1}** on {parent.CreatedAt:MMMM d, yyyy H:mm}_")).Id; | |
} | |
post.CommentId = await connection.Run( | |
new Mutation() | |
.AddDiscussionComment( | |
new AddDiscussionCommentInput | |
{ | |
DiscussionId = thread.DiscussionId.Value, | |
ReplyToId = parent?.CommentId, | |
// TODO: Update the URL below | |
Body = $""" | |
_From **{username}** on {post.CreatedAt:MMMM d, yyyy H:mm}_ | |
{post.Message} | |
_Copied from original comment on [Disqus](https://disqus.com/home/forum/TODO)_ | |
""" | |
}) | |
.Select(c => c.Comment.Id)); | |
// TODO: Fine-tune these | |
System.Threading.Thread.Sleep(10000); | |
} | |
class Thread | |
{ | |
public required Uri Link { get; set; } | |
public ID? DiscussionId { get; set; } | |
public int? DiscussionNumber { get; set; } | |
public bool NotFound { get; set; } | |
} | |
class Post | |
{ | |
public required long Id { get; set; } | |
public required string Message { get; set; } | |
public required DateTime CreatedAt { get; set; } | |
public required Author Author { get; set; } | |
public required long ThreadId { get; set; } | |
public long? ParentId { get; set; } | |
public ID? CommentId { get; set; } | |
} | |
class Author | |
{ | |
public required string Name { get; set; } | |
public string? Username { get; set; } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment