Skip to content

Instantly share code, notes, and snippets.

@m-radzikowski
Last active April 7, 2024 20:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save m-radzikowski/00d92b412ae3f6f944b2eed116b9aed8 to your computer and use it in GitHub Desktop.
Save m-radzikowski/00d92b412ae3f6f944b2eed116b9aed8 to your computer and use it in GitHub Desktop.
Migrate comments from WordPress wpDiscuz (https://wpdiscuz.com/) to Giscus (https://giscus.app/) powered by GitHub Discussions
import hashlib
import re
from time import sleep
from urllib.parse import urlparse
import requests
from bs4 import BeautifulSoup
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
from markdownify import markdownify as md
"""
This script migrates comments from WordPress wpDiscuz plugin (https://wpdiscuz.com/)
to Giscus (https://giscus.app/) powered by GitHub Discussions.
It fetches the WordPress page, parses it to extract comments,
and creates new discussions in the GitHub repository using GitHub GraphQL API.
This is a one-time script that should be run only once for each post.
Otherwise, it will create duplicate discussions and comments.
The script is quick and dirty, so it may not handle all edge cases.
Review the parsed comments in dry mode first.
The script assumes the Giscus discussion mapping mode "pathname".
It works with (or without) "strict title matching" option.
To run:
1. Install the required packages: `pip install requests beautifulsoup4 markdownify gql[all]`
2. Set the configuration parameters below.
3. Run the script: `python3 migrate-comments.py`
Note that the GitHub API has rate limits. The script will sleep for 1 second after each comment creation
to avoid hitting the rate limits, but it may still happen if there are too many comments to migrate.
In that case, you can delete the half-migrated discussion and run the script again for given post.
See more on the rate limits here: https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api
"""
########## Configuration ##########
# in dry run mode, script will not create any discussions or comments but output the comments to the console
dryrun = False
# GitHub Personal Access Token with "Discussions" read and write permissions on comments repository; create: https://github.com/settings/tokens
token = ""
# GitHub repository ID and discussions category ID, matching parameters in the Giscus script
repository_id = "" # "data-repo-id" attribute in the Giscus script
category_id = "" # "data-category-id" attribute in the Giscus script
# list of WordPress post URLs to migrate
posts = [
# "https://example.com/wordpress-to-github-discussions-migration/"
]
########## Configuration End ##########
posts.reverse() # start from the oldest post
transport = AIOHTTPTransport(url="https://api.github.com/graphql", headers={"Authorization": f"Bearer {token}"})
client = Client(transport=transport, fetch_schema_from_transport=True)
def main():
for post in posts:
migrate_post(post)
def migrate_post(url):
response = requests.get(url)
if response.status_code != 200:
print('Failed to fetch the page')
exit(1)
soup = BeautifulSoup(response.content, 'html.parser')
title = soup.find("h1").get_text().strip()
description = soup.find("meta", property="og:description")["content"]
print(f"# Post: {title}")
thread = soup.find("div", class_="wpd-thread-list")
comments = get_comments(thread, deep=False)
if len(comments) > 0:
discussion_id = create_discussion(url, description)
for comment in comments:
(element, text) = comment
if dryrun:
print("\n" + text)
comment_id = create_comment(discussion_id, text, None)
responses = get_comments(element, deep=True)
for response in responses:
(el, text) = response
if dryrun:
print("----------\n")
print(text)
create_comment(discussion_id, text, comment_id)
if dryrun:
print("--------------------\n--------------------")
def get_comments(parent, deep: bool):
comments = parent.find_all("div", class_="comment", recursive=deep)
results = []
for comment in comments:
author = comment.find("div", class_="wpd-comment-author").get_text().strip()
author = f"**{author}**"
date = comment.find("div", class_="wpd-comment-date")["title"]
date = " ".join(date.split(" ")[0:3])
content = comment.find("div", class_="wpd-comment-text").prettify().strip()
content = convert_to_markdown(content)
upvotes = int(comment.find("div", class_="wpd-vote-result").get_text().strip())
if upvotes != 0:
reactions = f"_Reactions: {abs(upvotes)} x "
reactions += "👍" if upvotes > 0 else "👎"
reactions += "_\n"
else:
reactions = ""
text = f"_From {author} on {date} (migrated from WordPress):_" + "\n\n" + content + "\n\n" + reactions
results.append((comment, text))
return results
def convert_to_markdown(content):
content = re.sub(r'\n\s+', '\n', content)
content = content.replace("\n", " ")
content = content.replace("> ", ">")
markdown = md(content, escape_asterisks=False, escape_underscores=False, escape_misc=False).strip()
return markdown
def create_discussion(url, description):
if dryrun:
return
pathname = urlparse(url).path[1:]
sha1 = hashlib.sha1(pathname.encode("utf-8")).hexdigest()
body = f"# {pathname}\n\n{description}\n\n{url}\n\n<!-- sha1: {sha1} -->"
query = gql(f"""
mutation {{
createDiscussion(input: {{repositoryId: "{repository_id}", categoryId: "{category_id}", body: "{escape_body(body)}", title: "{pathname}"}}) {{
discussion {{
id
}}
}}
}}
""")
result = client.execute(query)
discussion_id = result["createDiscussion"]["discussion"]["id"]
print(f'Created discussion "{pathname}" with ID: {discussion_id}')
return discussion_id
def create_comment(discussion_id, body, reply_to_id):
if dryrun:
return
reply_to_id = "null" if reply_to_id is None else f'"{reply_to_id}"'
query = gql(f"""
mutation {{
addDiscussionComment(input: {{discussionId: "{discussion_id}", body: "{escape_body(body)}", replyToId: {reply_to_id}}}) {{
comment {{
id
}}
}}
}}
""")
result = client.execute(query)
comment_id = result["addDiscussionComment"]["comment"]["id"]
print(f"Created comment with ID: {comment_id}")
sleep(1) # sleep for 1 second to avoid hitting rate limits
return comment_id
def escape_body(body):
return body.replace('"', '\\"').replace("\n", "\\n")
if __name__ == '__main__':
main()
@m-radzikowski
Copy link
Author

Sample output comment:

From John on April 7, 2024 (migrated from WordPress):

Much comments, very wow. With formatting.

and code blocks too

Reactions: 3 x 👍

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment