Skip to content

Instantly share code, notes, and snippets.

@chesterbr
Created February 14, 2024 00:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chesterbr/6368adb7530f6d582046a5d93a4d4a49 to your computer and use it in GitHub Desktop.
Save chesterbr/6368adb7530f6d582046a5d93a4d4a49 to your computer and use it in GitHub Desktop.
A script that imports Disqus posts into a format usable by Staticman
// import_disqus.mjs
// =================
//
// Converts Disqus XML export to a format compatible with Staticman (https://staticman.net/)
//
// Requires Node.js and the following packages:
//
// npm install xml2js crypto-js strip-indent
//
// (c) 2024 Carlos Duarte Do Nascimento (https://chester.me)
// Released under the MIT License (https://opensource.org/licenses/MIT)
import fs from 'fs';
import { Parser } from 'xml2js';
import path from 'path';
import stripIndent from 'strip-indent';
import CryptoJS from 'crypto-js';
var parser = new Parser();
fs.readFile('data.xml', function(err, data) {
parser.parseString(data, function (err, result) {
// Yes, Disqus calls a post a "thread" and a comment a "post" 🤦;
// let's first untangle this mess
const posts = result.disqus.thread;
const comments = result.disqus.post;
// All we need from posts is the URL and slug, so let's build a
// dictionary to quickly retrieve them from the post ID
const postInfo = posts.reduce((obj, post) => {
const slug = post.link[0].replace(/\/$/, "").split('/').slice(-1)[0].replace(/\.html$/, "");
obj[post['$']['dsq:id']] = {
"slug": slug,
"url": post.link[0]
}
return obj;
}, {});
// Now we can create one file for each comment
comments.forEach(comment => {
if (comment.isDeleted[0] === 'true' || comment.isSpam[0] === 'true') {
return;
}
const disqus_comment_id = comment['$']['dsq:id'];
const disqus_unix_timestamp = new Date(comment.createdAt[0]).getTime() / 1000;
const post = postInfo[comment.thread[0]['$']['dsq:id']];
const username = comment.author[0].username ? comment.author[0].username[0] : "";
const yml = stripIndent(`
_id: ${disqus_comment_id}
_parent: ${post.url}
replying_to_uid: '${comment.parent ? comment.parent[0]['$']['dsq:id'] : ''}'
message: ${singleQuote(comment.message[0])}
name: ${singleQuote(comment.author[0].name[0])}
email: '${username == "chesterbr" ? "3a49ee98333d753103cf708e40d36984" : CryptoJS.MD5(username+"@chester.me").toString()}'
hp: ''
date: ${disqus_unix_timestamp}
`).replace(/^\s+/, '') // Nix first line
.replace(/[\x00-\x09\x0B\x0C\x0E-\x1F\x7F-\x9F]/g, ""); // 🔪 invalid Unicode characters
// Use comment ID as milisseconds in filename for uniqueness
const dirname = "comments/" + post.slug;
const filename = `entry${disqus_unix_timestamp * 1000 + disqus_comment_id % 1000}.yml`;
if (!fs.existsSync(dirname)){
fs.mkdirSync(dirname, { recursive: true });
}
fs.writeFileSync(path.join(dirname, filename), yml);
});
});
});
function singleQuote(str) {
return `'${str.replace(/'/g, "''")}'`;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment