Skip to content

Instantly share code, notes, and snippets.

@CyberShadow
Last active November 5, 2018 05:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CyberShadow/656e9dd8b171cd0bf1d27cbea90c73db to your computer and use it in GitHub Desktop.
Save CyberShadow/656e9dd8b171cd0bf1d27cbea90c73db to your computer and use it in GitHub Desktop.
Convert WordPress comments to HashOver-next
/wp2hashover
#!/usr/bin/env dub
/+ dub.sdl:
name "wp2hashover"
dependency "ae" version="==0.0.2155"
+/
import std.algorithm.iteration;
import std.algorithm.sorting;
import std.conv;
import std.exception;
import std.file;
import std.path;
import std.regex;
import std.stdio;
import std.string;
import ae.utils.digest;
import ae.utils.funopt;
import ae.utils.main;
import ae.utils.regex;
import ae.utils.time.common;
import ae.utils.time.format;
import ae.utils.time.parse;
import ae.utils.xmllite;
void wp2hashover(string inputWordpressXMLFile, string outputHashoverCommentsDirectory)
{
auto wp = inputWordpressXMLFile.readText.xmlParse;
wp.children.each!(n => n.match!(
isNode!(XmlNodeType.Meta, "xml"), {},
isNode!(XmlNodeType.Comment), {},
isNode!"rss", n => n.children.each!(n => n.match!(
isNode!"channel", n => n.children.each!(n => n.match!(
isNode!"item", (n)
{
string link;
struct Comment { int id; XmlDocument xml; }
Comment[][int] comments;
n.children.each!(n => n.match!(
isNode!"link", n => link = n.text,
isNode!"wp:comment", (XmlNode n)
{
auto cDoc = xmlParse(`<?xml version="1.0" encoding="UTF-8"?><comment/>`);
void add(string name, string value)
{
auto t = new XmlNode(XmlNodeType.Text, value);
auto n = new XmlNode(XmlNodeType.Node, name);
n.children ~= t;
auto c = cDoc["comment"];
c.children ~= n;
}
int id, parent;
n.children.each!(n => n.match!(
isNode!"wp:comment_id", (n) {
id = n.text.to!int;
add("legacy_id", "comment-" ~ n.text);
},
isNode!"wp:comment_author", n => add("name", n.text),
isNode!"wp:comment_author_email", (n) {
add("email_raw", n.text); // usual "email" field is encrypted; emit this for reference only
add("email_hash", getDigestString!MD5(n.text.toLower).toLower);
},
isNode!"wp:comment_author_url", n => add("website", n.text),
isNode!"wp:comment_author_IP", n => add("ipaddr", n.text),
isNode!"wp:comment_date", n => add("date", n.text.parseTime!`Y-m-d H:i:s`.formatTime!(TimeFormats.ISO8601)),
isNode!"wp:comment_date_gmt", {},
isNode!"wp:comment_content", n => add("body", n.text),
isNode!"wp:comment_approved", n => add("status", ["0" : "pending", "1" : "approved", "trash" : "deleted"][n.text]),
isNode!"wp:comment_parent", n => parent = n.text.to!int,
));
comments[parent] ~= Comment(id, cDoc);
},
));
if (comments)
{
enforce(link, "No <link> found for item");
auto slug = link
.split("/")
[3..$]
.join('/')
.replaceAll(re!`[-<>:"/\|?&!*.=_+ ]+`, `-`)
.strip("-")
;
auto outDir = buildPath(outputHashoverCommentsDirectory, "threads", slug);
mkdirRecurse(outDir);
bool[int] sawParent;
void saveComments(int parent, string prefix)
{
sawParent[parent] = true;
foreach (i, comment; comments.get(parent, null).dup.sort!((a, b) => a.id < b.id).release)
{
auto slug = prefix ~ (i+1).text;
comment.xml.toPrettyString.toFile(outDir.buildPath(slug ~ ".xml"));
saveComments(comment.id, slug ~ "-");
}
}
saveComments(0, null);
foreach (id, children; comments)
enforce(id in sawParent, "Unknown parent comment ID: " ~ id.text);
}
}
)),
)),
));
}
bool isNode(string tag)(XmlNode n)
{
return n.type == XmlNodeType.Node && n.tag == tag;
}
bool isNode(XmlNodeType t)(XmlNode n)
{
return n.type == t;
}
bool isNode(XmlNodeType t, string tag)(XmlNode n)
{
return n.type == t && n.type == t;
}
void match(Dgs...)(XmlNode node)
{
static assert(Dgs.length % 2 == 0);
foreach (i, dg; Dgs)
static if (i % 2 == 0)
{
if (Dgs[i](node))
{
static if (is(typeof(Dgs[i+1]())))
Dgs[i+1]();
else
Dgs[i+1](node);
return;
}
}
// throw new Exception("Don't know what to do with node: " ~ node.toString);
}
unittest
{
if (false) // test instantiation
{
XmlNode n;
n.match!(
isNode!(XmlNodeType.Meta, "xml"), {},
isNode!"test", (n) {}
);
}
}
mixin main!(funopt!wp2hashover);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment