Convert WordPress comments to HashOver-next
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/wp2hashover |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env dub | |
/+ dub.sdl: | |
name "wp2hashover" | |
dependency "ae" version="==0.0.2155" | |
+/ | |
import std.algorithm.iteration; | |
import std.algorithm.sorting; | |
import std.conv; | |
import std.exception; | |
import std.file; | |
import std.path; | |
import std.regex; | |
import std.stdio; | |
import std.string; | |
import ae.utils.digest; | |
import ae.utils.funopt; | |
import ae.utils.main; | |
import ae.utils.regex; | |
import ae.utils.time.common; | |
import ae.utils.time.format; | |
import ae.utils.time.parse; | |
import ae.utils.xmllite; | |
void wp2hashover(string inputWordpressXMLFile, string outputHashoverCommentsDirectory) | |
{ | |
auto wp = inputWordpressXMLFile.readText.xmlParse; | |
wp.children.each!(n => n.match!( | |
isNode!(XmlNodeType.Meta, "xml"), {}, | |
isNode!(XmlNodeType.Comment), {}, | |
isNode!"rss", n => n.children.each!(n => n.match!( | |
isNode!"channel", n => n.children.each!(n => n.match!( | |
isNode!"item", (n) | |
{ | |
string link; | |
struct Comment { int id; XmlDocument xml; } | |
Comment[][int] comments; | |
n.children.each!(n => n.match!( | |
isNode!"link", n => link = n.text, | |
isNode!"wp:comment", (XmlNode n) | |
{ | |
auto cDoc = xmlParse(`<?xml version="1.0" encoding="UTF-8"?><comment/>`); | |
void add(string name, string value) | |
{ | |
auto t = new XmlNode(XmlNodeType.Text, value); | |
auto n = new XmlNode(XmlNodeType.Node, name); | |
n.children ~= t; | |
auto c = cDoc["comment"]; | |
c.children ~= n; | |
} | |
int id, parent; | |
n.children.each!(n => n.match!( | |
isNode!"wp:comment_id", (n) { | |
id = n.text.to!int; | |
add("legacy_id", "comment-" ~ n.text); | |
}, | |
isNode!"wp:comment_author", n => add("name", n.text), | |
isNode!"wp:comment_author_email", (n) { | |
add("email_raw", n.text); // usual "email" field is encrypted; emit this for reference only | |
add("email_hash", getDigestString!MD5(n.text.toLower).toLower); | |
}, | |
isNode!"wp:comment_author_url", n => add("website", n.text), | |
isNode!"wp:comment_author_IP", n => add("ipaddr", n.text), | |
isNode!"wp:comment_date", n => add("date", n.text.parseTime!`Y-m-d H:i:s`.formatTime!(TimeFormats.ISO8601)), | |
isNode!"wp:comment_date_gmt", {}, | |
isNode!"wp:comment_content", n => add("body", n.text), | |
isNode!"wp:comment_approved", n => add("status", ["0" : "pending", "1" : "approved", "trash" : "deleted"][n.text]), | |
isNode!"wp:comment_parent", n => parent = n.text.to!int, | |
)); | |
comments[parent] ~= Comment(id, cDoc); | |
}, | |
)); | |
if (comments) | |
{ | |
enforce(link, "No <link> found for item"); | |
auto slug = link | |
.split("/") | |
[3..$] | |
.join('/') | |
.replaceAll(re!`[-<>:"/\|?&!*.=_+ ]+`, `-`) | |
.strip("-") | |
; | |
auto outDir = buildPath(outputHashoverCommentsDirectory, "threads", slug); | |
mkdirRecurse(outDir); | |
bool[int] sawParent; | |
void saveComments(int parent, string prefix) | |
{ | |
sawParent[parent] = true; | |
foreach (i, comment; comments.get(parent, null).dup.sort!((a, b) => a.id < b.id).release) | |
{ | |
auto slug = prefix ~ (i+1).text; | |
comment.xml.toPrettyString.toFile(outDir.buildPath(slug ~ ".xml")); | |
saveComments(comment.id, slug ~ "-"); | |
} | |
} | |
saveComments(0, null); | |
foreach (id, children; comments) | |
enforce(id in sawParent, "Unknown parent comment ID: " ~ id.text); | |
} | |
} | |
)), | |
)), | |
)); | |
} | |
bool isNode(string tag)(XmlNode n) | |
{ | |
return n.type == XmlNodeType.Node && n.tag == tag; | |
} | |
bool isNode(XmlNodeType t)(XmlNode n) | |
{ | |
return n.type == t; | |
} | |
bool isNode(XmlNodeType t, string tag)(XmlNode n) | |
{ | |
return n.type == t && n.type == t; | |
} | |
void match(Dgs...)(XmlNode node) | |
{ | |
static assert(Dgs.length % 2 == 0); | |
foreach (i, dg; Dgs) | |
static if (i % 2 == 0) | |
{ | |
if (Dgs[i](node)) | |
{ | |
static if (is(typeof(Dgs[i+1]()))) | |
Dgs[i+1](); | |
else | |
Dgs[i+1](node); | |
return; | |
} | |
} | |
// throw new Exception("Don't know what to do with node: " ~ node.toString); | |
} | |
unittest | |
{ | |
if (false) // test instantiation | |
{ | |
XmlNode n; | |
n.match!( | |
isNode!(XmlNodeType.Meta, "xml"), {}, | |
isNode!"test", (n) {} | |
); | |
} | |
} | |
mixin main!(funopt!wp2hashover); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment