Skip to content

Instantly share code, notes, and snippets.

@benscholler
Created November 9, 2011 23:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save benscholler/1353559 to your computer and use it in GitHub Desktop.
Save benscholler/1353559 to your computer and use it in GitHub Desktop.
Wordpress Exporter via Node
// General exporter of WP exports into JSON objects. From that, we can build models to save to the DB.
// In usage at shell...
// cat file_of_wp_export.xml | node parse_articles.js >> json_output_file.json
/* Module Includes */
var xml2js = require('xml2js'), // Parser that reads in XML and converts it to JSON
YUI = require('yui3').YUI, // The YUI library
_ = require('underscore'); // Utility belt of JS functions
var data = '', file = '', arg = '';
function postMeta(arg, key) {
var result = '';
_.each(arg, function (data) {
if (data['wp:meta_key'] == key) {
result = data['wp:meta_value'];
}
});
return result;
}
process.stdin.resume();
process.stdin.setEncoding('utf8');
process.stdin.on('data', function (chunk) {
file += chunk.toString();
});
process.stdin.on('end', function () {
var parser = new xml2js.Parser();
parser.parseString(file, function (err, content) {
YUI().use("dataschema-json", function(Y) {
var
// Articles Layout
article_schema = {
resultListLocator: "channel.item",
resultFields: [
{key:"title"},
{key:"wp_post_id", locator:"wp:post_id"},
{key:"post_type", locator:"wp:post_type"},
{key:"created", locator:"pubDate"},
{key:"sub_title", locator:"wp:postmeta", parser: function (arg) { return postMeta(arg, '_aioseop_description')} },
{key:"blurb", locator:"excerpt:encoded"},
{key:"body", locator:"content:encoded"},
{key:"date_active", locator:"wp:post_date"},
{key:"slug", locator:"wp:post_name"},
{key:"author", locator:"dc:creator"},
{key:"flag_active", locator:"wp:status", parser: function (arg) {
switch (arg) {
case 'publish':
return 1;
case 'draft':
case 'pending':
case 'future':
case 'private':
default:
return 0;
}
}},
{key:"meta_keywords", locator:"wp:postmeta", parser: function (arg) { return postMeta(arg, '_aioseop_keywords')} },
{key:"meta_description", locator:"wp:postmeta", parser: function (arg) { return postMeta(arg, '_aioseop_description')} },
// If trying to write the JSON version of articles out, Comments appears as [Object].
// Having to do a custom parser to setup the data as needed for importing.
{key:"Comments", locator:"wp:comment", parser: function (arg) {
var comments = [];
_.each(arg, function (data) {
var comment = {};
if (data !== undefined) {
comment.wp_comment_id = data['wp:comment_id'];
comment.name = data['wp:comment_author'];
comment.email = data['wp:comment_author_email'];
comment.comment = data['wp:comment_content'];
comment.status = data['wp:comment_approved'];
comment.created = data['wp:comment_date'];
comments.push(comment);
}
});
return JSON.stringify(comments);
}},
// Same as comments here
{key:"Channels", locator: "category", parser: function (arg) {
var categories = [];
_.each(arg, function (data) {
var category = {};
if (data.hasOwnProperty('@')) {
if (data['@'].domain == 'category') {
if (data['@'].hasOwnProperty('nicename')) {
category.url_name = data['@'].nicename;
category.channel_name = data['#'];
} else {
category.url_name = data['#'];
}
categories.push(category);
}
}
});
return JSON.stringify(categories);
}},
// Same as comments here
{key:"Tags", locator: "category", parser: function (arg) {
var tags = [];
_.each(arg, function (data) {
var tag = {};
if (data.hasOwnProperty('@')) {
// This may change based on WP version. If WP < 3, use this. If not, domain should be "tag".
if (data['@'].domain == 'post_tag') {
if (data['@'].hasOwnProperty('nicename')) {
tag.slug = data['@'].nicename;
tag.name = data['#'];
} else {
tag.slug = data['#'];
}
tags.push(tag);
}
}
});
return JSON.stringify(tags);
}}
]
},
// Pictures are in the same items as articles, just defined by having an attachment URL
attachment_schema = {
resultListLocator: "channel.item",
resultFields: [
{key: "attachment_parent", locator:"wp:post_parent"},
{key: "attachment", locator:"wp:attachment_url"}
]
},
// Lives at the top of the WP XML file
category_schema = {
resultListLocator: "channel.wp:category",
resultFields: [
{key:"url_name", locator:"wp:category_nicename"},
{key:"channel_name", locator:"wp:cat_name"},
// When mapping these off in shell, you'll have to do recursive runs to find all parents until no parents are left.
// Note that parent name is based off of wp:cat_name
{key:"parent_category", locator:"wp:category_parent"}
]
},
// Lives at the top of the WP XML file
tag_schema = {
resultListLocator: "channel.wp:tag",
resultFields: [
{key:"name", locator:"wp:tag_name"},
{key:"slug", locator:"wp:tag_slug"}
]
},
// Lives within all the articles, no clear definition
// In WP > 3, all authors are defined at the top of the file.
// However, dc:creator is still used within the post definition, which uses the nicename of an author instead of an ID or set of IDs. Strange.
author_schema = {
resultListLocator: "channel.item",
resultFields: [
{key: "name", locator:"dc:creator"},
{key: "wp_post_id", locator:"wp:post_id"}
]
},
old_url_schema = {
resultListLocator: "channel.item",
resultFields: [
{key:"title"},
{key:"wp_post_id", locator:"wp:post_id"},
{key:"slug", locator:"wp:post_name"},
{key:"old_url", locator:"wp:old_url"}
]
};
// Depending on what you want to output, change the schema here. You could also merge some of schemas together if you want just one big JSON file with everything.
// If think it's better to keep schemas modular in case you need to import something before something else.
data_out = Y.DataSchema.JSON.apply(article_schema, content);
console.log(JSON.stringify(data_out));
});
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment