-
-
Save paulredmond/1372011 to your computer and use it in GitHub Desktop.
Wordpress Exporter via Node
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// General exporter of WP exports into JSON objects. From that, we can build models to save to the DB. | |
// In usage at shell... | |
// cat file_of_wp_export.xml | node parse_articles.js >> json_output_file.json | |
/* Module Includes */ | |
var xml2js = require('xml2js'), // Parser that reads in XML and converts it to JSON | |
YUI = require('yui3').YUI, // The YUI library | |
_ = require('underscore'); // Utility belt of JS functions | |
var data = '', file = '', arg = ''; | |
function postMeta(arg, key) { | |
var result = ''; | |
_.each(arg, function (data) { | |
if (data['wp:meta_key'] == key) { | |
result = data['wp:meta_value']; | |
} | |
}); | |
return result; | |
} | |
process.stdin.resume(); | |
process.stdin.setEncoding('utf8'); | |
process.stdin.on('data', function (chunk) { | |
file += chunk.toString(); | |
}); | |
process.stdin.on('end', function () { | |
var parser = new xml2js.Parser(); | |
parser.parseString(file, function (err, content) { | |
YUI().use("dataschema-json", function(Y) { | |
var | |
// Articles Layout | |
article_schema = { | |
resultListLocator: "channel.item", | |
resultFields: [ | |
{key:"title"}, | |
{key:"wp_post_id", locator:"wp:post_id"}, | |
{key:"post_type", locator:"wp:post_type"}, | |
{key:"created", locator:"pubDate"}, | |
{key:"sub_title", locator:"wp:postmeta", parser: function (arg) { return postMeta(arg, '_aioseop_description')} }, | |
{key:"blurb", locator:"excerpt:encoded"}, | |
{key:"body", locator:"content:encoded"}, | |
{key:"date_active", locator:"wp:post_date"}, | |
{key:"slug", locator:"wp:post_name"}, | |
{key:"author", locator:"dc:creator"}, | |
{key:"flag_active", locator:"wp:status", parser: function (arg) { | |
switch (arg) { | |
case 'publish': | |
return 1; | |
case 'draft': | |
case 'pending': | |
case 'future': | |
case 'private': | |
default: | |
return 0; | |
} | |
}}, | |
{key:"meta_keywords", locator:"wp:postmeta", parser: function (arg) { return postMeta(arg, '_aioseop_keywords')} }, | |
{key:"meta_description", locator:"wp:postmeta", parser: function (arg) { return postMeta(arg, '_aioseop_description')} }, | |
// If trying to write the JSON version of articles out, Comments appears as [Object]. | |
// Having to do a custom parser to setup the data as needed for importing. | |
{key:"Comments", locator:"wp:comment", parser: function (arg) { | |
var comments = []; | |
_.each(arg, function (data) { | |
var comment = {}; | |
if (data !== undefined) { | |
comment.wp_comment_id = data['wp:comment_id']; | |
comment.name = data['wp:comment_author']; | |
comment.email = data['wp:comment_author_email']; | |
comment.comment = data['wp:comment_content']; | |
comment.status = data['wp:comment_approved']; | |
comment.created = data['wp:comment_date']; | |
comments.push(comment); | |
} | |
}); | |
return JSON.stringify(comments); | |
}}, | |
// Same as comments here | |
{key:"Channels", locator: "category", parser: function (arg) { | |
var categories = []; | |
_.each(arg, function (data) { | |
var category = {}; | |
if (data.hasOwnProperty('@')) { | |
if (data['@'].domain == 'category') { | |
if (data['@'].hasOwnProperty('nicename')) { | |
category.url_name = data['@'].nicename; | |
category.channel_name = data['#']; | |
} else { | |
category.url_name = data['#']; | |
} | |
categories.push(category); | |
} | |
} | |
}); | |
return JSON.stringify(categories); | |
}}, | |
// Same as comments here | |
{key:"Tags", locator: "category", parser: function (arg) { | |
var tags = []; | |
_.each(arg, function (data) { | |
var tag = {}; | |
if (data.hasOwnProperty('@')) { | |
// This may change based on WP version. If WP < 3, use this. If not, domain should be "tag". | |
if (data['@'].domain == 'post_tag') { | |
if (data['@'].hasOwnProperty('nicename')) { | |
tag.slug = data['@'].nicename; | |
tag.name = data['#']; | |
} else { | |
tag.slug = data['#']; | |
} | |
tags.push(tag); | |
} | |
} | |
}); | |
return JSON.stringify(tags); | |
}} | |
] | |
}, | |
// Pictures are in the same items as articles, just defined by having an attachment URL | |
attachment_schema = { | |
resultListLocator: "channel.item", | |
resultFields: [ | |
{key: "attachment_parent", locator:"wp:post_parent"}, | |
{key: "attachment", locator:"wp:attachment_url"} | |
] | |
}, | |
// Lives at the top of the WP XML file | |
category_schema = { | |
resultListLocator: "channel.wp:category", | |
resultFields: [ | |
{key:"url_name", locator:"wp:category_nicename"}, | |
{key:"channel_name", locator:"wp:cat_name"}, | |
// When mapping these off in shell, you'll have to do recursive runs to find all parents until no parents are left. | |
// Note that parent name is based off of wp:cat_name | |
{key:"parent_category", locator:"wp:category_parent"} | |
] | |
}, | |
// Lives at the top of the WP XML file | |
tag_schema = { | |
resultListLocator: "channel.wp:tag", | |
resultFields: [ | |
{key:"name", locator:"wp:tag_name"}, | |
{key:"slug", locator:"wp:tag_slug"} | |
] | |
}, | |
// Lives within all the articles, no clear definition | |
// In WP > 3, all authors are defined at the top of the file. | |
// However, dc:creator is still used within the post definition, which uses the nicename of an author instead of an ID or set of IDs. Strange. | |
author_schema = { | |
resultListLocator: "channel.item", | |
resultFields: [ | |
{key: "name", locator:"dc:creator"}, | |
{key: "wp_post_id", locator:"wp:post_id"} | |
] | |
}; | |
// Depending on what you want to output, change the schema here. You could also merge some of schemas together if you want just one big JSON file with everything. | |
// I think it's better to keep schemas modular in case you need to import something before something else. | |
data_out = Y.DataSchema.JSON.apply(tag_schema, content); | |
console.log(JSON.stringify(data_out)); | |
}); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment