Skip to content

Instantly share code, notes, and snippets.

@moonthug
Created January 22, 2013 15:40
Show Gist options
  • Save moonthug/4595624 to your computer and use it in GitHub Desktop.
Save moonthug/4595624 to your computer and use it in GitHub Desktop.
Extract users and post from a Wordpress data export XML file and insert them into PyroCMS
<?php
////////////////////////////////////////////////////////////////////////////////
//
// Wordpress -> PyroCMS
//
//
// Config
$filename = "<WP EXPORT FILE>";
$db_schema = "<SCHEMA>";
$site_prefix = "<SITE PREFIX>"; // i.e. "default"
$db_users_default_pass = "<ENCODED_PASS>"; // i.e. "bd0fc5f02742c4e2a94f7fc6baa0d9844b69544a" = m00m00
$db_users_default_salt = "<SALT>"; // i.e. "ef7ab2" = ^ salt
////////////////////////////////////////////////////////////////////////////////
//
// Setup
//
//
// DB
include_once "ezsql_core.php";
include_once "ezsql.php";
$db = new ezSQL_mysql("root", "root", $db_schema, "localhost");
//
// File IO
$fh = fopen($filename, "r");
$data = fread($fh, filesize($filename));
fclose($fh);
//
// XML
define("NS_WP", "http://wordpress.org/export/1.2/");
define("NS_DC", "http://purl.org/dc/elements/1.1/");
define("NS_CONTENT", "http://purl.org/rss/1.0/modules/content/");
$doc = new DOMDocument();
$doc->loadXML($data);
//
// Variables
$author_map = array();
$tag_map = array();
$category_map = array();
$titles = array();
////////////////////////////////////////////////////////////////////////////////
//
// Authors
//
$author_list = $doc->getElementsByTagName("author");
foreach($author_list as $author)
{
$author_login = $author->getElementsByTagNameNS(NS_WP, "author_login")->item(0)->nodeValue;
$author_email = $author->getElementsByTagNameNS(NS_WP, "author_email")->item(0)->nodeValue;
$author_username = $author->getElementsByTagNameNS(NS_WP, "author_display_name")->item(0)->nodeValue;
$author_first_name = $author->getElementsByTagNameNS(NS_WP, "author_first_name")->item(0)->nodeValue;
$author_last_name = $author->getElementsByTagNameNS(NS_WP, "author_last_name")->item(0)->nodeValue;
$created_on = time();
//
// Create User
$db->query("INSERT INTO `{$db_schema}`.`{$site_prefix}_users` (email, password, salt, group_id, ip_address, active, activation_code, created_on, last_login, username, forgotten_password_code, remember_code) " .
"VALUES('{$author_email}', '{$db_users_default_pass}', '{$db_users_default_salt}', 2, '127.0.0.1', 1, NULL, {$created_on}, 0, '{$author_login}', NULL, NULL);");
$user_id = mysql_insert_id();
//
// Create Profile
$created_date = date("Y-m-d H:i:s");
$db->query("INSERT INTO `{$db_schema}`.`{$site_prefix}_profiles` (`created`,`updated`,`created_by`,`ordering_count`,`user_id`,`display_name`,`first_name`,`last_name`,`company`,`lang`,`bio`,`dob`) " .
"VALUES('{$created_date}', NULL, '1', '1', '{$user_id}', '{$author_username}', '{$author_first_name}', '{$author_last_name}', NULL, 'en', NULL, '0');");
$author_map[$author_login] = $user_id;
}
////////////////////////////////////////////////////////////////////////////////
//
// Posts
//
$item_list = $doc->getElementsByTagName("item");
foreach($item_list as $item)
{
//
// Handle Post
$post_title = mysql_real_escape_string(htmlentities($item->getElementsByTagName("title")->item(0)->nodeValue));
$post_title = !empty($post_title) ? $post_title : "Untitled";
$post_content = mysql_real_escape_string(htmlentities($item->getElementsByTagNameNS(NS_CONTENT, "encoded")->item(0)->nodeValue));
$post_date = strtotime($item->getElementsByTagNameNS(NS_WP, "post_date")->item(0)->nodeValue);
//
$post_creator = $item->getElementsByTagNameNS(NS_DC, "creator")->item(0)->nodeValue;
$post_creator = isset($author_map[$post_creator]) ? $author_map[$post_creator] : 1;
//
$post_status = $item->getElementsByTagNameNS(NS_WP, "status")->item(0)->nodeValue;
$post_slug = uniqid("post_");
if($post_status === "publish")
{
$post_slug = $item->getElementsByTagNameNS(NS_WP, "post_name")->item(0)->nodeValue;
}
// Ignore Trash ?
else if($post_status === "trash")
continue;
//
// Handle Tags/Categories
$category_list = $item->getElementsByTagName("category");
$post_categories = array();
$post_tags = array();
if($category_list->length > 0)
{
foreach($category_list as $category)
{
$tag_domain = $category->getAttribute("domain");
$tag_slug = $category->getAttribute("nicename");
$tag_value = $category->nodeValue;
if($tag_domain === "post_tag")
{
if(!isset($tag_map[$tag_slug]))
{
$db->query("INSERT INTO `$db_schema`.`{$site_prefix}_keywords` (`name`) VALUES ('{$tag_value}');");
$tag_id = mysql_insert_id();
$tag_map[$tag_slug] = $tag_id;
}
else
$tag_id = $tag_map[$tag_slug];
$post_tags[] = array(
"id" => $tag_id,
"slug" => $tag_slug
);
}
else
{
if(!isset($category_map[$tag_slug]))
{
$db->query("INSERT INTO `$db_schema`.`{$site_prefix}_blog_categories` (`slug`, `title`) VALUES ('{$tag_slug}', '{$tag_value}');");
$category_id = mysql_insert_id();
$category_map[$tag_slug] = $category_id;
}
else
$category_id = $category_map[$tag_slug];
$post_categories[] = $category_id;
}
}
}
//
// Handle applied keywords (Tags)
$post_tags_hash = "";
if(count($post_tags) > 0)
{
$post_tags_hash = uniqid("tags_");
$query = "INSERT INTO `$db_schema`.`{$site_prefix}_keywords_applied` (`hash`, `keyword_id`) VALUES ";
foreach($post_tags as $i=>$tag)
{
if($i > 0) $query .= ",";
$query .= "('{$post_tags_hash}', {$tag["id"]})";
}
$query .= ";";
$db->query($query);
}
//
// Check unique titles
$found_title = FALSE;
for($i = 0; $i < count($titles); $i++)
{
$clean_title = trim(strtolower($post_title));
if($clean_title === $titles[$i]["value"])
{
$post_title .= "_" . $titles[$i]["count"];
$titles[$i]["count"]++;
break;
}
}
if($found_title === FALSE)
$titles[] = array("name" => $post_title, "value" => trim(strtolower($post_title)), "count" => 1);
//
// Defaults
$post_category = isset($post_categories[0]) ? $post_categories[0] : 1;
$post_status = $post_status === "publish" ? "live" : "draft";
//
// Insert
$db->query("INSERT INTO `{$db_schema}`.`{$site_prefix}_blog` (`title`,`slug`,`category_id`,`attachment`,`intro`,`body`,`parsed`,`keywords`,`author_id`,`created_on`,`updated_on`,`comments_enabled`,`status`,`type`,`preview_hash`) ".
"VALUES('{$post_title}', '{$post_slug}', $post_category, '', '{$post_content}','{$post_content}', '', '{$post_tags_hash}', $post_creator, $post_date, $post_date, 1, '{$post_status}', 'wysiwyg-advanced', '');");
}
echo "Done!";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment