Skip to content

Instantly share code, notes, and snippets.

@chrismeller
Last active December 13, 2015 16:39
Show Gist options
  • Save chrismeller/4941933 to your computer and use it in GitHub Desktop.
Save chrismeller/4941933 to your computer and use it in GitHub Desktop.
Crappy WXR Parser using PHP DOM
<?php
date_default_timezone_set('UTC');
error_reporting(-1);
ini_set('display_errors', true);
$file = '/Users/chris/Downloads/wordpress-wxr-example.xml';
$contents = file_get_contents( $file );
$dom = new DOMDocument( '1.0', 'utf-8' );
$dom->loadXML( $contents, LIBXML_NOCDATA );
$xpath = new DOMXPath( $dom );
$channel = $xpath->query( './channel' )->item(0);
$title = $xpath->query( './title', $channel )->item(0)->nodeValue;
$link = $xpath->query( './link', $channel )->item(0)->nodeValue;
$description = $xpath->query( './description', $channel )->item(0)->nodeValue;
$pubdate = $xpath->query( './pubDate', $channel )->item(0)->nodeValue;
$generator = $xpath->query( './generator', $channel )->item(0)->nodeValue;
$language = $xpath->query( './language', $channel )->item(0)->nodeValue;
$base_site_url = $xpath->query( './wp:base_site_url', $channel )->item(0)->nodeValue;
$base_blog_url = $xpath->query( './wp:base_blog_url', $channel )->item(0)->nodeValue;
$wxr_version = $xpath->query( './wp:wxr_version', $channel )->item(0)->nodeValue;
$categories = $xpath->query( './wp:category', $channel );
$cats = array();
foreach ( $categories as $category ) {
$nicename = $xpath->query( './wp:category_nicename', $category )->item(0)->nodeValue;
$parent = $xpath->query( './wp:category_parent', $category )->item(0)->nodeValue;
$name = $xpath->query( './wp:cat_name', $category )->item(0)->nodeValue;
$cats[] = array(
'nicename' => $nicename,
'parent' => $parent,
'name' => $name,
);
}
$tags = $xpath->query( './wp:tag', $channel );
$ts = array();
foreach ( $tags as $tag ) {
$slug = $xpath->query( './wp:tag_slug', $tag )->item(0)->nodeValue;
$name = $xpath->query( './wp:tag_name', $tag )->item(0)->nodeValue;
$ts[] = array(
'slug' => $slug,
'name' => $name,
);
}
$items = $xpath->query( './item', $channel );
$is = array();
foreach ( $items as $item ) {
$i = array(
'title' => $xpath->query( './title', $item )->item(0)->nodeValue,
'link' => $xpath->query( './link', $item )->item(0)->nodeValue,
'pubdate' => $xpath->query( './pubDate', $item )->item(0)->nodeValue,
'creator' => $xpath->query( './dc:creator', $item )->item(0)->nodeValue,
'description' => $xpath->query( './description', $item )->item(0)->nodeValue,
'content_encoded' => $xpath->query( './content:encoded', $item )->item(0)->nodeValue,
'post_id' => $xpath->query( './wp:post_id', $item )->item(0)->nodeValue,
'post_date' => $xpath->query( './wp:post_date', $item )->item(0)->nodeValue,
'post_date_gmt' => $xpath->query( './wp:post_date_gmt', $item )->item(0)->nodeValue,
'comment_status' => $xpath->query( './wp:comment_status', $item )->item(0)->nodeValue,
'ping_status' => $xpath->query( './wp:ping_status', $item )->item(0)->nodeValue,
'post_name' => $xpath->query( './wp:post_name', $item )->item(0)->nodeValue,
'status' => $xpath->query( './wp:status', $item )->item(0)->nodeValue,
'post_parent' => $xpath->query( './wp:post_parent', $item )->item(0)->nodeValue,
'menu_order' => $xpath->query( './wp:menu_order', $item )->item(0)->nodeValue,
'post_type' => $xpath->query( './wp:post_type', $item )->item(0)->nodeValue,
'post_password' => $xpath->query( './wp:post_password', $item )->item(0)->nodeValue,
'excerpt_encoded' => null,
'is_sticky' => null,
);
$excerpt = $xpath->query( './excerpt:encoded', $item );
if ( $excerpt->length > 0 ) {
$i['excerpt_encoded'] = $excerpt->item(0)->nodeValue;
}
$is_sticky = $xpath->query( './wp:is_sticky', $item );
if ( $is_sticky->length > 0 ) {
$i['is_sticky'] = $is_sticky->item(0)->nodeValue;
}
$guid = $xpath->query( './guid', $item )->item(0);
$i['guid_is_permalink'] = $guid->getAttribute( 'isPermaLink' );
$i['guid'] = $guid->nodeValue;
$categories = $xpath->query( './category', $item );
$i['categories'] = array();
foreach ( $categories as $category ) {
$cat = array(
'name' => $category->nodeValue,
'domain' => $category->getAttribute( 'domain' ),
'nicename' => $category->getAttribute( 'nicename' ),
);
$i['categories'][] = $cat;
}
$comments = $xpath->query( './wp:comment', $item );
$i['comments'] = array();
foreach ( $comments as $comment ) {
$c = array(
'id' => $xpath->query( './wp:comment_id', $comment )->item(0)->nodeValue,
'author' => $xpath->query( './wp:comment_author', $comment )->item(0)->nodeValue,
'author_email' => $xpath->query( './wp:comment_author_email', $comment )->item(0)->nodeValue,
'author_url' => $xpath->query( './wp:comment_author_url', $comment )->item(0)->nodeValue,
'author_ip' => $xpath->query( './wp:comment_author_IP', $comment )->item(0)->nodeValue,
'date' => $xpath->query( './wp:comment_date', $comment )->item(0)->nodeValue,
'date_gmt' => $xpath->query( './wp:comment_date_gmt', $comment )->item(0)->nodeValue,
'content' => $xpath->query( './wp:comment_content', $comment )->item(0)->nodeValue,
'approved' => $xpath->query( './wp:comment_approved', $comment )->item(0)->nodeValue,
'type' => $xpath->query( './wp:comment_type', $comment )->item(0)->nodeValue,
'parent' => $xpath->query( './wp:comment_parent', $comment )->item(0)->nodeValue,
'user_id' => $xpath->query( './wp:comment_user_id', $comment )->item(0)->nodeValue,
);
$i['comments'][] = $c;
}
$meta = $xpath->query( './wp:postmeta', $item );
$i['meta'] = array();
foreach ( $meta as $metar ) {
$key = $xpath->query( './wp:meta_key', $metar )->item(0)->nodeValue;
$value = $xpath->query( './wp:meta_value', $metar )->item(0)->nodeValue;
$i['meta'][ $key ] = $value;
}
$is[] = $i;
}
var_dump($is);
echo $title . ': ' . $link;
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment