Skip to content

Instantly share code, notes, and snippets.

@fcingolani
Created June 18, 2013 20:34
Show Gist options
  • Save fcingolani/5809103 to your computer and use it in GitHub Desktop.
Save fcingolani/5809103 to your computer and use it in GitHub Desktop.
Generate a WXR file from shell. Intended for BIG WordPress installations.
<?php
error_reporting(E_ALL ^ E_NOTICE);
define( 'WXR_VERSION', '1.2' );
/**
* Generates the WXR export file for download
*
* @since 2.1.0
*
* @param array $args Filters defining what should be included in the export
*/
function my_export_wp( $args = array() ) {
global $wpdb, $post;
$defaults = array( 'content' => 'all', 'author' => false, 'category' => false,
'start_date' => false, 'end_date' => false, 'status' => false,
);
$args = wp_parse_args( $args, $defaults );
do_action( 'export_wp' );
if ( 'all' != $args['content'] && post_type_exists( $args['content'] ) ) {
$ptype = get_post_type_object( $args['content'] );
if ( ! $ptype->can_export )
$args['content'] = 'post';
$where = $wpdb->prepare( "{$wpdb->posts}.post_type = %s", $args['content'] );
} else {
$post_types = get_post_types( array( 'can_export' => true ) );
$esses = array_fill( 0, count($post_types), '%s' );
$where = $wpdb->prepare( "{$wpdb->posts}.post_type IN (" . implode( ',', $esses ) . ')', $post_types );
}
if ( $args['status'] && ( 'post' == $args['content'] || 'page' == $args['content'] ) )
$where .= $wpdb->prepare( " AND {$wpdb->posts}.post_status = %s", $args['status'] );
else
$where .= " AND {$wpdb->posts}.post_status != 'auto-draft'";
$join = '';
if ( $args['category'] && 'post' == $args['content'] ) {
if ( $term = term_exists( $args['category'], 'category' ) ) {
$join = "INNER JOIN {$wpdb->term_relationships} ON ({$wpdb->posts}.ID = {$wpdb->term_relationships}.object_id)";
$where .= $wpdb->prepare( " AND {$wpdb->term_relationships}.term_taxonomy_id = %d", $term['term_taxonomy_id'] );
}
}
if ( $args['author'] )
$where .= $wpdb->prepare( " AND {$wpdb->posts}.post_author = %d", $args['author'] );
if ( $args['start_date'] )
$where .= $wpdb->prepare( " AND {$wpdb->posts}.post_date >= %s", date( 'Y-m-d', strtotime($args['start_date']) ) );
if ( $args['end_date'] )
$where .= $wpdb->prepare( " AND {$wpdb->posts}.post_date < %s", date( 'Y-m-d', strtotime('+1 month', strtotime($args['end_date'])) ) );
// grab a snapshot of post IDs, just in case it changes during the export
$post_ids = $wpdb->get_col( "SELECT ID FROM {$wpdb->posts} $join WHERE $where" );
// get the requested terms ready, empty unless posts filtered by category or all content
$cats = $tags = $terms = array();
if ( isset( $term ) && $term ) {
$cat = get_term( $term['term_id'], 'category' );
$cats = array( $cat->term_id => $cat );
unset( $term, $cat );
} else if ( 'all' == $args['content'] ) {
$categories = (array) get_categories( array( 'get' => 'all' ) );
$tags = (array) get_tags( array( 'get' => 'all' ) );
$custom_taxonomies = get_taxonomies( array( '_builtin' => false ) );
$custom_terms = (array) get_terms( $custom_taxonomies, array( 'get' => 'all' ) );
// put categories in order with no child going before its parent
while ( $cat = array_shift( $categories ) ) {
if ( $cat->parent == 0 || isset( $cats[$cat->parent] ) )
$cats[$cat->term_id] = $cat;
else
$categories[] = $cat;
}
// put terms in order with no child going before its parent
while ( $t = array_shift( $custom_terms ) ) {
if ( $t->parent == 0 || isset( $terms[$t->parent] ) )
$terms[$t->term_id] = $t;
else
$custom_terms[] = $t;
}
unset( $categories, $custom_taxonomies, $custom_terms );
}
/**
* Wrap given string in XML CDATA tag.
*
* @since 2.1.0
*
* @param string $str String to wrap in XML CDATA tag.
* @return string
*/
function wxr_cdata( $str ) {
if ( seems_utf8( $str ) == false )
$str = utf8_encode( $str );
// $str = ent2ncr(esc_html($str));
$str = '<![CDATA[' . str_replace( ']]>', ']]]]><![CDATA[>', $str ) . ']]>';
return $str;
}
/**
* Return the URL of the site
*
* @since 2.5.0
*
* @return string Site URL.
*/
function wxr_site_url() {
// ms: the base url
if ( is_multisite() )
return network_home_url();
// wp: the blog url
else
return get_bloginfo_rss( 'url' );
}
/**
* Output a cat_name XML tag from a given category object
*
* @since 2.1.0
*
* @param object $category Category Object
*/
function wxr_cat_name( $category ) {
if ( empty( $category->name ) )
return;
echo '<wp:cat_name>' . wxr_cdata( $category->name ) . '</wp:cat_name>';
}
/**
* Output a category_description XML tag from a given category object
*
* @since 2.1.0
*
* @param object $category Category Object
*/
function wxr_category_description( $category ) {
if ( empty( $category->description ) )
return;
echo '<wp:category_description>' . wxr_cdata( $category->description ) . '</wp:category_description>';
}
/**
* Output a tag_name XML tag from a given tag object
*
* @since 2.3.0
*
* @param object $tag Tag Object
*/
function wxr_tag_name( $tag ) {
if ( empty( $tag->name ) )
return;
echo '<wp:tag_name>' . wxr_cdata( $tag->name ) . '</wp:tag_name>';
}
/**
* Output a tag_description XML tag from a given tag object
*
* @since 2.3.0
*
* @param object $tag Tag Object
*/
function wxr_tag_description( $tag ) {
if ( empty( $tag->description ) )
return;
echo '<wp:tag_description>' . wxr_cdata( $tag->description ) . '</wp:tag_description>';
}
/**
* Output a term_name XML tag from a given term object
*
* @since 2.9.0
*
* @param object $term Term Object
*/
function wxr_term_name( $term ) {
if ( empty( $term->name ) )
return;
echo '<wp:term_name>' . wxr_cdata( $term->name ) . '</wp:term_name>';
}
/**
* Output a term_description XML tag from a given term object
*
* @since 2.9.0
*
* @param object $term Term Object
*/
function wxr_term_description( $term ) {
if ( empty( $term->description ) )
return;
echo '<wp:term_description>' . wxr_cdata( $term->description ) . '</wp:term_description>';
}
/**
* Output list of authors with posts
*
* @since 3.1.0
*/
function wxr_authors_list() {
global $wpdb;
$authors = array();
$results = $wpdb->get_results( "SELECT DISTINCT post_author FROM $wpdb->posts WHERE post_status != 'auto-draft'" );
foreach ( (array) $results as $result )
$authors[] = get_userdata( $result->post_author );
$authors = array_filter( $authors );
foreach ( $authors as $author ) {
echo "\t<wp:author>";
echo '<wp:author_id>' . $author->ID . '</wp:author_id>';
echo '<wp:author_login>' . $author->user_login . '</wp:author_login>';
echo '<wp:author_email>' . $author->user_email . '</wp:author_email>';
echo '<wp:author_display_name>' . wxr_cdata( $author->display_name ) . '</wp:author_display_name>';
echo '<wp:author_first_name>' . wxr_cdata( $author->user_firstname ) . '</wp:author_first_name>';
echo '<wp:author_last_name>' . wxr_cdata( $author->user_lastname ) . '</wp:author_last_name>';
echo "</wp:author>\n";
}
}
/**
* Ouput all navigation menu terms
*
* @since 3.1.0
*/
function wxr_nav_menu_terms() {
$nav_menus = wp_get_nav_menus();
if ( empty( $nav_menus ) || ! is_array( $nav_menus ) )
return;
foreach ( $nav_menus as $menu ) {
echo "\t<wp:term><wp:term_id>{$menu->term_id}</wp:term_id><wp:term_taxonomy>nav_menu</wp:term_taxonomy><wp:term_slug>{$menu->slug}</wp:term_slug>";
wxr_term_name( $menu );
echo "</wp:term>\n";
}
}
/**
* Output list of taxonomy terms, in XML tag format, associated with a post
*
* @since 2.3.0
*/
function wxr_post_taxonomy() {
$post = get_post($post->post_ID);
$taxonomies = get_object_taxonomies( $post->post_type );
if ( empty( $taxonomies ) )
return;
$terms = wp_get_object_terms( $post->ID, $taxonomies );
foreach ( (array) $terms as $term ) {
echo "\t\t<category domain=\"{$term->taxonomy}\" nicename=\"{$term->slug}\">" . wxr_cdata( $term->name ) . "</category>\n";
}
}
function wxr_filter_postmeta( $return_me, $meta_key ) {
if ( '_edit_lock' == $meta_key )
$return_me = true;
return $return_me;
}
add_filter( 'wxr_export_skip_postmeta', 'wxr_filter_postmeta', 10, 2 );
echo '<?xml version="1.0" encoding="' . get_bloginfo('charset') . "\" ?>\n";
the_generator( 'export' ); ?>
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/<?php echo WXR_VERSION; ?>/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/<?php echo WXR_VERSION; ?>/"
>
<channel>
<title><?php bloginfo_rss( 'name' ); ?></title>
<link><?php bloginfo_rss( 'url' ); ?></link>
<description><?php bloginfo_rss( 'description' ); ?></description>
<pubDate><?php echo date( 'D, d M Y H:i:s +0000' ); ?></pubDate>
<language><?php bloginfo_rss( 'language' ); ?></language>
<wp:wxr_version><?php echo WXR_VERSION; ?></wp:wxr_version>
<wp:base_site_url><?php echo wxr_site_url(); ?></wp:base_site_url>
<wp:base_blog_url><?php bloginfo_rss( 'url' ); ?></wp:base_blog_url>
<?php wxr_authors_list(); ?>
<?php foreach ( $cats as $c ) : ?>
<wp:category><wp:term_id><?php echo $c->term_id ?></wp:term_id><wp:category_nicename><?php echo $c->slug; ?></wp:category_nicename><wp:category_parent><?php echo $c->parent ? $cats[$c->parent]->slug : ''; ?></wp:category_parent><?php wxr_cat_name( $c ); ?><?php wxr_category_description( $c ); ?></wp:category>
<?php endforeach; ?>
<?php foreach ( $tags as $t ) : ?>
<wp:tag><wp:term_id><?php echo $t->term_id ?></wp:term_id><wp:tag_slug><?php echo $t->slug; ?></wp:tag_slug><?php wxr_tag_name( $t ); ?><?php wxr_tag_description( $t ); ?></wp:tag>
<?php endforeach; ?>
<?php foreach ( $terms as $t ) : ?>
<wp:term><wp:term_id><?php echo $t->term_id ?></wp:term_id><wp:term_taxonomy><?php echo $t->taxonomy; ?></wp:term_taxonomy><wp:term_slug><?php echo $t->slug; ?></wp:term_slug><wp:term_parent><?php echo $t->parent ? $terms[$t->parent]->slug : ''; ?></wp:term_parent><?php wxr_term_name( $t ); ?><?php wxr_term_description( $t ); ?></wp:term>
<?php endforeach; ?>
<?php if ( 'all' == $args['content'] ) wxr_nav_menu_terms(); ?>
<?php do_action( 'rss2_head' ); ?>
<?php if ( $post_ids ) {
global $wp_query;
$wp_query->in_the_loop = true; // Fake being in the loop.
// fetch 20 posts at a time rather than loading the entire table into memory
while ( $next_posts = array_splice( $post_ids, 0, 20 ) ) {
$where = 'WHERE ID IN (' . join( ',', $next_posts ) . ')';
$posts = $wpdb->get_results( "SELECT * FROM {$wpdb->posts} $where" );
// Begin Loop
foreach ( $posts as $post ) {
setup_postdata( $post );
$is_sticky = is_sticky( $post->ID ) ? 1 : 0;
?>
<item>
<title><?php echo apply_filters( 'the_title_rss', $post->post_title ); ?></title>
<link><?php the_permalink_rss() ?></link>
<pubDate><?php echo mysql2date( 'D, d M Y H:i:s +0000', get_post_time( 'Y-m-d H:i:s', true ), false ); ?></pubDate>
<dc:creator><?php echo get_the_author_meta( 'login' ); ?></dc:creator>
<guid isPermaLink="false"><?php esc_url( the_guid() ); ?></guid>
<description></description>
<content:encoded><?php echo wxr_cdata( apply_filters( 'the_content_export', $post->post_content ) ); ?></content:encoded>
<excerpt:encoded><?php echo wxr_cdata( apply_filters( 'the_excerpt_export', $post->post_excerpt ) ); ?></excerpt:encoded>
<wp:post_id><?php echo $post->ID; ?></wp:post_id>
<wp:post_date><?php echo $post->post_date; ?></wp:post_date>
<wp:post_date_gmt><?php echo $post->post_date_gmt; ?></wp:post_date_gmt>
<wp:comment_status><?php echo $post->comment_status; ?></wp:comment_status>
<wp:ping_status><?php echo $post->ping_status; ?></wp:ping_status>
<wp:post_name><?php echo $post->post_name; ?></wp:post_name>
<wp:status><?php echo $post->post_status; ?></wp:status>
<wp:post_parent><?php echo $post->post_parent; ?></wp:post_parent>
<wp:menu_order><?php echo $post->menu_order; ?></wp:menu_order>
<wp:post_type><?php echo $post->post_type; ?></wp:post_type>
<wp:post_password><?php echo $post->post_password; ?></wp:post_password>
<wp:is_sticky><?php echo $is_sticky; ?></wp:is_sticky>
<?php if ( $post->post_type == 'attachment' ) : ?>
<wp:attachment_url><?php echo wp_get_attachment_url( $post->ID ); ?></wp:attachment_url>
<?php endif; ?>
<?php wxr_post_taxonomy(); ?>
<?php $postmeta = $wpdb->get_results( $wpdb->prepare( "SELECT * FROM $wpdb->postmeta WHERE post_id = %d", $post->ID ) );
foreach ( $postmeta as $meta ) :
if ( apply_filters( 'wxr_export_skip_postmeta', false, $meta->meta_key, $meta ) )
continue;
?>
<wp:postmeta>
<wp:meta_key><?php echo $meta->meta_key; ?></wp:meta_key>
<wp:meta_value><?php echo wxr_cdata( $meta->meta_value ); ?></wp:meta_value>
</wp:postmeta>
<?php endforeach; ?>
</item>
<?php
}
}
} ?>
</channel>
</rss>
<?php
}
$options = getopt('', array(
'blogid::',
'host::',
));
$wp_dir = array_pop($argv);
$wp_load_path = $wp_dir.'/wp-load.php';
$wp_export_path = $wp_dir.'/wp-admin/includes/export.php';
$host = $options['host'];
$blog_id = $options['blogid'];
if(!is_dir($wp_dir))
die('Directory does not exist: '.$wp_dir.PHP_EOL);
if(!file_exists($wp_load_path))
die('wp-load.php not found'.PHP_EOL);
if(!file_exists($wp_export_path))
die('wp-admin/includes/export.php not found'.PHP_EOL);
if(!$host)
die('Host required.'.PHP_EOL);
$_SERVER['HTTP_HOST'] = $host;
require_once $wp_load_path;
if(is_multisite()){
if(!$blog_id)
die('Blog ID required.'.PHP_EOL);
switch_to_blog($blog_id);
}
my_export_wp();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment