Created
June 18, 2013 20:34
-
-
Save fcingolani/5809103 to your computer and use it in GitHub Desktop.
Generate a WXR file from shell. Intended for BIG WordPress installations.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
error_reporting(E_ALL ^ E_NOTICE); | |
define( 'WXR_VERSION', '1.2' ); | |
/** | |
* Generates the WXR export file for download | |
* | |
* @since 2.1.0 | |
* | |
* @param array $args Filters defining what should be included in the export | |
*/ | |
function my_export_wp( $args = array() ) { | |
global $wpdb, $post; | |
$defaults = array( 'content' => 'all', 'author' => false, 'category' => false, | |
'start_date' => false, 'end_date' => false, 'status' => false, | |
); | |
$args = wp_parse_args( $args, $defaults ); | |
do_action( 'export_wp' ); | |
if ( 'all' != $args['content'] && post_type_exists( $args['content'] ) ) { | |
$ptype = get_post_type_object( $args['content'] ); | |
if ( ! $ptype->can_export ) | |
$args['content'] = 'post'; | |
$where = $wpdb->prepare( "{$wpdb->posts}.post_type = %s", $args['content'] ); | |
} else { | |
$post_types = get_post_types( array( 'can_export' => true ) ); | |
$esses = array_fill( 0, count($post_types), '%s' ); | |
$where = $wpdb->prepare( "{$wpdb->posts}.post_type IN (" . implode( ',', $esses ) . ')', $post_types ); | |
} | |
if ( $args['status'] && ( 'post' == $args['content'] || 'page' == $args['content'] ) ) | |
$where .= $wpdb->prepare( " AND {$wpdb->posts}.post_status = %s", $args['status'] ); | |
else | |
$where .= " AND {$wpdb->posts}.post_status != 'auto-draft'"; | |
$join = ''; | |
if ( $args['category'] && 'post' == $args['content'] ) { | |
if ( $term = term_exists( $args['category'], 'category' ) ) { | |
$join = "INNER JOIN {$wpdb->term_relationships} ON ({$wpdb->posts}.ID = {$wpdb->term_relationships}.object_id)"; | |
$where .= $wpdb->prepare( " AND {$wpdb->term_relationships}.term_taxonomy_id = %d", $term['term_taxonomy_id'] ); | |
} | |
} | |
if ( $args['author'] ) | |
$where .= $wpdb->prepare( " AND {$wpdb->posts}.post_author = %d", $args['author'] ); | |
if ( $args['start_date'] ) | |
$where .= $wpdb->prepare( " AND {$wpdb->posts}.post_date >= %s", date( 'Y-m-d', strtotime($args['start_date']) ) ); | |
if ( $args['end_date'] ) | |
$where .= $wpdb->prepare( " AND {$wpdb->posts}.post_date < %s", date( 'Y-m-d', strtotime('+1 month', strtotime($args['end_date'])) ) ); | |
// grab a snapshot of post IDs, just in case it changes during the export | |
$post_ids = $wpdb->get_col( "SELECT ID FROM {$wpdb->posts} $join WHERE $where" ); | |
// get the requested terms ready, empty unless posts filtered by category or all content | |
$cats = $tags = $terms = array(); | |
if ( isset( $term ) && $term ) { | |
$cat = get_term( $term['term_id'], 'category' ); | |
$cats = array( $cat->term_id => $cat ); | |
unset( $term, $cat ); | |
} else if ( 'all' == $args['content'] ) { | |
$categories = (array) get_categories( array( 'get' => 'all' ) ); | |
$tags = (array) get_tags( array( 'get' => 'all' ) ); | |
$custom_taxonomies = get_taxonomies( array( '_builtin' => false ) ); | |
$custom_terms = (array) get_terms( $custom_taxonomies, array( 'get' => 'all' ) ); | |
// put categories in order with no child going before its parent | |
while ( $cat = array_shift( $categories ) ) { | |
if ( $cat->parent == 0 || isset( $cats[$cat->parent] ) ) | |
$cats[$cat->term_id] = $cat; | |
else | |
$categories[] = $cat; | |
} | |
// put terms in order with no child going before its parent | |
while ( $t = array_shift( $custom_terms ) ) { | |
if ( $t->parent == 0 || isset( $terms[$t->parent] ) ) | |
$terms[$t->term_id] = $t; | |
else | |
$custom_terms[] = $t; | |
} | |
unset( $categories, $custom_taxonomies, $custom_terms ); | |
} | |
/** | |
* Wrap given string in XML CDATA tag. | |
* | |
* @since 2.1.0 | |
* | |
* @param string $str String to wrap in XML CDATA tag. | |
* @return string | |
*/ | |
function wxr_cdata( $str ) { | |
if ( seems_utf8( $str ) == false ) | |
$str = utf8_encode( $str ); | |
// $str = ent2ncr(esc_html($str)); | |
$str = '<![CDATA[' . str_replace( ']]>', ']]]]><![CDATA[>', $str ) . ']]>'; | |
return $str; | |
} | |
/** | |
* Return the URL of the site | |
* | |
* @since 2.5.0 | |
* | |
* @return string Site URL. | |
*/ | |
function wxr_site_url() { | |
// ms: the base url | |
if ( is_multisite() ) | |
return network_home_url(); | |
// wp: the blog url | |
else | |
return get_bloginfo_rss( 'url' ); | |
} | |
/** | |
* Output a cat_name XML tag from a given category object | |
* | |
* @since 2.1.0 | |
* | |
* @param object $category Category Object | |
*/ | |
function wxr_cat_name( $category ) { | |
if ( empty( $category->name ) ) | |
return; | |
echo '<wp:cat_name>' . wxr_cdata( $category->name ) . '</wp:cat_name>'; | |
} | |
/** | |
* Output a category_description XML tag from a given category object | |
* | |
* @since 2.1.0 | |
* | |
* @param object $category Category Object | |
*/ | |
function wxr_category_description( $category ) { | |
if ( empty( $category->description ) ) | |
return; | |
echo '<wp:category_description>' . wxr_cdata( $category->description ) . '</wp:category_description>'; | |
} | |
/** | |
* Output a tag_name XML tag from a given tag object | |
* | |
* @since 2.3.0 | |
* | |
* @param object $tag Tag Object | |
*/ | |
function wxr_tag_name( $tag ) { | |
if ( empty( $tag->name ) ) | |
return; | |
echo '<wp:tag_name>' . wxr_cdata( $tag->name ) . '</wp:tag_name>'; | |
} | |
/** | |
* Output a tag_description XML tag from a given tag object | |
* | |
* @since 2.3.0 | |
* | |
* @param object $tag Tag Object | |
*/ | |
function wxr_tag_description( $tag ) { | |
if ( empty( $tag->description ) ) | |
return; | |
echo '<wp:tag_description>' . wxr_cdata( $tag->description ) . '</wp:tag_description>'; | |
} | |
/** | |
* Output a term_name XML tag from a given term object | |
* | |
* @since 2.9.0 | |
* | |
* @param object $term Term Object | |
*/ | |
function wxr_term_name( $term ) { | |
if ( empty( $term->name ) ) | |
return; | |
echo '<wp:term_name>' . wxr_cdata( $term->name ) . '</wp:term_name>'; | |
} | |
/** | |
* Output a term_description XML tag from a given term object | |
* | |
* @since 2.9.0 | |
* | |
* @param object $term Term Object | |
*/ | |
function wxr_term_description( $term ) { | |
if ( empty( $term->description ) ) | |
return; | |
echo '<wp:term_description>' . wxr_cdata( $term->description ) . '</wp:term_description>'; | |
} | |
/** | |
* Output list of authors with posts | |
* | |
* @since 3.1.0 | |
*/ | |
function wxr_authors_list() { | |
global $wpdb; | |
$authors = array(); | |
$results = $wpdb->get_results( "SELECT DISTINCT post_author FROM $wpdb->posts WHERE post_status != 'auto-draft'" ); | |
foreach ( (array) $results as $result ) | |
$authors[] = get_userdata( $result->post_author ); | |
$authors = array_filter( $authors ); | |
foreach ( $authors as $author ) { | |
echo "\t<wp:author>"; | |
echo '<wp:author_id>' . $author->ID . '</wp:author_id>'; | |
echo '<wp:author_login>' . $author->user_login . '</wp:author_login>'; | |
echo '<wp:author_email>' . $author->user_email . '</wp:author_email>'; | |
echo '<wp:author_display_name>' . wxr_cdata( $author->display_name ) . '</wp:author_display_name>'; | |
echo '<wp:author_first_name>' . wxr_cdata( $author->user_firstname ) . '</wp:author_first_name>'; | |
echo '<wp:author_last_name>' . wxr_cdata( $author->user_lastname ) . '</wp:author_last_name>'; | |
echo "</wp:author>\n"; | |
} | |
} | |
/** | |
* Ouput all navigation menu terms | |
* | |
* @since 3.1.0 | |
*/ | |
function wxr_nav_menu_terms() { | |
$nav_menus = wp_get_nav_menus(); | |
if ( empty( $nav_menus ) || ! is_array( $nav_menus ) ) | |
return; | |
foreach ( $nav_menus as $menu ) { | |
echo "\t<wp:term><wp:term_id>{$menu->term_id}</wp:term_id><wp:term_taxonomy>nav_menu</wp:term_taxonomy><wp:term_slug>{$menu->slug}</wp:term_slug>"; | |
wxr_term_name( $menu ); | |
echo "</wp:term>\n"; | |
} | |
} | |
/** | |
* Output list of taxonomy terms, in XML tag format, associated with a post | |
* | |
* @since 2.3.0 | |
*/ | |
function wxr_post_taxonomy() { | |
$post = get_post($post->post_ID); | |
$taxonomies = get_object_taxonomies( $post->post_type ); | |
if ( empty( $taxonomies ) ) | |
return; | |
$terms = wp_get_object_terms( $post->ID, $taxonomies ); | |
foreach ( (array) $terms as $term ) { | |
echo "\t\t<category domain=\"{$term->taxonomy}\" nicename=\"{$term->slug}\">" . wxr_cdata( $term->name ) . "</category>\n"; | |
} | |
} | |
function wxr_filter_postmeta( $return_me, $meta_key ) { | |
if ( '_edit_lock' == $meta_key ) | |
$return_me = true; | |
return $return_me; | |
} | |
add_filter( 'wxr_export_skip_postmeta', 'wxr_filter_postmeta', 10, 2 ); | |
echo '<?xml version="1.0" encoding="' . get_bloginfo('charset') . "\" ?>\n"; | |
the_generator( 'export' ); ?> | |
<rss version="2.0" | |
xmlns:excerpt="http://wordpress.org/export/<?php echo WXR_VERSION; ?>/excerpt/" | |
xmlns:content="http://purl.org/rss/1.0/modules/content/" | |
xmlns:wfw="http://wellformedweb.org/CommentAPI/" | |
xmlns:dc="http://purl.org/dc/elements/1.1/" | |
xmlns:wp="http://wordpress.org/export/<?php echo WXR_VERSION; ?>/" | |
> | |
<channel> | |
<title><?php bloginfo_rss( 'name' ); ?></title> | |
<link><?php bloginfo_rss( 'url' ); ?></link> | |
<description><?php bloginfo_rss( 'description' ); ?></description> | |
<pubDate><?php echo date( 'D, d M Y H:i:s +0000' ); ?></pubDate> | |
<language><?php bloginfo_rss( 'language' ); ?></language> | |
<wp:wxr_version><?php echo WXR_VERSION; ?></wp:wxr_version> | |
<wp:base_site_url><?php echo wxr_site_url(); ?></wp:base_site_url> | |
<wp:base_blog_url><?php bloginfo_rss( 'url' ); ?></wp:base_blog_url> | |
<?php wxr_authors_list(); ?> | |
<?php foreach ( $cats as $c ) : ?> | |
<wp:category><wp:term_id><?php echo $c->term_id ?></wp:term_id><wp:category_nicename><?php echo $c->slug; ?></wp:category_nicename><wp:category_parent><?php echo $c->parent ? $cats[$c->parent]->slug : ''; ?></wp:category_parent><?php wxr_cat_name( $c ); ?><?php wxr_category_description( $c ); ?></wp:category> | |
<?php endforeach; ?> | |
<?php foreach ( $tags as $t ) : ?> | |
<wp:tag><wp:term_id><?php echo $t->term_id ?></wp:term_id><wp:tag_slug><?php echo $t->slug; ?></wp:tag_slug><?php wxr_tag_name( $t ); ?><?php wxr_tag_description( $t ); ?></wp:tag> | |
<?php endforeach; ?> | |
<?php foreach ( $terms as $t ) : ?> | |
<wp:term><wp:term_id><?php echo $t->term_id ?></wp:term_id><wp:term_taxonomy><?php echo $t->taxonomy; ?></wp:term_taxonomy><wp:term_slug><?php echo $t->slug; ?></wp:term_slug><wp:term_parent><?php echo $t->parent ? $terms[$t->parent]->slug : ''; ?></wp:term_parent><?php wxr_term_name( $t ); ?><?php wxr_term_description( $t ); ?></wp:term> | |
<?php endforeach; ?> | |
<?php if ( 'all' == $args['content'] ) wxr_nav_menu_terms(); ?> | |
<?php do_action( 'rss2_head' ); ?> | |
<?php if ( $post_ids ) { | |
global $wp_query; | |
$wp_query->in_the_loop = true; // Fake being in the loop. | |
// fetch 20 posts at a time rather than loading the entire table into memory | |
while ( $next_posts = array_splice( $post_ids, 0, 20 ) ) { | |
$where = 'WHERE ID IN (' . join( ',', $next_posts ) . ')'; | |
$posts = $wpdb->get_results( "SELECT * FROM {$wpdb->posts} $where" ); | |
// Begin Loop | |
foreach ( $posts as $post ) { | |
setup_postdata( $post ); | |
$is_sticky = is_sticky( $post->ID ) ? 1 : 0; | |
?> | |
<item> | |
<title><?php echo apply_filters( 'the_title_rss', $post->post_title ); ?></title> | |
<link><?php the_permalink_rss() ?></link> | |
<pubDate><?php echo mysql2date( 'D, d M Y H:i:s +0000', get_post_time( 'Y-m-d H:i:s', true ), false ); ?></pubDate> | |
<dc:creator><?php echo get_the_author_meta( 'login' ); ?></dc:creator> | |
<guid isPermaLink="false"><?php esc_url( the_guid() ); ?></guid> | |
<description></description> | |
<content:encoded><?php echo wxr_cdata( apply_filters( 'the_content_export', $post->post_content ) ); ?></content:encoded> | |
<excerpt:encoded><?php echo wxr_cdata( apply_filters( 'the_excerpt_export', $post->post_excerpt ) ); ?></excerpt:encoded> | |
<wp:post_id><?php echo $post->ID; ?></wp:post_id> | |
<wp:post_date><?php echo $post->post_date; ?></wp:post_date> | |
<wp:post_date_gmt><?php echo $post->post_date_gmt; ?></wp:post_date_gmt> | |
<wp:comment_status><?php echo $post->comment_status; ?></wp:comment_status> | |
<wp:ping_status><?php echo $post->ping_status; ?></wp:ping_status> | |
<wp:post_name><?php echo $post->post_name; ?></wp:post_name> | |
<wp:status><?php echo $post->post_status; ?></wp:status> | |
<wp:post_parent><?php echo $post->post_parent; ?></wp:post_parent> | |
<wp:menu_order><?php echo $post->menu_order; ?></wp:menu_order> | |
<wp:post_type><?php echo $post->post_type; ?></wp:post_type> | |
<wp:post_password><?php echo $post->post_password; ?></wp:post_password> | |
<wp:is_sticky><?php echo $is_sticky; ?></wp:is_sticky> | |
<?php if ( $post->post_type == 'attachment' ) : ?> | |
<wp:attachment_url><?php echo wp_get_attachment_url( $post->ID ); ?></wp:attachment_url> | |
<?php endif; ?> | |
<?php wxr_post_taxonomy(); ?> | |
<?php $postmeta = $wpdb->get_results( $wpdb->prepare( "SELECT * FROM $wpdb->postmeta WHERE post_id = %d", $post->ID ) ); | |
foreach ( $postmeta as $meta ) : | |
if ( apply_filters( 'wxr_export_skip_postmeta', false, $meta->meta_key, $meta ) ) | |
continue; | |
?> | |
<wp:postmeta> | |
<wp:meta_key><?php echo $meta->meta_key; ?></wp:meta_key> | |
<wp:meta_value><?php echo wxr_cdata( $meta->meta_value ); ?></wp:meta_value> | |
</wp:postmeta> | |
<?php endforeach; ?> | |
</item> | |
<?php | |
} | |
} | |
} ?> | |
</channel> | |
</rss> | |
<?php | |
} | |
$options = getopt('', array( | |
'blogid::', | |
'host::', | |
)); | |
$wp_dir = array_pop($argv); | |
$wp_load_path = $wp_dir.'/wp-load.php'; | |
$wp_export_path = $wp_dir.'/wp-admin/includes/export.php'; | |
$host = $options['host']; | |
$blog_id = $options['blogid']; | |
if(!is_dir($wp_dir)) | |
die('Directory does not exist: '.$wp_dir.PHP_EOL); | |
if(!file_exists($wp_load_path)) | |
die('wp-load.php not found'.PHP_EOL); | |
if(!file_exists($wp_export_path)) | |
die('wp-admin/includes/export.php not found'.PHP_EOL); | |
if(!$host) | |
die('Host required.'.PHP_EOL); | |
$_SERVER['HTTP_HOST'] = $host; | |
require_once $wp_load_path; | |
if(is_multisite()){ | |
if(!$blog_id) | |
die('Blog ID required.'.PHP_EOL); | |
switch_to_blog($blog_id); | |
} | |
my_export_wp(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment