Skip to content

Instantly share code, notes, and snippets.

@athurg
Last active December 19, 2015 12:39
Show Gist options
  • Save athurg/5956420 to your computer and use it in GitHub Desktop.
Save athurg/5956420 to your computer and use it in GitHub Desktop.
用于解析Wordpress导出的WXR格式的工具。可输出为PHP数组或者SQL语句。
<?php
class WXRDecoder
{
protected $_parser;
protected $_ns;
public $posts=array();
public $tags=array();
public $post_table = 'tbl_posts';
public $tag_table = 'tbl_tags';
public function __construct($filename='wordpress.xml')
{
$this->_parser = new SimpleXMLElement($filename, LIBXML_NSCLEAN | LIBXML_NOCDATA, true);
$this->_ns = $this->_parser->getNamespaces(true);
}
public function set_post_table($table)
{
$this->post_table = $table;
}
public function set_tag_table($table)
{
$this->tag_table = $table;
}
public function parse_posts()
{
$items = $this->_parser->channel->item;
foreach ($items as $item) {
//非发布状态的文章直接跳过
$status = $item->children($this->_ns['wp'])->status;
if ($status!='publish') {
continue;
}
$title = $item->title;
$post_id = $item->children($this->_ns['wp'])->post_id;
$post_date = $item->children($this->_ns['wp'])->post_date;
$type = $item->children($this->_ns['wp'])->post_type;
$content = $item->children($this->_ns['content'])->encoded;
//解析标签和分类(都统一合并到标签中)
$tags = array();
foreach ($item->category as $meta) {
$t = ($meta=='Uncategorized') ? '未分类': $meta;
$tags[] = (string)$t;
}
$tags = implode(',',array_unique($tags));
$post = array('id'=>$post_id,'title'=>$title,'date'=>$post_date,
'tags'=>$tags,'content'=>$content);
$this->posts[(string)$post_id] = (object)$post;
}
sort($this->posts);
return $this->posts;
}
public function parse_tags()
{
$items = $this->_parser->channel->children($this->_ns['wp'])->tag;
$tags = array();
foreach ($items as $item) {
//非发布状态的文章直接跳过
$tag = $item->children($this->_ns['wp'])->tag_name;
$tags[] = (string)$tag;
}
$this->tags = array_unique($tags);
sort($this->tags);
return $this->tags;
}
public function parse_posts_sql()
{
$this->parse_posts();
$sql = '';
foreach ($this->posts as $p) {
$sql .= "INSERT INTO `{$this->post_table}` (`id`,`title`,`date`,`tags`,`content`)";
$sql .= " VALUES ('{$p->id}','{$p->title}','{$p->date}','{$p->tags}','{$p->content}');\n";
}
return $sql;
}
public function parse_tags_sql()
{
$this->parse_tags();
$sql = '';
foreach ($this->tags as $p) {
$sql .= "INSERT INTO `{$this->tag_table}` (`name`) VALUES ('{$p}');\n";
}
return $sql;
}
}
//$decoder = new WXRDecoder('gooth.wordpress.2013-07-10.xml');
//echo $decoder->parse_posts_sql();
//echo $decoder->parse_tags_sql();
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment