Skip to content

Instantly share code, notes, and snippets.

@nicolasiensen
Created July 6, 2015 10:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nicolasiensen/f53053c14bcd1814e6f7 to your computer and use it in GitHub Desktop.
Save nicolasiensen/f53053c14bcd1814e6f7 to your computer and use it in GitHub Desktop.
# require "~/Desktop/wordpress_parser.rb"
# parser = WordpressParser.new("/Users/nicolasiensen/Desktop/quintal.wordpress.2015-03-03.xml")
# posts = parser.posts
# posts.each_with_index {|post, i| puts i; parser.create_post post }
require 'nokogiri'
class WordpressParser
def initialize file_path = "quintal.wordpress.2015-03-03.xml"
@xml = Nokogiri::XML(File.open(file_path))
end
def create_post post
category = Category.find_by(slug: post['categories'].first) || Category.first
tags = post['tags'].map do |tag|
Tag.find_or_create_by(name: tag)
end
new_post = Post.create(
title: post['title'],
published_at: Time.parse(post['published_at']),
body: post['content'],
legacy_id: post['post_id'],
created_at: Time.parse(post['post_date']),
slug: post['post_name'],
category_id: category.id,
tags: tags
)
post['comments'].each do |comment|
if comment[:author_email].present? && comment[:body].present?
user = User.find_by(email: comment[:author_email]) || user = User.create!(
email: comment[:author_email],
name: comment[:author_name],
ip: comment[:author_ip]
)
Comment.create!(
post_id: new_post.id,
user_id: user.id,
created_at: Time.parse(comment[:created_at]),
body: comment[:body],
approved: comment[:approved] == '1' ? true : false
)
end
end
end
def posts
@posts ||= posts_node_set.map do |post|
hash = {}
hash.store('title', post.search(".//title").children.text)
hash.store('published_at', post.search(".//pubDate").children.text)
hash.store('content', post.search(".//content:encoded").children.text)
hash.store('post_id', post.search(".//wp:post_id").children.text)
hash.store('post_date', post.search(".//wp:post_date").children.text)
hash.store('post_name', post.search(".//wp:post_name").children.text)
categories = post.search(".//category[@domain = 'category']").map do |category|
category.attribute("nicename").value
end
hash.store('categories', categories)
tags = post.search(".//category[@domain = 'post_tag']").map do |tag|
tag.attribute("nicename").value
end
hash.store('tags', tags)
comments = post.search(".//wp:comment").map do |comment|
{
author_name: comment.search(".//wp:comment_author").text,
author_email: comment.search(".//wp:comment_author_email").text,
author_ip: comment.search(".//wp:comment_author_IP").text,
created_at: comment.search(".//wp:comment_date").text,
body: comment.search(".//wp:comment_content").text,
approved: comment.search(".//wp:comment_approved").text
}
end
hash.store('comments', comments)
hash
end
end
def posts_node_set
@posts_node_set ||= @xml.xpath("//item[wp:post_type[text() = 'post']]")
end
def fix_all_posts_bodies
Post.all.each do |p|
body = p.body.remove(/height\=\"\d+\"/)
body = body.remove(/width\=\"\d+\"/)
p.update_attribute :body, body
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment