Skip to content

Instantly share code, notes, and snippets.

@krilnon

krilnon/kf.rb Secret

Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save krilnon/0c62720c35ae4de30601 to your computer and use it in GitHub Desktop.
Save krilnon/0c62720c35ae4de30601 to your computer and use it in GitHub Desktop.
kF->Discourse Import Script
require File.expand_path(File.dirname(__FILE__) + '/base.rb')
require 'csv'
class ImportScripts::KirupaForum < ImportScripts::Base
def self.run
new.perform
end
def initialize
@user_groups, @users, @forums, @forum_permissions, @threads, @posts, @group_mapping, @forum_mapping = nil
base = '/import_data/'
data = {
user_groups: 'usergroup.csv',
users: 'user_kf3.csv',
forums: 'forum.csv',
forum_permissions: 'forumpermission.csv',
threads: 'thread_kf3.csv',
posts: 'post_kf3.csv',
group_mapping: 'group_mapping.csv',
forum_mapping: 'forum_mapping.csv'
}
csv_opts = { headers: true, header_converters: :symbol }
data.each do |var, path|
self.instance_variable_set('@' + var.to_s, CSV.parse(File.read(base + path), csv_opts).map { |r| r.to_hash })
end
# lets you figure out the group ID if you're given the old ID and want to say what the new user's primary group ID is
# this info is taken from group_mapping.csv. we're condesing from about 9 groups to 5 or so
@group_from_old = {}
@group_mapping.each { |e| @group_from_old[e[:old_id]] = e[:new_id] }
@forum_from_old = {}
@forum_mapping.each { |e| @forum_from_old[e[:old_id]] = e[:new_id] }
super()
end
def execute
import_users
group_users
preprocess_posts
import_topics
@topics = @threads
import_posts
postprocess_posts
close_topics # this just closes the threads that already were closed, not all of them
end
def import_users
puts "creating users in import_users"
@users.sort_by! { |user| user[:userid].to_i }
create_users(@users) do |user|
puts "username:#{user[:username]}"
user[:username] = "userid_#{user[:userid]}" if user[:username].nil?
{
id: user[:userid],
username: UserNameSuggester.fix_username(user[:username]),
email: user[:email],
website: user[:homepage],
title: user[:usertitle],
primary_group_id: @group_from_old[user[:usergroupid]],
merge: true
}
end
end
def group_users
c = 0.0
@users.each do |user|
c += 1
# add new users to the appropriate group, if they're indeed new.
new_group_id = @group_from_old[user[:usergroupid]]
email = user[:email].downcase
group = Group.find(new_group_id)
unless group.users.where(email: email).size > 0
puts(((c / @users.size) * 100).to_s)
u = User.where(email: email).first
group.add(u) if u
end
end
end
# extracted from the "bitfield_vbulletin.xml" file
VB_FORUM_PERMISSIONS_CAN_VIEW = 1
VB_FORUM_PERMISSIONS_CAN_VIEW_THREADS = 524288
VB_FORUM_PERMISSIONS_CAN_REPLY_OWN = 32
VB_FORUM_PERMISSIONS_CAN_REPLY_OTHERS = 64
VB_FORUM_PERMISSIONS_CAN_POST_NEW = 16
def translate_forum_permissions(permissions)
can_see = ((permissions & VB_FORUM_PERMISSIONS_CAN_VIEW) | (permissions & VB_FORUM_PERMISSIONS_CAN_VIEW_THREADS)) > 0
can_reply = ((permissions & VB_FORUM_PERMISSIONS_CAN_REPLY_OWN) | (permissions & VB_FORUM_PERMISSIONS_CAN_REPLY_OTHERS)) > 0
can_create = (permissions & VB_FORUM_PERMISSIONS_CAN_POST_NEW) > 0
return CategoryGroup.permission_types[:full] if can_create
return CategoryGroup.permission_types[:create_post] if can_reply
return CategoryGroup.permission_types[:readonly] if can_see
nil
end
def preprocess_posts
puts "", "Preprocessing posts..."
current = 0
max = @posts.size
@posts.each do |post|
post[:raw] = preprocess_post_raw(post[:pagetext])
current += 1
print_status(current, max)
end
end
def import_topics
puts "", "Importing topics..."
# keep track of closed topics
@closed_topic_ids = []
@topics = @threads
# sort the topics
@topics.sort_by! { |topic| topic[:threadid].to_i }
create_posts(@topics) do |topic|
id = "thread#" + topic[:threadid]
title = CGI.unescapeHTML(topic[:title]).strip[0...255]
# store the list of closed topics
@closed_topic_ids << id if topic[:open] == "0"
next if post_id_from_imported_post_id(id)
next unless post = @posts.select { |p| p[:postid] == topic[:firstpostid] }.first
u = nil
case topic[:postuserid]
when "1"
u = 1
when "9456"
u = 2
else
u = user_id_from_imported_user_id(topic[:postuserid])
end
u = Discourse::SYSTEM_USER_ID if u.nil?
t = {
id: id,
user_id: u,
title: title,
category: Category.where(id: @forum_from_old[topic[:forumid]]).first, # kF: all forumids are matched to existing Discourse category
raw: post[:raw],
created_at: Time.at(topic[:dateline].to_i),
visible: topic[:visible].to_i == 1,
views: topic[:views].to_i,
}
if topic[:sticky].to_i == 1
t[:pinned_at] = t[:created_at]
end
# tag
# kF not doing tags, since our threads had > 1 tag, and finding tags requires a weird join probably
#if (tag = @mapped_categories[topic[:old_forumid]][:tag] || "").present?
# t[:custom_fields] ||= {}
# t[:custom_fields]['tag'] = tag
#end
t
end
# kF categorize; disabled for replies import
@topics.each do |topic|
title = CGI.unescapeHTML(topic[:title]).strip[0...255]
topic_maybe = Topic.where(title: title).first
f_id = @forum_from_old[topic[:forumid]]
cat = Category.find(f_id) unless f_id.to_i == 0
if topic_maybe and cat
cat.topics.append(topic_maybe)
end
end
end
def import_posts
puts "", "Importing posts..."
# reject all first posts
first_post_ids = Set.new(@topics.map { |t| t[:firstpostid] })
posts_to_import = @posts.reject { |post| first_post_ids.include?(post[:postid]) }
# sort the posts
@posts.sort_by! { |post| post[:postid].to_i }
create_posts(posts_to_import) do |post|
next if post[:threadid].nil?
next unless t = topic_lookup_from_imported_post_id("thread#" + post[:threadid])
u = nil
case post[:userid]
when "1"
u = 1 # kirupa
when "9456"
u = 2 # krilnon
else
u = user_id_from_imported_user_id(post[:userid])
end
u = Discourse::SYSTEM_USER_ID if u.nil?
p = {
id: post[:postid],
user_id: u,
topic_id: t[:topic_id],
raw: post[:raw],
created_at: Time.at(post[:dateline].to_i),
hidden: post[:visible].to_i == 0,
}
# kF not doing edit reasons. requires some crazy joins in SQL probably
#if (edit_reason = (post[:editreason] || "").gsub("NULL", "")).present?
# p[:edit_reason] = edit_reason
#end
if parent = topic_lookup_from_imported_post_id(post[:parentid])
p[:reply_to_post_number] = parent[:post_number]
end
p
end
end
def preprocess_post_raw(raw)
return "" if raw.blank?
raw = raw.gsub(/(\\r)?\\n/, "\n")
.gsub("\\t", "\t")
# remove attachments
raw = raw.gsub(/\[attach[^\]]*\]\d+\[\/attach\]/i, "")
# [HTML]...[/HTML]
raw = raw.gsub(/\[html\]/i, "\n```html\n")
.gsub(/\[\/html\]/i, "\n```\n")
# [PHP]...[/PHP]
raw = raw.gsub(/\[php\]/i, "\n```php\n")
.gsub(/\[\/php\]/i, "\n```\n")
# [HIGHLIGHT="..."]
raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" }
# [CODE]...[/CODE]
# [HIGHLIGHT]...[/HIGHLIGHT]
raw = raw.gsub(/\[\/?code\]/i, "\n```\n")
.gsub(/\[\/?highlight\]/i, "\n```\n")
# kF
# [ic]...[/ic]
raw = raw.gsub(/\[ic\](.+?)\[\/ic\]/im) { "`#{$1}`" }
# [SAMP]...[/SAMP]
raw = raw.gsub(/\[\/?samp\]/i, "`")
# replace all chevrons with HTML entities
# NOTE: must be done
# - AFTER all the "code" processing
# - BEFORE the "quote" processing
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
.gsub("<", "&lt;")
.gsub("\u2603", "<")
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
.gsub(">", "&gt;")
.gsub("\u2603", ">")
# [URL=...]...[/URL]
raw = raw.gsub(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" }
# [URL]...[/URL]
# [MP3]...[/MP3]
raw = raw.gsub(/\[\/?url\]/i, "")
.gsub(/\[\/?mp3\]/i, "")
# [B]...[/B]
raw = raw.gsub(/\[b\]/i, "**")
.gsub(/\[\/b\]/i, "**")
# [I]...[/I]
raw = raw.gsub(/\[i\]/i, "*")
.gsub(/\[\/i\]/i, "*")
# [MENTION]<username>[/MENTION]
raw = raw.gsub(/\[mention\](.+?)\[\/mention\]/i) do
old_username = $1
if @old_username_to_new_usernames.has_key?(old_username)
old_username = @old_username_to_new_usernames[old_username]
end
"@#{old_username}"
end
# [MENTION=<user_id>]<username>[/MENTION]
raw = raw.gsub(/\[mention="?(\d+)"?\](.+?)\[\/mention\]/i) do
user_id, old_username = $1, $2
if user = @users.select { |u| u[:userid] == user_id }.first
old_username = @old_username_to_new_usernames[user[:username]] || user[:username]
end
"@#{old_username}"
end
# [QUOTE]...[/QUOTE]
raw = raw.gsub(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }
# [QUOTE=<username>]...[/QUOTE]
raw = raw.gsub(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
old_username, quote = $1, $2
if @old_username_to_new_usernames and @old_username_to_new_usernames.has_key?(old_username)
old_username = @old_username_to_new_usernames[old_username]
end
"\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n"
end
# [YOUTUBE]<id>[/YOUTUBE]
raw = raw.gsub(/\[youtube\](.+?)\[\/youtube\]/i) { "\n//youtu.be/#{$1}\n" }
# [VIDEO=youtube;<id>]...[/VIDEO]
raw = raw.gsub(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" }
raw
end
def postprocess_posts
puts "", "Postprocessing posts..."
current = 0
max = @posts.size
@posts.each do |post|
begin
new_raw = postprocess_post_raw(post[:raw])
if new_raw != post[:raw]
new_id = post_id_from_imported_post_id(post[:postid])
p = Post.find_by(id: new_id)
if p.nil?
puts "Could not save the post-processed raw of the post ##{new_id} (previous id: ##{post[:postid]})"
next
end
p.raw = new_raw
p.save
end
rescue Exception => e
puts "", "-" * 100
puts e.message
puts e.backtrace.join("\n")
puts "-" * 100, ""
next
ensure
current += 1
print_status(current, max)
end
end
end
def postprocess_post_raw(raw)
# [QUOTE=<username>;<post_id>]...[/QUOTE]
raw = raw.gsub(/\[quote=([^;]+);(\d+)\](.+?)\[\/quote\]/im) do
old_username, post_id, quote = $1, $2, $3
if @old_username_to_new_usernames and @old_username_to_new_usernames.has_key?(old_username)
old_username = @old_username_to_new_usernames[old_username]
end
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
post_number = topic_lookup[:post_number]
topic_id = topic_lookup[:topic_id]
"\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n"
else
"\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n"
end
end
# [THREAD]<thread_id>[/THREAD]
# ==> http://my.discourse.org/t/slug/<topic_id>
raw = raw.gsub(/\[thread\](\d+)\[\/thread\]/i) do
thread_id = $1
if topic_lookup = topic_lookup_from_imported_post_id("thread#" + thread_id)
topic_lookup[:url]
else
$&
end
end
# [THREAD=<thread_id>]...[/THREAD]
# ==> [...](http://my.discourse.org/t/slug/<topic_id>)
raw = raw.gsub(/\[thread=(\d+)\](.+?)\[\/thread\]/i) do
thread_id, link = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id("thread#" + thread_id)
url = topic_lookup[:url]
"[#{link}](#{url})"
else
$&
end
end
# [POST]<post_id>[/POST]
# ==> http://my.discourse.org/t/slug/<topic_id>/<post_number>
raw = raw.gsub(/\[post\](\d+)\[\/post\]/i) do
post_id = $1
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
topic_lookup[:url]
else
$&
end
end
# [POST=<post_id>]...[/POST]
# ==> [...](http://my.discourse.org/t/<topic_slug>/<topic_id>/<post_number>)
raw = raw.gsub(/\[post=(\d+)\](.+?)\[\/post\]/i) do
post_id, link = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
url = topic_lookup[:url]
"[#{link}](#{url})"
else
$&
end
end
raw
end
def close_topics
puts "", "Closing topics..."
sql = <<-SQL
WITH closed_topic_ids AS (
SELECT t.id AS topic_id
FROM post_custom_fields pcf
JOIN posts p ON p.id = pcf.post_id
JOIN topics t ON t.id = p.topic_id
WHERE pcf.name = 'import_id'
AND pcf.value IN (?)
)
UPDATE topics
SET closed = true
WHERE id IN (SELECT topic_id FROM closed_topic_ids)
SQL
Topic.exec_sql(sql, @closed_topic_ids)
end
end
ImportScripts::KirupaForum.run
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment