-
-
Save krilnon/0c62720c35ae4de30601 to your computer and use it in GitHub Desktop.
kF->Discourse Import Script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require File.expand_path(File.dirname(__FILE__) + '/base.rb') | |
require 'csv' | |
class ImportScripts::KirupaForum < ImportScripts::Base | |
def self.run | |
new.perform | |
end | |
def initialize | |
@user_groups, @users, @forums, @forum_permissions, @threads, @posts, @group_mapping, @forum_mapping = nil | |
base = '/import_data/' | |
data = { | |
user_groups: 'usergroup.csv', | |
users: 'user_kf3.csv', | |
forums: 'forum.csv', | |
forum_permissions: 'forumpermission.csv', | |
threads: 'thread_kf3.csv', | |
posts: 'post_kf3.csv', | |
group_mapping: 'group_mapping.csv', | |
forum_mapping: 'forum_mapping.csv' | |
} | |
csv_opts = { headers: true, header_converters: :symbol } | |
data.each do |var, path| | |
self.instance_variable_set('@' + var.to_s, CSV.parse(File.read(base + path), csv_opts).map { |r| r.to_hash }) | |
end | |
# lets you figure out the group ID if you're given the old ID and want to say what the new user's primary group ID is | |
# this info is taken from group_mapping.csv. we're condesing from about 9 groups to 5 or so | |
@group_from_old = {} | |
@group_mapping.each { |e| @group_from_old[e[:old_id]] = e[:new_id] } | |
@forum_from_old = {} | |
@forum_mapping.each { |e| @forum_from_old[e[:old_id]] = e[:new_id] } | |
super() | |
end | |
def execute | |
import_users | |
group_users | |
preprocess_posts | |
import_topics | |
@topics = @threads | |
import_posts | |
postprocess_posts | |
close_topics # this just closes the threads that already were closed, not all of them | |
end | |
def import_users | |
puts "creating users in import_users" | |
@users.sort_by! { |user| user[:userid].to_i } | |
create_users(@users) do |user| | |
puts "username:#{user[:username]}" | |
user[:username] = "userid_#{user[:userid]}" if user[:username].nil? | |
{ | |
id: user[:userid], | |
username: UserNameSuggester.fix_username(user[:username]), | |
email: user[:email], | |
website: user[:homepage], | |
title: user[:usertitle], | |
primary_group_id: @group_from_old[user[:usergroupid]], | |
merge: true | |
} | |
end | |
end | |
def group_users | |
c = 0.0 | |
@users.each do |user| | |
c += 1 | |
# add new users to the appropriate group, if they're indeed new. | |
new_group_id = @group_from_old[user[:usergroupid]] | |
email = user[:email].downcase | |
group = Group.find(new_group_id) | |
unless group.users.where(email: email).size > 0 | |
puts(((c / @users.size) * 100).to_s) | |
u = User.where(email: email).first | |
group.add(u) if u | |
end | |
end | |
end | |
# extracted from the "bitfield_vbulletin.xml" file | |
VB_FORUM_PERMISSIONS_CAN_VIEW = 1 | |
VB_FORUM_PERMISSIONS_CAN_VIEW_THREADS = 524288 | |
VB_FORUM_PERMISSIONS_CAN_REPLY_OWN = 32 | |
VB_FORUM_PERMISSIONS_CAN_REPLY_OTHERS = 64 | |
VB_FORUM_PERMISSIONS_CAN_POST_NEW = 16 | |
def translate_forum_permissions(permissions) | |
can_see = ((permissions & VB_FORUM_PERMISSIONS_CAN_VIEW) | (permissions & VB_FORUM_PERMISSIONS_CAN_VIEW_THREADS)) > 0 | |
can_reply = ((permissions & VB_FORUM_PERMISSIONS_CAN_REPLY_OWN) | (permissions & VB_FORUM_PERMISSIONS_CAN_REPLY_OTHERS)) > 0 | |
can_create = (permissions & VB_FORUM_PERMISSIONS_CAN_POST_NEW) > 0 | |
return CategoryGroup.permission_types[:full] if can_create | |
return CategoryGroup.permission_types[:create_post] if can_reply | |
return CategoryGroup.permission_types[:readonly] if can_see | |
nil | |
end | |
def preprocess_posts | |
puts "", "Preprocessing posts..." | |
current = 0 | |
max = @posts.size | |
@posts.each do |post| | |
post[:raw] = preprocess_post_raw(post[:pagetext]) | |
current += 1 | |
print_status(current, max) | |
end | |
end | |
def import_topics | |
puts "", "Importing topics..." | |
# keep track of closed topics | |
@closed_topic_ids = [] | |
@topics = @threads | |
# sort the topics | |
@topics.sort_by! { |topic| topic[:threadid].to_i } | |
create_posts(@topics) do |topic| | |
id = "thread#" + topic[:threadid] | |
title = CGI.unescapeHTML(topic[:title]).strip[0...255] | |
# store the list of closed topics | |
@closed_topic_ids << id if topic[:open] == "0" | |
next if post_id_from_imported_post_id(id) | |
next unless post = @posts.select { |p| p[:postid] == topic[:firstpostid] }.first | |
u = nil | |
case topic[:postuserid] | |
when "1" | |
u = 1 | |
when "9456" | |
u = 2 | |
else | |
u = user_id_from_imported_user_id(topic[:postuserid]) | |
end | |
u = Discourse::SYSTEM_USER_ID if u.nil? | |
t = { | |
id: id, | |
user_id: u, | |
title: title, | |
category: Category.where(id: @forum_from_old[topic[:forumid]]).first, # kF: all forumids are matched to existing Discourse category | |
raw: post[:raw], | |
created_at: Time.at(topic[:dateline].to_i), | |
visible: topic[:visible].to_i == 1, | |
views: topic[:views].to_i, | |
} | |
if topic[:sticky].to_i == 1 | |
t[:pinned_at] = t[:created_at] | |
end | |
# tag | |
# kF not doing tags, since our threads had > 1 tag, and finding tags requires a weird join probably | |
#if (tag = @mapped_categories[topic[:old_forumid]][:tag] || "").present? | |
# t[:custom_fields] ||= {} | |
# t[:custom_fields]['tag'] = tag | |
#end | |
t | |
end | |
# kF categorize; disabled for replies import | |
@topics.each do |topic| | |
title = CGI.unescapeHTML(topic[:title]).strip[0...255] | |
topic_maybe = Topic.where(title: title).first | |
f_id = @forum_from_old[topic[:forumid]] | |
cat = Category.find(f_id) unless f_id.to_i == 0 | |
if topic_maybe and cat | |
cat.topics.append(topic_maybe) | |
end | |
end | |
end | |
def import_posts | |
puts "", "Importing posts..." | |
# reject all first posts | |
first_post_ids = Set.new(@topics.map { |t| t[:firstpostid] }) | |
posts_to_import = @posts.reject { |post| first_post_ids.include?(post[:postid]) } | |
# sort the posts | |
@posts.sort_by! { |post| post[:postid].to_i } | |
create_posts(posts_to_import) do |post| | |
next if post[:threadid].nil? | |
next unless t = topic_lookup_from_imported_post_id("thread#" + post[:threadid]) | |
u = nil | |
case post[:userid] | |
when "1" | |
u = 1 # kirupa | |
when "9456" | |
u = 2 # krilnon | |
else | |
u = user_id_from_imported_user_id(post[:userid]) | |
end | |
u = Discourse::SYSTEM_USER_ID if u.nil? | |
p = { | |
id: post[:postid], | |
user_id: u, | |
topic_id: t[:topic_id], | |
raw: post[:raw], | |
created_at: Time.at(post[:dateline].to_i), | |
hidden: post[:visible].to_i == 0, | |
} | |
# kF not doing edit reasons. requires some crazy joins in SQL probably | |
#if (edit_reason = (post[:editreason] || "").gsub("NULL", "")).present? | |
# p[:edit_reason] = edit_reason | |
#end | |
if parent = topic_lookup_from_imported_post_id(post[:parentid]) | |
p[:reply_to_post_number] = parent[:post_number] | |
end | |
p | |
end | |
end | |
def preprocess_post_raw(raw) | |
return "" if raw.blank? | |
raw = raw.gsub(/(\\r)?\\n/, "\n") | |
.gsub("\\t", "\t") | |
# remove attachments | |
raw = raw.gsub(/\[attach[^\]]*\]\d+\[\/attach\]/i, "") | |
# [HTML]...[/HTML] | |
raw = raw.gsub(/\[html\]/i, "\n```html\n") | |
.gsub(/\[\/html\]/i, "\n```\n") | |
# [PHP]...[/PHP] | |
raw = raw.gsub(/\[php\]/i, "\n```php\n") | |
.gsub(/\[\/php\]/i, "\n```\n") | |
# [HIGHLIGHT="..."] | |
raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" } | |
# [CODE]...[/CODE] | |
# [HIGHLIGHT]...[/HIGHLIGHT] | |
raw = raw.gsub(/\[\/?code\]/i, "\n```\n") | |
.gsub(/\[\/?highlight\]/i, "\n```\n") | |
# kF | |
# [ic]...[/ic] | |
raw = raw.gsub(/\[ic\](.+?)\[\/ic\]/im) { "`#{$1}`" } | |
# [SAMP]...[/SAMP] | |
raw = raw.gsub(/\[\/?samp\]/i, "`") | |
# replace all chevrons with HTML entities | |
# NOTE: must be done | |
# - AFTER all the "code" processing | |
# - BEFORE the "quote" processing | |
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" } | |
.gsub("<", "<") | |
.gsub("\u2603", "<") | |
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" } | |
.gsub(">", ">") | |
.gsub("\u2603", ">") | |
# [URL=...]...[/URL] | |
raw = raw.gsub(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" } | |
# [URL]...[/URL] | |
# [MP3]...[/MP3] | |
raw = raw.gsub(/\[\/?url\]/i, "") | |
.gsub(/\[\/?mp3\]/i, "") | |
# [B]...[/B] | |
raw = raw.gsub(/\[b\]/i, "**") | |
.gsub(/\[\/b\]/i, "**") | |
# [I]...[/I] | |
raw = raw.gsub(/\[i\]/i, "*") | |
.gsub(/\[\/i\]/i, "*") | |
# [MENTION]<username>[/MENTION] | |
raw = raw.gsub(/\[mention\](.+?)\[\/mention\]/i) do | |
old_username = $1 | |
if @old_username_to_new_usernames.has_key?(old_username) | |
old_username = @old_username_to_new_usernames[old_username] | |
end | |
"@#{old_username}" | |
end | |
# [MENTION=<user_id>]<username>[/MENTION] | |
raw = raw.gsub(/\[mention="?(\d+)"?\](.+?)\[\/mention\]/i) do | |
user_id, old_username = $1, $2 | |
if user = @users.select { |u| u[:userid] == user_id }.first | |
old_username = @old_username_to_new_usernames[user[:username]] || user[:username] | |
end | |
"@#{old_username}" | |
end | |
# [QUOTE]...[/QUOTE] | |
raw = raw.gsub(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" } | |
# [QUOTE=<username>]...[/QUOTE] | |
raw = raw.gsub(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do | |
old_username, quote = $1, $2 | |
if @old_username_to_new_usernames and @old_username_to_new_usernames.has_key?(old_username) | |
old_username = @old_username_to_new_usernames[old_username] | |
end | |
"\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n" | |
end | |
# [YOUTUBE]<id>[/YOUTUBE] | |
raw = raw.gsub(/\[youtube\](.+?)\[\/youtube\]/i) { "\n//youtu.be/#{$1}\n" } | |
# [VIDEO=youtube;<id>]...[/VIDEO] | |
raw = raw.gsub(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" } | |
raw | |
end | |
def postprocess_posts | |
puts "", "Postprocessing posts..." | |
current = 0 | |
max = @posts.size | |
@posts.each do |post| | |
begin | |
new_raw = postprocess_post_raw(post[:raw]) | |
if new_raw != post[:raw] | |
new_id = post_id_from_imported_post_id(post[:postid]) | |
p = Post.find_by(id: new_id) | |
if p.nil? | |
puts "Could not save the post-processed raw of the post ##{new_id} (previous id: ##{post[:postid]})" | |
next | |
end | |
p.raw = new_raw | |
p.save | |
end | |
rescue Exception => e | |
puts "", "-" * 100 | |
puts e.message | |
puts e.backtrace.join("\n") | |
puts "-" * 100, "" | |
next | |
ensure | |
current += 1 | |
print_status(current, max) | |
end | |
end | |
end | |
def postprocess_post_raw(raw) | |
# [QUOTE=<username>;<post_id>]...[/QUOTE] | |
raw = raw.gsub(/\[quote=([^;]+);(\d+)\](.+?)\[\/quote\]/im) do | |
old_username, post_id, quote = $1, $2, $3 | |
if @old_username_to_new_usernames and @old_username_to_new_usernames.has_key?(old_username) | |
old_username = @old_username_to_new_usernames[old_username] | |
end | |
if topic_lookup = topic_lookup_from_imported_post_id(post_id) | |
post_number = topic_lookup[:post_number] | |
topic_id = topic_lookup[:topic_id] | |
"\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n" | |
else | |
"\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n" | |
end | |
end | |
# [THREAD]<thread_id>[/THREAD] | |
# ==> http://my.discourse.org/t/slug/<topic_id> | |
raw = raw.gsub(/\[thread\](\d+)\[\/thread\]/i) do | |
thread_id = $1 | |
if topic_lookup = topic_lookup_from_imported_post_id("thread#" + thread_id) | |
topic_lookup[:url] | |
else | |
$& | |
end | |
end | |
# [THREAD=<thread_id>]...[/THREAD] | |
# ==> [...](http://my.discourse.org/t/slug/<topic_id>) | |
raw = raw.gsub(/\[thread=(\d+)\](.+?)\[\/thread\]/i) do | |
thread_id, link = $1, $2 | |
if topic_lookup = topic_lookup_from_imported_post_id("thread#" + thread_id) | |
url = topic_lookup[:url] | |
"[#{link}](#{url})" | |
else | |
$& | |
end | |
end | |
# [POST]<post_id>[/POST] | |
# ==> http://my.discourse.org/t/slug/<topic_id>/<post_number> | |
raw = raw.gsub(/\[post\](\d+)\[\/post\]/i) do | |
post_id = $1 | |
if topic_lookup = topic_lookup_from_imported_post_id(post_id) | |
topic_lookup[:url] | |
else | |
$& | |
end | |
end | |
# [POST=<post_id>]...[/POST] | |
# ==> [...](http://my.discourse.org/t/<topic_slug>/<topic_id>/<post_number>) | |
raw = raw.gsub(/\[post=(\d+)\](.+?)\[\/post\]/i) do | |
post_id, link = $1, $2 | |
if topic_lookup = topic_lookup_from_imported_post_id(post_id) | |
url = topic_lookup[:url] | |
"[#{link}](#{url})" | |
else | |
$& | |
end | |
end | |
raw | |
end | |
def close_topics | |
puts "", "Closing topics..." | |
sql = <<-SQL | |
WITH closed_topic_ids AS ( | |
SELECT t.id AS topic_id | |
FROM post_custom_fields pcf | |
JOIN posts p ON p.id = pcf.post_id | |
JOIN topics t ON t.id = p.topic_id | |
WHERE pcf.name = 'import_id' | |
AND pcf.value IN (?) | |
) | |
UPDATE topics | |
SET closed = true | |
WHERE id IN (SELECT topic_id FROM closed_topic_ids) | |
SQL | |
Topic.exec_sql(sql, @closed_topic_ids) | |
end | |
end | |
ImportScripts::KirupaForum.run |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment