Skip to content

Instantly share code, notes, and snippets.

@rahilwazir
Last active March 26, 2020 23:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rahilwazir/bc220385123f50062492be8908b067d6 to your computer and use it in GitHub Desktop.
Save rahilwazir/bc220385123f50062492be8908b067d6 to your computer and use it in GitHub Desktop.
MyBB to Discourse (nzarchitecture.net.nz) with attachment
# frozen_string_literal: true
if ARGV.include?('bbcode-to-md')
# Replace (most) bbcode with markdown before creating posts.
# This will dramatically clean up the final posts in Discourse.
#
# In a temp dir:
#
# git clone https://github.com/nlalonde/ruby-bbcode-to-md.git
# cd ruby-bbcode-to-md
# gem build ruby-bbcode-to-md.gemspec
# gem install ruby-bbcode-to-md-*.gem
require 'ruby-bbcode-to-md'
end
require_relative '../../config/environment'
require_relative 'base/lookup_container'
require_relative 'base/uploader'
module ImportScripts; end
class ImportScripts::Base
def initialize
preload_i18n
@lookup = ImportScripts::LookupContainer.new
@uploader = ImportScripts::Uploader.new
@bbcode_to_md = true if use_bbcode_to_md?
@site_settings_during_import = {}
@old_site_settings = {}
@start_times = { import: Time.now }
@skip_updates = false
@next_category_color_index = {}
end
def preload_i18n
I18n.t("test")
ActiveSupport::Inflector.transliterate("test")
end
def perform
Rails.logger.level = 3 # :error, so that we don't create log files that are many GB
change_site_settings
execute
puts ""
unless @skip_updates
update_topic_status
update_bumped_at
update_last_posted_at
update_last_seen_at
update_user_stats
update_topic_users
update_post_timings
update_feature_topic_users
update_category_featured_topics
update_topic_count_replies
reset_topic_counters
end
elapsed = Time.now - @start_times[:import]
puts '', '', 'Done (%02dh %02dmin %02dsec)' % [elapsed / 3600, elapsed / 60 % 60, elapsed % 60]
ensure
reset_site_settings
end
def get_site_settings_for_import
{
email_domains_blacklist: '',
min_topic_title_length: 1,
min_post_length: 1,
min_first_post_length: 1,
min_personal_message_post_length: 1,
min_personal_message_title_length: 1,
allow_duplicate_topic_titles: true,
disable_emails: 'yes',
max_attachment_size_kb: 102400,
max_image_size_kb: 102400,
authorized_extensions: '*',
clean_up_inactive_users_after_days: 0,
clean_up_unused_staged_users_after_days: 0
}
end
def change_site_settings
if SiteSetting.bootstrap_mode_enabled
SiteSetting.default_trust_level = TrustLevel[0] if SiteSetting.default_trust_level == TrustLevel[1]
SiteSetting.default_email_digest_frequency = 10080 if SiteSetting.default_email_digest_frequency == 1440
SiteSetting.bootstrap_mode_enabled = false
end
@site_settings_during_import = get_site_settings_for_import
@site_settings_during_import.each do |key, value|
@old_site_settings[key] = SiteSetting.get(key)
SiteSetting.set(key, value)
end
# Some changes that should not be rolled back after the script is done
SiteSetting.purge_unactivated_users_grace_period_days = 60
SiteSetting.purge_deleted_uploads_grace_period_days = 90
RateLimiter.disable
end
def reset_site_settings
@old_site_settings.each do |key, value|
current_value = SiteSetting.get(key)
SiteSetting.set(key, value) unless current_value != @site_settings_during_import[key]
end
RateLimiter.enable
end
def use_bbcode_to_md?
ARGV.include?("bbcode-to-md")
end
# Implementation will do most of its work in its execute method.
# It will need to call create_users, create_categories, and create_posts.
def execute
raise NotImplementedError
end
%i{
add_category
add_group
add_post
add_topic
add_user
category_id_from_imported_category_id
find_group_by_import_id
find_user_by_import_id
group_id_from_imported_group_id
post_already_imported?
post_id_from_imported_post_id
topic_lookup_from_imported_post_id
user_already_imported?
user_id_from_imported_user_id
}.each do |method_name|
delegate method_name, to: :@lookup
end
def create_admin(opts = {})
admin = User.new
admin.email = opts[:email] || "sam.saffron@gmail.com"
admin.username = opts[:username] || "sam"
admin.password = SecureRandom.uuid
admin.save!
admin.grant_admin!
admin.change_trust_level!(TrustLevel[4])
admin.email_tokens.update_all(confirmed: true)
admin
end
def created_group(group)
# override if needed
end
# Iterate through a list of groups to be imported.
# Takes a collection and yields to the block for each element.
# Block should return a hash with the attributes for each element.
# Required fields are :id and :name, where :id is the id of the
# group in the original datasource. The given id will not be used
# to create the Discourse group record.
def create_groups(results, opts = {})
created = 0
skipped = 0
failed = 0
total = opts[:total] || results.count
results.each do |result|
g = yield(result)
if g.nil? || group_id_from_imported_group_id(g[:id])
skipped += 1
else
new_group = create_group(g, g[:id])
created_group(new_group)
if new_group.valid?
add_group(g[:id].to_s, new_group)
created += 1
else
failed += 1
puts "Failed to create group id #{g[:id]} #{new_group.name}: #{new_group.errors.full_messages}"
end
end
print_status(created + skipped + failed + (opts[:offset] || 0), total, get_start_time("groups"))
end
[created, skipped]
end
def create_group(opts, import_id)
opts = opts.dup.tap { |o| o.delete(:id) }
import_name = opts[:name].presence || opts[:full_name]
opts[:name] = UserNameSuggester.suggest(import_name)
existing = Group.find_by(name: opts[:name])
return existing if existing && existing.custom_fields["import_id"].to_s == import_id.to_s
g = existing || Group.new(opts)
g.custom_fields["import_id"] = import_id
g.custom_fields["import_name"] = import_name
g.tap(&:save)
end
def all_records_exist?(type, import_ids)
return false if import_ids.empty?
connection = ActiveRecord::Base.connection.raw_connection
connection.exec('CREATE TEMP TABLE import_ids(val text PRIMARY KEY)')
import_id_clause = import_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",")
connection.exec("INSERT INTO import_ids VALUES #{import_id_clause}")
existing = "#{type.to_s.classify}CustomField".constantize
existing = existing.where(name: 'import_id')
.joins('JOIN import_ids ON val = value')
.count
if existing == import_ids.length
puts "Skipping #{import_ids.length} already imported #{type}"
true
end
ensure
connection.exec('DROP TABLE import_ids') unless connection.nil?
end
def created_user(user)
# override if needed
end
# Iterate through a list of user records to be imported.
# Takes a collection, and yields to the block for each element.
# Block should return a hash with the attributes for the User model.
# Required fields are :id and :email, where :id is the id of the
# user in the original datasource. The given id will not be used to
# create the Discourse user record.
def create_users(results, opts = {})
created = 0
skipped = 0
failed = 0
total = opts[:total] || results.count
results.each do |result|
u = yield(result)
# block returns nil to skip a user
if u.nil?
skipped += 1
else
import_id = u[:id]
if user_id_from_imported_user_id(import_id)
skipped += 1
else
new_user = create_user(u, import_id)
created_user(new_user)
if new_user && new_user.valid? && new_user.user_profile && new_user.user_profile.valid?
add_user(import_id.to_s, new_user)
created += 1
else
failed += 1
puts "Failed to create user id: #{import_id}, username: #{new_user.try(:username)}, email: #{new_user.try(:email)}"
if new_user.try(:errors)
puts "user errors: #{new_user.errors.full_messages}"
if new_user.try(:user_profile).try(:errors)
puts "user_profile errors: #{new_user.user_profile.errors.full_messages}"
end
end
end
end
end
print_status(created + skipped + failed + (opts[:offset] || 0), total, get_start_time("users"))
end
[created, skipped]
end
def create_user(opts, import_id)
original_opts = opts.dup
opts.delete(:id)
merge = opts.delete(:merge)
post_create_action = opts.delete(:post_create_action)
existing = find_existing_user(opts[:email], opts[:username])
return existing if existing && (merge || existing.custom_fields["import_id"].to_s == import_id.to_s)
bio_raw = opts.delete(:bio_raw)
website = opts.delete(:website)
location = opts.delete(:location)
avatar_url = opts.delete(:avatar_url)
original_username = opts[:username]
original_name = opts[:name]
original_email = opts[:email] = opts[:email].downcase
if !UsernameValidator.new(opts[:username]).valid_format? || !User.username_available?(opts[:username])
opts[:username] = UserNameSuggester.suggest(opts[:username].presence || opts[:name].presence || opts[:email])
end
unless opts[:email][EmailValidator.email_regex]
opts[:email] = fake_email
puts "Invalid email '#{original_email}' for '#{opts[:username]}'. Using '#{opts[:email]}'"
end
opts[:name] = original_username if original_name.blank? && opts[:username] != original_username
opts[:trust_level] = TrustLevel[1] unless opts[:trust_level]
opts[:active] = opts.fetch(:active, true)
opts[:import_mode] = true
opts[:last_emailed_at] = opts.fetch(:last_emailed_at, Time.now)
u = User.new(opts)
(opts[:custom_fields] || {}).each { |k, v| u.custom_fields[k] = v }
u.custom_fields["import_id"] = import_id
u.custom_fields["import_username"] = original_username if original_username.present? && original_username != opts[:username]
u.custom_fields["import_avatar_url"] = avatar_url if avatar_url.present?
u.custom_fields["import_pass"] = opts[:password] if opts[:password].present?
u.custom_fields["import_email"] = original_email if original_email != opts[:email]
begin
User.transaction do
u.save!
if bio_raw.present? || website.present? || location.present?
if website.present?
u.user_profile.website = website
u.user_profile.website = nil unless u.user_profile.valid?
end
u.user_profile.bio_raw = bio_raw[0..2999] if bio_raw.present?
u.user_profile.location = location if location.present?
u.user_profile.save!
end
end
if opts[:active] && opts[:password].present?
u.activate
end
rescue => e
# try based on email
if e.try(:record).try(:errors).try(:messages).try(:[], :primary_email).present?
if existing = User.find_by_email(opts[:email].downcase)
existing.custom_fields["import_id"] = import_id
existing.save!
u = existing
end
else
puts "Error on record: #{original_opts.inspect}"
raise e
end
end
if u.custom_fields['import_email']
u.suspended_at = Time.zone.at(Time.now)
u.suspended_till = 200.years.from_now
u.save!
user_option = u.user_option
user_option.email_digests = false
user_option.email_level = UserOption.email_level_types[:never]
user_option.email_messages_level = UserOption.email_level_types[:never]
user_option.save!
if u.save
StaffActionLogger.new(Discourse.system_user).log_user_suspend(u, 'Invalid email address on import')
else
Rails.logger.error("Failed to suspend user #{u.username}. #{u.errors.try(:full_messages).try(:inspect)}")
end
end
post_create_action.try(:call, u) if u.persisted?
u # If there was an error creating the user, u.errors has the messages
end
def find_existing_user(email, username)
# Force the use of the index on the 'user_emails' table
UserEmail.where("lower(email) = ?", email.downcase).first&.user || User.where(username: username).first
end
def created_category(category)
# override if needed
end
# Iterates through a collection to create categories.
# The block should return a hash with attributes for the new category.
# Required fields are :id and :name, where :id is the id of the
# category in the original datasource. The given id will not be used to
# create the Discourse category record.
# Optional attributes are position, description, and parent_category_id.
def create_categories(results)
created = 0
skipped = 0
total = results.count
results.each do |c|
params = yield(c)
# block returns nil to skip
if params.nil? || category_id_from_imported_category_id(params[:id])
skipped += 1
else
# Basic massaging on the category name
params[:name] = "Blank" if params[:name].blank?
params[:name].strip!
params[:name] = params[:name][0..49]
# make sure categories don't go more than 2 levels deep
if params[:parent_category_id]
top = Category.find_by_id(params[:parent_category_id])
top = top.parent_category while top && !top.parent_category.nil?
params[:parent_category_id] = top.id if top
end
new_category = create_category(params, params[:id])
created_category(new_category)
created += 1
end
print_status(created + skipped, total, get_start_time("categories"))
end
[created, skipped]
end
def create_tags(results)
created = 0
skipped = 0
total = results.count
results.each do |c|
params = yield(c)
# block returns nil to skip
if params.nil?
skipped += 1
else
# Basic massaging on the category name
params[:name] = "Blank" if params[:name].blank?
params[:name].strip!
params[:name] = params[:name][0..49]
create_tag(params, params[:id])
created += 1
end
print_status(created + skipped, total, get_start_time("tags"))
end
[created, skipped]
end
def create_tag(opts, import_id)
existing =
Tag
.where("LOWER(name) = ?", opts[:name].downcase.strip)
.first
return existing if existing
post_create_action = opts.delete(:post_create_action)
new_tag = Tag.new(
name: opts[:name]
)
new_tag.save!
post_create_action.try(:call, new_tag)
new_tag
end
def create_category(opts, import_id)
existing =
Category
.where(parent_category_id: opts[:parent_category_id])
.where("LOWER(name) = ?", opts[:name].downcase.strip)
.first
return existing if existing
post_create_action = opts.delete(:post_create_action)
new_category = Category.new(
name: opts[:name],
user_id: opts[:user_id] || opts[:user].try(:id) || Discourse::SYSTEM_USER_ID,
position: opts[:position],
parent_category_id: opts[:parent_category_id],
color: opts[:color] || category_color(opts[:parent_category_id]),
text_color: opts[:text_color] || "FFF",
read_restricted: opts[:read_restricted] || false,
)
new_category.custom_fields["import_id"] = import_id if import_id
new_category.save!
if opts[:description].present?
changes = { raw: opts[:description] }
opts = { skip_revision: true, skip_validations: true, bypass_bump: true }
new_category.topic.first_post.revise(Discourse.system_user, changes, opts)
end
add_category(import_id, new_category)
post_create_action.try(:call, new_category)
new_category
end
def category_color(parent_category_id)
@category_colors ||= SiteSetting.category_colors.split('|')
index = @next_category_color_index[parent_category_id].presence || 0
@next_category_color_index[parent_category_id] = index + 1 >= @category_colors.count ? 0 : index + 1
@category_colors[index]
end
def created_post(post)
# override if needed
end
# Iterates through a collection of posts to be imported.
# It can create topics and replies.
# Attributes will be passed to the PostCreator.
# Topics should give attributes title and category.
# Replies should provide topic_id. Use topic_lookup_from_imported_post_id to find the topic.
def create_posts(results, opts = {})
skipped = 0
created = 0
total = opts[:total] || results.count
start_time = get_start_time("posts-#{total}") # the post count should be unique enough to differentiate between posts and PMs
results.each do |r|
params = yield(r)
# block returns nil to skip a post
if params.nil?
skipped += 1
else
import_id = params.delete(:id).to_s
if post_id_from_imported_post_id(import_id)
skipped += 1
else
begin
new_post = create_post(params, import_id)
if new_post.is_a?(Post)
add_post(import_id, new_post)
add_topic(new_post)
created_post(new_post)
created += 1
else
skipped += 1
puts "Error creating post #{import_id}. Skipping."
p new_post
end
rescue Discourse::InvalidAccess => e
skipped += 1
puts "InvalidAccess creating post #{import_id}. Topic is closed? #{e.message}"
rescue => e
skipped += 1
puts "Exception while creating post #{import_id}. Skipping."
puts e.message
puts e.backtrace.join("\n")
end
end
end
print_status(created + skipped + (opts[:offset] || 0), total, start_time)
end
[created, skipped]
end
def create_topic_tags(results, opts = {})
skipped = 0
created = 0
total = opts[:total] || results.count
start_time = get_start_time("topic-tags-#{total}") # the post count should be unique enough to differentiate between posts and PMs
results.each do |r|
params = yield(r)
# block returns nil to skip a post
if params.nil?
skipped += 1
else
import_id = params.delete(:id).to_s
begin
tt_created = TopicTag.where(params).first
unless tt_created.nil?
skipped += 1
else
tt_created = TopicTag.create(params)
if tt_created.is_a?(TopicTag)
created += 1
else
skipped += 1
puts "Error creating topic_tag #{import_id}. Skipping."
end
end
rescue Discourse::InvalidAccess => e
skipped += 1
puts "InvalidAccess creating topic_tag #{import_id}. Topic is closed? #{e.message}"
rescue => e
skipped += 1
puts "Exception while creating topic_tag #{import_id}. Skipping."
puts e.message
puts e.backtrace.join("\n")
end
end
print_status(created + skipped + (opts[:offset] || 0), total, start_time)
end
[created, skipped]
end
STAFF_GUARDIAN ||= Guardian.new(Discourse.system_user)
def create_post(opts, import_id)
user = User.find(opts[:user_id])
post_create_action = opts.delete(:post_create_action)
opts = opts.merge(skip_validations: true)
opts[:import_mode] = true
opts[:custom_fields] ||= {}
opts[:custom_fields]['import_id'] = import_id
unless opts[:topic_id]
opts[:meta_data] = meta_data = {}
meta_data["import_closed"] = true if opts[:closed]
meta_data["import_archived"] = true if opts[:archived]
meta_data["import_topic_id"] = opts[:import_topic_id] if opts[:import_topic_id]
end
opts[:guardian] = STAFF_GUARDIAN
if @bbcode_to_md
opts[:raw] = opts[:raw].bbcode_to_md(false, {}, :disable, :quote) rescue opts[:raw]
end
post_creator = PostCreator.new(user, opts)
post = post_creator.create
post_create_action.try(:call, post) if post
post && post_creator.errors.empty? ? post : post_creator.errors.full_messages
end
def create_upload(user_id, path, source_filename)
@uploader.create_upload(user_id, path, source_filename)
end
# Iterate through a list of bookmark records to be imported.
# Takes a collection, and yields to the block for each element.
# Block should return a hash with the attributes for the bookmark.
# Required fields are :user_id and :post_id, where both ids are
# the values in the original datasource.
def create_bookmarks(results, opts = {})
created = 0
skipped = 0
total = opts[:total] || results.count
user = User.new
post = Post.new
results.each do |result|
params = yield(result)
# only the IDs are needed, so this should be enough
if params.nil?
skipped += 1
else
user.id = user_id_from_imported_user_id(params[:user_id])
post.id = post_id_from_imported_post_id(params[:post_id])
if user.id.nil? || post.id.nil?
skipped += 1
puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}"
else
result = PostActionCreator.create(user, post, :bookmark)
created += 1 if result.success?
skipped += 1 if result.failed?
end
end
print_status(created + skipped + (opts[:offset] || 0), total, get_start_time("bookmarks"))
end
[created, skipped]
end
def close_inactive_topics(opts = {})
num_days = opts[:days] || 30
puts '', "Closing topics that have been inactive for more than #{num_days} days."
query = Topic.where('last_posted_at < ?', num_days.days.ago).where(closed: false)
total_count = query.count
closed_count = 0
query.find_each do |topic|
topic.update_status('closed', true, Discourse.system_user)
closed_count += 1
print_status(closed_count, total_count, get_start_time("close_inactive_topics"))
end
end
def update_topic_status
puts "", "Updating topic status"
DB.exec <<~SQL
UPDATE topics AS t
SET closed = TRUE
WHERE EXISTS(
SELECT 1
FROM topic_custom_fields AS f
WHERE f.topic_id = t.id AND f.name = 'import_closed' AND f.value = 't'
)
SQL
DB.exec <<~SQL
UPDATE topics AS t
SET archived = TRUE
WHERE EXISTS(
SELECT 1
FROM topic_custom_fields AS f
WHERE f.topic_id = t.id AND f.name = 'import_archived' AND f.value = 't'
)
SQL
DB.exec <<~SQL
DELETE FROM topic_custom_fields
WHERE name IN ('import_closed', 'import_archived')
SQL
end
def update_bumped_at
puts "", "Updating bumped_at on topics"
DB.exec <<~SQL
UPDATE topics t
SET bumped_at = COALESCE((SELECT MAX(created_at) FROM posts WHERE topic_id = t.id AND post_type = #{Post.types[:regular]}), bumped_at)
SQL
end
def update_last_posted_at
puts "", "Updating last posted at on users"
DB.exec <<~SQL
WITH lpa AS (
SELECT user_id, MAX(posts.created_at) AS last_posted_at
FROM posts
GROUP BY user_id
)
UPDATE users
SET last_posted_at = lpa.last_posted_at
FROM users u1
JOIN lpa ON lpa.user_id = u1.id
WHERE u1.id = users.id
AND users.last_posted_at <> lpa.last_posted_at
SQL
end
def update_user_stats
puts "", "Updating topic reply counts..."
count = 0
total = User.real.count
User.real.find_each do |u|
u.create_user_stat if u.user_stat.nil?
us = u.user_stat
us.update_topic_reply_count
us.save
print_status(count += 1, total, get_start_time("user_stats"))
end
puts "", "Updating first_post_created_at..."
DB.exec <<~SQL
WITH sub AS (
SELECT user_id, MIN(posts.created_at) AS first_post_created_at
FROM posts
GROUP BY user_id
)
UPDATE user_stats
SET first_post_created_at = sub.first_post_created_at
FROM user_stats u1
JOIN sub ON sub.user_id = u1.user_id
WHERE u1.user_id = user_stats.user_id
AND user_stats.first_post_created_at <> sub.first_post_created_at
SQL
puts "", "Updating user post_count..."
DB.exec <<~SQL
WITH sub AS (
SELECT user_id, COUNT(*) AS post_count
FROM posts
GROUP BY user_id
)
UPDATE user_stats
SET post_count = sub.post_count
FROM user_stats u1
JOIN sub ON sub.user_id = u1.user_id
WHERE u1.user_id = user_stats.user_id
AND user_stats.post_count <> sub.post_count
SQL
puts "", "Updating user topic_count..."
DB.exec <<~SQL
WITH sub AS (
SELECT user_id, COUNT(*) AS topic_count
FROM topics
GROUP BY user_id
)
UPDATE user_stats
SET topic_count = sub.topic_count
FROM user_stats u1
JOIN sub ON sub.user_id = u1.user_id
WHERE u1.user_id = user_stats.user_id
AND user_stats.topic_count <> sub.topic_count
SQL
end
# scripts that are able to import last_seen_at from the source data should override this method
def update_last_seen_at
puts "", "Updating last seen at on users"
DB.exec("UPDATE users SET last_seen_at = created_at WHERE last_seen_at IS NULL")
DB.exec("UPDATE users SET last_seen_at = last_posted_at WHERE last_posted_at IS NOT NULL")
end
def update_topic_users
puts "", "Updating topic users"
DB.exec <<~SQL
INSERT INTO topic_users (user_id, topic_id, posted, last_read_post_number, highest_seen_post_number, first_visited_at, last_visited_at, total_msecs_viewed)
SELECT user_id, topic_id, 't' , MAX(post_number), MAX(post_number), MIN(created_at), MAX(created_at), COUNT(id) * 5000
FROM posts
WHERE user_id > 0
GROUP BY user_id, topic_id
ON CONFLICT DO NOTHING
SQL
end
def update_post_timings
puts "", "Updating post timings"
DB.exec <<~SQL
INSERT INTO post_timings (topic_id, post_number, user_id, msecs)
SELECT topic_id, post_number, user_id, 5000
FROM posts
WHERE user_id > 0
ON CONFLICT DO NOTHING
SQL
end
def update_feature_topic_users
puts "", "Updating featured topic users"
TopicFeaturedUsers.ensure_consistency!
end
def reset_topic_counters
puts "", "Resetting topic counters"
Topic.reset_all_highest!
end
def update_category_featured_topics
puts "", "Updating featured topics in categories"
count = 0
total = Category.count
Category.find_each do |category|
CategoryFeaturedTopic.feature_topics_for(category)
print_status(count += 1, total, get_start_time("category_featured_topics"))
end
end
def update_topic_count_replies
puts "", "Updating user topic reply counts"
count = 0
total = User.real.count
User.real.find_each do |u|
u.user_stat.update_topic_reply_count
u.user_stat.save!
print_status(count += 1, total, get_start_time("topic_count_replies"))
end
end
def update_tl0
puts "", "Setting users with no posts to trust level 0"
count = 0
total = User.count
User.includes(:user_stat).find_each do |user|
begin
user.update_columns(trust_level: 0) if user.trust_level > 0 && user.post_count == 0
rescue Discourse::InvalidAccess
end
print_status(count += 1, total, get_start_time("update_tl0"))
end
end
def update_user_signup_date_based_on_first_post
puts "", "Setting users' signup date based on the date of their first post"
count = 0
total = User.count
User.find_each do |user|
if first = user.posts.order('created_at ASC').first
user.created_at = first.created_at
user.save!
end
print_status(count += 1, total, get_start_time("user_signup"))
end
end
def html_for_upload(upload, display_filename)
@uploader.html_for_upload(upload, display_filename)
end
def embedded_image_html(upload)
@uploader.embedded_image_html(upload)
end
def attachment_html(upload, display_filename)
@uploader.attachment_html(upload, display_filename)
end
def print_status(current, max, start_time = nil)
if start_time.present?
elapsed_seconds = Time.now - start_time
elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
else
elements_per_minute = ''
end
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
end
def print_spinner
@spinner_chars ||= %w{ | / - \\ }
@spinner_chars.push @spinner_chars.shift
print "\b#{@spinner_chars[0]}"
end
def get_start_time(key)
@start_times.fetch(key) { |k| @start_times[k] = Time.now }
end
def batches(batch_size = 1000)
offset = 0
loop do
yield offset
offset += batch_size
end
end
def fake_email
SecureRandom.hex << "@email.invalid"
end
end
# frozen_string_literal: true
require "mysql2"
require "htmlentities"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Before running this script, paste these lines into your shell,
# then use arrow keys to edit the values
=begin
export DB_HOST="localhost"
export DB_NAME="mybb"
export DB_PW=""
export DB_USER="root"
export TABLE_PREFIX="mybb_"
export BASE="" #
=end
# Call it like this:
# RAILS_ENV=production ruby script/import_scripts/mybb.rb
class ImportScripts::MyBB < ImportScripts::Base
DB_HOST ||= ENV['DB_HOST'] || "172.17.0.3"
DB_NAME ||= ENV['DB_NAME'] || "mybb"
DB_PW ||= ENV['DB_PW'] || "root"
DB_USER ||= ENV['DB_USER'] || "root"
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "mybb_"
BATCH_SIZE = 1000
BASE = ""
QUIET = true
ATTACHMENT_DIR = '/var/www/discourse/public/uploads/MyBBAttachmentUploads/uploads'
def initialize
super
@old_username_to_new_usernames = {}
@htmlentities = HTMLEntities.new
@client = Mysql2::Client.new(
host: DB_HOST,
username: DB_USER,
password: DB_PW,
database: DB_NAME
)
end
def execute
SiteSetting.disable_emails = "non-staff"
import_users
import_categories
import_tags
import_posts
import_topic_tags
import_private_messages
create_permalinks
suspend_users
post_process_posts
import_attachments
end
def import_users
puts '', "creating users"
total_count = mysql_query("SELECT count(*) count
FROM #{TABLE_PREFIX}users u
JOIN #{TABLE_PREFIX}usergroups g ON g.gid = u.usergroup
WHERE g.title != 'Banned';").first['count']
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT uid id, email email, username, regdate, g.title `group`, uf.fid1, uf.fid2, uf.fid4, uf.fid5, uf.fid6, uf.fid7
FROM #{TABLE_PREFIX}users u
JOIN #{TABLE_PREFIX}usergroups g ON g.gid = u.usergroup
LEFT JOIN #{TABLE_PREFIX}userfields uf ON uf.ufid = u.uid
WHERE g.title != 'Banned'
ORDER BY u.uid ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
break if results.size < 1
#next if all_records_exist? :users, results.map { |u| u["id"].to_i }
custom_fields = {fid1: 'user_field_6', fid2: 'user_field_5', fid4: 'user_field_2', fid7: 'user_field_1'}
create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'],
email: user['email'],
name: ((user['fid5'] ||= '') + " " + (user['fid6'] ||= '')),
username: user['username'],
created_at: Time.zone.at(user['regdate']),
moderator: user['group'] == 'Super Moderators',
admin: user['group'] == 'Administrators',
#custom_fields: {
# user_field_6: user['fid1'],
# user_field_5: user['fid2'],
# user_field_2: user['fid4'],
# user_field_1: user['fid7']
#},
post_create_action: proc do |newuser|
name = (user['fid5'] ||= '') + " " + (user['fid6'] ||= '')
puts name
newuser.name = name
newuser.save
if user['fid1'] then
user_field_name = custom_fields[:fid1]
user_custom_field = UserCustomField.where(user_id:newuser.id,name:user_field_name).first
if user_custom_field == nil then
user_custom_field = UserCustomField.create!(user_id: newuser.id, name: user_field_name, value: user['fid1'])
else
if user_custom_field.value != user['fid1'] then
user_custom_field.value = user['fid1']
user_custom_field.save!
end
end
end
if user['fid2'] then
user_field_name = custom_fields[:fid2]
user_custom_field = UserCustomField.where(user_id:newuser.id,name:user_field_name).first
if user_custom_field == nil then
user_custom_field = UserCustomField.create!(user_id: newuser.id, name: user_field_name, value: user['fid2'])
else
if user_custom_field.value != user['fid2'] then
user_custom_field.value = user['fid2']
user_custom_field.save!
end
end
end
if user['fid4'] then
user_field_name = custom_fields[:fid4]
user_custom_field = UserCustomField.where(user_id:newuser.id,name:user_field_name).first
if user_custom_field == nil then
user_custom_field = UserCustomField.create!(user_id: newuser.id, name: user_field_name, value: user['fid4'])
else
if user_custom_field.value != user['fid4'] then
user_custom_field.value = user['fid4']
user_custom_field.save!
end
end
end
if user['fid7'] then
user_field_name = custom_fields[:fid7]
user_custom_field = UserCustomField.where(user_id:newuser.id,name:user_field_name).first
if user_custom_field == nil then
user_custom_field = UserCustomField.create!(user_id: newuser.id, name: user_field_name, value: user['fid7'])
else
if user_custom_field.value != user['fid7'] then
user_custom_field.value = user['fid7']
user_custom_field.save!
end
end
end
end
}
end
end
end
def import_categories
results = mysql_query("
SELECT fid id, pid parent_id, left(name, 50) name, description
FROM #{TABLE_PREFIX}forums
ORDER BY pid ASC, fid ASC
")
create_categories(results) do |row|
h = { id: row['id'], name: CGI.unescapeHTML(row['name']), description: CGI.unescapeHTML(row['description']) }
if row['parent_id'].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id'])
end
h
end
end
def import_tags
results = mysql_query("
SELECT pid id, prefix title
FROM #{TABLE_PREFIX}threadprefixes
")
create_tags(results) do |row|
name = tag_name(row['title'])
h = { id: row['id'], name: name }
h
end
end
def import_posts
puts "", "creating topics and posts"
total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}posts").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
SELECT p.pid id,
p.tid topic_id,
t.fid category_id,
t.subject title,
t.firstpost first_post_id,
p.uid user_id,
p.message raw,
p.dateline post_time
FROM #{TABLE_PREFIX}posts p,
#{TABLE_PREFIX}threads t
WHERE p.tid = t.tid
ORDER BY p.dateline
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
# If you have imported a phpbb forum to mybb previously there might
# be a problem with #{TABLE_PREFIX}threads.firstpost. If these ids are wrong
# the thread cannot be imported to discourse as the topic post is
# missing. This query retrieves the first_post_id manually. As it
# will decrease the performance it is commented out by default.
# m['first_post_id'] = mysql_query("
# SELECT p.pid id,
# FROM #{TABLE_PREFIX}posts p,
# #{TABLE_PREFIX}threads t
# WHERE p.tid = #{m['topic_id']} AND t.tid = #{m['topic_id']}
# ORDER BY p.dateline
# LIMIT 1
# ").first['id']
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_mybb_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['post_time'])
if m['id'] == m['first_post_id']
mapped[:category] = category_id_from_imported_category_id(m['category_id'])
mapped[:title] = CGI.unescapeHTML(m['title'])
else
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end
skip ? nil : mapped
end
end
end
def import_topic_tags
puts "", "importing topic tags"
total_count = mysql_query("
SELECT count(*) count FROM #{TABLE_PREFIX}threads t
INNER JOIN #{TABLE_PREFIX}threadprefixes tp
ON t.prefix = tp.pid
INNER JOIN #{TABLE_PREFIX}posts p
ON t.tid = p.tid
GROUP BY t.tid, p.pid
").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
SELECT t.tid topic_id,
p.pid post_id,
tp.prefix tag
FROM #{TABLE_PREFIX}threads t
INNER JOIN #{TABLE_PREFIX}threadprefixes tp
ON t.prefix = tp.pid
INNER JOIN #{TABLE_PREFIX}posts p
ON t.tid = p.tid
GROUP BY t.tid, p.pid
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
break if results.size < 1
create_topic_tags(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
tag = tag_name(m['tag'])
tag_id = Tag.find_by_name(tag).try(:id)
if tag_id.nil?
skip = true
else
topic_custom_field = TopicCustomField.where(name: "import_id", value: m['post_id']).first
if topic_custom_field.nil?
skip = true
else
topic_id = topic_custom_field.topic_id
mapped[:id] = m['topic_id']
mapped[:topic_id] = topic_id
mapped[:tag_id] = tag_id
end
end
skip ? nil : mapped
end
end
end
def import_private_messages
puts "", "private messages are not implemented"
end
def suspend_users
puts '', "banned users are not implemented"
end
# Discourse usernames don't allow spaces
def convert_username(username, post_id)
count = 0
username.gsub!(/\s+/) { |a| count += 1; '_' }
# Warn on MyBB bug that places post text in the quote line - http://community.mybb.com/thread-180526.html
if count > 5
puts "Warning: probably incorrect quote in post #{post_id}"
end
username
end
# Take an original post id and return the migrated topic id and post number for it
def post_id_to_post_num_and_topic(quoted_post_id, post_id)
quoted_post_id_from_imported = post_id_from_imported_post_id(quoted_post_id.to_i)
if quoted_post_id_from_imported
begin
post = Post.find(quoted_post_id_from_imported)
"post:#{post.post_number}, topic:#{post.topic_id}"
rescue
puts "Could not find migrated post #{quoted_post_id_from_imported} quoted by original post #{post_id} as #{quoted_post_id}"
""
end
else
puts "Original post #{post_id} quotes nonexistent post #{quoted_post_id}"
""
end
end
def process_mybb_post(raw, import_id)
s = raw.dup
# convert the quote line
s.gsub!(/\[quote='([^']+)'.*?pid='(\d+).*?\]/) {
"[quote=\"#{convert_username($1, import_id)}, " + post_id_to_post_num_and_topic($2, import_id) + '"]'
}
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
s.gsub!(/<!-- s(\S+) -->(?:.*)<!-- s(?:\S+) -->/, '\1')
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
# Remove mybb video tags.
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
s = CGI.unescapeHTML(s)
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
preprocess_post_raw(s)
end
def create_permalinks
puts '', 'Creating redirects...', ''
SiteSetting.permalink_normalizations = '/(\\w+)-(\\d+)[-.].*/\\1-\\2.html'
puts '', 'Users...', ''
total_users = User.count
start_time = Time.now
count = 0
User.find_each do |u|
ucf = u.custom_fields
count += 1
if ucf && ucf["import_id"] && ucf["import_username"]
Permalink.create(url: "#{BASE}/user-#{ucf['import_id']}.html", external_url: "/u/#{u.username}") rescue nil
end
print_status(count, total_users, start_time)
end
puts '', 'Categories...', ''
total_categories = Category.count
start_time = Time.now
count = 0
Category.find_each do |cat|
ccf = cat.custom_fields
count += 1
next unless id = ccf["import_id"]
unless QUIET
puts ("forum-#{id}.html --> /c/#{cat.id}")
end
Permalink.create(url: "#{BASE}/forum-#{id}.html", category_id: cat.id) rescue nil
print_status(count, total_categories, start_time)
end
puts '', 'Topics...', ''
total_posts = Post.count
start_time = Time.now
count = 0
puts '', 'Posts...', ''
batches(BATCH_SIZE) do |offset|
results = mysql_query("
SELECT p.pid id,
p.tid topic_id
FROM #{TABLE_PREFIX}posts p,
#{TABLE_PREFIX}threads t
WHERE p.tid = t.tid
AND t.firstpost=p.pid
ORDER BY p.dateline
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
break if results.size < 1
results.each do |post|
count += 1
if topic = topic_lookup_from_imported_post_id(post['id'])
id = post['topic_id']
Permalink.create(url: "#{BASE}/thread-#{id}.html", topic_id: topic[:topic_id]) rescue nil
unless QUIET
puts ("#{BASE}/thread-#{id}.html --> http://localhost:3000/t/#{topic[:topic_id]}")
end
print_status(count, total_posts, start_time)
end
end
end
end
def post_process_posts
puts "", "Postprocessing posts..."
current = 0
max = Post.count
Post.find_each do |post|
begin
new_raw = postprocess_post_raw(post.raw)
if new_raw != post.raw
post.raw = new_raw
post.save
end
rescue PrettyText::JavaScriptError
nil
ensure
print_status(current += 1, max)
end
end
end
def preprocess_post_raw(raw)
return "" if raw.blank?
# decode HTML entities
raw = @htmlentities.decode(raw)
# fix whitespaces
raw = raw.gsub(/(\\r)?\\n/, "\n")
.gsub("\\t", "\t")
# [HTML]...[/HTML]
raw = raw.gsub(/\[html\]/i, "\n```html\n")
.gsub(/\[\/html\]/i, "\n```\n")
# [PHP]...[/PHP]
raw = raw.gsub(/\[php\]/i, "\n```php\n")
.gsub(/\[\/php\]/i, "\n```\n")
# [HIGHLIGHT="..."]
raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" }
# [CODE]...[/CODE]
# [HIGHLIGHT]...[/HIGHLIGHT]
raw = raw.gsub(/\[\/?code\]/i, "\n```\n")
.gsub(/\[\/?highlight\]/i, "\n```\n")
# [SAMP]...[/SAMP]
raw = raw.gsub(/\[\/?samp\]/i, "`")
# replace all chevrons with HTML entities
# NOTE: must be done
# - AFTER all the "code" processing
# - BEFORE the "quote" processing
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
.gsub("<", "&lt;")
.gsub("\u2603", "<")
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
.gsub(">", "&gt;")
.gsub("\u2603", ">")
# [URL=...]...[/URL]
raw.gsub!(/\[url="?(.+?)"?\](.+?)\[\/url\]/i) { "<a href=\"#{$1}\">#{$2}</a>" }
# [URL]...[/URL]
# [MP3]...[/MP3]
raw = raw.gsub(/\[\/?url\]/i, "")
.gsub(/\[\/?mp3\]/i, "")
# [MENTION]<username>[/MENTION]
raw = raw.gsub(/\[mention\](.+?)\[\/mention\]/i) do
old_username = $1
if @old_username_to_new_usernames.has_key?(old_username)
old_username = @old_username_to_new_usernames[old_username]
end
"@#{old_username}"
end
# [USER=<user_id>]<username>[/USER]
raw = raw.gsub(/\[user="?(\d+)"?\](.+?)\[\/user\]/i) do
user_id, old_username = $1, $2
if @old_username_to_new_usernames.has_key?(old_username)
new_username = @old_username_to_new_usernames[old_username]
else
new_username = old_username
end
"@#{new_username}"
end
# [FONT=blah] and [COLOR=blah]
# no idea why the /i is not matching case insensitive..
raw.gsub! /\[color=.*?\](.*?)\[\/color\]/im, '\1'
raw.gsub! /\[COLOR=.*?\](.*?)\[\/COLOR\]/im, '\1'
raw.gsub! /\[font=.*?\](.*?)\[\/font\]/im, '\1'
raw.gsub! /\[FONT=.*?\](.*?)\[\/FONT\]/im, '\1'
# [CENTER]...[/CENTER]
raw.gsub! /\[CENTER\](.*?)\[\/CENTER\]/im, '\1'
# fix LIST
raw.gsub! /\[LIST\](.*?)\[\/LIST\]/im, '<ul>\1</ul>'
raw.gsub! /\[\*\]/im, '<li>'
# [QUOTE]...[/QUOTE]
raw = raw.gsub(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }
# [QUOTE=<username>]...[/QUOTE]
raw = raw.gsub(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
old_username, quote = $1, $2
if @old_username_to_new_usernames.has_key?(old_username)
old_username = @old_username_to_new_usernames[old_username]
end
"\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n"
end
# [YOUTUBE]<id>[/YOUTUBE]
raw = raw.gsub(/\[youtube\](.+?)\[\/youtube\]/i) { "\n//youtu.be/#{$1}\n" }
# [VIDEO=youtube;<id>]...[/VIDEO]
raw = raw.gsub(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" }
raw
end
def postprocess_post_raw(raw)
# [QUOTE=<username>;<post_id>]...[/QUOTE]
raw = raw.gsub(/\[quote=([^;]+);n(\d+)\](.+?)\[\/quote\]/im) do
old_username, post_id, quote = $1, $2, $3
if @old_username_to_new_usernames.has_key?(old_username)
old_username = @old_username_to_new_usernames[old_username]
end
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
post_number = topic_lookup[:post_number]
topic_id = topic_lookup[:topic_id]
"\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n"
else
"\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n"
end
end
# remove attachments
raw = raw.gsub(/\[attach[^\]]*\]\d+\[\/attach\]/i, "")
# [THREAD]<thread_id>[/THREAD]
# ==> http://my.discourse.org/t/slug/<topic_id>
raw = raw.gsub(/\[thread\](\d+)\[\/thread\]/i) do
thread_id = $1
if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}")
topic_lookup[:url]
else
$&
end
end
# [THREAD=<thread_id>]...[/THREAD]
# ==> [...](http://my.discourse.org/t/slug/<topic_id>)
raw = raw.gsub(/\[thread=(\d+)\](.+?)\[\/thread\]/i) do
thread_id, link = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}")
url = topic_lookup[:url]
"[#{link}](#{url})"
else
$&
end
end
# [POST]<post_id>[/POST]
# ==> http://my.discourse.org/t/slug/<topic_id>/<post_number>
raw = raw.gsub(/\[post\](\d+)\[\/post\]/i) do
post_id = $1
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
topic_lookup[:url]
else
$&
end
end
# [POST=<post_id>]...[/POST]
# ==> [...](http://my.discourse.org/t/<topic_slug>/<topic_id>/<post_number>)
raw = raw.gsub(/\[post=(\d+)\](.+?)\[\/post\]/i) do
post_id, link = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
url = topic_lookup[:url]
"[#{link}](#{url})"
else
$&
end
end
raw = raw.gsub(/\[attachment[^\]]*\]/i, "")
raw = raw.gsub(/\[url[^\]]*\]/i, "")
raw = raw.gsub(/\[hr[^\]]*\]/i, "")
raw = raw.gsub(/\[size=.*?\](.*?)\[\/size\]/im, '\1')
raw
end
def import_attachments
puts '', 'importing attachments...'
total_count = mysql_query(<<-SQL
SELECT COUNT(pid)
FROM #{TABLE_PREFIX}attachments
WHERE pid <> 0
SQL
)
posts = mysql_query(<<-SQL
SELECT *
FROM #{TABLE_PREFIX}attachments
WHERE pid <> 0
SQL
)
PostCustomField.where(name: 'import_id').pluck(:post_id, :value).each do |post_id, import_id|
attachments = mysql_query(<<-SQL
SELECT *
FROM #{TABLE_PREFIX}attachments
WHERE pid = #{import_id}
SQL
)
next if attachments.size < 1
attachments.each do |attachment|
post = Post.find(post_id)
upload, filename = find_upload(post, attachment)
next unless upload
html = html_for_upload(upload, filename)
if !post.raw[html]
post.raw << "\n\n" << html
post.save!
PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists?
end
end
end
end
def find_upload(post, attachment)
filename = File.join(ATTACHMENT_DIR, attachment['attachname'])
real_filename = attachment['filename']
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
return unless File.exists?(filename)
upload = create_upload(post.user.id, filename, real_filename)
if upload.nil? || !upload.valid?
puts "Upload not valid :("
puts upload.errors.inspect if upload
return
end
[upload, real_filename]
end
def tag_name(tag)
tag = DiscourseTagging.clean_tag(tag)
unless tag[':'].nil?
tag[':']= ''
end
tag
end
def mysql_query(sql)
@client.query(sql, cache_rows: false)
end
end
ImportScripts::MyBB.new.perform
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment