Created
May 31, 2022 22:46
-
-
Save sarumpaet/a767b788d6ab5f10f5dbb40f3ba765e8 to your computer and use it in GitHub Desktop.
consolidate separate actor tables of two MediaWikis with shared user table into one actor table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
mysqli_report(MYSQLI_REPORT_ERROR | MYSQLI_REPORT_STRICT); | |
// Consolidate separate actor tables of two MediaWikis | |
// with a shared user table into one actor table. | |
// This addresses bugs https://phabricator.wikimedia.org/T299766 | |
// and https://phabricator.wikimedia.org/T243276 | |
// You will probably want to check if replace_actor_id() covers | |
// all the tables in your wiki, especially if you use many extensions. | |
// Test on a copy of the databases, keep a backup, and keep the output | |
// of this script for later reference. | |
$db_secondary = new mysqli("127.0.0.1", "db_user", "db_password", "db_name"); | |
$db_main = new mysqli("127.0.0.1", "db_user", "db_password", "db_name"); | |
$dry_run = 1; | |
$wait_for_confirmation = 1; | |
// make sure any new actors we create in primary db are higher than those in the secondary db | |
$query = "SELECT actor_id FROM wiki_actor ORDER BY actor_id DESC LIMIT 1"; | |
$result_main = $db_main->query($query)->fetch_assoc()["actor_id"]; | |
$result_sec = $db_secondary->query($query)->fetch_assoc()["actor_id"]; | |
if($result_sec >= $result_main) die("secondard db max actor id must be lower than main db max actor id"); | |
function _replace_actor_id($table_name, $col_name, $old_id, $new_id) { | |
global $db_secondary; | |
$query = "SELECT * FROM " . $table_name . " WHERE " . $col_name . " = " . $old_id; | |
$rows = $db_secondary->query($query); | |
$i=0; | |
foreach($rows as $row) { | |
$i++; | |
$pars = array("table_name"=>$table_name,"col_name"=>$col_name,"value_old"=>$old_id,"value_new"=>$new_id); | |
print("json_update_row: " . json_encode(array("pars"=>$pars,"data"=>$row)) . "\n"); | |
} | |
if($i>0) { echo "found " . $i . " entries in " . $table_name . "\n"; } | |
global $dry_run; | |
$query = "UPDATE " . $table_name . " SET " . $col_name . " = " . $new_id . " WHERE " . $col_name . " = " . $old_id; | |
print("$query\n"); | |
if($dry_run == 1) return; | |
$db_secondary->query($query); | |
print("updated " . $db_secondary->affected_rows . " rows.\n"); | |
} | |
$sec_search_actor_by_id = $db_secondary->prepare("SELECT * FROM wiki_actor WHERE actor_id = ?"); | |
function replace_actor_id($old_id, $new_id) { | |
global $sec_search_actor_by_id; | |
$sec_search_actor_by_id->bind_param("i", $new_id); | |
$sec_search_actor_by_id->execute(); | |
foreach($sec_search_actor_by_id->get_result() as $actor) { | |
print_r($actor); | |
die("Fatal: Trying to replace secondary actor id $old_id by $new_id but the latter is already taken"); | |
} | |
_replace_actor_id("wiki_archive", "ar_actor", $old_id, $new_id); | |
_replace_actor_id("wiki_filearchive", "fa_actor", $old_id, $new_id); | |
_replace_actor_id("wiki_image", "img_actor", $old_id, $new_id); | |
_replace_actor_id("wiki_ipblocks", "ipb_by_actor", $old_id, $new_id); | |
_replace_actor_id("wiki_logging", "log_actor", $old_id, $new_id); | |
_replace_actor_id("wiki_oldimage", "oi_actor", $old_id, $new_id); | |
_replace_actor_id("wiki_recentchanges", "rc_actor", $old_id, $new_id); | |
_replace_actor_id("wiki_revision", "rev_actor", $old_id, $new_id); | |
_replace_actor_id("wiki_revision_actor_temp", "revactor_actor", $old_id, $new_id); | |
} | |
$main_create_actor = $db_main->prepare("INSERT INTO wiki_actor (actor_user, actor_name) VALUES (?, ?)"); | |
function create_main_actor($actor_user, $actor_name, $ext_actor_id) { | |
global $dry_run; | |
if($dry_run == 1) return; | |
global $main_create_actor; | |
$main_create_actor->bind_param("is", $actor_user, $actor_name); | |
$main_create_actor->execute(); | |
$new_id = $main_create_actor->insert_id; | |
print("json_created_actor: " . json_encode(array("actor_user"=>$actor_user,"actor_name"=>$actor_name,"secondary_actor_id"=>$ext_actor_id)) . "\n"); | |
return $new_id; | |
} | |
$main_search_actor = $db_main->prepare("SELECT * FROM wiki_actor WHERE actor_user = ? AND actor_name = ?"); | |
$main_search_actor_by_id = $db_main->prepare("SELECT * FROM wiki_actor WHERE actor_id = ?"); | |
$main_search_ip_actor = $db_main->prepare("SELECT * FROM wiki_actor WHERE actor_user IS NULL AND actor_name = ?"); | |
$main_search_user = $db_main->prepare("SELECT * FROM wiki_user WHERE user_id = ?"); | |
$actor_id_replacements = []; | |
// iterate through all actors in secondary db | |
$rows = $db_secondary->query("SELECT * FROM wiki_actor"); | |
foreach($rows as $sec_actor) { | |
echo "secondary: actor_name = " . $sec_actor['actor_name'] . "\n"; | |
$main_search_actor->bind_param("is", $sec_actor['actor_user'], $sec_actor['actor_name']); | |
$main_search_actor->execute(); | |
$actor_in_main = $main_search_actor->get_result(); | |
$found = 0; | |
foreach($actor_in_main as $actor_main) { | |
$found = 1; | |
// this secondary db actor has a corresponding main db actor entry (= user existed before the introduction of the actor table) | |
// nothing more to be done for this actor :) | |
echo "main: actor_name = " . $actor_main['actor_name'] . "\n"; | |
echo "mapping: secondary actor_id " . $sec_actor['actor_id'] . " => main actor_id " . $actor_main['actor_id'] . "\n"; | |
} | |
if($found == 0) { | |
// this secondary actor has no corresponding main db actor entry; look up its associated main db user | |
$main_search_user->bind_param("i", $sec_actor['actor_user']); | |
$main_search_user->execute(); | |
$users_in_main = $main_search_user->get_result(); | |
foreach($users_in_main as $user_main) { | |
$found = 1; | |
echo "main: user_name = " . $user_main['user_name'] . " (secondary actor_name: " . $sec_actor['actor_name'] . ")\n"; | |
if($user_main['user_name'] != $sec_actor['actor_name']) die("user names do not match"); | |
echo "mapping: secondary actor_id " . $sec_actor['actor_id'] . " => main user_id " . $user_main['user_id'] . "\n"; | |
if($wait_for_confirmation == 1) readline("Press enter..."); | |
$main_search_actor_by_id->bind_param("i", $sec_actor['actor_id']); | |
$main_search_actor_by_id->execute(); | |
$actor_list = $main_search_actor_by_id->get_result(); | |
$taken = 0; | |
foreach($actor_list as $taken_actor_id) { | |
$taken = 1; | |
echo "secondary actor_id taken by another actor in main.\n"; | |
echo "creating new actor in main and changing actor ids in db_secondary\n"; | |
$new_actor_id = create_main_actor($user_main['user_id'], $user_main['user_name'], $sec_actor['actor_id']); | |
replace_actor_id($sec_actor['actor_id'], $new_actor_id); | |
} | |
if($taken == 0) { | |
die("main actor_id still available (this should not happen)"); | |
} | |
} | |
} | |
if($found == 0) { | |
$main_search_ip_actor->bind_param("s", $sec_actor['actor_name']); | |
$main_search_ip_actor->execute(); | |
$actors_in_main = $main_search_ip_actor->get_result(); | |
foreach($actors_in_main as $actor_main) { | |
echo "found a main actor corresponding to secondary IP actor, changing actor ids in secondary db to high ids\n"; | |
if($wait_for_confirmation == 1) readline("Press enter..."); | |
$tmp_new_id = $sec_actor['actor_id'] + 100000; | |
$actor_id_replacements[] = [$tmp_new_id, $actor_main['actor_id']]; | |
replace_actor_id($sec_actor['actor_id'], $tmp_new_id); | |
$found = 1; | |
} | |
if($found == 0) { | |
echo "no main user found for this secondary actor, creating IP actor and changing actor ids in secondary db\n"; | |
if($wait_for_confirmation == 1) readline("Press enter..."); | |
$new_actor_id = create_main_actor(NULL, $sec_actor['actor_name'], $sec_actor['actor_id']); | |
replace_actor_id($sec_actor['actor_id'], $new_actor_id); | |
$found = 1; | |
} | |
} | |
if($found == 1) { | |
print("json_secondary_actor_deleted: " . json_encode($sec_actor) . "\n"); | |
$db_secondary->query("DELETE FROM wiki_actor WHERE actor_id = " . $sec_actor['actor_id'] . ";"); | |
} else { | |
print("*********** could not handle secondary actor " . $sec_actor["actor_id"] . "\n"); | |
} | |
echo "\n"; | |
} | |
echo "\nFinishing up IP actors...\n"; | |
foreach($actor_id_replacements as $actor_id_replacement) { | |
replace_actor_id($actor_id_replacement[0], $actor_id_replacement[1]); | |
} | |
echo "\nDone."; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment