Skip to content

Instantly share code, notes, and snippets.

@sarumpaet
Created May 31, 2022 22:46
Show Gist options
  • Save sarumpaet/a767b788d6ab5f10f5dbb40f3ba765e8 to your computer and use it in GitHub Desktop.
Save sarumpaet/a767b788d6ab5f10f5dbb40f3ba765e8 to your computer and use it in GitHub Desktop.
consolidate separate actor tables of two MediaWikis with shared user table into one actor table
<?php
mysqli_report(MYSQLI_REPORT_ERROR | MYSQLI_REPORT_STRICT);
// Consolidate separate actor tables of two MediaWikis
// with a shared user table into one actor table.
// This addresses bugs https://phabricator.wikimedia.org/T299766
// and https://phabricator.wikimedia.org/T243276
// You will probably want to check if replace_actor_id() covers
// all the tables in your wiki, especially if you use many extensions.
// Test on a copy of the databases, keep a backup, and keep the output
// of this script for later reference.
$db_secondary = new mysqli("127.0.0.1", "db_user", "db_password", "db_name");
$db_main = new mysqli("127.0.0.1", "db_user", "db_password", "db_name");
$dry_run = 1;
$wait_for_confirmation = 1;
// make sure any new actors we create in primary db are higher than those in the secondary db
$query = "SELECT actor_id FROM wiki_actor ORDER BY actor_id DESC LIMIT 1";
$result_main = $db_main->query($query)->fetch_assoc()["actor_id"];
$result_sec = $db_secondary->query($query)->fetch_assoc()["actor_id"];
if($result_sec >= $result_main) die("secondard db max actor id must be lower than main db max actor id");
function _replace_actor_id($table_name, $col_name, $old_id, $new_id) {
global $db_secondary;
$query = "SELECT * FROM " . $table_name . " WHERE " . $col_name . " = " . $old_id;
$rows = $db_secondary->query($query);
$i=0;
foreach($rows as $row) {
$i++;
$pars = array("table_name"=>$table_name,"col_name"=>$col_name,"value_old"=>$old_id,"value_new"=>$new_id);
print("json_update_row: " . json_encode(array("pars"=>$pars,"data"=>$row)) . "\n");
}
if($i>0) { echo "found " . $i . " entries in " . $table_name . "\n"; }
global $dry_run;
$query = "UPDATE " . $table_name . " SET " . $col_name . " = " . $new_id . " WHERE " . $col_name . " = " . $old_id;
print("$query\n");
if($dry_run == 1) return;
$db_secondary->query($query);
print("updated " . $db_secondary->affected_rows . " rows.\n");
}
$sec_search_actor_by_id = $db_secondary->prepare("SELECT * FROM wiki_actor WHERE actor_id = ?");
function replace_actor_id($old_id, $new_id) {
global $sec_search_actor_by_id;
$sec_search_actor_by_id->bind_param("i", $new_id);
$sec_search_actor_by_id->execute();
foreach($sec_search_actor_by_id->get_result() as $actor) {
print_r($actor);
die("Fatal: Trying to replace secondary actor id $old_id by $new_id but the latter is already taken");
}
_replace_actor_id("wiki_archive", "ar_actor", $old_id, $new_id);
_replace_actor_id("wiki_filearchive", "fa_actor", $old_id, $new_id);
_replace_actor_id("wiki_image", "img_actor", $old_id, $new_id);
_replace_actor_id("wiki_ipblocks", "ipb_by_actor", $old_id, $new_id);
_replace_actor_id("wiki_logging", "log_actor", $old_id, $new_id);
_replace_actor_id("wiki_oldimage", "oi_actor", $old_id, $new_id);
_replace_actor_id("wiki_recentchanges", "rc_actor", $old_id, $new_id);
_replace_actor_id("wiki_revision", "rev_actor", $old_id, $new_id);
_replace_actor_id("wiki_revision_actor_temp", "revactor_actor", $old_id, $new_id);
}
$main_create_actor = $db_main->prepare("INSERT INTO wiki_actor (actor_user, actor_name) VALUES (?, ?)");
function create_main_actor($actor_user, $actor_name, $ext_actor_id) {
global $dry_run;
if($dry_run == 1) return;
global $main_create_actor;
$main_create_actor->bind_param("is", $actor_user, $actor_name);
$main_create_actor->execute();
$new_id = $main_create_actor->insert_id;
print("json_created_actor: " . json_encode(array("actor_user"=>$actor_user,"actor_name"=>$actor_name,"secondary_actor_id"=>$ext_actor_id)) . "\n");
return $new_id;
}
$main_search_actor = $db_main->prepare("SELECT * FROM wiki_actor WHERE actor_user = ? AND actor_name = ?");
$main_search_actor_by_id = $db_main->prepare("SELECT * FROM wiki_actor WHERE actor_id = ?");
$main_search_ip_actor = $db_main->prepare("SELECT * FROM wiki_actor WHERE actor_user IS NULL AND actor_name = ?");
$main_search_user = $db_main->prepare("SELECT * FROM wiki_user WHERE user_id = ?");
$actor_id_replacements = [];
// iterate through all actors in secondary db
$rows = $db_secondary->query("SELECT * FROM wiki_actor");
foreach($rows as $sec_actor) {
echo "secondary: actor_name = " . $sec_actor['actor_name'] . "\n";
$main_search_actor->bind_param("is", $sec_actor['actor_user'], $sec_actor['actor_name']);
$main_search_actor->execute();
$actor_in_main = $main_search_actor->get_result();
$found = 0;
foreach($actor_in_main as $actor_main) {
$found = 1;
// this secondary db actor has a corresponding main db actor entry (= user existed before the introduction of the actor table)
// nothing more to be done for this actor :)
echo "main: actor_name = " . $actor_main['actor_name'] . "\n";
echo "mapping: secondary actor_id " . $sec_actor['actor_id'] . " => main actor_id " . $actor_main['actor_id'] . "\n";
}
if($found == 0) {
// this secondary actor has no corresponding main db actor entry; look up its associated main db user
$main_search_user->bind_param("i", $sec_actor['actor_user']);
$main_search_user->execute();
$users_in_main = $main_search_user->get_result();
foreach($users_in_main as $user_main) {
$found = 1;
echo "main: user_name = " . $user_main['user_name'] . " (secondary actor_name: " . $sec_actor['actor_name'] . ")\n";
if($user_main['user_name'] != $sec_actor['actor_name']) die("user names do not match");
echo "mapping: secondary actor_id " . $sec_actor['actor_id'] . " => main user_id " . $user_main['user_id'] . "\n";
if($wait_for_confirmation == 1) readline("Press enter...");
$main_search_actor_by_id->bind_param("i", $sec_actor['actor_id']);
$main_search_actor_by_id->execute();
$actor_list = $main_search_actor_by_id->get_result();
$taken = 0;
foreach($actor_list as $taken_actor_id) {
$taken = 1;
echo "secondary actor_id taken by another actor in main.\n";
echo "creating new actor in main and changing actor ids in db_secondary\n";
$new_actor_id = create_main_actor($user_main['user_id'], $user_main['user_name'], $sec_actor['actor_id']);
replace_actor_id($sec_actor['actor_id'], $new_actor_id);
}
if($taken == 0) {
die("main actor_id still available (this should not happen)");
}
}
}
if($found == 0) {
$main_search_ip_actor->bind_param("s", $sec_actor['actor_name']);
$main_search_ip_actor->execute();
$actors_in_main = $main_search_ip_actor->get_result();
foreach($actors_in_main as $actor_main) {
echo "found a main actor corresponding to secondary IP actor, changing actor ids in secondary db to high ids\n";
if($wait_for_confirmation == 1) readline("Press enter...");
$tmp_new_id = $sec_actor['actor_id'] + 100000;
$actor_id_replacements[] = [$tmp_new_id, $actor_main['actor_id']];
replace_actor_id($sec_actor['actor_id'], $tmp_new_id);
$found = 1;
}
if($found == 0) {
echo "no main user found for this secondary actor, creating IP actor and changing actor ids in secondary db\n";
if($wait_for_confirmation == 1) readline("Press enter...");
$new_actor_id = create_main_actor(NULL, $sec_actor['actor_name'], $sec_actor['actor_id']);
replace_actor_id($sec_actor['actor_id'], $new_actor_id);
$found = 1;
}
}
if($found == 1) {
print("json_secondary_actor_deleted: " . json_encode($sec_actor) . "\n");
$db_secondary->query("DELETE FROM wiki_actor WHERE actor_id = " . $sec_actor['actor_id'] . ";");
} else {
print("*********** could not handle secondary actor " . $sec_actor["actor_id"] . "\n");
}
echo "\n";
}
echo "\nFinishing up IP actors...\n";
foreach($actor_id_replacements as $actor_id_replacement) {
replace_actor_id($actor_id_replacement[0], $actor_id_replacement[1]);
}
echo "\nDone.";
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment