Created
May 15, 2013 23:19
-
-
Save ebernhardson/5588207 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Maintenance script that populates the event_page_id column of echo_event | |
* | |
* @ingroup Maintenance | |
*/ | |
class updateEchoSchemaForSuppression extends Maintenance { | |
public function __construct() { | |
parent::__construct(); | |
$this->mBatchSize = 500; | |
} | |
protected function createReader() { | |
$reader = new EchoBatchRowIterator( | |
MWEchoDbFactory::getDB( DB_SLAVE ), | |
'echo_event', | |
'event_id', | |
$this->mBatchSize | |
); | |
$reader->addCondition( "event_page_title IS NOT NULL" ); | |
$reader->addCondition( "event_page_id IS NULL" ); | |
return $reader; | |
} | |
protected function createWriter() { | |
return new EchoBatchRowWriter( | |
MWEchoDbFactory::getDB( DB_MASTER ), | |
$wgEchoCluster, | |
'echo_event', | |
'event_id' | |
); | |
} | |
protected function createUpdateGenerator() { | |
return new EchoSuppressionRowUpdateGenerator; | |
} | |
public function execute() { | |
$update = new EchoSchemaUpdate( | |
$this->createReader(), | |
$this->createWriter(), | |
$this->createUpdateGenerator() | |
); | |
$self = $this; | |
$update->setOutput( function( $text ) use( $self ) { | |
$self->__outputInternal( $text ); | |
} ); | |
$update->execute(); | |
} | |
/** | |
* Internal use only, exists because 5.3 Closures have no access to protected methods | |
*/ | |
public function __outputInternal( $text ) { | |
$this->output( $text ); | |
} | |
} | |
class EchoSchemaUpdate { | |
public function __construct( EchoBatchRowIterator $reader, EchoBatchRowWriter $writer, EchoRowUpdateGenerator $generator ) { | |
$this->reader = $reader; | |
$this->writer = $writer; | |
$this->generator = $generator; | |
$this->output = function() {}; // noop | |
} | |
protected function output( $text ) { | |
$output = $this->output; | |
$output( $text ); | |
} | |
public function setOutput( Closure $output ) { | |
$this->output = $output; | |
} | |
public function execute() { | |
foreach ( $this->reader as $rows ) { | |
$updates = array(); | |
foreach ( $rows as $row ) { | |
list( $id, $update ) = $this->generator->generateUpdateForRow( $row ); | |
if ( $update ) { | |
$updates[$id] = $update; | |
} | |
} | |
if ( $updates ) { | |
$this->output( "Processing " . count($update) . " rows\n" ); | |
$this->writer->write( $updates ); | |
} | |
} | |
$this->output( "Completed\n" ); | |
} | |
} | |
class EchoBatchRowWriter { | |
public function __construct( DatabaseType $db, $clusterName, $table, $idField ) { | |
$this->db = $db; | |
$this->clusterName = $clusterName; | |
$this->table = $table; | |
$this->idField = $idField; | |
} | |
public function write( array $updates ) { | |
$this->db->begin(); | |
foreach ( $updates as $id => $update ) { | |
$this->db->update( | |
$this->table | |
$update, | |
array( $this->idField => $id ), | |
__METHOD__ | |
); | |
} | |
$this->db->commit(); | |
wfWaitForSlaves( false, false, $this->clusterName ); | |
} | |
} | |
class EchoBatchRowIterator implements Iterator { | |
protected $db; | |
protected $table; | |
protected $idField; | |
protected $batchSize; | |
protected $conditions = array(); | |
protected $maxId = 0; | |
public function __construct( DatabaseType $db, $table, $idField, $batchSize ) { | |
$this->db = $db; | |
$this->table = $table; | |
$this->idField = $idField; | |
$this->batchSize = $batchSize; | |
} | |
public function addCondition( $condition ) { | |
$this->conditions = $condition; | |
} | |
public function current() { | |
return $this->current; | |
} | |
public function key() { | |
return $this->maxId; | |
} | |
public function rewind() { | |
$this->maxId = 0; | |
$this->next(); | |
} | |
public function valid() { | |
return count($this->current) > 0; | |
} | |
public function next() { | |
$conditions = $this->conditions; | |
$conditions[] = "{$this->idField} > {$this->maxId}"; | |
$res = $this->db->select( | |
$this->table, | |
array( '*' ), | |
$conditions, | |
__METHOD__, | |
array( | |
'LIMIT' => $this->batchSize, | |
"ORDER BY {$this->idField} ASC", | |
) | |
); | |
$this->current = iterator_to_array( $res ); | |
// If $this->current is empty then self::valid() will return false | |
// so no need to handle edge case | |
if ( $this->current) { | |
$row = end( $res ); | |
$this->maxId = $row->{$this->idField}; | |
} | |
} | |
} | |
interface EchoRowUpdateGenerator { | |
function generateRowUpdate( $row ); | |
} | |
class EchoSuppressionRowUpdateGenerator implements EchoRowUpdateGenerator | |
{ | |
/** | |
* Given a database row, generate an update array suitible for schema update | |
* | |
* @param $row object Database row from echo_event table | |
* @return [int, array] The row id to update and an associative array of column upates | |
*/ | |
public function generateRowUpdate( $row ) { | |
$update = $this->updatePageIdFromTitle( $row ); | |
if ( $row->event_extra !== null && $row->event_type === 'page-linked' ) { | |
$update = $this->updatePageLinkedExtraData( $row, $update ); | |
} | |
return array( $row->event_id, $update ); | |
} | |
protected function updatePageIdFromTitle( $row ) { | |
$update = array(); | |
$title = Title::newFromText( $row->event_page_title, $row->event_page_namespace ); | |
if ( $title !== null ) { | |
$pageId = $title->getArticleId(); | |
if ( $pageId ) { | |
// If the title has a proper id from the database, store it | |
$update['event_page_id'] = $pageId; | |
} else { | |
// For titles that do not refer to a WikiPage stored in the database | |
// move the title/namespace into event_extra | |
$extra = $row->event_extra ? unserialize( $row->event_extra ) : array(); | |
$extra['page_title'] = $row->event_page_title; | |
$extra['page_namespace'] = $row->event_page_namespace; | |
$update['event_extra'] = serialize( $extra ); | |
} | |
} | |
return $update; | |
} | |
protected function updatePageLinkedExtraData( $row, array $update ) { | |
$extra = array(); | |
if ( isset( $update['extra'] ) ) { | |
$extra = unserialize( $update['extra'] ); | |
} elseif ( $row->event_extra ) { | |
$extra = unserialize( $row->event_extra ); | |
} | |
if ( isset( $extra['link-from-title'], $extra['link-from-namespace'] ) ) { | |
$title = Title::newFromText( $extra['link-from-title'], $extra['link-from-namespace'] ); | |
unset( $extra['link-from-title'], $extra['link-from-namespace'] ); | |
// Link from page is always from a content page, if null or no article id it was | |
// somehow invalid | |
if ( $title !== null && $title->getArticleId() ) { | |
$extra['link-from-page-id'] = $title->getArticleId(); | |
} | |
$update['event_extra'] = serialize( $extra ); | |
} | |
return $update; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment