Skip to content

Instantly share code, notes, and snippets.

@ebernhardson
Created May 15, 2013 23:19
Show Gist options
  • Save ebernhardson/5588207 to your computer and use it in GitHub Desktop.
Save ebernhardson/5588207 to your computer and use it in GitHub Desktop.
/**
* Maintenance script that populates the event_page_id column of echo_event
*
* @ingroup Maintenance
*/
class updateEchoSchemaForSuppression extends Maintenance {
public function __construct() {
parent::__construct();
$this->mBatchSize = 500;
}
protected function createReader() {
$reader = new EchoBatchRowIterator(
MWEchoDbFactory::getDB( DB_SLAVE ),
'echo_event',
'event_id',
$this->mBatchSize
);
$reader->addCondition( "event_page_title IS NOT NULL" );
$reader->addCondition( "event_page_id IS NULL" );
return $reader;
}
protected function createWriter() {
return new EchoBatchRowWriter(
MWEchoDbFactory::getDB( DB_MASTER ),
$wgEchoCluster,
'echo_event',
'event_id'
);
}
protected function createUpdateGenerator() {
return new EchoSuppressionRowUpdateGenerator;
}
public function execute() {
$update = new EchoSchemaUpdate(
$this->createReader(),
$this->createWriter(),
$this->createUpdateGenerator()
);
$self = $this;
$update->setOutput( function( $text ) use( $self ) {
$self->__outputInternal( $text );
} );
$update->execute();
}
/**
* Internal use only, exists because 5.3 Closures have no access to protected methods
*/
public function __outputInternal( $text ) {
$this->output( $text );
}
}
class EchoSchemaUpdate {
public function __construct( EchoBatchRowIterator $reader, EchoBatchRowWriter $writer, EchoRowUpdateGenerator $generator ) {
$this->reader = $reader;
$this->writer = $writer;
$this->generator = $generator;
$this->output = function() {}; // noop
}
protected function output( $text ) {
$output = $this->output;
$output( $text );
}
public function setOutput( Closure $output ) {
$this->output = $output;
}
public function execute() {
foreach ( $this->reader as $rows ) {
$updates = array();
foreach ( $rows as $row ) {
list( $id, $update ) = $this->generator->generateUpdateForRow( $row );
if ( $update ) {
$updates[$id] = $update;
}
}
if ( $updates ) {
$this->output( "Processing " . count($update) . " rows\n" );
$this->writer->write( $updates );
}
}
$this->output( "Completed\n" );
}
}
class EchoBatchRowWriter {
public function __construct( DatabaseType $db, $clusterName, $table, $idField ) {
$this->db = $db;
$this->clusterName = $clusterName;
$this->table = $table;
$this->idField = $idField;
}
public function write( array $updates ) {
$this->db->begin();
foreach ( $updates as $id => $update ) {
$this->db->update(
$this->table
$update,
array( $this->idField => $id ),
__METHOD__
);
}
$this->db->commit();
wfWaitForSlaves( false, false, $this->clusterName );
}
}
class EchoBatchRowIterator implements Iterator {
protected $db;
protected $table;
protected $idField;
protected $batchSize;
protected $conditions = array();
protected $maxId = 0;
public function __construct( DatabaseType $db, $table, $idField, $batchSize ) {
$this->db = $db;
$this->table = $table;
$this->idField = $idField;
$this->batchSize = $batchSize;
}
public function addCondition( $condition ) {
$this->conditions = $condition;
}
public function current() {
return $this->current;
}
public function key() {
return $this->maxId;
}
public function rewind() {
$this->maxId = 0;
$this->next();
}
public function valid() {
return count($this->current) > 0;
}
public function next() {
$conditions = $this->conditions;
$conditions[] = "{$this->idField} > {$this->maxId}";
$res = $this->db->select(
$this->table,
array( '*' ),
$conditions,
__METHOD__,
array(
'LIMIT' => $this->batchSize,
"ORDER BY {$this->idField} ASC",
)
);
$this->current = iterator_to_array( $res );
// If $this->current is empty then self::valid() will return false
// so no need to handle edge case
if ( $this->current) {
$row = end( $res );
$this->maxId = $row->{$this->idField};
}
}
}
interface EchoRowUpdateGenerator {
function generateRowUpdate( $row );
}
class EchoSuppressionRowUpdateGenerator implements EchoRowUpdateGenerator
{
/**
* Given a database row, generate an update array suitible for schema update
*
* @param $row object Database row from echo_event table
* @return [int, array] The row id to update and an associative array of column upates
*/
public function generateRowUpdate( $row ) {
$update = $this->updatePageIdFromTitle( $row );
if ( $row->event_extra !== null && $row->event_type === 'page-linked' ) {
$update = $this->updatePageLinkedExtraData( $row, $update );
}
return array( $row->event_id, $update );
}
protected function updatePageIdFromTitle( $row ) {
$update = array();
$title = Title::newFromText( $row->event_page_title, $row->event_page_namespace );
if ( $title !== null ) {
$pageId = $title->getArticleId();
if ( $pageId ) {
// If the title has a proper id from the database, store it
$update['event_page_id'] = $pageId;
} else {
// For titles that do not refer to a WikiPage stored in the database
// move the title/namespace into event_extra
$extra = $row->event_extra ? unserialize( $row->event_extra ) : array();
$extra['page_title'] = $row->event_page_title;
$extra['page_namespace'] = $row->event_page_namespace;
$update['event_extra'] = serialize( $extra );
}
}
return $update;
}
protected function updatePageLinkedExtraData( $row, array $update ) {
$extra = array();
if ( isset( $update['extra'] ) ) {
$extra = unserialize( $update['extra'] );
} elseif ( $row->event_extra ) {
$extra = unserialize( $row->event_extra );
}
if ( isset( $extra['link-from-title'], $extra['link-from-namespace'] ) ) {
$title = Title::newFromText( $extra['link-from-title'], $extra['link-from-namespace'] );
unset( $extra['link-from-title'], $extra['link-from-namespace'] );
// Link from page is always from a content page, if null or no article id it was
// somehow invalid
if ( $title !== null && $title->getArticleId() ) {
$extra['link-from-page-id'] = $title->getArticleId();
}
$update['event_extra'] = serialize( $extra );
}
return $update;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment