Skip to content

Instantly share code, notes, and snippets.

@subbuss
Created September 17, 2018 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save subbuss/ffcfed641784f64945ebd7e49fc448cb to your computer and use it in GitHub Desktop.
Save subbuss/ffcfed641784f64945ebd7e49fc448cb to your computer and use it in GitHub Desktop.
<?php
class QuoteTransformer extends TokenHandler {
public $isActive;
public function __construct( $manager, $options ) {
parent::__construct( $manager, $options );
$this->isActive = false;
}
public static function quoteAndNewlineRank() {
return 2.1;
}
public static function anyRank() {
return 2.101;
}
public function init() {
$this->manager->addTransform(
function ( $token, $tokenManager, $prevToken ) {
$this->onQuote( $token, $tokenManager, $prevToken )
},
'QuoteTransformer:onQuote',
self::quoteAndNewlineRank(),
'tag',
'mw-quote' );
$this->reset();
}
public function reset() {
$this->chunks = [];
$this->currentChunk = [];
$this->last = Object::create( null );
$this->isActive = false;
}
public function _startNewChunk() {
array_push( $this->chunks, $this->currentChunk );
$this->currentChunk = [];
}
public function onQuote( $token, $tokenManager, $prevToken ) {
$qlen = count( $token->value );
$this->manager->env->log(
"trace/quote",
$this->manager->pipelineId,
"QUOTE |",
function () use ( &$token ) {
return json_encode( $token )
}
);
if ( !$this->isActive ) {
$processQuotes = function ( $token ) use ( &$token ) {
$this->processQuotes( $token )
};
$onAny = function ( $token ) use ( &$token ) {
$this->onAny( $token )
};
$this->manager->addTransform( $processQuotes, 'QuoteTransformer:processQuotes', self::quoteAndNewlineRank(), 'newline' );
$this->manager->addTransform( $processQuotes, 'QuoteTransformer:processQuotes', self::quoteAndNewlineRank(), 'tag', 'td' );
$this->manager->addTransform( $processQuotes, 'QuoteTransformer:processQuotes', self::quoteAndNewlineRank(), 'tag', 'th' );
$this->manager->addTransform( $processQuotes, 'QuoteTransformer:processQuotes:end', self::quoteAndNewlineRank(), 'end' );
$this->manager->addTransform( $onAny, 'QuoteTransformer:onAny', self::anyRank(), 'any' );
$this->isActive = true;
array_push( $this->currentChunk, $prevToken || '' );
}if ( $qlen === 2 || $qlen === 3 || $qlen === 5 ) {
$this->_startNewChunk();
array_push( $this->currentChunk, $token );
$this->_startNewChunk();
} else {
$console->assert( false, "should be transformed by tokenizer" );
}
return [];
}
public function onAny( $token ) {
$this->manager->env->log(
"trace/quote",
$this->manager->pipelineId,
"ANY |",
function () use ( &$token ) {
return ( !$this->isActive ) ? " ---> " : "" + json_encode( $token );
}
);
array_push( $this->currentChunk, $token );
return [];
}
public function processQuotes( $token ) {
$this->manager->env->log(
"trace/quote",
$this->manager->pipelineId,
"NL |",
function () use ( &$token ) {
return ( !$this->isActive ) ? " ---> " : "" + json_encode( $token );
}
);
if ( [ 'td', 'th' ]->includes( $token->name ) && $token->dataAttribs->stx === 'html' ) {
return [ "token" => $token ];
}
if ( !$this->isActive ) {
return [ "token" => $token ];
}
$res = null;
$qlen = null;
$i = null;
$numbold = 0;
$numitalics = 0;
for ( $i = 1; $i < count( $this->chunks ); $i += 2 ) {
$console->assert( count( $this->chunks[$i] ) === 1 );
$qlen = count( $this->chunks[$i][0]->value );
if ( $qlen === 2 || $qlen === 5 ) {
$numitalics++;
}
if ( $qlen === 3 || $qlen === 5 ) {
$numbold++;
}
}
if ( $numitalics % 2 === 1 && $numbold % 2 === 1 ) {
$firstsingleletterword = -1;
$firstmultiletterword = -1;
$firstspace = -1;
for ( $i = 1; $i < count( $this->chunks );$i += 2 ) {
if ( count( $this->chunks[$i][0]->value ) !== 3 ) {
continue;
}
$prevChunk = $this->chunks[$i - 1];
$ctxPrevToken = '';
for (
$j = count( $prevChunk ) - 1;
strlen( $ctxPrevToken ) < 2 && $j >= 0;
$j--
) {
if ( $prevChunk[$j]->constructor === $String ) {
$ctxPrevToken = $prevChunk[$j] + $ctxPrevToken;
}
}
$lastchar = $ctxPrevToken[count( $ctxPrevToken ) - 1];
$secondtolastchar = $ctxPrevToken[count( $ctxPrevToken ) - 2];
if ( $lastchar === ' ' && $firstspace === -1 ) {
$firstspace = $i;
} elseif ( $lastchar !== ' ' ) {
if ( $secondtolastchar === ' ' && $firstsingleletterword === -1 ) {
$firstsingleletterword = $i;
break;
} elseif ( $firstmultiletterword === -1 ) {
$firstmultiletterword = $i;
}
}
}
if ( $firstsingleletterword > -1 ) {
$this->convertBold( $firstsingleletterword );
} elseif ( $firstmultiletterword > -1 ) {
$this->convertBold( $firstmultiletterword );
} elseif ( $firstspace > -1 ) {
$this->convertBold( $firstspace );
} else {
// XXX?
}
}
$this->convertQuotesToTags();
array_push( $this->currentChunk, $token );
$this->_startNewChunk();
array_shift( $this->chunks[0] );
$res = [ "tokens" => $this->chunks ]; # in PHP no clones are needed
$this->manager->env->log(
"trace/quote",
$this->manager->pipelineId,
"----->",
function () use ( &$res ) {
return json_encode( $res->tokens )
}
);
$this->reset();
$quoteAndNewlineRank = self::quoteAndNewlineRank();
$anyRank = self::anyRank();
$this->manager->removeTransform( $quoteAndNewlineRank, 'end' );
$this->manager->removeTransform( $quoteAndNewlineRank, 'tag', 'td' );
$this->manager->removeTransform( $quoteAndNewlineRank, 'tag', 'th' );
$this->manager->removeTransform( $quoteAndNewlineRank, 'newline' );
$this->manager->removeTransform( $anyRank, 'any' );
return $res;
}
public function convertBold( $i ) use ( &$SelfclosingTagTk ) {
$console->assert( $i > 0 && count( $this->chunks[$i] ) === 1 && count( $this->chunks[$i][0]->value ) === 3 );
array_push( $this->chunks[$i - 1], "'" );
$oldbold = $this->chunks[$i][0];
$tsr = ( $oldbold->dataAttribs ) ? $oldbold->dataAttribs->tsr : null;
if ( $tsr ) {
$tsr = [ $tsr[0] + 1, $tsr[1] ];
}$newbold = new $SelfclosingTagTk( 'mw-quote', [], [ "tsr" => $tsr ] );
$newbold->value = "''";
$this->chunks[$i] = [ $newbold ];
}
public function convertQuotesToTags() use ( &$EndTagTk, &$TagTk ) {
$lastboth = -1;
$state = '';
for (
$i = 1;
$i < count( $this->chunks );
$i += 2
) {
$console->assert( count( $this->chunks[$i] ) === 1 );
$qlen = count( $this->chunks[$i][0]->value );
if ( $qlen === 2 ) {
if ( $state === 'i' ) {
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ) ] );
$state = '';
} elseif ( $state === 'bi' ) {
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ) ] );
$state = 'b';
} elseif ( $state === 'ib' ) {
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ), new $EndTagTk( 'i' ), new $TagTk( 'b' ) ], "bogus two" );
$state = 'b';
} elseif ( $state === 'both' ) {
$this->quoteToTag( $lastboth, [ new $TagTk( 'b' ), new $TagTk( 'i' ) ] );
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ) ] );
$state = 'b';
} else {
$this->quoteToTag( $i, [ new $TagTk( 'i' ) ] );
$state += 'i';
}
} elseif ( $qlen === 3 ) {
if ( $state === 'b' ) {
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ) ] );
$state = '';
} elseif ( $state === 'ib' ) {
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ) ] );
$state = 'i';
} elseif ( $state === 'bi' ) {
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ), new $EndTagTk( 'b' ), new $TagTk( 'i' ) ], "bogus two" );
$state = 'i';
} elseif ( $state === 'both' ) {
$this->quoteToTag( $lastboth, [ new $TagTk( 'i' ), new $TagTk( 'b' ) ] );
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ) ] );
$state = 'i';
} else {
$this->quoteToTag( $i, [ new $TagTk( 'b' ) ] );
$state += 'b';
}
} elseif ( $qlen === 5 ) {
if ( $state === 'b' ) {
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ), new $TagTk( 'i' ) ] );
$state = 'i';
} elseif ( $state === 'i' ) {
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ), new $TagTk( 'b' ) ] );
$state = 'b';
} elseif ( $state === 'bi' ) {
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ), new $EndTagTk( 'b' ) ] );
$state = '';
} elseif ( $state === 'ib' ) {
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ), new $EndTagTk( 'i' ) ] );
$state = '';
} elseif ( $state === 'both' ) {
$this->quoteToTag( $lastboth, [ new $TagTk( 'i' ), new $TagTk( 'b' ) ] );
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ), new $EndTagTk( 'i' ) ] );
$state = '';
} else {
$lastboth = $i;
$state = 'both';
}
}
}
if ( $state === 'both' ) {
$this->quoteToTag( $lastboth, [ new $TagTk( 'b' ), new $TagTk( 'i' ) ] );
$state = 'bi';
}
if ( $state === 'b' || $state === 'ib' ) {
array_push( $this->currentChunk, new $EndTagTk( 'b' ) );
$this->last->b->dataAttribs->autoInsertedEnd = true;
}
if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
array_push( $this->currentChunk, new $EndTagTk( 'i' ) );
$this->last->i->dataAttribs->autoInsertedEnd = true;
}
if ( $state === 'bi' ) {
array_push( $this->currentChunk, new $EndTagTk( 'b' ) );
$this->last->b->dataAttribs->autoInsertedEnd = true;
}
}
public function quoteToTag( $chunk, $tags, $ignoreBogusTwo ) use ( &$EndTagTk ) {
$console->assert( count( $this->chunks[$chunk] ) === 1 );
$result = [];
$oldtag = $this->chunks[$chunk][0];
$tsr = ( $oldtag->dataAttribs ) ? $oldtag->dataAttribs->tsr : null;
$startpos = ( $tsr ) ? $tsr[0] : null;
$endpos = ( $tsr ) ? $tsr[1] : null;
for (
$i = 0;
$i < count( $tags );
$i++
) {
if ( $tsr ) {
if ( $i === 0 && $ignoreBogusTwo ) {
$this->last[$tags[$i]->name]->dataAttribs->autoInsertedEnd = true;
} elseif ( $i === 2 && $ignoreBogusTwo ) {
$tags[$i]->dataAttribs->autoInsertedStart = true;
} elseif ( $tags[$i]->name === 'b' ) {
$tags[$i]->dataAttribs->tsr = [ $startpos, $startpos + 3 ];
$startpos = $tags[$i]->dataAttribs->tsr[1];
} elseif ( $tags[$i]->name === 'i' ) {
$tags[$i]->dataAttribs->tsr = [ $startpos, $startpos + 2 ];
$startpos = $tags[$i]->dataAttribs->tsr[1];
} else {
$console->assert( false );
}
}
$this->last[$tags[$i]->name] = ( $tags[$i]->constructor === $EndTagTk ) ? null : $tags[$i];
array_push( $result, $tags[$i] );
}
if ( $tsr ) {
$console->assert( $startpos === $endpos, $startpos, $endpos );
}$this->chunks[$chunk] = $result;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment