Created
September 17, 2018 14:02
-
-
Save subbuss/ffcfed641784f64945ebd7e49fc448cb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class QuoteTransformer extends TokenHandler { | |
public $isActive; | |
public function __construct( $manager, $options ) { | |
parent::__construct( $manager, $options ); | |
$this->isActive = false; | |
} | |
public static function quoteAndNewlineRank() { | |
return 2.1; | |
} | |
public static function anyRank() { | |
return 2.101; | |
} | |
public function init() { | |
$this->manager->addTransform( | |
function ( $token, $tokenManager, $prevToken ) { | |
$this->onQuote( $token, $tokenManager, $prevToken ) | |
}, | |
'QuoteTransformer:onQuote', | |
self::quoteAndNewlineRank(), | |
'tag', | |
'mw-quote' ); | |
$this->reset(); | |
} | |
public function reset() { | |
$this->chunks = []; | |
$this->currentChunk = []; | |
$this->last = Object::create( null ); | |
$this->isActive = false; | |
} | |
public function _startNewChunk() { | |
array_push( $this->chunks, $this->currentChunk ); | |
$this->currentChunk = []; | |
} | |
public function onQuote( $token, $tokenManager, $prevToken ) { | |
$qlen = count( $token->value ); | |
$this->manager->env->log( | |
"trace/quote", | |
$this->manager->pipelineId, | |
"QUOTE |", | |
function () use ( &$token ) { | |
return json_encode( $token ) | |
} | |
); | |
if ( !$this->isActive ) { | |
$processQuotes = function ( $token ) use ( &$token ) { | |
$this->processQuotes( $token ) | |
}; | |
$onAny = function ( $token ) use ( &$token ) { | |
$this->onAny( $token ) | |
}; | |
$this->manager->addTransform( $processQuotes, 'QuoteTransformer:processQuotes', self::quoteAndNewlineRank(), 'newline' ); | |
$this->manager->addTransform( $processQuotes, 'QuoteTransformer:processQuotes', self::quoteAndNewlineRank(), 'tag', 'td' ); | |
$this->manager->addTransform( $processQuotes, 'QuoteTransformer:processQuotes', self::quoteAndNewlineRank(), 'tag', 'th' ); | |
$this->manager->addTransform( $processQuotes, 'QuoteTransformer:processQuotes:end', self::quoteAndNewlineRank(), 'end' ); | |
$this->manager->addTransform( $onAny, 'QuoteTransformer:onAny', self::anyRank(), 'any' ); | |
$this->isActive = true; | |
array_push( $this->currentChunk, $prevToken || '' ); | |
}if ( $qlen === 2 || $qlen === 3 || $qlen === 5 ) { | |
$this->_startNewChunk(); | |
array_push( $this->currentChunk, $token ); | |
$this->_startNewChunk(); | |
} else { | |
$console->assert( false, "should be transformed by tokenizer" ); | |
} | |
return []; | |
} | |
public function onAny( $token ) { | |
$this->manager->env->log( | |
"trace/quote", | |
$this->manager->pipelineId, | |
"ANY |", | |
function () use ( &$token ) { | |
return ( !$this->isActive ) ? " ---> " : "" + json_encode( $token ); | |
} | |
); | |
array_push( $this->currentChunk, $token ); | |
return []; | |
} | |
public function processQuotes( $token ) { | |
$this->manager->env->log( | |
"trace/quote", | |
$this->manager->pipelineId, | |
"NL |", | |
function () use ( &$token ) { | |
return ( !$this->isActive ) ? " ---> " : "" + json_encode( $token ); | |
} | |
); | |
if ( [ 'td', 'th' ]->includes( $token->name ) && $token->dataAttribs->stx === 'html' ) { | |
return [ "token" => $token ]; | |
} | |
if ( !$this->isActive ) { | |
return [ "token" => $token ]; | |
} | |
$res = null; | |
$qlen = null; | |
$i = null; | |
$numbold = 0; | |
$numitalics = 0; | |
for ( $i = 1; $i < count( $this->chunks ); $i += 2 ) { | |
$console->assert( count( $this->chunks[$i] ) === 1 ); | |
$qlen = count( $this->chunks[$i][0]->value ); | |
if ( $qlen === 2 || $qlen === 5 ) { | |
$numitalics++; | |
} | |
if ( $qlen === 3 || $qlen === 5 ) { | |
$numbold++; | |
} | |
} | |
if ( $numitalics % 2 === 1 && $numbold % 2 === 1 ) { | |
$firstsingleletterword = -1; | |
$firstmultiletterword = -1; | |
$firstspace = -1; | |
for ( $i = 1; $i < count( $this->chunks );$i += 2 ) { | |
if ( count( $this->chunks[$i][0]->value ) !== 3 ) { | |
continue; | |
} | |
$prevChunk = $this->chunks[$i - 1]; | |
$ctxPrevToken = ''; | |
for ( | |
$j = count( $prevChunk ) - 1; | |
strlen( $ctxPrevToken ) < 2 && $j >= 0; | |
$j-- | |
) { | |
if ( $prevChunk[$j]->constructor === $String ) { | |
$ctxPrevToken = $prevChunk[$j] + $ctxPrevToken; | |
} | |
} | |
$lastchar = $ctxPrevToken[count( $ctxPrevToken ) - 1]; | |
$secondtolastchar = $ctxPrevToken[count( $ctxPrevToken ) - 2]; | |
if ( $lastchar === ' ' && $firstspace === -1 ) { | |
$firstspace = $i; | |
} elseif ( $lastchar !== ' ' ) { | |
if ( $secondtolastchar === ' ' && $firstsingleletterword === -1 ) { | |
$firstsingleletterword = $i; | |
break; | |
} elseif ( $firstmultiletterword === -1 ) { | |
$firstmultiletterword = $i; | |
} | |
} | |
} | |
if ( $firstsingleletterword > -1 ) { | |
$this->convertBold( $firstsingleletterword ); | |
} elseif ( $firstmultiletterword > -1 ) { | |
$this->convertBold( $firstmultiletterword ); | |
} elseif ( $firstspace > -1 ) { | |
$this->convertBold( $firstspace ); | |
} else { | |
// XXX? | |
} | |
} | |
$this->convertQuotesToTags(); | |
array_push( $this->currentChunk, $token ); | |
$this->_startNewChunk(); | |
array_shift( $this->chunks[0] ); | |
$res = [ "tokens" => $this->chunks ]; # in PHP no clones are needed | |
$this->manager->env->log( | |
"trace/quote", | |
$this->manager->pipelineId, | |
"----->", | |
function () use ( &$res ) { | |
return json_encode( $res->tokens ) | |
} | |
); | |
$this->reset(); | |
$quoteAndNewlineRank = self::quoteAndNewlineRank(); | |
$anyRank = self::anyRank(); | |
$this->manager->removeTransform( $quoteAndNewlineRank, 'end' ); | |
$this->manager->removeTransform( $quoteAndNewlineRank, 'tag', 'td' ); | |
$this->manager->removeTransform( $quoteAndNewlineRank, 'tag', 'th' ); | |
$this->manager->removeTransform( $quoteAndNewlineRank, 'newline' ); | |
$this->manager->removeTransform( $anyRank, 'any' ); | |
return $res; | |
} | |
public function convertBold( $i ) use ( &$SelfclosingTagTk ) { | |
$console->assert( $i > 0 && count( $this->chunks[$i] ) === 1 && count( $this->chunks[$i][0]->value ) === 3 ); | |
array_push( $this->chunks[$i - 1], "'" ); | |
$oldbold = $this->chunks[$i][0]; | |
$tsr = ( $oldbold->dataAttribs ) ? $oldbold->dataAttribs->tsr : null; | |
if ( $tsr ) { | |
$tsr = [ $tsr[0] + 1, $tsr[1] ]; | |
}$newbold = new $SelfclosingTagTk( 'mw-quote', [], [ "tsr" => $tsr ] ); | |
$newbold->value = "''"; | |
$this->chunks[$i] = [ $newbold ]; | |
} | |
public function convertQuotesToTags() use ( &$EndTagTk, &$TagTk ) { | |
$lastboth = -1; | |
$state = ''; | |
for ( | |
$i = 1; | |
$i < count( $this->chunks ); | |
$i += 2 | |
) { | |
$console->assert( count( $this->chunks[$i] ) === 1 ); | |
$qlen = count( $this->chunks[$i][0]->value ); | |
if ( $qlen === 2 ) { | |
if ( $state === 'i' ) { | |
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ) ] ); | |
$state = ''; | |
} elseif ( $state === 'bi' ) { | |
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ) ] ); | |
$state = 'b'; | |
} elseif ( $state === 'ib' ) { | |
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ), new $EndTagTk( 'i' ), new $TagTk( 'b' ) ], "bogus two" ); | |
$state = 'b'; | |
} elseif ( $state === 'both' ) { | |
$this->quoteToTag( $lastboth, [ new $TagTk( 'b' ), new $TagTk( 'i' ) ] ); | |
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ) ] ); | |
$state = 'b'; | |
} else { | |
$this->quoteToTag( $i, [ new $TagTk( 'i' ) ] ); | |
$state += 'i'; | |
} | |
} elseif ( $qlen === 3 ) { | |
if ( $state === 'b' ) { | |
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ) ] ); | |
$state = ''; | |
} elseif ( $state === 'ib' ) { | |
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ) ] ); | |
$state = 'i'; | |
} elseif ( $state === 'bi' ) { | |
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ), new $EndTagTk( 'b' ), new $TagTk( 'i' ) ], "bogus two" ); | |
$state = 'i'; | |
} elseif ( $state === 'both' ) { | |
$this->quoteToTag( $lastboth, [ new $TagTk( 'i' ), new $TagTk( 'b' ) ] ); | |
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ) ] ); | |
$state = 'i'; | |
} else { | |
$this->quoteToTag( $i, [ new $TagTk( 'b' ) ] ); | |
$state += 'b'; | |
} | |
} elseif ( $qlen === 5 ) { | |
if ( $state === 'b' ) { | |
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ), new $TagTk( 'i' ) ] ); | |
$state = 'i'; | |
} elseif ( $state === 'i' ) { | |
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ), new $TagTk( 'b' ) ] ); | |
$state = 'b'; | |
} elseif ( $state === 'bi' ) { | |
$this->quoteToTag( $i, [ new $EndTagTk( 'i' ), new $EndTagTk( 'b' ) ] ); | |
$state = ''; | |
} elseif ( $state === 'ib' ) { | |
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ), new $EndTagTk( 'i' ) ] ); | |
$state = ''; | |
} elseif ( $state === 'both' ) { | |
$this->quoteToTag( $lastboth, [ new $TagTk( 'i' ), new $TagTk( 'b' ) ] ); | |
$this->quoteToTag( $i, [ new $EndTagTk( 'b' ), new $EndTagTk( 'i' ) ] ); | |
$state = ''; | |
} else { | |
$lastboth = $i; | |
$state = 'both'; | |
} | |
} | |
} | |
if ( $state === 'both' ) { | |
$this->quoteToTag( $lastboth, [ new $TagTk( 'b' ), new $TagTk( 'i' ) ] ); | |
$state = 'bi'; | |
} | |
if ( $state === 'b' || $state === 'ib' ) { | |
array_push( $this->currentChunk, new $EndTagTk( 'b' ) ); | |
$this->last->b->dataAttribs->autoInsertedEnd = true; | |
} | |
if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { | |
array_push( $this->currentChunk, new $EndTagTk( 'i' ) ); | |
$this->last->i->dataAttribs->autoInsertedEnd = true; | |
} | |
if ( $state === 'bi' ) { | |
array_push( $this->currentChunk, new $EndTagTk( 'b' ) ); | |
$this->last->b->dataAttribs->autoInsertedEnd = true; | |
} | |
} | |
public function quoteToTag( $chunk, $tags, $ignoreBogusTwo ) use ( &$EndTagTk ) { | |
$console->assert( count( $this->chunks[$chunk] ) === 1 ); | |
$result = []; | |
$oldtag = $this->chunks[$chunk][0]; | |
$tsr = ( $oldtag->dataAttribs ) ? $oldtag->dataAttribs->tsr : null; | |
$startpos = ( $tsr ) ? $tsr[0] : null; | |
$endpos = ( $tsr ) ? $tsr[1] : null; | |
for ( | |
$i = 0; | |
$i < count( $tags ); | |
$i++ | |
) { | |
if ( $tsr ) { | |
if ( $i === 0 && $ignoreBogusTwo ) { | |
$this->last[$tags[$i]->name]->dataAttribs->autoInsertedEnd = true; | |
} elseif ( $i === 2 && $ignoreBogusTwo ) { | |
$tags[$i]->dataAttribs->autoInsertedStart = true; | |
} elseif ( $tags[$i]->name === 'b' ) { | |
$tags[$i]->dataAttribs->tsr = [ $startpos, $startpos + 3 ]; | |
$startpos = $tags[$i]->dataAttribs->tsr[1]; | |
} elseif ( $tags[$i]->name === 'i' ) { | |
$tags[$i]->dataAttribs->tsr = [ $startpos, $startpos + 2 ]; | |
$startpos = $tags[$i]->dataAttribs->tsr[1]; | |
} else { | |
$console->assert( false ); | |
} | |
} | |
$this->last[$tags[$i]->name] = ( $tags[$i]->constructor === $EndTagTk ) ? null : $tags[$i]; | |
array_push( $result, $tags[$i] ); | |
} | |
if ( $tsr ) { | |
$console->assert( $startpos === $endpos, $startpos, $endpos ); | |
}$this->chunks[$chunk] = $result; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment