Created
February 2, 2013 12:29
-
-
Save thomas-mcdonald/4697086 to your computer and use it in GitHub Desktop.
MediaWiki internal link handler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function replaceInternalLinks2( &$s ) { | |
static $tc = FALSE, $e1, $e1_img; | |
# the % is needed to support urlencoded titles as well | |
if ( !$tc ) { | |
$tc = Title::legalChars() . '#%'; | |
# Match a link having the form [[namespace:link|alternate]]trail | |
$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; | |
# Match cases where there is no "]]", which might still be images | |
$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; | |
} | |
$holders = new LinkHolderArray( $this ); | |
# split the entire text string on occurrences of [[ | |
$a = StringUtils::explode( '[[', ' ' . $s ); | |
# get the first element (all text up to first [[), and remove the space we added | |
$s = $a->current(); | |
$a->next(); | |
$line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" | |
$s = substr( $s, 1 ); | |
$useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension(); | |
$e2 = null; | |
if ( $useLinkPrefixExtension ) { | |
# Match the end of a line for a word that's not followed by whitespace, | |
# e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched | |
$e2 = wfMessage( 'linkprefix' )->inContentLanguage()->text(); | |
} | |
$nottalk = !$this->mTitle->isTalkPage(); | |
if ( $useLinkPrefixExtension ) { | |
$m = array(); | |
if ( preg_match( $e2, $s, $m ) ) { | |
$first_prefix = $m[2]; | |
} else { | |
$first_prefix = false; | |
} | |
} else { | |
$prefix = ''; | |
} | |
$useSubpages = $this->areSubpagesAllowed(); | |
wfProfileOut( __METHOD__ . '-setup' ); | |
# Loop for each link | |
for ( ; $line !== false && $line !== null ; $a->next(), $line = $a->current() ) { | |
if ( $useLinkPrefixExtension ) { | |
wfProfileIn( __METHOD__ . '-prefixhandling' ); | |
if ( preg_match( $e2, $s, $m ) ) { | |
$prefix = $m[2]; | |
$s = $m[1]; | |
} else { | |
$prefix = ''; | |
} | |
# first link | |
if ( $first_prefix ) { | |
$prefix = $first_prefix; | |
$first_prefix = false; | |
} | |
wfProfileOut( __METHOD__ . '-prefixhandling' ); | |
} | |
$might_be_img = false; | |
wfProfileIn( __METHOD__ . "-e1" ); | |
if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt | |
$text = $m[2]; | |
# If we get a ] at the beginning of $m[3] that means we have a link that's something like: | |
# [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up, | |
# the real problem is with the $e1 regex | |
# See bug 1300. | |
# | |
# Still some problems for cases where the ] is meant to be outside punctuation, | |
# and no image is in sight. See bug 2095. | |
# | |
if ( $text !== '' && substr( $m[3], 0, 1 ) === ']' && strpos( $text, '[' ) !== false ) | |
{ | |
$text .= ']'; # so that replaceExternalLinks($text) works later | |
$m[3] = substr( $m[3], 1 ); | |
} | |
# fix up urlencoded title texts | |
if ( strpos( $m[1], '%' ) !== false ) { | |
# Should anchors '#' also be rejected? | |
$m[1] = str_replace( array('<', '>'), array('<', '>'), rawurldecode( $m[1] ) ); | |
} | |
$trail = $m[3]; | |
} elseif ( preg_match( $e1_img, $line, $m ) ) { # Invalid, but might be an image with a link in its caption | |
$might_be_img = true; | |
$text = $m[2]; | |
if ( strpos( $m[1], '%' ) !== false ) { | |
$m[1] = rawurldecode( $m[1] ); | |
} | |
$trail = ""; | |
} else { # Invalid form; output directly | |
$s .= $prefix . '[[' . $line ; | |
wfProfileOut( __METHOD__ . "-e1" ); | |
continue; | |
} | |
wfProfileOut( __METHOD__ . "-e1" ); | |
wfProfileIn( __METHOD__ . "-misc" ); | |
# Don't allow internal links to pages containing | |
# PROTO: where PROTO is a valid URL protocol; these | |
# should be external links. | |
if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) { | |
$s .= $prefix . '[[' . $line ; | |
wfProfileOut( __METHOD__ . "-misc" ); | |
continue; | |
} | |
# Make subpage if necessary | |
if ( $useSubpages ) { | |
$link = $this->maybeDoSubpageLink( $m[1], $text ); | |
} else { | |
$link = $m[1]; | |
} | |
$noforce = ( substr( $m[1], 0, 1 ) !== ':' ); | |
if ( !$noforce ) { | |
# Strip off leading ':' | |
$link = substr( $link, 1 ); | |
} | |
wfProfileOut( __METHOD__ . "-misc" ); | |
wfProfileIn( __METHOD__ . "-title" ); | |
$nt = Title::newFromText( $this->mStripState->unstripNoWiki( $link ) ); | |
if ( $nt === null ) { | |
$s .= $prefix . '[[' . $line; | |
wfProfileOut( __METHOD__ . "-title" ); | |
continue; | |
} | |
$ns = $nt->getNamespace(); | |
$iw = $nt->getInterWiki(); | |
wfProfileOut( __METHOD__ . "-title" ); | |
if ( $might_be_img ) { # if this is actually an invalid link | |
wfProfileIn( __METHOD__ . "-might_be_img" ); | |
if ( $ns == NS_FILE && $noforce ) { # but might be an image | |
$found = false; | |
while ( true ) { | |
# look at the next 'line' to see if we can close it there | |
$a->next(); | |
$next_line = $a->current(); | |
if ( $next_line === false || $next_line === null ) { | |
break; | |
} | |
$m = explode( ']]', $next_line, 3 ); | |
if ( count( $m ) == 3 ) { | |
# the first ]] closes the inner link, the second the image | |
$found = true; | |
$text .= "[[{$m[0]}]]{$m[1]}"; | |
$trail = $m[2]; | |
break; | |
} elseif ( count( $m ) == 2 ) { | |
# if there's exactly one ]] that's fine, we'll keep looking | |
$text .= "[[{$m[0]}]]{$m[1]}"; | |
} else { | |
# if $next_line is invalid too, we need look no further | |
$text .= '[[' . $next_line; | |
break; | |
} | |
} | |
if ( !$found ) { | |
# we couldn't find the end of this imageLink, so output it raw | |
# but don't ignore what might be perfectly normal links in the text we've examined | |
$holders->merge( $this->replaceInternalLinks2( $text ) ); | |
$s .= "{$prefix}[[$link|$text"; | |
# note: no $trail, because without an end, there *is* no trail | |
wfProfileOut( __METHOD__ . "-might_be_img" ); | |
continue; | |
} | |
} else { | |
# it's not an image, so output it raw | |
$s .= "{$prefix}[[$link|$text"; | |
# note: no $trail, because without an end, there *is* no trail | |
wfProfileOut( __METHOD__ . "-might_be_img" ); | |
continue; | |
} | |
wfProfileOut( __METHOD__ . "-might_be_img" ); | |
} | |
$wasblank = ( $text == '' ); | |
if ( $wasblank ) { | |
$text = $link; | |
} else { | |
# Bug 4598 madness. Handle the quotes only if they come from the alternate part | |
# [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a> | |
# [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] | |
# -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a> | |
$text = $this->doQuotes( $text ); | |
} | |
# Link not escaped by : , create the various objects | |
if ( $noforce ) { | |
# Interwikis | |
wfProfileIn( __METHOD__ . "-interwiki" ); | |
if ( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && Language::fetchLanguageName( $iw, null, 'mw' ) ) { | |
// XXX: the above check prevents links to sites with identifiers that are not language codes | |
# Bug 24502: filter duplicates | |
if ( !isset( $this->mLangLinkLanguages[$iw] ) ) { | |
$this->mLangLinkLanguages[$iw] = true; | |
$this->mOutput->addLanguageLink( $nt->getFullText() ); | |
} | |
$s = rtrim( $s . $prefix ); | |
$s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail; | |
wfProfileOut( __METHOD__ . "-interwiki" ); | |
continue; | |
} | |
wfProfileOut( __METHOD__ . "-interwiki" ); | |
if ( $ns == NS_FILE ) { | |
wfProfileIn( __METHOD__ . "-image" ); | |
if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { | |
if ( $wasblank ) { | |
# if no parameters were passed, $text | |
# becomes something like "File:Foo.png", | |
# which we don't want to pass on to the | |
# image generator | |
$text = ''; | |
} else { | |
# recursively parse links inside the image caption | |
# actually, this will parse them in any other parameters, too, | |
# but it might be hard to fix that, and it doesn't matter ATM | |
$text = $this->replaceExternalLinks( $text ); | |
$holders->merge( $this->replaceInternalLinks2( $text ) ); | |
} | |
# cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them | |
$s .= $prefix . $this->armorLinks( | |
$this->makeImage( $nt, $text, $holders ) ) . $trail; | |
} else { | |
$s .= $prefix . $trail; | |
} | |
wfProfileOut( __METHOD__ . "-image" ); | |
continue; | |
} | |
if ( $ns == NS_CATEGORY ) { | |
wfProfileIn( __METHOD__ . "-category" ); | |
$s = rtrim( $s . "\n" ); # bug 87 | |
if ( $wasblank ) { | |
$sortkey = $this->getDefaultSort(); | |
} else { | |
$sortkey = $text; | |
} | |
$sortkey = Sanitizer::decodeCharReferences( $sortkey ); | |
$sortkey = str_replace( "\n", '', $sortkey ); | |
$sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey ); | |
$this->mOutput->addCategory( $nt->getDBkey(), $sortkey ); | |
$s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail; | |
wfProfileOut( __METHOD__ . "-category" ); | |
continue; | |
} | |
} | |
# Self-link checking | |
if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) { | |
if ( $nt->equals( $this->mTitle ) || ( !$nt->isKnown() && in_array($this->mTitle->getPrefixedText(),$this->getConverterLanguage()->autoConvertToAllVariants( $nt->getPrefixedText() ),true) ) ) { | |
$s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); | |
continue; | |
} | |
} | |
# NS_MEDIA is a pseudo-namespace for linking directly to a file | |
# @todo FIXME: Should do batch file existence checks, see comment below | |
if ( $ns == NS_MEDIA ) { | |
wfProfileIn( __METHOD__ . "-media" ); | |
# Give extensions a chance to select the file revision for us | |
$options = array(); | |
$descQuery = false; | |
wfRunHooks( 'BeforeParserFetchFileAndTitle', array( $this, $nt, &$options, &$descQuery ) ); | |
# Fetch and register the file (file title may be different via hooks) | |
list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options ); | |
# Cloak with NOPARSE to avoid replacement in replaceExternalLinks | |
$s .= $prefix . $this->armorLinks(Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; | |
wfProfileOut( __METHOD__ . "-media" ); | |
continue; | |
} | |
wfProfileIn( __METHOD__ . "-always_known" ); | |
# Some titles, such as valid special pages or files in foreign repos, should | |
# be shown as bluelinks even though they're not included in the page table | |
# | |
# @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do | |
# batch file existence checks for NS_FILE and NS_MEDIA | |
if ( $iw == '' && $nt->isAlwaysKnown() ) { | |
$this->mOutput->addLink( $nt ); | |
$s .= $this->makeKnownLinkHolder( $nt, $text, array(), $trail, $prefix ); | |
} else { | |
# Links will be added to the output link list after checking | |
$s .= $holders->makeHolder( $nt, $text, array(), $trail, $prefix ); | |
} | |
wfProfileOut( __METHOD__ . "-always_known" ); | |
} | |
wfProfileOut( __METHOD__ ); | |
return $holders; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment