# Persistent:
var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
$mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex, $mPreprocessor,
- $mExtLinkBracketedRegex, $mDefaultStripList, $mVarCache, $mConf;
+ $mExtLinkBracketedRegex, $mUrlProtocols, $mDefaultStripList, $mVarCache, $mConf;
# Cleared with clearState():
$this->mFunctionHooks = array();
$this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
$this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery' );
+ $this->mUrlProtocols = wfUrlProtocols();
$this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
'[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S';
$this->mVarCache = array();
if ( isset( $conf['preprocessorClass'] ) ) {
$this->mPreprocessorClass = $conf['preprocessorClass'];
+ } elseif ( extension_loaded( 'domxml' ) ) {
+ // PECL extension that conflicts with the core DOM extension (bug 13770)
+ wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
+ $this->mPreprocessorClass = 'Preprocessor_Hash';
} elseif ( extension_loaded( 'dom' ) ) {
$this->mPreprocessorClass = 'Preprocessor_DOM';
} else {
*/
function doMagicLinks( $text ) {
wfProfileIn( __METHOD__ );
+ $prots = $this->mUrlProtocols;
+ $urlChar = self::EXT_LINK_URL_CLASS;
$text = preg_replace_callback(
'!(?: # Start cases
- <a.*?</a> | # Skip link text
- <.*?> | # Skip stuff inside HTML elements
- (?:RFC|PMID)\s+([0-9]+) | # RFC or PMID, capture number as m[1]
- ISBN\s+(\b # ISBN, capture number as m[2]
- (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
- (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
- [0-9Xx] # check digit
- \b)
+ (<a.*?</a>) | # m[1]: Skip link text
+ (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
+ (\\b(?:$prots)$urlChar+) | # m[3]: Free external links" . '
+ (?:RFC|PMID)\s+([0-9]+) | # m[4]: RFC or PMID, capture number
+ ISBN\s+(\b # m[5]: ISBN, capture number
+ (?: 97[89] [\ \-]? )? # optional 13-digit ISBN prefix
+ (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters
+ [0-9Xx] # check digit
+ \b)
)!x', array( &$this, 'magicLinkCallback' ), $text );
wfProfileOut( __METHOD__ );
return $text;
}
function magicLinkCallback( $m ) {
- if ( substr( $m[0], 0, 1 ) === '<' ) {
+ if ( isset( $m[1] ) && strval( $m[1] ) !== '' ) {
+ # Skip anchor
+ return $m[0];
+ } elseif ( isset( $m[2] ) && strval( $m[2] ) !== '' ) {
# Skip HTML element
return $m[0];
- } elseif ( substr( $m[0], 0, 4 ) === 'ISBN' ) {
- $isbn = $m[2];
- $num = strtr( $isbn, array(
- '-' => '',
- ' ' => '',
- 'x' => 'X',
- ));
- $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
- $text = '<a href="' .
- $titleObj->escapeLocalUrl() .
- "\" class=\"internal\">ISBN $isbn</a>";
- } else {
+ } elseif ( isset( $m[3] ) && strval( $m[3] ) !== '' ) {
+ # Free external link
+ return $this->makeFreeExternalLink( $m[0] );
+ } elseif ( isset( $m[4] ) && strval( $m[4] ) !== '' ) {
+ # RFC or PMID
if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
$keyword = 'RFC';
$urlmsg = 'rfcurl';
- $id = $m[1];
+ $id = $m[4];
} elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
$keyword = 'PMID';
$urlmsg = 'pubmedurl';
- $id = $m[1];
+ $id = $m[4];
} else {
throw new MWException( __METHOD__.': unrecognised match type "' .
substr($m[0], 0, 20 ) . '"' );
}
-
$url = wfMsg( $urlmsg, $id);
$sk = $this->mOptions->getSkin();
$la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
- $text = "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
+ return "<a href=\"{$url}\"{$la}>{$keyword} {$id}</a>";
+ } elseif ( isset( $m[5] ) && strval( $m[5] ) !== '' ) {
+ # ISBN
+ $isbn = $m[5];
+ $num = strtr( $isbn, array(
+ '-' => '',
+ ' ' => '',
+ 'x' => 'X',
+ ));
+ $titleObj = SpecialPage::getTitleFor( 'Booksources', $num );
+ return'<a href="' .
+ $titleObj->escapeLocalUrl() .
+ "\" class=\"internal\">ISBN $isbn</a>";
+ } else {
+ return $m[0];
}
- return $text;
}
+ /**
+ * Make a free external link, given a user-supplied URL
+ * @return HTML
+ * @private
+ */
+ function makeFreeExternalLink( $url ) {
+ global $wgContLang;
+ wfProfileIn( __METHOD__ );
+
+ $sk = $this->mOptions->getSkin();
+ $trail = '';
+
+ # The characters '<' and '>' (which were escaped by
+ # removeHTMLtags()) should not be included in
+ # URLs, per RFC 2396.
+ $m2 = array();
+ if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
+ $trail = substr($url, $m2[0][1]) . $trail;
+ $url = substr($url, 0, $m2[0][1]);
+ }
+
+ # Move trailing punctuation to $trail
+ $sep = ',;\.:!?';
+ # If there is no left bracket, then consider right brackets fair game too
+ if ( strpos( $url, '(' ) === false ) {
+ $sep .= ')';
+ }
+
+ $numSepChars = strspn( strrev( $url ), $sep );
+ if ( $numSepChars ) {
+ $trail = substr( $url, -$numSepChars ) . $trail;
+ $url = substr( $url, 0, -$numSepChars );
+ }
+
+ $url = Sanitizer::cleanUrl( $url );
+
+ # Is this an external image?
+ $text = $this->maybeMakeExternalImage( $url );
+ if ( $text === false ) {
+ # Not an image, make a link
+ $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() );
+ # Register it in the output object...
+ # Replace unnecessary URL escape codes with their equivalent characters
+ $pasteurized = self::replaceUnusualEscapes( $url );
+ $this->mOutput->addExternalLink( $pasteurized );
+ }
+ wfProfileOut( __METHOD__ );
+ return $text . $trail;
+ }
+
+
/**
* Parse headers and return html
*
$sk = $this->mOptions->getSkin();
$bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
-
- $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
+ $s = array_shift( $bits );
$i = 0;
while ( $i<count( $bits ) ) {
$url = Sanitizer::cleanUrl( $url );
- # Process the trail (i.e. everything after this link up until start of the next link),
- # replacing any non-bracketed links
- $trail = $this->replaceFreeExternalLinks( $trail );
-
# Use the encoded URL
# This means that users can paste URLs directly into the text
# Funny characters like ö aren't valid in URLs anyway
return $s;
}
- /**
- * Replace anything that looks like a URL with a link
- * @private
- */
- function replaceFreeExternalLinks( $text ) {
- global $wgContLang;
- wfProfileIn( __METHOD__ );
-
- $bits = preg_split( '/(\b(?:' . wfUrlProtocols() . '))/S', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
- $s = array_shift( $bits );
- $i = 0;
-
- $sk = $this->mOptions->getSkin();
-
- while ( $i < count( $bits ) ){
- $protocol = $bits[$i++];
- $remainder = $bits[$i++];
-
- $m = array();
- if ( preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
- # Found some characters after the protocol that look promising
- $url = $protocol . $m[1];
- $trail = $m[2];
-
- # special case: handle urls as url args:
- # http://www.example.com/foo?=http://www.example.com/bar
- if(strlen($trail) == 0 &&
- isset($bits[$i]) &&
- preg_match('/^'. wfUrlProtocols() . '$/S', $bits[$i]) &&
- preg_match( '/^('.self::EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m ))
- {
- # add protocol, arg
- $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link
- $i += 2;
- $trail = $m[2];
- }
-
- # The characters '<' and '>' (which were escaped by
- # removeHTMLtags()) should not be included in
- # URLs, per RFC 2396.
- $m2 = array();
- if (preg_match('/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE)) {
- $trail = substr($url, $m2[0][1]) . $trail;
- $url = substr($url, 0, $m2[0][1]);
- }
-
- # Move trailing punctuation to $trail
- $sep = ',;\.:!?';
- # If there is no left bracket, then consider right brackets fair game too
- if ( strpos( $url, '(' ) === false ) {
- $sep .= ')';
- }
-
- $numSepChars = strspn( strrev( $url ), $sep );
- if ( $numSepChars ) {
- $trail = substr( $url, -$numSepChars ) . $trail;
- $url = substr( $url, 0, -$numSepChars );
- }
-
- $url = Sanitizer::cleanUrl( $url );
-
- # Is this an external image?
- $text = $this->maybeMakeExternalImage( $url );
- if ( $text === false ) {
- # Not an image, make a link
- $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() );
- # Register it in the output object...
- # Replace unnecessary URL escape codes with their equivalent characters
- $pasteurized = self::replaceUnusualEscapes( $url );
- $this->mOutput->addExternalLink( $pasteurized );
- }
- $s .= $text . $trail;
- } else {
- $s .= $protocol . $remainder;
- }
- }
- wfProfileOut( __METHOD__ );
- return $s;
- }
-
/**
* Replace unusual URL escape codes with their equivalent characters
* @param string
/**
* make an image if it's allowed, either through the global
- * option or through the exception
+ * option, through the exception, or through the on-wiki whitelist
* @private
*/
function maybeMakeExternalImage( $url ) {
$imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
$imagesexception = !empty($imagesfrom);
$text = false;
+ # $imagesfrom could be either a single string or an array of strings, parse out the latter
+ if( $imagesexception && is_array( $imagesfrom ) ) {
+ $imagematch = false;
+ foreach( $imagesfrom as $match ) {
+ if( strpos( $url, $match ) === 0 ) {
+ $imagematch = true;
+ break;
+ }
+ }
+ } elseif( $imagesexception ) {
+ $imagematch = (strpos( $url, $imagesfrom ) === 0);
+ } else {
+ $imagematch = false;
+ }
if ( $this->mOptions->getAllowExternalImages()
- || ( $imagesexception && strpos( $url, $imagesfrom ) === 0 ) ) {
+ || ( $imagesexception && $imagematch ) ) {
if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
# Image found
$text = $sk->makeExternalImage( $url );
}
}
+ if( !$text && $this->mOptions->getEnableImageWhitelist()
+ && preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
+ $whitelist = explode( "\n", wfMsgForContent( 'external_image_whitelist' ) );
+ foreach( $whitelist as $entry ) {
+ # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
+ if( strpos( $entry, '#' ) === 0 || $entry === '' )
+ continue;
+ if( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
+ # Image matches a whitelist entry
+ $text = $sk->makeExternalImage( $url );
+ break;
+ }
+ }
+ }
return $text;
}
* Insert a NOPARSE hacky thing into any inline links in a chunk that's
* going to go through further parsing steps before inline URL expansion.
*
- * In particular this is important when using action=render, which causes
- * full URLs to be included.
- *
- * Oh man I hate our multi-layer parser!
+ * Not needed quite as much as it used to be since free links are a bit
+ * more sensible these days. But bracketed links are still an issue.
*
* @param string more-or-less HTML
* @return string less-or-more HTML with NOPARSE bits
if (strlen($url) > 255)
return wfMsg('scarytranscludetoolong');
- return $this->fetchScaryTemplateMaybeFromCache($url);
+ return "<div class=\"mw-iw-transclusion\">\n" . $this->fetchScaryTemplateMaybeFromCache($url) . "</div>\n";
}
function fetchScaryTemplateMaybeFromCache($url) {
$nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
$p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]]
+ $p4 = "/\[\[(:?$nc+:|:|)($tc+?)(($tc+))\\|]]/"; # [[ns:page(context)|]]
$p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]]
$p2 = "/\[\[\\|($tc+)]]/"; # [[|page]]
# try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
$text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
+ $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
$text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
$t = $this->mTitle->getText();
if ( strpos( $matches[0], '%' ) !== false )
$matches[1] = urldecode( $matches[1] );
- $tp = Title::newFromText( $matches[1], NS_IMAGE );
+ $tp = Title::newFromText( $matches[1]/*, NS_IMAGE*/ );
$nt =& $tp;
if( is_null( $nt ) ) {
# Bogus title. Ignore these so we don't bomb out later.