* changes in an incompatible way, so the parser cache
* can automatically discard old data.
*/
-define( 'MW_PARSER_VERSION', '1.6.0' );
+define( 'MW_PARSER_VERSION', '1.6.1' );
/**
* Variable substitution O(N^2) attack
define( 'HTTP_PROTOCOLS', 'http:\/\/|https:\/\/' );
# Everything except bracket, space, or control characters
define( 'EXT_LINK_URL_CLASS', '[^][<>"\\x00-\\x20\\x7F]' );
-# Including space
-define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
+# Including space, but excluding newlines
+define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x0a\\x0d]' );
define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
-define( 'EXT_LINK_BRACKETED', '/\[(\b(' . wfUrlProtocols() . ')'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
+define( 'EXT_LINK_BRACKETED', '/\[(\b(' . wfUrlProtocols() . ')'.
+ EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
define( 'EXT_IMAGE_REGEX',
'/^('.HTTP_PROTOCOLS.')'. # Protocol
'('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
'('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
);
+// State constants for the definition list colon extraction
+define( 'MW_COLON_STATE_TEXT', 0 );
+define( 'MW_COLON_STATE_TAG', 1 );
+define( 'MW_COLON_STATE_TAGSTART', 2 );
+define( 'MW_COLON_STATE_CLOSETAG', 3 );
+define( 'MW_COLON_STATE_TAGSLASH', 4 );
+define( 'MW_COLON_STATE_COMMENT', 5 );
+define( 'MW_COLON_STATE_COMMENTDASH', 6 );
+define( 'MW_COLON_STATE_COMMENTDASHDASH', 7 );
+
/**
* PHP Parser
*
class Parser
{
/**#@+
- * @access private
+ * @private
*/
# Persistent:
- var $mTagHooks;
+ var $mTagHooks, $mFunctionHooks;
# Cleared with clearState():
var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
/**
* Constructor
*
- * @access public
+ * @public
*/
function Parser() {
$this->mTagHooks = array();
+ $this->mFunctionHooks = array();
$this->clearState();
+ $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
}
/**
* Clear Parser state
*
- * @access private
+ * @private
*/
function clearState() {
$this->mOutput = new ParserOutput;
'titles' => array()
);
$this->mRevisionId = null;
- $this->mUniqPrefix = 'UNIQ' . Parser::getRandomString();
+
+ /**
+ * Prefix for temporary replacement strings for the multipass parser.
+ * \x07 should never appear in input as it's disallowed in XML.
+ * Using it at the front also gives us a little extra robustness
+ * since it shouldn't match when butted up against identifier-like
+ * string constructs.
+ */
+ $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
# Clear these on every parse, bug 4549
$this->mTemplates = array();
$this->mTemplatePath = array();
+ $this->mShowToc = true;
+ $this->mForceTocPosition = false;
+
wfRunHooks( 'ParserClearState', array( &$this ) );
}
/**
* Accessor for mUniqPrefix.
*
- * @access public
+ * @public
*/
function UniqPrefix() {
return $this->mUniqPrefix;
* Convert wikitext to HTML
* Do not call this function recursively.
*
- * @access private
+ * @private
* @param string $text Text we want to parse
* @param Title &$title A title object
* @param array $options
$this->mRevisionId = $revid;
$this->mOutputType = OT_HTML;
- $this->mStripState = NULL;
-
//$text = $this->strip( $text, $this->mStripState );
// VOODOO MAGIC FIX! Sometimes the above segfaults in PHP5.
$x =& $this->mStripState;
'/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1 \\2',
# french spaces, Guillemet-right
'/(\\302\\253) /' => '\\1 ',
- '/<center *>(.*)<\\/center *>/i' => '<div class="center">\\1</div>',
);
$text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
/**
* Get a random string
*
- * @access private
+ * @private
* @static
*/
function getRandomString() {
function getOptions() { return $this->mOptions; }
/**
- * Replaces all occurrences of <$tag>content</$tag> in the text
- * with a random marker and returns the new text. the output parameter
- * $content will be an associative array filled with data on the form
- * $unique_marker => content.
+ * Replaces all occurrences of HTML-style comments and the given tags
+ * in the text with a random marker and returns teh next text. The output
+ * parameter $matches will be an associative array filled with data in
+ * the form:
+ * 'UNIQ-xxxxx' => array(
+ * 'element',
+ * 'tag content',
+ * array( 'param' => 'x' ),
+ * '<element param="x">tag content</element>' ) )
*
- * If $content is already set, the additional entries will be appended
- * If $tag is set to STRIP_COMMENTS, the function will extract
- * <!-- HTML comments -->
+ * @param $elements list of element names. Comments are always extracted.
+ * @param $text Source text string.
+ * @param $uniq_prefix
*
- * @access private
+ * @private
* @static
*/
- function extractTagsAndParams($tag, $text, &$content, &$tags, &$params, $uniq_prefix = ''){
- $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
- if ( !$content ) {
- $content = array( );
- }
+ function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
+ $rand = Parser::getRandomString();
$n = 1;
$stripped = '';
+ $matches = array();
- if ( !$tags ) {
- $tags = array( );
- }
-
- if ( !$params ) {
- $params = array( );
- }
-
- if( $tag == STRIP_COMMENTS ) {
- $start = '/<!--()()/';
- $end = '/-->/';
- } else {
- $start = "/<$tag(\\s+[^\\/>]*|\\s*)(\\/?)>/i";
- $end = "/<\\/$tag\\s*>/i";
- }
+ $taglist = implode( '|', $elements );
+ $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
while ( '' != $text ) {
$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
$stripped .= $p[0];
- if( count( $p ) < 4 ) {
+ if( count( $p ) < 5 ) {
break;
}
- $attributes = $p[1];
- $empty = $p[2];
- $inside = $p[3];
+ if( count( $p ) > 5 ) {
+ // comment
+ $element = $p[4];
+ $attributes = '';
+ $close = '';
+ $inside = $p[5];
+ } else {
+ // tag
+ $element = $p[1];
+ $attributes = $p[2];
+ $close = $p[3];
+ $inside = $p[4];
+ }
- $marker = $rnd . sprintf('%08X', $n++);
+ $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++) . '-QINU';
$stripped .= $marker;
- $tags[$marker] = "<$tag$attributes$empty>";
- $params[$marker] = Sanitizer::decodeTagAttributes( $attributes );
-
- if ( $empty === '/' ) {
+ if ( $close === '/>' ) {
// Empty element tag, <tag />
- $content[$marker] = null;
+ $content = null;
$text = $inside;
+ $tail = null;
} else {
- $q = preg_split( $end, $inside, 2 );
- $content[$marker] = $q[0];
- if( count( $q ) < 2 ) {
+ if( $element == '!--' ) {
+ $end = '/(-->)/';
+ } else {
+ $end = "/(<\\/$element\\s*>)/i";
+ }
+ $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
+ $content = $q[0];
+ if( count( $q ) < 3 ) {
# No end tag -- let it run out to the end of the text.
- break;
+ $tail = '';
+ $text = '';
} else {
- $text = $q[1];
+ $tail = $q[1];
+ $text = $q[2];
}
}
+
+ $matches[$marker] = array( $element,
+ $content,
+ Sanitizer::decodeTagAttributes( $attributes ),
+ "<$element$attributes$close$content$tail" );
}
return $stripped;
}
- /**
- * Wrapper function for extractTagsAndParams
- * for cases where $tags and $params isn't needed
- * i.e. where tags will never have params, like <nowiki>
- *
- * @access private
- * @static
- */
- function extractTags( $tag, $text, &$content, $uniq_prefix = '' ) {
- $dummy_tags = array();
- $dummy_params = array();
-
- return Parser::extractTagsAndParams( $tag, $text, $content,
- $dummy_tags, $dummy_params, $uniq_prefix );
- }
-
/**
* Strips and renders nowiki, pre, math, hiero
* If $render is set, performs necessary rendering operations on plugins
* for section editing, where these comments cause confusion when
* counting the sections in the wikisource
*
- * @access private
+ * @private
*/
function strip( $text, &$state, $stripcomments = false ) {
$render = ($this->mOutputType == OT_HTML);
- $html_content = array();
- $nowiki_content = array();
- $math_content = array();
- $pre_content = array();
- $comment_content = array();
- $ext_content = array();
- $ext_tags = array();
- $ext_params = array();
- $gallery_content = array();
# Replace any instances of the placeholders
$uniq_prefix = $this->mUniqPrefix;
#$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
-
- # html
+ $commentState = array();
+
+ $elements = array_merge(
+ array( 'nowiki', 'gallery' ),
+ array_keys( $this->mTagHooks ) );
global $wgRawHtml;
if( $wgRawHtml ) {
- $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
- foreach( $html_content as $marker => $content ) {
- if ($render ) {
- # Raw and unchecked for validity.
- $html_content[$marker] = $content;
- } else {
- $html_content[$marker] = '<html>'.$content.'</html>';
- }
- }
+ $elements[] = 'html';
}
-
- # nowiki
- $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
- foreach( $nowiki_content as $marker => $content ) {
- if( $render ){
- $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
- } else {
- $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
- }
- }
-
- # math
if( $this->mOptions->getUseTeX() ) {
- $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
- foreach( $math_content as $marker => $content ){
- if( $render ) {
- $math_content[$marker] = renderMath( $content );
- } else {
- $math_content[$marker] = '<math>'.$content.'</math>';
- }
- }
- }
-
- # pre
- $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
- foreach( $pre_content as $marker => $content ){
- if( $render ){
- $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
- } else {
- $pre_content[$marker] = '<pre>'.$content.'</pre>';
- }
- }
-
- # gallery
- $text = Parser::extractTags('gallery', $text, $gallery_content, $uniq_prefix);
- foreach( $gallery_content as $marker => $content ) {
- require_once( 'ImageGallery.php' );
- if ( $render ) {
- $gallery_content[$marker] = $this->renderImageGallery( $content );
- } else {
- $gallery_content[$marker] = '<gallery>'.$content.'</gallery>';
- }
- }
-
- # Comments
- $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
- foreach( $comment_content as $marker => $content ){
- $comment_content[$marker] = '<!--'.$content.'-->';
+ $elements[] = 'math';
}
+
- # Extensions
- foreach ( $this->mTagHooks as $tag => $callback ) {
- $ext_content[$tag] = array();
- $text = Parser::extractTagsAndParams( $tag, $text, $ext_content[$tag],
- $ext_tags[$tag], $ext_params[$tag], $uniq_prefix );
- foreach( $ext_content[$tag] as $marker => $content ) {
- $full_tag = $ext_tags[$tag][$marker];
- $params = $ext_params[$tag][$marker];
- if ( $render )
- $ext_content[$tag][$marker] = call_user_func_array( $callback, array( $content, $params, &$this ) );
- else {
- if ( is_null( $content ) ) {
- // Empty element tag
- $ext_content[$tag][$marker] = $full_tag;
+ $matches = array();
+ $text = Parser::extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
+
+ foreach( $matches as $marker => $data ) {
+ list( $element, $content, $params, $tag ) = $data;
+ if( $render ) {
+ $tagName = strtolower( $element );
+ switch( $tagName ) {
+ case '!--':
+ // Comment
+ if( substr( $tag, -3 ) == '-->' ) {
+ $output = $tag;
+ } else {
+ // Unclosed comment in input.
+ // Close it so later stripping can remove it
+ $output = "$tag-->";
+ }
+ break;
+ case 'html':
+ if( $wgRawHtml ) {
+ $output = $content;
+ break;
+ }
+ // Shouldn't happen otherwise. :)
+ case 'nowiki':
+ $output = wfEscapeHTMLTagsOnly( $content );
+ break;
+ case 'math':
+ $output = MathRenderer::renderMath( $content );
+ break;
+ case 'gallery':
+ $output = $this->renderImageGallery( $content );
+ break;
+ default:
+ if( isset( $this->mTagHooks[$tagName] ) ) {
+ $output = call_user_func_array( $this->mTagHooks[$tagName],
+ array( $content, $params, $this ) );
} else {
- $ext_content[$tag][$marker] = "$full_tag$content</$tag>";
+ throw new MWException( "Invalid call hook $element" );
}
}
+ } else {
+ // Just stripping tags; keep the source
+ $output = $tag;
+ }
+ if( !$stripcomments && $element == '!--' ) {
+ $commentState[$marker] = $output;
+ } else {
+ $state[$element][$marker] = $output;
}
}
# not invoke any extension tags / parser hooks contained within
# a comment.)
if ( !$stripcomments ) {
- $tempstate = array( 'comment' => $comment_content );
- $text = $this->unstrip( $text, $tempstate );
- $comment_content = array();
+ // Put them all back and forget them
+ $text = strtr( $text, $commentState );
}
- # Merge state with the pre-existing state, if there is one
- if ( $state ) {
- $state['html'] = $state['html'] + $html_content;
- $state['nowiki'] = $state['nowiki'] + $nowiki_content;
- $state['math'] = $state['math'] + $math_content;
- $state['pre'] = $state['pre'] + $pre_content;
- $state['gallery'] = $state['gallery'] + $gallery_content;
- $state['comment'] = $state['comment'] + $comment_content;
-
- foreach( $ext_content as $tag => $array ) {
- if ( array_key_exists( $tag, $state ) ) {
- $state[$tag] = $state[$tag] + $array;
- }
- }
- } else {
- $state = array(
- 'html' => $html_content,
- 'nowiki' => $nowiki_content,
- 'math' => $math_content,
- 'pre' => $pre_content,
- 'gallery' => $gallery_content,
- 'comment' => $comment_content,
- ) + $ext_content;
- }
return $text;
}
/**
- * restores pre, math, and hiero removed by strip()
+ * Restores pre, math, and other extensions removed by strip()
*
* always call unstripNoWiki() after this one
- * @access private
+ * @private
*/
function unstrip( $text, &$state ) {
if ( !is_array( $state ) ) {
return $text;
}
- # Must expand in reverse order, otherwise nested tags will be corrupted
- foreach( array_reverse( $state, true ) as $tag => $contentDict ) {
+ $replacements = array();
+ foreach( $state as $tag => $contentDict ) {
if( $tag != 'nowiki' && $tag != 'html' ) {
- foreach( array_reverse( $contentDict, true ) as $uniq => $content ) {
- $text = str_replace( $uniq, $content, $text );
+ foreach( $contentDict as $uniq => $content ) {
+ $replacements[$uniq] = $content;
}
}
}
+ $text = strtr( $text, $replacements );
return $text;
}
/**
- * always call this after unstrip() to preserve the order
+ * Always call this after unstrip() to preserve the order
*
- * @access private
+ * @private
*/
function unstripNoWiki( $text, &$state ) {
if ( !is_array( $state ) ) {
return $text;
}
- # Must expand in reverse order, otherwise nested tags will be corrupted
- for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
- $text = str_replace( key( $state['nowiki'] ), $content, $text );
- }
-
- global $wgRawHtml;
- if ($wgRawHtml) {
- for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
- $text = str_replace( key( $state['html'] ), $content, $text );
+ $replacements = array();
+ foreach( $state as $tag => $contentDict ) {
+ if( $tag == 'nowiki' || $tag == 'html' ) {
+ foreach( $contentDict as $uniq => $content ) {
+ $replacements[$uniq] = $content;
+ }
}
}
+ $text = strtr( $text, $replacements );
return $text;
}
* Returns the unique tag which must be inserted into the stripped text
* The tag will be replaced with the original text in unstrip()
*
- * @access private
+ * @private
*/
function insertStripItem( $text, &$state ) {
$rnd = $this->mUniqPrefix . '-item' . Parser::getRandomString();
if ( !$state ) {
- $state = array(
- 'html' => array(),
- 'nowiki' => array(),
- 'math' => array(),
- 'pre' => array(),
- 'comment' => array(),
- 'gallery' => array(),
- );
+ $state = array();
}
$state['item'][$rnd] = $text;
return $rnd;
*
* @param string $text Hideous HTML input
* @return string Corrected HTML output
- * @access public
+ * @public
* @static
*/
function tidy( $text ) {
/**
* Spawn an external HTML tidy process and get corrected markup back from it.
*
- * @access private
+ * @private
* @static
*/
function externalTidy( $text ) {
*
* 'pear install tidy' should be able to compile the extension module.
*
- * @access private
+ * @private
* @static
*/
function internalTidy( $text ) {
/**
* parse the wiki syntax used to render tables
*
- * @access private
+ * @private
*/
function doTableStuff ( $t ) {
$fname = 'Parser::doTableStuff';
* Helper function for parse() that transforms wiki markup into
* HTML. Only called for $mOutputType == OT_HTML.
*
- * @access private
+ * @private
*/
function internalParse( $text ) {
$args = array();
$text = strtr( $text, array( '<onlyinclude>' => '' , '</onlyinclude>' => '' ) );
$text = strtr( $text, array( '<noinclude>' => '', '</noinclude>' => '') );
$text = preg_replace( '/<includeonly>.*?<\/includeonly>/s', '', $text );
-
+
$text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ) );
+
$text = $this->replaceVariables( $text, $args );
+ // Tables need to come after variable replacement for things to work
+ // properly; putting them before other transformations should keep
+ // exciting things like link expansions from showing up in surprising
+ // places.
+ $text = $this->doTableStuff( $text );
+
$text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
+ $text = $this->stripToc( $text );
$text = $this->doHeadings( $text );
if($this->mOptions->getUseDynamicDates()) {
$df =& DateFormatter::getInstance();
$text = str_replace($this->mUniqPrefix."NOPARSE", "", $text);
$text = $this->doMagicLinks( $text );
- $text = $this->doTableStuff( $text );
$text = $this->formatHeadings( $text, $isMain );
wfProfileOut( $fname );
* Replace special strings like "ISBN xxx" and "RFC xxx" with
* magic external links.
*
- * @access private
+ * @private
*/
function &doMagicLinks( &$text ) {
$text = $this->magicISBN( $text );
/**
* Parse headers and return html
*
- * @access private
+ * @private
*/
function doHeadings( $text ) {
$fname = 'Parser::doHeadings';
wfProfileIn( $fname );
for ( $i = 6; $i >= 1; --$i ) {
$h = str_repeat( '=', $i );
- $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
+ $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m",
"<h{$i}>\\1</h{$i}>\\2", $text );
}
wfProfileOut( $fname );
/**
* Replace single quotes with HTML markup
- * @access private
+ * @private
* @return string the altered text
*/
function doAllQuotes( $text ) {
/**
* Helper function for doAllQuotes()
- * @access private
+ * @private
*/
function doQuotes( $text ) {
$arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
* Note: this is all very hackish and the order of execution matters a lot.
* Make sure to run maintenance/parserTests.php if you change this code.
*
- * @access private
+ * @private
*/
function replaceExternalLinks( $text ) {
global $wgContLang;
# No link text, e.g. [http://domain.tld/some.link]
if ( $text == '' ) {
- # Autonumber if allowed
- if ( strpos( HTTP_PROTOCOLS, str_replace('/','\/', $protocol) ) !== false ) {
+ # Autonumber if allowed. See bug #5918
+ if ( strpos( wfUrlProtocols(), substr($protocol, 0, strpos($protocol, ':')) ) !== false ) {
$text = '[' . ++$this->mAutonumber . ']';
$linktype = 'autonumber';
} else {
$text = $wgContLang->markNoConversion($text);
- # Replace & from obsolete syntax with &.
- # All HTML entities will be escaped by makeExternalLink()
- $url = str_replace( '&', '&', $url );
+ # Normalize any HTML entities in input. They will be
+ # re-escaped by makeExternalLink().
+ $url = Sanitizer::decodeCharReferences( $url );
# Process the trail (i.e. everything after this link up until start of the next link),
# replacing any non-bracketed links
# This means that users can paste URLs directly into the text
# Funny characters like ö aren't valid in URLs anyway
# This was changed in August 2004
- $s .= $sk->makeExternalLink( $url, $text, false, $linktype ) . $dtrail . $trail;
+ $s .= $sk->makeExternalLink( $url, $text, false, $linktype, $this->mTitle->getNamespace() ) . $dtrail . $trail;
# Register link in the output object.
# Replace unnecessary URL escape codes with the referenced character
/**
* Replace anything that looks like a URL with a link
- * @access private
+ * @private
*/
function replaceFreeExternalLinks( $text ) {
global $wgContLang;
preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $bits[$i + 1], $m ))
{
# add protocol, arg
- $url .= $bits[$i] . $bits[$i + 1]; # protocol, url as arg to previous link
+ $url .= $bits[$i] . $m[1]; # protocol, url as arg to previous link
$i += 2;
$trail = $m[2];
}
$url = substr( $url, 0, -$numSepChars );
}
- # Replace & from obsolete syntax with &.
- # All HTML entities will be escaped by makeExternalLink()
- # or maybeMakeExternalImage()
- $url = str_replace( '&', '&', $url );
+ # Normalize any HTML entities in input. They will be
+ # re-escaped by makeExternalLink() or maybeMakeExternalImage()
+ $url = Sanitizer::decodeCharReferences( $url );
# Is this an external image?
$text = $this->maybeMakeExternalImage( $url );
if ( $text === false ) {
# Not an image, make a link
- $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free' );
+ $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free', $this->mTitle->getNamespace() );
# Register it in the output object...
# Replace unnecessary URL escape codes with their equivalent characters
$pasteurized = Parser::replaceUnusualEscapes( $url );
* Callback function used in replaceUnusualEscapes().
* Replaces unusual URL escape codes with their equivalent character
* @static
- * @access private
+ * @private
*/
function replaceUnusualEscapesCallback( $matches ) {
$char = urldecode( $matches[0] );
/**
* make an image if it's allowed, either through the global
* option or through the exception
- * @access private
+ * @private
*/
function maybeMakeExternalImage( $url ) {
$sk =& $this->mOptions->getSkin();
/**
* Process [[ ]] wikilinks
*
- * @access private
+ * @private
*/
function replaceInternalLinks( $s ) {
global $wgContLang;
$useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
if( is_null( $this->mTitle ) ) {
- wfDebugDieBacktrace( 'nooo' );
+ throw new MWException( 'nooo' );
}
$nottalk = !$this->mTitle->isTalkPage();
* @param string $target the source of the link
* @param string &$text the link text, modified as necessary
* @return string the full name of the link
- * @access private
+ * @private
*/
function maybeDoSubpageLink($target, &$text) {
# Valid link forms:
/**#@+
* Used by doBlockLevels()
- * @access private
+ * @private
*/
/* private */ function closeParagraph() {
$result = '';
/**
* Make lists from lines starting with ':', '*', '#', etc.
*
- * @access private
+ * @private
* @return string the lists rendered as HTML
*/
function doBlockLevels( $text, $linestart ) {
wfProfileIn( "$fname-paragraph" );
# No prefix (not in list)--go to paragraph mode
// XXX: use a stack for nestable elements like span, table and div
- $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
+ $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/center|<\\/tr|<\\/td|<\\/th)/iS', $t );
$closematch = preg_match(
'/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
- '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul)/iS', $t );
+ '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<center)/iS', $t );
if ( $openmatch or $closematch ) {
$paragraphStack = false;
+ #Â TODO bug 5718: paragraph closed
$output .= $this->closeParagraph();
if ( $preOpenMatch and !$preCloseMatch ) {
$this->mInPre = true;
}
/**
- * Split up a string on ':', ignoring any occurences inside
- * <a>..</a> or <span>...</span>
+ * Split up a string on ':', ignoring any occurences inside tags
+ * to prevent illegal overlapping.
* @param string $str the string to split
* @param string &$before set to everything before the ':'
* @param string &$after set to everything after the ':'
* return string the position of the ':', or false if none found
*/
function findColonNoLinks($str, &$before, &$after) {
- # I wonder if we should make this count all tags, not just <a>
- # and <span>. That would prevent us from matching a ':' that
- # comes in the middle of italics other such formatting....
- # -- Wil
$fname = 'Parser::findColonNoLinks';
wfProfileIn( $fname );
- $pos = 0;
- do {
- $colon = strpos($str, ':', $pos);
-
- if ($colon !== false) {
- $before = substr($str, 0, $colon);
- $after = substr($str, $colon + 1);
-
- # Skip any ':' within <a> or <span> pairs
- $a = substr_count($before, '<a');
- $s = substr_count($before, '<span');
- $ca = substr_count($before, '</a>');
- $cs = substr_count($before, '</span>');
-
- if ($a <= $ca and $s <= $cs) {
- # Tags are balanced before ':'; ok
+
+ $pos = strpos( $str, ':' );
+ if( $pos === false ) {
+ // Nothing to find!
+ wfProfileOut( $fname );
+ return false;
+ }
+
+ $lt = strpos( $str, '<' );
+ if( $lt === false || $lt > $pos ) {
+ // Easy; no tag nesting to worry about
+ $before = substr( $str, 0, $pos );
+ $after = substr( $str, $pos+1 );
+ wfProfileOut( $fname );
+ return $pos;
+ }
+
+ // Ugly state machine to walk through avoiding tags.
+ $state = MW_COLON_STATE_TEXT;
+ $stack = 0;
+ $len = strlen( $str );
+ for( $i = 0; $i < $len; $i++ ) {
+ $c = $str{$i};
+
+ switch( $state ) {
+ // (Using the number is a performance hack for common cases)
+ case 0: // MW_COLON_STATE_TEXT:
+ switch( $c ) {
+ case "<":
+ // Could be either a <start> tag or an </end> tag
+ $state = MW_COLON_STATE_TAGSTART;
+ break;
+ case ":":
+ if( $stack == 0 ) {
+ // We found it!
+ $before = substr( $str, 0, $i );
+ $after = substr( $str, $i + 1 );
+ wfProfileOut( $fname );
+ return $i;
+ }
+ // Embedded in a tag; don't break it.
+ break;
+ default:
+ // Skip ahead looking for something interesting
+ $colon = strpos( $str, ':', $i );
+ if( $colon === false ) {
+ // Nothing else interesting
+ wfProfileOut( $fname );
+ return false;
+ }
+ $lt = strpos( $str, '<', $i );
+ if( $stack === 0 ) {
+ if( $lt === false || $colon < $lt ) {
+ // We found it!
+ $before = substr( $str, 0, $colon );
+ $after = substr( $str, $colon + 1 );
+ wfProfileOut( $fname );
+ return $i;
+ }
+ }
+ if( $lt === false ) {
+ // Nothing else interesting to find; abort!
+ // We're nested, but there's no close tags left. Abort!
+ break 2;
+ }
+ // Skip ahead to next tag start
+ $i = $lt;
+ $state = MW_COLON_STATE_TAGSTART;
+ }
+ break;
+ case 1: // MW_COLON_STATE_TAG:
+ // In a <tag>
+ switch( $c ) {
+ case ">":
+ $stack++;
+ $state = MW_COLON_STATE_TEXT;
break;
+ case "/":
+ // Slash may be followed by >?
+ $state = MW_COLON_STATE_TAGSLASH;
+ break;
+ default:
+ // ignore
+ }
+ break;
+ case 2: // MW_COLON_STATE_TAGSTART:
+ switch( $c ) {
+ case "/":
+ $state = MW_COLON_STATE_CLOSETAG;
+ break;
+ case "!":
+ $state = MW_COLON_STATE_COMMENT;
+ break;
+ case ">":
+ // Illegal early close? This shouldn't happen D:
+ $state = MW_COLON_STATE_TEXT;
+ break;
+ default:
+ $state = MW_COLON_STATE_TAG;
+ }
+ break;
+ case 3: // MW_COLON_STATE_CLOSETAG:
+ // In a </tag>
+ if( $c == ">" ) {
+ $stack--;
+ if( $stack < 0 ) {
+ wfDebug( "Invalid input in $fname; too many close tags\n" );
+ wfProfileOut( $fname );
+ return false;
+ }
+ $state = MW_COLON_STATE_TEXT;
+ }
+ break;
+ case MW_COLON_STATE_TAGSLASH:
+ if( $c == ">" ) {
+ // Yes, a self-closed tag <blah/>
+ $state = MW_COLON_STATE_TEXT;
+ } else {
+ // Probably we're jumping the gun, and this is an attribute
+ $state = MW_COLON_STATE_TAG;
+ }
+ break;
+ case 5: // MW_COLON_STATE_COMMENT:
+ if( $c == "-" ) {
+ $state = MW_COLON_STATE_COMMENTDASH;
+ }
+ break;
+ case MW_COLON_STATE_COMMENTDASH:
+ if( $c == "-" ) {
+ $state = MW_COLON_STATE_COMMENTDASHDASH;
+ } else {
+ $state = MW_COLON_STATE_COMMENT;
}
- $pos = $colon + 1;
+ break;
+ case MW_COLON_STATE_COMMENTDASHDASH:
+ if( $c == ">" ) {
+ $state = MW_COLON_STATE_TEXT;
+ } else {
+ $state = MW_COLON_STATE_COMMENT;
+ }
+ break;
+ default:
+ throw new MWException( "State machine error in $fname" );
}
- } while ($colon !== false);
+ }
+ if( $stack > 0 ) {
+ wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" );
+ return false;
+ }
wfProfileOut( $fname );
- return $colon;
+ return false;
}
/**
* Return value of a magic variable (like PAGENAME)
*
- * @access private
+ * @private
*/
function getVariableValue( $index ) {
global $wgContLang, $wgSitename, $wgServer, $wgServerName, $wgScriptPath;
return $this->mTitle->getSubpageText();
case MAG_SUBPAGENAMEE:
return $this->mTitle->getSubpageUrlForm();
+ case MAG_BASEPAGENAME:
+ return $this->mTitle->getBaseText();
+ case MAG_BASEPAGENAMEE:
+ return wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) );
+ case MAG_TALKPAGENAME:
+ if( $this->mTitle->canTalk() ) {
+ $talkPage = $this->mTitle->getTalkPage();
+ return $talkPage->getPrefixedText();
+ } else {
+ return '';
+ }
+ case MAG_TALKPAGENAMEE:
+ if( $this->mTitle->canTalk() ) {
+ $talkPage = $this->mTitle->getTalkPage();
+ return $talkPage->getPrefixedUrl();
+ } else {
+ return '';
+ }
+ case MAG_SUBJECTPAGENAME:
+ $subjPage = $this->mTitle->getSubjectPage();
+ return $subjPage->getPrefixedText();
+ case MAG_SUBJECTPAGENAMEE:
+ $subjPage = $this->mTitle->getSubjectPage();
+ return $subjPage->getPrefixedUrl();
case MAG_REVISIONID:
return $this->mRevisionId;
case MAG_NAMESPACE:
- return $wgContLang->getNsText( $this->mTitle->getNamespace() );
+ return str_replace('_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) );
case MAG_NAMESPACEE:
return wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
+ case MAG_TALKSPACE:
+ return $this->mTitle->canTalk() ? str_replace('_',' ',$this->mTitle->getTalkNsText()) : '';
+ case MAG_TALKSPACEE:
+ return $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
+ case MAG_SUBJECTSPACE:
+ return $this->mTitle->getSubjectNsText();
+ case MAG_SUBJECTSPACEE:
+ return( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
case MAG_CURRENTDAYNAME:
return $varCache[$index] = $wgContLang->getWeekdayName( date( 'w', $ts ) + 1 );
case MAG_CURRENTYEAR:
return $varCache[$index] = $wgContLang->formatNum( wfNumberOfArticles() );
case MAG_NUMBEROFFILES:
return $varCache[$index] = $wgContLang->formatNum( wfNumberOfFiles() );
+ case MAG_NUMBEROFUSERS:
+ return $varCache[$index] = $wgContLang->formatNum( wfNumberOfUsers() );
+ case MAG_NUMBEROFPAGES:
+ return $varCache[$index] = $wgContLang->formatNum( wfNumberOfPages() );
+ case MAG_CURRENTTIMESTAMP:
+ return $varCache[$index] = wfTimestampNow();
+ case MAG_CURRENTVERSION:
+ global $wgVersion;
+ return $wgVersion;
case MAG_SITENAME:
return $wgSitename;
case MAG_SERVER:
return $wgServerName;
case MAG_SCRIPTPATH:
return $wgScriptPath;
+ case MAG_DIRECTIONMARK:
+ return $wgContLang->getDirMark();
+ case MAG_CONTENTLANGUAGE:
+ global $wgContLanguageCode;
+ return $wgContLanguageCode;
default:
$ret = null;
if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$varCache, &$index, &$ret ) ) )
/**
* initialise the magic variables (like CURRENTMONTHNAME)
*
- * @access private
+ * @private
*/
function initialiseVariables() {
$fname = 'Parser::initialiseVariables';
* 4 => callback # replacement callback to call if {{{{..}}}} is found
* )
* )
- * @access private
+ * @private
*/
function replace_callback ($text, $callbacks) {
$openingBraceStack = array(); # this array will hold a stack of parentheses which are not closed yet
* @param string $tex The text to transform
* @param array $args Key-value pairs representing template parameters to substitute
* @param bool $argsOnly Only do argument (triple-brace) expansion, not double-brace expansion
- * @access private
+ * @private
*/
function replaceVariables( $text, $args = array(), $argsOnly = false ) {
# Prevent too big inclusions
wfProfileOut( $fname );
return $text;
}
-
+
/**
* Replace magic variables
- * @access private
+ * @private
*/
function variableSubstitution( $matches ) {
$fname = 'Parser::variableSubstitution';
* $piece['title']: the title, i.e. the part before the |
* $piece['parts']: the parameter array
* @return string the text of the template
- * @access private
+ * @private
*/
function braceSubstitution( $piece ) {
- global $wgContLang;
+ global $wgContLang, $wgLang, $wgAllowDisplayTitle, $action;
$fname = 'Parser::braceSubstitution';
wfProfileIn( $fname );
}
}
+ # URLENCODE
+ if( !$found ) {
+ $urlencode =& MagicWord::get( MAG_URLENCODE );
+ if( $urlencode->matchStartAndRemove( $part1 ) ) {
+ $text = $linestart . urlencode( $part1 );
+ $found = true;
+ }
+ }
+
# LCFIRST, UCFIRST, LC and UC
if ( !$found ) {
$lcfirst =& MagicWord::get( MAG_LCFIRST );
if ( $func !== false ) {
$title = Title::newFromText( $part1 );
+ # Due to order of execution of a lot of bits, the values might be encoded
+ # before arriving here; if that's true, then the title can't be created
+ # and the variable will fail. If we can't get a decent title from the first
+ # attempt, url-decode and try for a second.
+ if( is_null( $title ) )
+ $title = Title::newFromUrl( urldecode( $part1 ) );
if ( !is_null( $title ) ) {
if ( $argc > 0 ) {
$text = $linestart . $title->$func( $args[0] );
}
}
+ $lang = $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang;
# GRAMMAR
if ( !$found && $argc == 1 ) {
$mwGrammar =& MagicWord::get( MAG_GRAMMAR );
if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
- $text = $linestart . $wgContLang->convertGrammar( $args[0], $part1 );
+ $text = $linestart . $lang->convertGrammar( $args[0], $part1 );
$found = true;
}
}
if ( !$found && $argc >= 2 ) {
$mwPluralForm =& MagicWord::get( MAG_PLURAL );
if ( $mwPluralForm->matchStartAndRemove( $part1 ) ) {
- if ($argc==2) {$args[2]=$args[1];}
- $text = $linestart . $wgContLang->convertPlural( $part1, $args[0], $args[1], $args[2]);
+ while ( count($args) < 5 ) { $args[] = $args[count($args)-1]; }
+ $text = $linestart . $lang->convertPlural( $part1, $args[0], $args[1],
+ $args[2], $args[3], $args[4]);
$found = true;
}
}
+
+ # DISPLAYTITLE
+ if ( !$found && $argc == 1 && $wgAllowDisplayTitle ) {
+ $mwDT =& MagicWord::get( MAG_DISPLAYTITLE );
+ if ( $mwDT->matchStartAndRemove( $part1 ) ) {
+
+ # Set title in parser output object
+ $param = $args[0];
+ $parserOptions = new ParserOptions;
+ $local_parser = new Parser ();
+ $t2 = $local_parser->parse ( $param, $this->mTitle, $parserOptions, false );
+ $this->mOutput->mHTMLtitle = $t2->GetText();
+
+ # Add subtitle
+ $t = $this->mTitle->getPrefixedText();
+ $this->mOutput->mSubtitle .= wfMsg('displaytitle', $t);
+ $text = "" ;
+ $found = true ;
+ }
+ }
+
+ # NUMBEROFPAGES, NUMBEROFUSERS, NUMBEROFARTICLES, and NUMBEROFFILES
+ if( !$found ) {
+ $mwWordsToCheck = array( MAG_NUMBEROFPAGES => 'wfNumberOfPages',
+ MAG_NUMBEROFUSERS => 'wfNumberOfUsers',
+ MAG_NUMBEROFARTICLES => 'wfNumberOfArticles',
+ MAG_NUMBEROFFILES => 'wfNumberOfFiles' );
+ foreach( $mwWordsToCheck as $word => $func ) {
+ $mwCurrentWord =& MagicWord::get( $word );
+ if( $mwCurrentWord->matchStartAndRemove( $part1 ) ) {
+ $mwRawSuffix =& MagicWord::get( MAG_RAWSUFFIX );
+ if( $mwRawSuffix->match( $args[0] ) ) {
+ # Raw and unformatted
+ $text = $linestart . call_user_func( $func );
+ } else {
+ # Formatted according to the content default
+ $text = $linestart . $wgContLang->formatNum( call_user_func( $func ) );
+ }
+ $found = true;
+ }
+ }
+ }
+
+ # #LANGUAGE:
+ if( !$found ) {
+ $mwLanguage =& MagicWord::get( MAG_LANGUAGE );
+ if( $mwLanguage->matchStartAndRemove( $part1 ) ) {
+ $lang = $wgContLang->getLanguageName( strtolower( $part1 ) );
+ $text = $linestart . ( $lang != '' ? $lang : $part1 );
+ $found = true;
+ }
+ }
+
+ # Extensions
+ if ( !$found && substr( $part1, 0, 1 ) == '#' ) {
+ $colonPos = strpos( $part1, ':' );
+ if ( $colonPos !== false ) {
+ $function = strtolower( substr( $part1, 1, $colonPos - 1 ) );
+ if ( isset( $this->mFunctionHooks[$function] ) ) {
+ $funcArgs = array_map( 'trim', $args );
+ $funcArgs = array_merge( array( &$this, trim( substr( $part1, $colonPos + 1 ) ) ), $funcArgs );
+ $result = call_user_func_array( $this->mFunctionHooks[$function], $funcArgs );
+ $found = true;
+
+ // The text is usually already parsed, doesn't need triple-brace tags expanded, etc.
+ //$noargs = true;
+ //$noparse = true;
+
+ if ( is_array( $result ) ) {
+ $text = $linestart . $result[0];
+ unset( $result[0] );
+
+ // Extract flags into the local scope
+ // This allows callers to set flags such as nowiki, noparse, found, etc.
+ extract( $result );
+ } else {
+ $text = $linestart . $result;
+ }
+ }
+ }
+ }
# Template table test
# Check for excessive inclusion
$dbk = $title->getPrefixedDBkey();
if ( $this->incrementIncludeCount( $dbk ) ) {
- if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() ) {
- # Capture special page output
+ if ( $title->getNamespace() == NS_SPECIAL && $this->mOptions->getAllowSpecialInclusion() && $this->mOutputType != OT_WIKI ) {
$text = SpecialPage::capturePath( $title );
if ( is_string( $text ) ) {
$found = true;
# Use the original $piece['title'] not the mangled $part1, so that
# modifiers such as RAW: produce separate cache entries
if( $found ) {
- $this->mTemplates[$piece['title']] = $text;
+ if( $isHTML ) {
+ // A special page; don't store it in the template cache.
+ } else {
+ $this->mTemplates[$piece['title']] = $text;
+ }
$text = $linestart . $text;
}
}
if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
$text = wfEscapeWikiText( $text );
} elseif ( ($this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI) && $found ) {
- if ( !$noargs ) {
+ if ( $noargs ) {
+ $assocArgs = array();
+ } else {
# Clean up argument array
$assocArgs = array();
$index = 1;
/**
* Triple brace replacement -- used for template arguments
- * @access private
+ * @private
*/
function argSubstitution( $matches ) {
$arg = trim( $matches['title'] );
/**
* Returns true if the function is allowed to include this entity
- * @access private
+ * @private
*/
function incrementIncludeCount( $dbk ) {
if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
}
}
+ /**
+ * Detect __TOC__ magic word and set a placeholder
+ */
+ function stripToc( $text ) {
+ # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
+ # do not add TOC
+ $mw = MagicWord::get( MAG_NOTOC );
+ if( $mw->matchAndRemove( $text ) ) {
+ $this->mShowToc = false;
+ }
+
+ $mw = MagicWord::get( MAG_TOC );
+ if( $mw->match( $text ) ) {
+ $this->mShowToc = true;
+ $this->mForceTocPosition = true;
+
+ // Set a placeholder. At the end we'll fill it in with the TOC.
+ $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
+
+ // Only keep the first one.
+ $text = $mw->replace( '', $text );
+ }
+ return $text;
+ }
+
/**
* This function accomplishes several tasks:
* 1) Auto-number headings if that option is enabled
*
* @param string $text
* @param boolean $isMain
- * @access private
+ * @private
*/
function formatHeadings( $text, $isMain=true ) {
global $wgMaxTocLevel, $wgContLang;
$doNumberHeadings = $this->mOptions->getNumberHeadings();
- $doShowToc = true;
- $forceTocHere = false;
if( !$this->mTitle->userCanEdit() ) {
$showEditLink = 0;
} else {
if( $esw->matchAndRemove( $text ) ) {
$showEditLink = 0;
}
- # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
- # do not add TOC
- $mw =& MagicWord::get( MAG_NOTOC );
- if( $mw->matchAndRemove( $text ) ) {
- $doShowToc = false;
- }
# Get all headlines for numbering them and adding funky stuff like [edit]
# links - this is for later, but we need the number of headlines right now
$numMatches = preg_match_all( '/<H([1-6])(.*?'.'>)(.*?)<\/H[1-6] *>/i', $text, $matches );
# if there are fewer than 4 headlines in the article, do not show TOC
- if( $numMatches < 4 ) {
- $doShowToc = false;
- }
-
- # if the string __TOC__ (not case-sensitive) occurs in the HTML,
- # override above conditions and always show TOC at that place
-
- $mw =& MagicWord::get( MAG_TOC );
- if($mw->match( $text ) ) {
- $doShowToc = true;
- $forceTocHere = true;
- } else {
- # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
- # override above conditions and always show TOC above first header
- $mw =& MagicWord::get( MAG_FORCETOC );
- if ($mw->matchAndRemove( $text ) ) {
- $doShowToc = true;
- }
+ # unless it's been explicitly enabled.
+ $enoughToc = $this->mShowToc &&
+ (($numMatches >= 4) || $this->mForceTocPosition);
+
+ # Allow user to stipulate that a page should have a "new section"
+ # link added via __NEWSECTIONLINK__
+ $mw =& MagicWord::get( MAG_NEWSECTIONLINK );
+ if( $mw->matchAndRemove( $text ) )
+ $this->mOutput->setNewSection( true );
+
+ # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
+ # override above conditions and always show TOC above first header
+ $mw =& MagicWord::get( MAG_FORCETOC );
+ if ($mw->matchAndRemove( $text ) ) {
+ $this->mShowToc = true;
+ $enoughToc = true;
}
# Never ever show TOC if no headers
if( $numMatches < 1 ) {
- $doShowToc = false;
+ $enoughToc = false;
}
# We need this to perform operations on the HTML
}
$level = $matches[1][$headlineCount];
- if( $doNumberHeadings || $doShowToc ) {
+ if( $doNumberHeadings || $enoughToc ) {
if ( $level > $prevlevel ) {
# Increase TOC level
$toclevel++;
$sublevelCount[$toclevel] = 0;
- $toc .= $sk->tocIndent();
+ if( $toclevel<$wgMaxTocLevel ) {
+ $toc .= $sk->tocIndent();
+ }
}
elseif ( $level < $prevlevel && $toclevel > 1 ) {
# Decrease TOC level, find level to jump to
}
}
}
-
- $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel );
+ if( $toclevel<$wgMaxTocLevel ) {
+ $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel );
+ }
}
else {
# No change in level, end TOC line
- $toc .= $sk->tocLineEnd();
+ if( $toclevel<$wgMaxTocLevel ) {
+ $toc .= $sk->tocLineEnd();
+ }
}
$levelCount[$toclevel] = $level;
if($refcount[$headlineCount] > 1 ) {
$anchor .= '_' . $refcount[$headlineCount];
}
- if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
+ if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
$toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel);
}
if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
$sectionCount++;
}
- if( $doShowToc ) {
- $toc .= $sk->tocUnindent( $toclevel - 1 );
+ if( $enoughToc ) {
+ if( $toclevel<$wgMaxTocLevel ) {
+ $toc .= $sk->tocUnindent( $toclevel - 1 );
+ }
$toc = $sk->tocList( $toc );
}
# $full .= $sk->editSectionLink(0);
}
$full .= $block;
- if( $doShowToc && !$i && $isMain && !$forceTocHere) {
- # Top anchor now in skin
+ if( $enoughToc && !$i && $isMain && !$this->mForceTocPosition ) {
+ # Top anchor now in skin
$full = $full.$toc;
}
}
$i++;
}
- if($forceTocHere) {
- $mw =& MagicWord::get( MAG_TOC );
- return $mw->replace( $toc, $full );
+ if( $this->mForceTocPosition ) {
+ return str_replace( '<!--MWTOC-->', $toc, $full );
} else {
return $full;
}
/**
* Return an HTML link for the "ISBN 123456" text
- * @access private
+ * @private
*/
function magicISBN( $text ) {
$fname = 'Parser::magicISBN';
/**
* Return an HTML link for the "RFC 1234" text
*
- * @access private
+ * @private
* @param string $text Text to be processed
* @param string $keyword Magic keyword to use (default RFC)
* @param string $urlmsg Interface message to use (default rfcurl)
* @param ParserOptions $options parsing options
* @param bool $clearState whether to clear the parser state first
* @return string the altered wiki markup
- * @access public
+ * @public
*/
function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
$this->mOptions = $options;
);
$text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
$text = $this->strip( $text, $stripState, true );
- $text = $this->pstPass2( $text, $user );
+ $text = $this->pstPass2( $text, $stripState, $user );
$text = $this->unstrip( $text, $stripState );
$text = $this->unstripNoWiki( $text, $stripState );
return $text;
/**
* Pre-save transform helper function
- * @access private
+ * @private
*/
- function pstPass2( $text, &$user ) {
+ function pstPass2( $text, &$stripState, &$user ) {
global $wgContLang, $wgLocaltimezone;
/* Note: This is the timestamp saved as hardcoded wikitext to
* the database, we use $wgContLang here in order to give
- * everyone the same signiture and use the default one rather
- * than the one selected in each users preferences.
+ * everyone the same signature and use the default one rather
+ * than the one selected in each user's preferences.
*/
if ( isset( $wgLocaltimezone ) ) {
$oldtz = getenv( 'TZ' );
# Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
$text = $this->replaceVariables( $text );
+ # Strip out <nowiki> etc. added via replaceVariables
+ $text = $this->strip( $text, $stripState );
+
# Signatures
$sigText = $this->getUserSig( $user );
$text = strtr( $text, array(
*
* @param User $user
* @return string
- * @access private
+ * @private
*/
function getUserSig( &$user ) {
$username = $user->getName();
# Sig. might contain markup; validate this
if( $this->validateSig( $nickname ) !== false ) {
# Validated; clean up (if needed) and return it
- return( $this->cleanSig( $nickname ) );
+ return $this->cleanSig( $nickname, true );
} else {
# Failed to validate; fall back to the default
$nickname = $username;
* 2) Substitute all transclusions
*
* @param string $text
+ * @param $parsing Whether we're cleaning (preferences save) or parsing
* @return string Signature text
*/
- function cleanSig( $text ) {
+ function cleanSig( $text, $parsing = false ) {
+ global $wgTitle;
+ $this->startExternalParse( $wgTitle, new ParserOptions(), $parsing ? OT_WIKI : OT_MSG );
+
$substWord = MagicWord::get( MAG_SUBST );
$substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
$substText = '{{' . $substWord->getSynonym( 0 );
$text = preg_replace( $substRegex, $substText, $text );
$text = preg_replace( '/~{3,5}/', '', $text );
$text = $this->replaceVariables( $text );
-
+
+ $this->clearState();
return $text;
}
/**
* Set up some variables which are usually set up in parse()
* so that an external function can call some class members with confidence
- * @access public
+ * @public
*/
function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
$this->mTitle =& $title;
* @param string $text the text to transform
* @param ParserOptions $options options
* @return string the text with variables substituted
- * @access public
+ * @public
*/
function transformMsg( $text, $options ) {
global $wgTitle;
* Transform and return $text. Use $parser for any required context, e.g. use
* $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
*
- * @access public
+ * @public
*
* @param mixed $tag The tag to use, e.g. 'hook' for <hook>
* @param mixed $callback The callback function (and object) to use for the tag
* @return The old value of the mTagHooks array associated with the hook
*/
function setHook( $tag, $callback ) {
+ $tag = strtolower( $tag );
$oldVal = @$this->mTagHooks[$tag];
$this->mTagHooks[$tag] = $callback;
return $oldVal;
}
+ /**
+ * Create a function, e.g. {{sum:1|2|3}}
+ * The callback function should have the form:
+ * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
+ *
+ * The callback may either return the text result of the function, or an array with the text
+ * in element 0, and a number of flags in the other elements. The names of the flags are
+ * specified in the keys. Valid flags are:
+ * found The text returned is valid, stop processing the template. This
+ * is on by default.
+ * nowiki Wiki markup in the return value should be escaped
+ * noparse Unsafe HTML tags should not be stripped, etc.
+ * noargs Don't replace triple-brace arguments in the return value
+ * isHTML The returned text is HTML, armour it against wikitext transformation
+ *
+ * @public
+ *
+ * @param string $name The function name. Function names are case-insensitive.
+ * @param mixed $callback The callback function (and object) to use
+ *
+ * @return The old callback function for this name, if any
+ */
+ function setFunctionHook( $name, $callback ) {
+ $name = strtolower( $name );
+ $oldVal = @$this->mFunctionHooks[$name];
+ $this->mFunctionHooks[$name] = $callback;
+ return $oldVal;
+ }
+
/**
* Replace <!--LINK--> link placeholders with actual links, in the buffer
* Placeholders created in Skin::makeLinkObj()
/**
* @param array $matches
* @return string
- * @access private
+ * @private
*/
function replaceLinkHoldersTextCallback( $matches ) {
$type = $matches[1];
return $matches[0];
}
+ /**
+ * Tag hook handler for 'pre'.
+ */
+ function renderPreTag( $text, $attribs, $parser ) {
+ // Backwards-compatibility hack
+ $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $text );
+
+ $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
+ return wfOpenElement( 'pre', $attribs ) .
+ wfEscapeHTMLTagsOnly( $content ) .
+ '</pre>';
+ }
+
/**
* Renders an image gallery from a text with one line per image.
* text labels may be given by using |-style alternative text. E.g.
* 'A tree'.
*/
function renderImageGallery( $text ) {
- # Setup the parser
- $parserOptions = new ParserOptions;
- $localParser = new Parser();
-
$ig = new ImageGallery();
$ig->setShowBytes( false );
$ig->setShowFilename( false );
+ $ig->setParsing();
$lines = explode( "\n", $text );
foreach ( $lines as $line ) {
$label = '';
}
- $pout = $localParser->parse( $label , $this->mTitle, $parserOptions );
+ $pout = $this->parse( $label,
+ $this->mTitle,
+ $this->mOptions,
+ false, // Strip whitespace...?
+ false // Don't clear state!
+ );
$html = $pout->getText();
$ig->add( new Image( $nt ), $html );
- $this->mOutput->addImage( $nt->getDBkey() );
+
+ # Only add real images (bug #5586)
+ if ( $nt->getNamespace() == NS_IMAGE ) {
+ $this->mOutput->addImage( $nt->getDBkey() );
+ }
}
return $ig->toHTML();
}
* shouldn't be cached.
*/
function disableCache() {
+ wfDebug( "Parser output marked as uncacheable.\n" );
$this->mOutput->mCacheTime = -1;
}
* @param string $text
* @param array $args
* @return string
- * @access private
+ * @private
*/
function attributeStripCallback( &$text, $args ) {
$text = $this->replaceVariables( $text, $args );
*/
function getTags() { return array_keys( $this->mTagHooks ); }
/**#@-*/
+
+
+ /**
+ * Break wikitext input into sections, and either pull or replace
+ * some particular section's text.
+ *
+ * External callers should use the getSection and replaceSection methods.
+ *
+ * @param $text Page wikitext
+ * @param $section Numbered section. 0 pulls the text before the first
+ * heading; other numbers will pull the given section
+ * along with its lower-level subsections.
+ * @param $mode One of "get" or "replace"
+ * @param $newtext Replacement text for section data.
+ * @return string for "get", the extracted section text.
+ * for "replace", the whole page with the section replaced.
+ */
+ private function extractSections( $text, $section, $mode, $newtext='' ) {
+ # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
+ # comments to be stripped as well)
+ $striparray = array();
+
+ $oldOutputType = $this->mOutputType;
+ $oldOptions = $this->mOptions;
+ $this->mOptions = new ParserOptions();
+ $this->mOutputType = OT_WIKI;
+
+ $striptext = $this->strip( $text, $striparray, true );
+
+ $this->mOutputType = $oldOutputType;
+ $this->mOptions = $oldOptions;
+
+ # now that we can be sure that no pseudo-sections are in the source,
+ # split it up by section
+ $uniq = preg_quote( $this->uniqPrefix(), '/' );
+ $comment = "(?:$uniq-!--.*?QINU)";
+ $secs = preg_split(
+ /*
+ "/
+ ^(
+ (?:$comment|<\/?noinclude>)* # Initial comments will be stripped
+ (?:
+ (=+) # Should this be limited to 6?
+ .+? # Section title...
+ \\2 # Ending = count must match start
+ |
+ ^
+ <h([1-6])\b.*?>
+ .*?
+ <\/h\\3\s*>
+ )
+ (?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok
+ )$
+ /mix",
+ */
+ "/
+ (
+ ^
+ (?:$comment|<\/?noinclude>)* # Initial comments will be stripped
+ (=+) # Should this be limited to 6?
+ .+? # Section title...
+ \\2 # Ending = count must match start
+ (?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok
+ $
+ |
+ <h([1-6])\b.*?>
+ .*?
+ <\/h\\3\s*>
+ )
+ /mix",
+ $striptext, -1,
+ PREG_SPLIT_DELIM_CAPTURE);
+
+ if( $mode == "get" ) {
+ if( $section == 0 ) {
+ // "Section 0" returns the content before any other section.
+ $rv = $secs[0];
+ } else {
+ $rv = "";
+ }
+ } elseif( $mode == "replace" ) {
+ if( $section == 0 ) {
+ $rv = $newtext . "\n\n";
+ $remainder = true;
+ } else {
+ $rv = $secs[0];
+ $remainder = false;
+ }
+ }
+ $count = 0;
+ $sectionLevel = 0;
+ for( $index = 1; $index < count( $secs ); ) {
+ $headerLine = $secs[$index++];
+ if( $secs[$index] ) {
+ // A wiki header
+ $headerLevel = strlen( $secs[$index++] );
+ } else {
+ // An HTML header
+ $index++;
+ $headerLevel = intval( $secs[$index++] );
+ }
+ $content = $secs[$index++];
+
+ $count++;
+ if( $mode == "get" ) {
+ if( $count == $section ) {
+ $rv = $headerLine . $content;
+ $sectionLevel = $headerLevel;
+ } elseif( $count > $section ) {
+ if( $sectionLevel && $headerLevel > $sectionLevel ) {
+ $rv .= $headerLine . $content;
+ } else {
+ // Broke out to a higher-level section
+ break;
+ }
+ }
+ } elseif( $mode == "replace" ) {
+ if( $count < $section ) {
+ $rv .= $headerLine . $content;
+ } elseif( $count == $section ) {
+ $rv .= $newtext . "\n\n";
+ $sectionLevel = $headerLevel;
+ } elseif( $count > $section ) {
+ if( $headerLevel <= $sectionLevel ) {
+ // Passed the section's sub-parts.
+ $remainder = true;
+ }
+ if( $remainder ) {
+ $rv .= $headerLine . $content;
+ }
+ }
+ }
+ }
+ # reinsert stripped tags
+ $rv = $this->unstrip( $rv, $striparray );
+ $rv = $this->unstripNoWiki( $rv, $striparray );
+ $rv = trim( $rv );
+ return $rv;
+ }
+
+ /**
+ * This function returns the text of a section, specified by a number ($section).
+ * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
+ * the first section before any such heading (section 0).
+ *
+ * If a section contains subsections, these are also returned.
+ *
+ * @param $text String: text to look in
+ * @param $section Integer: section number
+ * @return string text of the requested section
+ */
+ function getSection( $text, $section ) {
+ return $this->extractSections( $text, $section, "get" );
+ }
+
+ function replaceSection( $oldtext, $section, $text ) {
+ return $this->extractSections( $oldtext, $section, "replace", $text );
+ }
+
}
/**
$mLinks, # 2-D map of NS/DBK to ID for the links in the document. ID=zero for broken.
$mTemplates, # 2-D map of NS/DBK to ID for the template references. ID=zero for broken.
$mImages, # DB keys of the images used, in the array key only
- $mExternalLinks; # External link URLs, in the key only
+ $mExternalLinks, # External link URLs, in the key only
+ $mHTMLtitle, # Display HTML title
+ $mSubtitle, # Additional subtitle
+ $mNewSection; # Show a new section link?
function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
$containsOldMagic = false, $titletext = '' )
$this->mTemplates = array();
$this->mImages = array();
$this->mExternalLinks = array();
+ $this->mHTMLtitle = "" ;
+ $this->mSubtitle = "" ;
+ $this->mNewSection = false;
}
function getText() { return $this->mText; }
- function getLanguageLinks() { return $this->mLanguageLinks; }
+ function &getLanguageLinks() { return $this->mLanguageLinks; }
function getCategoryLinks() { return array_keys( $this->mCategories ); }
function &getCategories() { return $this->mCategories; }
function getCacheTime() { return $this->mCacheTime; }
function addImage( $name ) { $this->mImages[$name] = 1; }
function addLanguageLink( $t ) { $this->mLanguageLinks[] = $t; }
function addExternalLink( $url ) { $this->mExternalLinks[$url] = 1; }
+
+ function setNewSection( $value ) {
+ $this->mNewSection = (bool)$value;
+ }
+ function getNewSection() {
+ return (bool)$this->mNewSection;
+ }
function addLink( $title, $id ) {
$ns = $title->getNamespace();
$this->mTemplates[$ns][$dbk] = $id;
}
- /**
- * @deprecated
- */
- /*
- function merge( $other ) {
- $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
- $this->mCategories = array_merge( $this->mCategories, $this->mLanguageLinks );
- $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
- }*/
-
/**
* Return true if this cached output object predates the global or
* per-article cache invalidation timestamps, or if it comes from
*
* @param string $touched the affected article's last touched timestamp
* @return bool
- * @access public
+ * @public
*/
function expired( $touched ) {
global $wgCacheEpoch;
var $mEditSection; # Create "edit section" links
var $mNumberHeadings; # Automatically number headings
var $mAllowSpecialInclusion; # Allow inclusion of special pages
- var $mTidy; # Ask for tidy cleanup
+ var $mTidy; # Ask for tidy cleanup
+ var $mInterfaceMessage; # Which lang to call for PLURAL and GRAMMAR
function getUseTeX() { return $this->mUseTeX; }
function getUseDynamicDates() { return $this->mUseDynamicDates; }
function getEditSection() { return $this->mEditSection; }
function getNumberHeadings() { return $this->mNumberHeadings; }
function getAllowSpecialInclusion() { return $this->mAllowSpecialInclusion; }
- function getTidy() { return $this->mTidy; }
+ function getTidy() { return $this->mTidy; }
+ function getInterfaceMessage() { return $this->mInterfaceMessage; }
function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
function setAllowSpecialInclusion( $x ) { return wfSetVar( $this->mAllowSpecialInclusion, $x ); }
- function setTidy( $x ) { return wfSetVar( $this->mTidy, $x); }
+ function setTidy( $x ) { return wfSetVar( $this->mTidy, $x); }
function setSkin( &$x ) { $this->mSkin =& $x; }
+ function setInterfaceMessage( $x ) { return wfSetVar( $this->mInterfaceMessage, $x); }
function ParserOptions() {
global $wgUser;
$this->mNumberHeadings = $user->getOption( 'numberheadings' );
$this->mAllowSpecialInclusion = $wgAllowSpecialInclusion;
$this->mTidy = false;
+ $this->mInterfaceMessage = false;
wfProfileOut( $fname );
}
}
return $numImages;
}
+/**
+ * Return the number of user accounts
+ * @return integer
+ */
+function wfNumberOfUsers() {
+ wfProfileIn( 'wfNumberOfUsers' );
+ $dbr =& wfGetDB( DB_SLAVE );
+ $count = $dbr->selectField( 'site_stats', 'ss_users', array(), 'wfNumberOfUsers' );
+ wfProfileOut( 'wfNumberOfUsers' );
+ return (int)$count;
+}
+
+/**
+ * Return the total number of pages
+ * @return integer
+ */
+function wfNumberOfPages() {
+ wfProfileIn( 'wfNumberOfPages' );
+ $dbr =& wfGetDB( DB_SLAVE );
+ $count = $dbr->selectField( 'site_stats', 'ss_total_pages', array(), 'wfNumberOfPages' );
+ wfProfileOut( 'wfNumberOfPages' );
+ return (int)$count;
+}
+
/**
* Get various statistics from the database
- * @access private
+ * @private
*/
function wfLoadSiteStats() {
global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
* Escape html tags
* Basically replacing " > and < with HTML entities ( ", >, <)
*
- * @param string $in Text that might contain HTML tags
+ * @param $in String: text that might contain HTML tags.
* @return string Escaped string
*/
function wfEscapeHTMLTagsOnly( $in ) {