/** */
require_once( 'Sanitizer.php' );
require_once( 'HttpFunctions.php' );
-require_once( 'ImageGallery.php' );
/**
* Update this version number when the ParserOutput format
define( 'HTTP_PROTOCOLS', 'http:\/\/|https:\/\/' );
# Everything except bracket, space, or control characters
define( 'EXT_LINK_URL_CLASS', '[^][<>"\\x00-\\x20\\x7F]' );
-# Including space
-define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
+# Including space, but excluding newlines
+define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x0a\\x0d]' );
define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
-define( 'EXT_LINK_BRACKETED', '/\[(\b(' . wfUrlProtocols() . ')'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
+define( 'EXT_LINK_BRACKETED', '/\[(\b(' . wfUrlProtocols() . ')'.
+ EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
define( 'EXT_IMAGE_REGEX',
'/^('.HTTP_PROTOCOLS.')'. # Protocol
'('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
'('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
);
+// State constants for the definition list colon extraction
+define( 'MW_COLON_STATE_TEXT', 0 );
+define( 'MW_COLON_STATE_TAG', 1 );
+define( 'MW_COLON_STATE_TAGSTART', 2 );
+define( 'MW_COLON_STATE_CLOSETAG', 3 );
+define( 'MW_COLON_STATE_TAGSLASH', 4 );
+define( 'MW_COLON_STATE_COMMENT', 5 );
+define( 'MW_COLON_STATE_COMMENTDASH', 6 );
+define( 'MW_COLON_STATE_COMMENTDASHDASH', 7 );
+
/**
* PHP Parser
*
$this->mTagHooks = array();
$this->mFunctionHooks = array();
$this->clearState();
+ $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
}
/**
'/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1 \\2',
# french spaces, Guillemet-right
'/(\\302\\253) /' => '\\1 ',
- '/<center *>(.*)<\\/center *>/i' => '<div class="center">\\1</div>',
);
$text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
$inside = $p[4];
}
- $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++);
+ $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++) . '-QINU';
$stripped .= $marker;
if ( $close === '/>' ) {
$commentState = array();
$elements = array_merge(
- array( 'nowiki', 'pre', 'gallery' ),
+ array( 'nowiki', 'gallery' ),
array_keys( $this->mTagHooks ) );
global $wgRawHtml;
if( $wgRawHtml ) {
$output = wfEscapeHTMLTagsOnly( $content );
break;
case 'math':
- $output = renderMath( $content );
- break;
- case 'pre':
- // Backwards-compatibility hack
- $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $content );
- $output = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
+ $output = MathRenderer::renderMath( $content );
break;
case 'gallery':
$output = $this->renderImageGallery( $content );
$output = call_user_func_array( $this->mTagHooks[$tagName],
array( $content, $params, $this ) );
} else {
- wfDebugDieBacktrace( "Invalid call hook $element" );
+ throw new MWException( "Invalid call hook $element" );
}
}
} else {
wfProfileIn( $fname );
for ( $i = 6; $i >= 1; --$i ) {
$h = str_repeat( '=', $i );
- $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
+ $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m",
"<h{$i}>\\1</h{$i}>\\2", $text );
}
wfProfileOut( $fname );
$useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
if( is_null( $this->mTitle ) ) {
- wfDebugDieBacktrace( 'nooo' );
+ throw new MWException( 'nooo' );
}
$nottalk = !$this->mTitle->isTalkPage();
wfProfileIn( "$fname-paragraph" );
# No prefix (not in list)--go to paragraph mode
// XXX: use a stack for nestable elements like span, table and div
- $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
+ $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/center|<\\/tr|<\\/td|<\\/th)/iS', $t );
$closematch = preg_match(
'/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
- '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul)/iS', $t );
+ '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<center)/iS', $t );
if ( $openmatch or $closematch ) {
$paragraphStack = false;
# TODO bug 5718: paragraph closed
}
/**
- * Split up a string on ':', ignoring any occurences inside
- * <a>..</a> or <span>...</span>
+ * Split up a string on ':', ignoring any occurences inside tags
+ * to prevent illegal overlapping.
* @param string $str the string to split
* @param string &$before set to everything before the ':'
* @param string &$after set to everything after the ':'
* return string the position of the ':', or false if none found
*/
function findColonNoLinks($str, &$before, &$after) {
- # I wonder if we should make this count all tags, not just <a>
- # and <span>. That would prevent us from matching a ':' that
- # comes in the middle of italics other such formatting....
- # -- Wil
$fname = 'Parser::findColonNoLinks';
wfProfileIn( $fname );
- $pos = 0;
- do {
- $colon = strpos($str, ':', $pos);
-
- if ($colon !== false) {
- $before = substr($str, 0, $colon);
- $after = substr($str, $colon + 1);
-
- # Skip any ':' within <a> or <span> pairs
- $a = substr_count($before, '<a');
- $s = substr_count($before, '<span');
- $ca = substr_count($before, '</a>');
- $cs = substr_count($before, '</span>');
-
- if ($a <= $ca and $s <= $cs) {
- # Tags are balanced before ':'; ok
+
+ $pos = strpos( $str, ':' );
+ if( $pos === false ) {
+ // Nothing to find!
+ wfProfileOut( $fname );
+ return false;
+ }
+
+ $lt = strpos( $str, '<' );
+ if( $lt === false || $lt > $pos ) {
+ // Easy; no tag nesting to worry about
+ $before = substr( $str, 0, $pos );
+ $after = substr( $str, $pos+1 );
+ wfProfileOut( $fname );
+ return $pos;
+ }
+
+ // Ugly state machine to walk through avoiding tags.
+ $state = MW_COLON_STATE_TEXT;
+ $stack = 0;
+ $len = strlen( $str );
+ for( $i = 0; $i < $len; $i++ ) {
+ $c = $str{$i};
+
+ switch( $state ) {
+ // (Using the number is a performance hack for common cases)
+ case 0: // MW_COLON_STATE_TEXT:
+ switch( $c ) {
+ case "<":
+ // Could be either a <start> tag or an </end> tag
+ $state = MW_COLON_STATE_TAGSTART;
+ break;
+ case ":":
+ if( $stack == 0 ) {
+ // We found it!
+ $before = substr( $str, 0, $i );
+ $after = substr( $str, $i + 1 );
+ wfProfileOut( $fname );
+ return $i;
+ }
+ // Embedded in a tag; don't break it.
+ break;
+ default:
+ // Skip ahead looking for something interesting
+ $colon = strpos( $str, ':', $i );
+ if( $colon === false ) {
+ // Nothing else interesting
+ wfProfileOut( $fname );
+ return false;
+ }
+ $lt = strpos( $str, '<', $i );
+ if( $stack === 0 ) {
+ if( $lt === false || $colon < $lt ) {
+ // We found it!
+ $before = substr( $str, 0, $colon );
+ $after = substr( $str, $colon + 1 );
+ wfProfileOut( $fname );
+ return $i;
+ }
+ }
+ if( $lt === false ) {
+ // Nothing else interesting to find; abort!
+ // We're nested, but there's no close tags left. Abort!
+ break 2;
+ }
+ // Skip ahead to next tag start
+ $i = $lt;
+ $state = MW_COLON_STATE_TAGSTART;
+ }
+ break;
+ case 1: // MW_COLON_STATE_TAG:
+ // In a <tag>
+ switch( $c ) {
+ case ">":
+ $stack++;
+ $state = MW_COLON_STATE_TEXT;
+ break;
+ case "/":
+ // Slash may be followed by >?
+ $state = MW_COLON_STATE_TAGSLASH;
+ break;
+ default:
+ // ignore
+ }
+ break;
+ case 2: // MW_COLON_STATE_TAGSTART:
+ switch( $c ) {
+ case "/":
+ $state = MW_COLON_STATE_CLOSETAG;
break;
+ case "!":
+ $state = MW_COLON_STATE_COMMENT;
+ break;
+ case ">":
+ // Illegal early close? This shouldn't happen D:
+ $state = MW_COLON_STATE_TEXT;
+ break;
+ default:
+ $state = MW_COLON_STATE_TAG;
+ }
+ break;
+ case 3: // MW_COLON_STATE_CLOSETAG:
+ // In a </tag>
+ if( $c == ">" ) {
+ $stack--;
+ if( $stack < 0 ) {
+ wfDebug( "Invalid input in $fname; too many close tags\n" );
+ wfProfileOut( $fname );
+ return false;
+ }
+ $state = MW_COLON_STATE_TEXT;
+ }
+ break;
+ case MW_COLON_STATE_TAGSLASH:
+ if( $c == ">" ) {
+ // Yes, a self-closed tag <blah/>
+ $state = MW_COLON_STATE_TEXT;
+ } else {
+ // Probably we're jumping the gun, and this is an attribute
+ $state = MW_COLON_STATE_TAG;
}
- $pos = $colon + 1;
+ break;
+ case 5: // MW_COLON_STATE_COMMENT:
+ if( $c == "-" ) {
+ $state = MW_COLON_STATE_COMMENTDASH;
+ }
+ break;
+ case MW_COLON_STATE_COMMENTDASH:
+ if( $c == "-" ) {
+ $state = MW_COLON_STATE_COMMENTDASHDASH;
+ } else {
+ $state = MW_COLON_STATE_COMMENT;
+ }
+ break;
+ case MW_COLON_STATE_COMMENTDASHDASH:
+ if( $c == ">" ) {
+ $state = MW_COLON_STATE_TEXT;
+ } else {
+ $state = MW_COLON_STATE_COMMENT;
+ }
+ break;
+ default:
+ throw new MWException( "State machine error in $fname" );
}
- } while ($colon !== false);
+ }
+ if( $stack > 0 ) {
+ wfDebug( "Invalid input in $fname; not enough close tags (stack $stack, state $state)\n" );
+ return false;
+ }
wfProfileOut( $fname );
- return $colon;
+ return false;
}
/**
return $wgScriptPath;
case MAG_DIRECTIONMARK:
return $wgContLang->getDirMark();
+ case MAG_CONTENTLANGUAGE:
+ global $wgContLanguageCode;
+ return $wgContLanguageCode;
default:
$ret = null;
if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$varCache, &$index, &$ret ) ) )
if ( !$found && $argc >= 2 ) {
$mwPluralForm =& MagicWord::get( MAG_PLURAL );
if ( $mwPluralForm->matchStartAndRemove( $part1 ) ) {
- if ($argc==2) {$args[2]=$args[1];}
- $text = $linestart . $lang->convertPlural( $part1, $args[0], $args[1], $args[2]);
+ while ( count($args) < 5 ) { $args[] = $args[count($args)-1]; }
+ $text = $linestart . $lang->convertPlural( $part1, $args[0], $args[1],
+ $args[2], $args[3], $args[4]);
$found = true;
}
}
# Use the original $piece['title'] not the mangled $part1, so that
# modifiers such as RAW: produce separate cache entries
if( $found ) {
- $this->mTemplates[$piece['title']] = $text;
+ if( $isHTML ) {
+ // A special page; don't store it in the template cache.
+ } else {
+ $this->mTemplates[$piece['title']] = $text;
+ }
$text = $linestart . $text;
}
}
return $matches[0];
}
+ /**
+ * Tag hook handler for 'pre'.
+ */
+ function renderPreTag( $text, $attribs, $parser ) {
+ // Backwards-compatibility hack
+ $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $text );
+
+ $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
+ return wfOpenElement( 'pre', $attribs ) .
+ wfEscapeHTMLTagsOnly( $content ) .
+ '</pre>';
+ }
+
/**
* Renders an image gallery from a text with one line per image.
* text labels may be given by using |-style alternative text. E.g.
*/
function getTags() { return array_keys( $this->mTagHooks ); }
/**#@-*/
+
+
+ /**
+ * Break wikitext input into sections, and either pull or replace
+ * some particular section's text.
+ *
+ * External callers should use the getSection and replaceSection methods.
+ *
+ * @param $text Page wikitext
+ * @param $section Numbered section. 0 pulls the text before the first
+ * heading; other numbers will pull the given section
+ * along with its lower-level subsections.
+ * @param $mode One of "get" or "replace"
+ * @param $newtext Replacement text for section data.
+ * @return string for "get", the extracted section text.
+ * for "replace", the whole page with the section replaced.
+ */
+ private function extractSections( $text, $section, $mode, $newtext='' ) {
+ # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
+ # comments to be stripped as well)
+ $striparray = array();
+
+ $oldOutputType = $this->mOutputType;
+ $oldOptions = $this->mOptions;
+ $this->mOptions = new ParserOptions();
+ $this->mOutputType = OT_WIKI;
+
+ $striptext = $this->strip( $text, $striparray, true );
+
+ $this->mOutputType = $oldOutputType;
+ $this->mOptions = $oldOptions;
+
+ # now that we can be sure that no pseudo-sections are in the source,
+ # split it up by section
+ $uniq = preg_quote( $this->uniqPrefix(), '/' );
+ $comment = "(?:$uniq-!--.*?QINU)";
+ $secs = preg_split(
+ /*
+ "/
+ ^(
+ (?:$comment|<\/?noinclude>)* # Initial comments will be stripped
+ (?:
+ (=+) # Should this be limited to 6?
+ .+? # Section title...
+ \\2 # Ending = count must match start
+ |
+ ^
+ <h([1-6])\b.*?>
+ .*?
+ <\/h\\3\s*>
+ )
+ (?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok
+ )$
+ /mix",
+ */
+ "/
+ (
+ ^
+ (?:$comment|<\/?noinclude>)* # Initial comments will be stripped
+ (=+) # Should this be limited to 6?
+ .+? # Section title...
+ \\2 # Ending = count must match start
+ (?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok
+ $
+ |
+ <h([1-6])\b.*?>
+ .*?
+ <\/h\\3\s*>
+ )
+ /mix",
+ $striptext, -1,
+ PREG_SPLIT_DELIM_CAPTURE);
+
+ if( $mode == "get" ) {
+ if( $section == 0 ) {
+ // "Section 0" returns the content before any other section.
+ $rv = $secs[0];
+ } else {
+ $rv = "";
+ }
+ } elseif( $mode == "replace" ) {
+ if( $section == 0 ) {
+ $rv = $newtext . "\n\n";
+ $remainder = true;
+ } else {
+ $rv = $secs[0];
+ $remainder = false;
+ }
+ }
+ $count = 0;
+ $sectionLevel = 0;
+ for( $index = 1; $index < count( $secs ); ) {
+ $headerLine = $secs[$index++];
+ if( $secs[$index] ) {
+ // A wiki header
+ $headerLevel = strlen( $secs[$index++] );
+ } else {
+ // An HTML header
+ $index++;
+ $headerLevel = intval( $secs[$index++] );
+ }
+ $content = $secs[$index++];
+
+ $count++;
+ if( $mode == "get" ) {
+ if( $count == $section ) {
+ $rv = $headerLine . $content;
+ $sectionLevel = $headerLevel;
+ } elseif( $count > $section ) {
+ if( $sectionLevel && $headerLevel > $sectionLevel ) {
+ $rv .= $headerLine . $content;
+ } else {
+ // Broke out to a higher-level section
+ break;
+ }
+ }
+ } elseif( $mode == "replace" ) {
+ if( $count < $section ) {
+ $rv .= $headerLine . $content;
+ } elseif( $count == $section ) {
+ $rv .= $newtext . "\n\n";
+ $sectionLevel = $headerLevel;
+ } elseif( $count > $section ) {
+ if( $headerLevel <= $sectionLevel ) {
+ // Passed the section's sub-parts.
+ $remainder = true;
+ }
+ if( $remainder ) {
+ $rv .= $headerLine . $content;
+ }
+ }
+ }
+ }
+ # reinsert stripped tags
+ $rv = $this->unstrip( $rv, $striparray );
+ $rv = $this->unstripNoWiki( $rv, $striparray );
+ $rv = trim( $rv );
+ return $rv;
+ }
+
+ /**
+ * This function returns the text of a section, specified by a number ($section).
+ * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
+ * the first section before any such heading (section 0).
+ *
+ * If a section contains subsections, these are also returned.
+ *
+ * @param $text String: text to look in
+ * @param $section Integer: section number
+ * @return string text of the requested section
+ */
+ function getSection( $text, $section ) {
+ return $this->extractSections( $text, $section, "get" );
+ }
+
+ function replaceSection( $oldtext, $section, $text ) {
+ return $this->extractSections( $oldtext, $section, "replace", $text );
+ }
+
}
/**