}
/**
- * A page get its title converted except:
- * a) Language conversion is globally disabled
- * b) Title convert is globally disabled
- * c) The page is a redirect page
- * d) User request with a "linkconvert" set to "no"
- * e) A "nocontentconvert" magic word has been set
- * f) A "notitleconvert" magic word has been set
- * g) User sets "noconvertlink" in his/her preference
- *
- * Note that if a user tries to set a title in a conversion
- * rule but content conversion was not done, then the parser
- * won't pick it up. This is probably expected behavior.
+ * A converted title will be provided in the output object if title and
+ * content conversion are enabled, the article text does not contain
+ * a conversion-suppressing double-underscore tag, and no
+ * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
+ * automatic link conversion.
*/
if ( !( $wgDisableLangConversion
|| $wgDisableTitleConversion
$has_opened_tr = array(); # Did this table open a <tr> element?
$indent_level = 0; # indent level of the table
- foreach ( $lines as $outLine ) {
+ # Keep pulling lines off the front of the array until they're all gone.
+ # we want to be able to push lines back on to the front of the stream,
+ # but StringUtils::explode() returns funky optimised Iterators which don't
+ # support insertion. So maintain a separate buffer and draw on that first if
+ # there's anything in it
+ $extraLines = array();
+ $lines->rewind();
+ do {
+ if( $extraLines ){
+ $outLine = array_shift( $extraLines );
+ } elseif( $lines->valid() ) {
+ $outLine = $lines->current();
+ $lines->next();
+ } else {
+ break;
+ }
$line = trim( $outLine );
if ( $line === '' ) { # empty line, go to next line
} elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 ) === '|+' ) {
# This might be cell elements, td, th or captions
if ( substr( $line , 0 , 2 ) === '|+' ) {
- $first_character = '+';
- $line = substr( $line , 1 );
+ $first_character = '|+';
}
- $line = substr( $line , 1 );
+ $line = substr( $line , strlen( $first_character ) );
if ( $first_character === '!' ) {
$line = str_replace( '!!' , '||' , $line );
# by earlier parser steps, but should avoid splitting up eg
# attribute values containing literal "||".
$cells = StringUtils::explodeMarkup( '||' , $line );
-
- $outLine = '';
-
- # Loop through each table cell
- foreach ( $cells as $cell ) {
- $previous = '';
- if ( $first_character !== '+' ) {
- $tr_after = array_pop( $tr_attributes );
- if ( !array_pop( $tr_history ) ) {
- $previous = "<tr{$tr_after}>\n";
- }
- array_push( $tr_history , true );
- array_push( $tr_attributes , '' );
- array_pop( $has_opened_tr );
- array_push( $has_opened_tr , true );
+ $cell = array_shift( $cells );
+
+ # Inject cells back into the stream to be dealt with later
+ # TODO: really we should do the whole thing as a stream...
+ # but that would be too much like a sensible implementation :P
+ if( count( $cells ) ){
+ foreach( array_reverse( $cells ) as $extraCell ){
+ array_unshift( $extraLines, $first_character . $extraCell );
}
+ }
- $last_tag = array_pop( $last_tag_history );
+ $outLine = '';
- if ( array_pop( $td_history ) ) {
- $previous = "</{$last_tag}>\n{$previous}";
+ $previous = '';
+ if ( $first_character !== '|+' ) {
+ $tr_after = array_pop( $tr_attributes );
+ if ( !array_pop( $tr_history ) ) {
+ $previous = "<tr{$tr_after}>\n";
}
+ array_push( $tr_history , true );
+ array_push( $tr_attributes , '' );
+ array_pop( $has_opened_tr );
+ array_push( $has_opened_tr , true );
+ }
- if ( $first_character === '|' ) {
- $last_tag = 'td';
- } elseif ( $first_character === '!' ) {
- $last_tag = 'th';
- } elseif ( $first_character === '+' ) {
- $last_tag = 'caption';
- } else {
- $last_tag = '';
- }
+ $last_tag = array_pop( $last_tag_history );
- array_push( $last_tag_history , $last_tag );
+ if ( array_pop( $td_history ) ) {
+ $previous = "</{$last_tag}>\n{$previous}";
+ }
- # A cell could contain both parameters and data
- $cell_data = explode( '|' , $cell , 2 );
+ if ( $first_character === '|' ) {
+ $last_tag = 'td';
+ } elseif ( $first_character === '!' ) {
+ $last_tag = 'th';
+ } elseif ( $first_character === '|+' ) {
+ $last_tag = 'caption';
+ } else {
+ $last_tag = '';
+ }
- # Bug 553: Note that a '|' inside an invalid link should not
- # be mistaken as delimiting cell parameters
- if ( strpos( $cell_data[0], '[[' ) !== false ) {
- $cell = "{$previous}<{$last_tag}>{$cell}";
- } elseif ( count( $cell_data ) == 1 ) {
- $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
- } else {
- $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
- $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
- $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
- }
+ array_push( $last_tag_history , $last_tag );
+
+ # A cell could contain both parameters and data... but the pipe could
+ # also be the start of a nested table, or a raw pipe inside an invalid
+ # link (bug 553).
+ $cell_data = preg_split( '/(?<!\{)\|/', $cell, 2 );
+
+ # Bug 553: a '|' inside an invalid link should not
+ # be mistaken as delimiting cell parameters
+ if ( strpos( $cell_data[0], '[[' ) !== false ) {
+ $data = $cell;
+ $cell = "{$previous}<{$last_tag}>";
+ } elseif ( count( $cell_data ) == 1 ) {
+ $cell = "{$previous}<{$last_tag}>";
+ $data = $cell_data[0];
+ } else {
+ $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
+ $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
+ $cell = "{$previous}<{$last_tag}{$attributes}>";
+ $data = $cell_data[1];
+ }
- $outLine .= $cell;
- array_push( $td_history , true );
+ # Bug 529: the start of a table cell should be a linestart context for
+ # processing other block markup, including nested tables. The original
+ # implementation of this was to add a newline before every brace construct,
+ # which broke all manner of other things. Instead, push the contents
+ # of the cell back into the stream and come back to it later. But don't
+ # do that if the first line is empty, or you may get extra whitespace
+ if( $data ){
+ array_unshift( $extraLines, trim( $data ) );
}
+
+ $outLine .= $cell;
+ array_push( $td_history , true );
}
$out .= $outLine . "\n";
- }
+ } while( $lines->valid() || count( $extraLines ) );
# Closing open td, tr && table
while ( count( $td_history ) > 0 ) {
'/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
'<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
if ( $openmatch or $closematch ) {
+
$paragraphStack = false;
# TODO bug 5718: paragraph closed
$output .= $this->closeParagraph();
$value = wfEscapeWikiText( $this->mTitle->getText() );
break;
case 'pagenamee':
- $value = $this->mTitle->getPartialURL();
+ $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
break;
case 'fullpagename':
$value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
break;
case 'fullpagenamee':
- $value = $this->mTitle->getPrefixedURL();
+ $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
break;
case 'subpagename':
$value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
break;
case 'subpagenamee':
- $value = $this->mTitle->getSubpageUrlForm();
+ $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
break;
case 'basepagename':
$value = wfEscapeWikiText( $this->mTitle->getBaseText() );
break;
case 'basepagenamee':
- $value = wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) );
+ $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) );
break;
case 'talkpagename':
if ( $this->mTitle->canTalk() ) {
case 'talkpagenamee':
if ( $this->mTitle->canTalk() ) {
$talkPage = $this->mTitle->getTalkPage();
- $value = $talkPage->getPrefixedUrl();
+ $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() );
} else {
$value = '';
}
break;
case 'subjectpagenamee':
$subjPage = $this->mTitle->getSubjectPage();
- $value = $subjPage->getPrefixedUrl();
+ $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() );
break;
case 'revisionid':
# Let the edit saving system know we should parse the page
$text = wfEscapeWikiText( $text );
} elseif ( is_string( $text )
&& !$piece['lineStart']
- && preg_match( '/^(?:{\\||:|;|#|\*)/', $text ) )
+ && preg_match( '/^{\\|/', $text ) )
{
- # Bug 529: if the template begins with a table or block-level
- # element, it should be treated as beginning a new line.
- # This behaviour is somewhat controversial.
+ # Bug 529: if the template begins with a table, it should be treated as
+ # beginning a new line. This previously handled other block-level elements
+ # such as #, :, etc, but these have many false-positives (bug 12974).
$text = "\n" . $text;
}
if ( !$title->equals( $cacheTitle ) ) {
$this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
- array( $title->getNamespace(), $cdb = $title->getDBkey() );
+ array( $title->getNamespace(), $title->getDBkey() );
}
return array( $dom, $title );
$text = $rev->getText();
} elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
global $wgContLang;
- $message = $wgContLang->lcfirst( $title->getText() );
- $text = wfMsgForContentNoTrans( $message );
- if ( wfEmptyMsg( $message, $text ) ) {
+ $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
+ if ( !$message->exists() ) {
$text = false;
break;
}
+ $text = $message->plain();
} else {
break;
}
$showEditLink = $this->mOptions->getEditSection();
}
if ( $showEditLink ) {
- $editLinkAsToken = $this->mOptions->getEditSectionTokens();
- if ( $editLinkAsToken ) {
- $this->mOutput->setEditSectionTokens( "{$this->mUniqPrefix}-editsection-", self::MARKER_SUFFIX );
- }
+ $this->mOutput->setEditSectionTokens( true );
}
# Get all headlines for numbering them and adding funky stuff like [edit]
# give headline the correct <h#> tag
if ( $showEditLink && $sectionIndex !== false ) {
- if ( $editLinkAsToken ) {
- // Output edit section links as markers with styles that can be customized by skins
- if ( $isTemplate ) {
- # Put a T flag in the section identifier, to indicate to extractSections()
- # that sections inside <includeonly> should be counted.
- $editlinkArgs = array( $titleText, "T-$sectionIndex", null );
- } else {
- $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint );
- }
- // We use nearly the same structure as uniqPrefix and the marker stuffix (besides there being nothing random)
- // However the this is output into the parser output itself not replaced early, so we hardcode this in case
- // the constants change in a different version of MediaWiki, which would break this code.
- $editlink = "{$this->mUniqPrefix}-editsection-" . serialize($editlinkArgs) . self::MARKER_SUFFIX;
+ // Output edit section links as markers with styles that can be customized by skins
+ if ( $isTemplate ) {
+ # Put a T flag in the section identifier, to indicate to extractSections()
+ # that sections inside <includeonly> should be counted.
+ $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
} else {
- // Output edit section links directly as markup like we used to
- if ( $isTemplate ) {
- # Put a T flag in the section identifier, to indicate to extractSections()
- # that sections inside <includeonly> should be counted.
- $editlink = $sk->doEditSectionLink( Title::newFromText( $titleText ), "T-$sectionIndex", null, $this->mOptions->getUserLang() );
- } else {
- $editlink = $sk->doEditSectionLink( $this->mTitle, $sectionIndex, $headlineHint, $this->mOptions->getUserLang() );
- }
+ $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint );
+ }
+ // We use a bit of pesudo-xml for editsection markers. The language converter is run later on
+ // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff
+ // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped
+ // so we don't have to worry about a user trying to input one of these markers directly.
+ // We use a page and section attribute to stop the language converter from converting these important bits
+ // of data, but put the headline hint inside a content block because the language converter is supposed to
+ // be able to convert that piece of data.
+ $editlink = '<editsection page="' . htmlspecialchars($editlinkArgs[0]);
+ $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"';
+ if ( isset($editlinkArgs[2]) ) {
+ $editlink .= '>' . $editlinkArgs[2] . '</editsection>';
+ } else {
+ $editlink .= '/>';
}
} else {
$editlink = '';
"\r\n" => "\n",
);
$text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
- $text = $this->pstPass2( $text, $user );
+ if( $options->getPreSaveTransform() ) {
+ $text = $this->pstPass2( $text, $user );
+ }
$text = $this->mStripState->unstripBoth( $text );
$this->setUser( null ); #Reset
# whatever crap the system uses, localised or not, so we cannot
# ship premade translations.
$key = 'timezone-' . strtolower( trim( $tzMsg ) );
- $value = wfMsgForContent( $key );
- if ( !wfEmptyMsg( $key, $value ) ) {
- $tzMsg = $value;
+ $msg = wfMessage( $key )->inContentLanguage();
+ if ( $msg->exists() ) {
+ $tzMsg = $msg->text();
}
date_default_timezone_set( $oldtz );
*/
public function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
$this->setTitle( $title );
- $options->resetUsage();
$this->mOptions = $options;
$this->setOutputType( $outputType );
if ( $clearState ) {
*/
public function setHook( $tag, $callback ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
$oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
$this->mTagHooks[$tag] = $callback;
if ( !in_array( $tag, $this->mStripList ) ) {
function setTransparentTagHook( $tag, $callback ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
$oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
$this->mTransparentTagHooks[$tag] = $callback;
*/
function setFunctionTagHook( $tag, $callback, $flags ) {
$tag = strtolower( $tag );
+ if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
$old = isset( $this->mFunctionTagHooks[$tag] ) ?
$this->mFunctionTagHooks[$tag] : null;
$this->mFunctionTagHooks[$tag] = array( $callback, $flags );
if ( strpos( $matches[0], '%' ) !== false ) {
$matches[1] = rawurldecode( $matches[1] );
}
- $tp = Title::newFromText( $matches[1] );
+ $tp = Title::newFromText( $matches[1], NS_FILE );
$nt =& $tp;
if ( is_null( $nt ) ) {
# Bogus title. Ignore these so we don't bomb out later.
$title = Title::newFromText( $title );
}
$this->mTitle = $title;
- $options->resetUsage();
$this->setOutputType( $outputType );
$text = $this->replaceVariables( $text );
$text = $this->mStripState->unstripBoth( $text );