Remove a couple of calls to ParserOptions::resetUsage(), missed on its removal in...

[lhc/web/wiklou.git] / includes / parser / Parser.php
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php

index b723bea..99df3d4 100644 (file)
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -329,18 +329,11 @@ class Parser {
                 }
  
                 /**
-                * A page get its title converted except:
-                * a) Language conversion is globally disabled
-                * b) Title convert is globally disabled
-                * c) The page is a redirect page
-                * d) User request with a "linkconvert" set to "no"
-                * e) A "nocontentconvert" magic word has been set
-                * f) A "notitleconvert" magic word has been set
-                * g) User sets "noconvertlink" in his/her preference
-                *
-                * Note that if a user tries to set a title in a conversion
-                * rule but content conversion was not done, then the parser
-                * won't pick it up.  This is probably expected behavior.
+                * A converted title will be provided in the output object if title and
+                * content conversion are enabled, the article text does not contain 
+                * a conversion-suppressing double-underscore tag, and no 
+                * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
+                * automatic link conversion.
                  */
                 if ( !( $wgDisableLangConversion
                                 || $wgDisableTitleConversion
@@ -832,7 +825,22 @@ class Parser {
                 $has_opened_tr = array(); # Did this table open a <tr> element?
                 $indent_level = 0; # indent level of the table
  
-               foreach ( $lines as $outLine ) {
+               # Keep pulling lines off the front of the array until they're all gone.
+               # we want to be able to push lines back on to the front of the stream,
+               # but StringUtils::explode() returns funky optimised Iterators which don't
+               # support insertion.  So maintain a separate buffer and draw on that first if
+               # there's anything in it
+               $extraLines = array();
+               $lines->rewind();
+               do {
+                       if( $extraLines ){
+                               $outLine = array_shift( $extraLines );
+                       } elseif( $lines->valid() ) {
+                               $outLine = $lines->current();
+                               $lines->next();
+                       } else {
+                               break;
+                       }
                         $line = trim( $outLine );
  
                         if ( $line === '' ) { # empty line, go to next line
@@ -908,11 +916,10 @@ class Parser {
                         } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 )  === '|+' ) {
                                 # This might be cell elements, td, th or captions
                                 if ( substr( $line , 0 , 2 ) === '|+' ) {
-                                       $first_character = '+';
-                                       $line = substr( $line , 1 );
+                                       $first_character = '|+';
                                 }
  
-                               $line = substr( $line , 1 );
+                               $line = substr( $line , strlen( $first_character ) );
  
                                 if ( $first_character === '!' ) {
                                         $line = str_replace( '!!' , '||' , $line );
@@ -923,62 +930,84 @@ class Parser {
                                 # by earlier parser steps, but should avoid splitting up eg
                                 # attribute values containing literal "||".
                                 $cells = StringUtils::explodeMarkup( '||' , $line );
-
-                               $outLine = '';
-
-                               # Loop through each table cell
-                               foreach ( $cells as $cell ) {
-                                       $previous = '';
-                                       if ( $first_character !== '+' ) {
-                                               $tr_after = array_pop( $tr_attributes );
-                                               if ( !array_pop( $tr_history ) ) {
-                                                       $previous = "<tr{$tr_after}>\n";
-                                               }
-                                               array_push( $tr_history , true );
-                                               array_push( $tr_attributes , '' );
-                                               array_pop( $has_opened_tr );
-                                               array_push( $has_opened_tr , true );
+                               $cell = array_shift( $cells );
+
+                               # Inject cells back into the stream to be dealt with later
+                               # TODO: really we should do the whole thing as a stream...
+                               # but that would be too much like a sensible implementation :P
+                               if( count( $cells ) ){
+                                       foreach( array_reverse( $cells ) as $extraCell ){
+                                               array_unshift( $extraLines, $first_character . $extraCell );
                                         }
+                               }
  
-                                       $last_tag = array_pop( $last_tag_history );
+                               $outLine = '';
  
-                                       if ( array_pop( $td_history ) ) {
-                                               $previous = "</{$last_tag}>\n{$previous}";
+                               $previous = '';
+                               if ( $first_character !== '|+' ) {
+                                       $tr_after = array_pop( $tr_attributes );
+                                       if ( !array_pop( $tr_history ) ) {
+                                               $previous = "<tr{$tr_after}>\n";
                                         }
+                                       array_push( $tr_history , true );
+                                       array_push( $tr_attributes , '' );
+                                       array_pop( $has_opened_tr );
+                                       array_push( $has_opened_tr , true );
+                               }
  
-                                       if ( $first_character === '|' ) {
-                                               $last_tag = 'td';
-                                       } elseif ( $first_character === '!' ) {
-                                               $last_tag = 'th';
-                                       } elseif ( $first_character === '+' ) {
-                                               $last_tag = 'caption';
-                                       } else {
-                                               $last_tag = '';
-                                       }
+                               $last_tag = array_pop( $last_tag_history );
  
-                                       array_push( $last_tag_history , $last_tag );
+                               if ( array_pop( $td_history ) ) {
+                                       $previous = "</{$last_tag}>\n{$previous}";
+                               }
  
-                                       # A cell could contain both parameters and data
-                                       $cell_data = explode( '|' , $cell , 2 );
+                               if ( $first_character === '|' ) {
+                                       $last_tag = 'td';
+                               } elseif ( $first_character === '!' ) {
+                                       $last_tag = 'th';
+                               } elseif ( $first_character === '|+' ) {
+                                       $last_tag = 'caption';
+                               } else {
+                                       $last_tag = '';
+                               }
  
-                                       # Bug 553: Note that a '|' inside an invalid link should not
-                                       # be mistaken as delimiting cell parameters
-                                       if ( strpos( $cell_data[0], '[[' ) !== false ) {
-                                               $cell = "{$previous}<{$last_tag}>{$cell}";
-                                       } elseif ( count( $cell_data ) == 1 ) {
-                                               $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
-                                       } else {
-                                               $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
-                                               $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
-                                               $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
-                                       }
+                               array_push( $last_tag_history , $last_tag );
+
+                               # A cell could contain both parameters and data... but the pipe could
+                               # also be the start of a nested table, or a raw pipe inside an invalid
+                               # link (bug 553).  
+                               $cell_data = preg_split( '/(?<!\{)\|/', $cell, 2 );
+
+                               # Bug 553: a '|' inside an invalid link should not
+                               # be mistaken as delimiting cell parameters
+                               if ( strpos( $cell_data[0], '[[' ) !== false ) {
+                                       $data = $cell;
+                                       $cell = "{$previous}<{$last_tag}>";
+                               } elseif ( count( $cell_data ) == 1 ) {
+                                       $cell = "{$previous}<{$last_tag}>";
+                                       $data = $cell_data[0];
+                               } else {
+                                       $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
+                                       $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
+                                       $cell = "{$previous}<{$last_tag}{$attributes}>";
+                                       $data = $cell_data[1];
+                               }
  
-                                       $outLine .= $cell;
-                                       array_push( $td_history , true );
+                               # Bug 529: the start of a table cell should be a linestart context for
+                               # processing other block markup, including nested tables.  The original
+                               # implementation of this was to add a newline before every brace construct,
+                               # which broke all manner of other things.  Instead, push the contents
+                               # of the cell back into the stream and come back to it later.  But don't
+                               # do that if the first line is empty, or you may get extra whitespace
+                               if( $data ){
+                                       array_unshift( $extraLines, trim( $data ) );
                                 }
+
+                               $outLine .= $cell;
+                               array_push( $td_history , true );
                         }
                         $out .= $outLine . "\n";
-               }
+               } while( $lines->valid() || count( $extraLines ) );
  
                 # Closing open td, tr && table
                 while ( count( $td_history ) > 0 ) {
@@ -2242,6 +2271,7 @@ class Parser {
                                         '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
                                         '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
                                 if ( $openmatch or $closematch ) {
+
                                         $paragraphStack = false;
                                         # TODO bug 5718: paragraph closed
                                         $output .= $this->closeParagraph();
@@ -2563,25 +2593,25 @@ class Parser {
                                 $value = wfEscapeWikiText( $this->mTitle->getText() );
                                 break;
                         case 'pagenamee':
-                               $value = $this->mTitle->getPartialURL();
+                               $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
                                 break;
                         case 'fullpagename':
                                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
                                 break;
                         case 'fullpagenamee':
-                               $value = $this->mTitle->getPrefixedURL();
+                               $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
                                 break;
                         case 'subpagename':
                                 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
                                 break;
                         case 'subpagenamee':
-                               $value = $this->mTitle->getSubpageUrlForm();
+                               $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
                                 break;
                         case 'basepagename':
                                 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
                                 break;
                         case 'basepagenamee':
-                               $value = wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) );
+                               $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) );
                                 break;
                         case 'talkpagename':
                                 if ( $this->mTitle->canTalk() ) {
@@ -2594,7 +2624,7 @@ class Parser {
                         case 'talkpagenamee':
                                 if ( $this->mTitle->canTalk() ) {
                                         $talkPage = $this->mTitle->getTalkPage();
-                                       $value = $talkPage->getPrefixedUrl();
+                                       $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() );
                                 } else {
                                         $value = '';
                                 }
@@ -2605,7 +2635,7 @@ class Parser {
                                 break;
                         case 'subjectpagenamee':
                                 $subjPage = $this->mTitle->getSubjectPage();
-                               $value = $subjPage->getPrefixedUrl();
+                               $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() );
                                 break;
                         case 'revisionid':
                                 # Let the edit saving system know we should parse the page
@@ -3232,11 +3262,11 @@ class Parser {
                         $text = wfEscapeWikiText( $text );
                 } elseif ( is_string( $text )
                         && !$piece['lineStart']
-                       && preg_match( '/^(?:{\\||:|;|#|\*)/', $text ) )
+                       && preg_match( '/^{\\|/', $text ) )
                 {
-                       # Bug 529: if the template begins with a table or block-level
-                       # element, it should be treated as beginning a new line.
-                       # This behaviour is somewhat controversial.
+                       # Bug 529: if the template begins with a table, it should be treated as
+                       # beginning a new line.  This previously handled other block-level elements
+                       # such as #, :, etc, but these have many false-positives (bug 12974).
                         $text = "\n" . $text;
                 }
  
@@ -3295,7 +3325,7 @@ class Parser {
  
                 if ( !$title->equals( $cacheTitle ) ) {
                         $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
-                               array( $title->getNamespace(), $cdb = $title->getDBkey() );
+                               array( $title->getNamespace(), $title->getDBkey() );
                 }
  
                 return array( $dom, $title );
@@ -3362,12 +3392,12 @@ class Parser {
                                 $text = $rev->getText();
                         } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
                                 global $wgContLang;
-                               $message = $wgContLang->lcfirst( $title->getText() );
-                               $text = wfMsgForContentNoTrans( $message );
-                               if ( wfEmptyMsg( $message, $text ) ) {
+                               $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
+                               if ( !$message->exists() ) {
                                         $text = false;
                                         break;
                                 }
+                               $text = $message->plain();
                         } else {
                                 break;
                         }
@@ -3697,10 +3727,7 @@ class Parser {
                         $showEditLink = $this->mOptions->getEditSection();
                 }
                 if ( $showEditLink ) {
-                       $editLinkAsToken = $this->mOptions->getEditSectionTokens();
-                       if ( $editLinkAsToken ) {
-                               $this->mOutput->setEditSectionTokens( "{$this->mUniqPrefix}-editsection-", self::MARKER_SUFFIX );
-                       }
+                       $this->mOutput->setEditSectionTokens( true );
                 }
  
                 # Get all headlines for numbering them and adding funky stuff like [edit]
@@ -3955,28 +3982,27 @@ class Parser {
  
                         # give headline the correct <h#> tag
                         if ( $showEditLink && $sectionIndex !== false ) {
-                               if ( $editLinkAsToken ) {
-                                       // Output edit section links as markers with styles that can be customized by skins
-                                       if ( $isTemplate ) {
-                                               # Put a T flag in the section identifier, to indicate to extractSections()
-                                               # that sections inside <includeonly> should be counted.
-                                               $editlinkArgs = array( $titleText, "T-$sectionIndex", null );
-                                       } else {
-                                               $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint );
-                                       }
-                                       // We use nearly the same structure as uniqPrefix and the marker stuffix (besides there being nothing random)
-                                       // However the this is output into the parser output itself not replaced early, so we hardcode this in case
-                                       // the constants change in a different version of MediaWiki, which would break this code.
-                                       $editlink = "{$this->mUniqPrefix}-editsection-" . serialize($editlinkArgs) . self::MARKER_SUFFIX;
+                               // Output edit section links as markers with styles that can be customized by skins
+                               if ( $isTemplate ) {
+                                       # Put a T flag in the section identifier, to indicate to extractSections()
+                                       # that sections inside <includeonly> should be counted.
+                                       $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
                                 } else {
-                                       // Output edit section links directly as markup like we used to
-                                       if ( $isTemplate ) {
-                                               # Put a T flag in the section identifier, to indicate to extractSections()
-                                               # that sections inside <includeonly> should be counted.
-                                               $editlink = $sk->doEditSectionLink( Title::newFromText( $titleText ), "T-$sectionIndex", null, $this->mOptions->getUserLang() );
-                                       } else {
-                                               $editlink = $sk->doEditSectionLink( $this->mTitle, $sectionIndex, $headlineHint, $this->mOptions->getUserLang() );
-                                       }
+                                       $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint );
+                               }
+                               // We use a bit of pesudo-xml for editsection markers. The language converter is run later on
+                               // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff
+                               // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped
+                               // so we don't have to worry about a user trying to input one of these markers directly.
+                               // We use a page and section attribute to stop the language converter from converting these important bits
+                               // of data, but put the headline hint inside a content block because the language converter is supposed to
+                               // be able to convert that piece of data.
+                               $editlink = '<editsection page="' . htmlspecialchars($editlinkArgs[0]);
+                               $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"';
+                               if ( isset($editlinkArgs[2]) ) {
+                                       $editlink .= '>' . $editlinkArgs[2] . '</editsection>';
+                               } else {
+                                       $editlink .= '/>';
                                 }
                         } else {
                                 $editlink = '';
@@ -4064,7 +4090,9 @@ class Parser {
                         "\r\n" => "\n",
                 );
                 $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
-               $text = $this->pstPass2( $text, $user );
+               if( $options->getPreSaveTransform() ) {
+                       $text = $this->pstPass2( $text, $user );
+               }
                 $text = $this->mStripState->unstripBoth( $text );
  
                 $this->setUser( null ); #Reset
@@ -4101,9 +4129,9 @@ class Parser {
                 # whatever crap the system uses, localised or not, so we cannot
                 # ship premade translations.
                 $key = 'timezone-' . strtolower( trim( $tzMsg ) );
-               $value = wfMsgForContent( $key );
-               if ( !wfEmptyMsg( $key, $value ) ) {
-                       $tzMsg = $value;
+               $msg = wfMessage( $key )->inContentLanguage();
+               if ( $msg->exists() ) {
+                       $tzMsg = $msg->text();
                 }
  
                 date_default_timezone_set( $oldtz );
@@ -4279,7 +4307,6 @@ class Parser {
          */
         public function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
                 $this->setTitle( $title );
-               $options->resetUsage();
                 $this->mOptions = $options;
                 $this->setOutputType( $outputType );
                 if ( $clearState ) {
@@ -4326,6 +4353,7 @@ class Parser {
          */
         public function setHook( $tag, $callback ) {
                 $tag = strtolower( $tag );
+               if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
                 $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
                 $this->mTagHooks[$tag] = $callback;
                 if ( !in_array( $tag, $this->mStripList ) ) {
@@ -4337,6 +4365,7 @@ class Parser {
  
         function setTransparentTagHook( $tag, $callback ) {
                 $tag = strtolower( $tag );
+               if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
                 $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
                 $this->mTransparentTagHooks[$tag] = $callback;
  
@@ -4441,6 +4470,7 @@ class Parser {
          */
         function setFunctionTagHook( $tag, $callback, $flags ) {
                 $tag = strtolower( $tag );
+               if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
                 $old = isset( $this->mFunctionTagHooks[$tag] ) ?
                         $this->mFunctionTagHooks[$tag] : null;
                 $this->mFunctionTagHooks[$tag] = array( $callback, $flags );
@@ -4530,7 +4560,7 @@ class Parser {
                         if ( strpos( $matches[0], '%' ) !== false ) {
                                 $matches[1] = rawurldecode( $matches[1] );
                         }
-                       $tp = Title::newFromText( $matches[1] );
+                       $tp = Title::newFromText( $matches[1], NS_FILE );
                         $nt =& $tp;
                         if ( is_null( $nt ) ) {
                                 # Bogus title. Ignore these so we don't bomb out later.
@@ -5179,7 +5209,6 @@ class Parser {
                         $title = Title::newFromText( $title );
                 }
                 $this->mTitle = $title;
-               $options->resetUsage();
                 $this->setOutputType( $outputType );
                 $text = $this->replaceVariables( $text );
                 $text = $this->mStripState->unstripBoth( $text );