Remove a couple of calls to ParserOptions::resetUsage(), missed on its removal in...
[lhc/web/wiklou.git] / includes / parser / Parser.php
index b723bea..99df3d4 100644 (file)
@@ -329,18 +329,11 @@ class Parser {
                }
 
                /**
-                * A page get its title converted except:
-                * a) Language conversion is globally disabled
-                * b) Title convert is globally disabled
-                * c) The page is a redirect page
-                * d) User request with a "linkconvert" set to "no"
-                * e) A "nocontentconvert" magic word has been set
-                * f) A "notitleconvert" magic word has been set
-                * g) User sets "noconvertlink" in his/her preference
-                *
-                * Note that if a user tries to set a title in a conversion
-                * rule but content conversion was not done, then the parser
-                * won't pick it up.  This is probably expected behavior.
+                * A converted title will be provided in the output object if title and
+                * content conversion are enabled, the article text does not contain 
+                * a conversion-suppressing double-underscore tag, and no 
+                * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
+                * automatic link conversion.
                 */
                if ( !( $wgDisableLangConversion
                                || $wgDisableTitleConversion
@@ -832,7 +825,22 @@ class Parser {
                $has_opened_tr = array(); # Did this table open a <tr> element?
                $indent_level = 0; # indent level of the table
 
-               foreach ( $lines as $outLine ) {
+               # Keep pulling lines off the front of the array until they're all gone.
+               # we want to be able to push lines back on to the front of the stream,
+               # but StringUtils::explode() returns funky optimised Iterators which don't
+               # support insertion.  So maintain a separate buffer and draw on that first if
+               # there's anything in it
+               $extraLines = array();
+               $lines->rewind();
+               do {
+                       if( $extraLines ){
+                               $outLine = array_shift( $extraLines );
+                       } elseif( $lines->valid() ) {
+                               $outLine = $lines->current();
+                               $lines->next();
+                       } else {
+                               break;
+                       }
                        $line = trim( $outLine );
 
                        if ( $line === '' ) { # empty line, go to next line
@@ -908,11 +916,10 @@ class Parser {
                        } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 )  === '|+' ) {
                                # This might be cell elements, td, th or captions
                                if ( substr( $line , 0 , 2 ) === '|+' ) {
-                                       $first_character = '+';
-                                       $line = substr( $line , 1 );
+                                       $first_character = '|+';
                                }
 
-                               $line = substr( $line , 1 );
+                               $line = substr( $line , strlen( $first_character ) );
 
                                if ( $first_character === '!' ) {
                                        $line = str_replace( '!!' , '||' , $line );
@@ -923,62 +930,84 @@ class Parser {
                                # by earlier parser steps, but should avoid splitting up eg
                                # attribute values containing literal "||".
                                $cells = StringUtils::explodeMarkup( '||' , $line );
-
-                               $outLine = '';
-
-                               # Loop through each table cell
-                               foreach ( $cells as $cell ) {
-                                       $previous = '';
-                                       if ( $first_character !== '+' ) {
-                                               $tr_after = array_pop( $tr_attributes );
-                                               if ( !array_pop( $tr_history ) ) {
-                                                       $previous = "<tr{$tr_after}>\n";
-                                               }
-                                               array_push( $tr_history , true );
-                                               array_push( $tr_attributes , '' );
-                                               array_pop( $has_opened_tr );
-                                               array_push( $has_opened_tr , true );
+                               $cell = array_shift( $cells );
+
+                               # Inject cells back into the stream to be dealt with later
+                               # TODO: really we should do the whole thing as a stream...
+                               # but that would be too much like a sensible implementation :P
+                               if( count( $cells ) ){
+                                       foreach( array_reverse( $cells ) as $extraCell ){
+                                               array_unshift( $extraLines, $first_character . $extraCell );
                                        }
+                               }
 
-                                       $last_tag = array_pop( $last_tag_history );
+                               $outLine = '';
 
-                                       if ( array_pop( $td_history ) ) {
-                                               $previous = "</{$last_tag}>\n{$previous}";
+                               $previous = '';
+                               if ( $first_character !== '|+' ) {
+                                       $tr_after = array_pop( $tr_attributes );
+                                       if ( !array_pop( $tr_history ) ) {
+                                               $previous = "<tr{$tr_after}>\n";
                                        }
+                                       array_push( $tr_history , true );
+                                       array_push( $tr_attributes , '' );
+                                       array_pop( $has_opened_tr );
+                                       array_push( $has_opened_tr , true );
+                               }
 
-                                       if ( $first_character === '|' ) {
-                                               $last_tag = 'td';
-                                       } elseif ( $first_character === '!' ) {
-                                               $last_tag = 'th';
-                                       } elseif ( $first_character === '+' ) {
-                                               $last_tag = 'caption';
-                                       } else {
-                                               $last_tag = '';
-                                       }
+                               $last_tag = array_pop( $last_tag_history );
 
-                                       array_push( $last_tag_history , $last_tag );
+                               if ( array_pop( $td_history ) ) {
+                                       $previous = "</{$last_tag}>\n{$previous}";
+                               }
 
-                                       # A cell could contain both parameters and data
-                                       $cell_data = explode( '|' , $cell , 2 );
+                               if ( $first_character === '|' ) {
+                                       $last_tag = 'td';
+                               } elseif ( $first_character === '!' ) {
+                                       $last_tag = 'th';
+                               } elseif ( $first_character === '|+' ) {
+                                       $last_tag = 'caption';
+                               } else {
+                                       $last_tag = '';
+                               }
 
-                                       # Bug 553: Note that a '|' inside an invalid link should not
-                                       # be mistaken as delimiting cell parameters
-                                       if ( strpos( $cell_data[0], '[[' ) !== false ) {
-                                               $cell = "{$previous}<{$last_tag}>{$cell}";
-                                       } elseif ( count( $cell_data ) == 1 ) {
-                                               $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
-                                       } else {
-                                               $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
-                                               $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
-                                               $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
-                                       }
+                               array_push( $last_tag_history , $last_tag );
+
+                               # A cell could contain both parameters and data... but the pipe could
+                               # also be the start of a nested table, or a raw pipe inside an invalid
+                               # link (bug 553).  
+                               $cell_data = preg_split( '/(?<!\{)\|/', $cell, 2 );
+
+                               # Bug 553: a '|' inside an invalid link should not
+                               # be mistaken as delimiting cell parameters
+                               if ( strpos( $cell_data[0], '[[' ) !== false ) {
+                                       $data = $cell;
+                                       $cell = "{$previous}<{$last_tag}>";
+                               } elseif ( count( $cell_data ) == 1 ) {
+                                       $cell = "{$previous}<{$last_tag}>";
+                                       $data = $cell_data[0];
+                               } else {
+                                       $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
+                                       $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
+                                       $cell = "{$previous}<{$last_tag}{$attributes}>";
+                                       $data = $cell_data[1];
+                               }
 
-                                       $outLine .= $cell;
-                                       array_push( $td_history , true );
+                               # Bug 529: the start of a table cell should be a linestart context for
+                               # processing other block markup, including nested tables.  The original
+                               # implementation of this was to add a newline before every brace construct,
+                               # which broke all manner of other things.  Instead, push the contents
+                               # of the cell back into the stream and come back to it later.  But don't
+                               # do that if the first line is empty, or you may get extra whitespace
+                               if( $data ){
+                                       array_unshift( $extraLines, trim( $data ) );
                                }
+
+                               $outLine .= $cell;
+                               array_push( $td_history , true );
                        }
                        $out .= $outLine . "\n";
-               }
+               } while( $lines->valid() || count( $extraLines ) );
 
                # Closing open td, tr && table
                while ( count( $td_history ) > 0 ) {
@@ -2242,6 +2271,7 @@ class Parser {
                                        '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
                                        '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
                                if ( $openmatch or $closematch ) {
+
                                        $paragraphStack = false;
                                        # TODO bug 5718: paragraph closed
                                        $output .= $this->closeParagraph();
@@ -2563,25 +2593,25 @@ class Parser {
                                $value = wfEscapeWikiText( $this->mTitle->getText() );
                                break;
                        case 'pagenamee':
-                               $value = $this->mTitle->getPartialURL();
+                               $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
                                break;
                        case 'fullpagename':
                                $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
                                break;
                        case 'fullpagenamee':
-                               $value = $this->mTitle->getPrefixedURL();
+                               $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
                                break;
                        case 'subpagename':
                                $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
                                break;
                        case 'subpagenamee':
-                               $value = $this->mTitle->getSubpageUrlForm();
+                               $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
                                break;
                        case 'basepagename':
                                $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
                                break;
                        case 'basepagenamee':
-                               $value = wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) );
+                               $value = wfEscapeWikiText( wfUrlEncode( str_replace( ' ', '_', $this->mTitle->getBaseText() ) ) );
                                break;
                        case 'talkpagename':
                                if ( $this->mTitle->canTalk() ) {
@@ -2594,7 +2624,7 @@ class Parser {
                        case 'talkpagenamee':
                                if ( $this->mTitle->canTalk() ) {
                                        $talkPage = $this->mTitle->getTalkPage();
-                                       $value = $talkPage->getPrefixedUrl();
+                                       $value = wfEscapeWikiText( $talkPage->getPrefixedUrl() );
                                } else {
                                        $value = '';
                                }
@@ -2605,7 +2635,7 @@ class Parser {
                                break;
                        case 'subjectpagenamee':
                                $subjPage = $this->mTitle->getSubjectPage();
-                               $value = $subjPage->getPrefixedUrl();
+                               $value = wfEscapeWikiText( $subjPage->getPrefixedUrl() );
                                break;
                        case 'revisionid':
                                # Let the edit saving system know we should parse the page
@@ -3232,11 +3262,11 @@ class Parser {
                        $text = wfEscapeWikiText( $text );
                } elseif ( is_string( $text )
                        && !$piece['lineStart']
-                       && preg_match( '/^(?:{\\||:|;|#|\*)/', $text ) )
+                       && preg_match( '/^{\\|/', $text ) )
                {
-                       # Bug 529: if the template begins with a table or block-level
-                       # element, it should be treated as beginning a new line.
-                       # This behaviour is somewhat controversial.
+                       # Bug 529: if the template begins with a table, it should be treated as
+                       # beginning a new line.  This previously handled other block-level elements
+                       # such as #, :, etc, but these have many false-positives (bug 12974).
                        $text = "\n" . $text;
                }
 
@@ -3295,7 +3325,7 @@ class Parser {
 
                if ( !$title->equals( $cacheTitle ) ) {
                        $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
-                               array( $title->getNamespace(), $cdb = $title->getDBkey() );
+                               array( $title->getNamespace(), $title->getDBkey() );
                }
 
                return array( $dom, $title );
@@ -3362,12 +3392,12 @@ class Parser {
                                $text = $rev->getText();
                        } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
                                global $wgContLang;
-                               $message = $wgContLang->lcfirst( $title->getText() );
-                               $text = wfMsgForContentNoTrans( $message );
-                               if ( wfEmptyMsg( $message, $text ) ) {
+                               $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
+                               if ( !$message->exists() ) {
                                        $text = false;
                                        break;
                                }
+                               $text = $message->plain();
                        } else {
                                break;
                        }
@@ -3697,10 +3727,7 @@ class Parser {
                        $showEditLink = $this->mOptions->getEditSection();
                }
                if ( $showEditLink ) {
-                       $editLinkAsToken = $this->mOptions->getEditSectionTokens();
-                       if ( $editLinkAsToken ) {
-                               $this->mOutput->setEditSectionTokens( "{$this->mUniqPrefix}-editsection-", self::MARKER_SUFFIX );
-                       }
+                       $this->mOutput->setEditSectionTokens( true );
                }
 
                # Get all headlines for numbering them and adding funky stuff like [edit]
@@ -3955,28 +3982,27 @@ class Parser {
 
                        # give headline the correct <h#> tag
                        if ( $showEditLink && $sectionIndex !== false ) {
-                               if ( $editLinkAsToken ) {
-                                       // Output edit section links as markers with styles that can be customized by skins
-                                       if ( $isTemplate ) {
-                                               # Put a T flag in the section identifier, to indicate to extractSections()
-                                               # that sections inside <includeonly> should be counted.
-                                               $editlinkArgs = array( $titleText, "T-$sectionIndex", null );
-                                       } else {
-                                               $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint );
-                                       }
-                                       // We use nearly the same structure as uniqPrefix and the marker stuffix (besides there being nothing random)
-                                       // However the this is output into the parser output itself not replaced early, so we hardcode this in case
-                                       // the constants change in a different version of MediaWiki, which would break this code.
-                                       $editlink = "{$this->mUniqPrefix}-editsection-" . serialize($editlinkArgs) . self::MARKER_SUFFIX;
+                               // Output edit section links as markers with styles that can be customized by skins
+                               if ( $isTemplate ) {
+                                       # Put a T flag in the section identifier, to indicate to extractSections()
+                                       # that sections inside <includeonly> should be counted.
+                                       $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
                                } else {
-                                       // Output edit section links directly as markup like we used to
-                                       if ( $isTemplate ) {
-                                               # Put a T flag in the section identifier, to indicate to extractSections()
-                                               # that sections inside <includeonly> should be counted.
-                                               $editlink = $sk->doEditSectionLink( Title::newFromText( $titleText ), "T-$sectionIndex", null, $this->mOptions->getUserLang() );
-                                       } else {
-                                               $editlink = $sk->doEditSectionLink( $this->mTitle, $sectionIndex, $headlineHint, $this->mOptions->getUserLang() );
-                                       }
+                                       $editlinkArgs = array( $this->mTitle->getPrefixedText(), $sectionIndex, $headlineHint );
+                               }
+                               // We use a bit of pesudo-xml for editsection markers. The language converter is run later on
+                               // Using a UNIQ style marker leads to the converter screwing up the tokens when it converts stuff
+                               // And trying to insert strip tags fails too. At this point all real inputted tags have already been escaped
+                               // so we don't have to worry about a user trying to input one of these markers directly.
+                               // We use a page and section attribute to stop the language converter from converting these important bits
+                               // of data, but put the headline hint inside a content block because the language converter is supposed to
+                               // be able to convert that piece of data.
+                               $editlink = '<editsection page="' . htmlspecialchars($editlinkArgs[0]);
+                               $editlink .= '" section="' . htmlspecialchars($editlinkArgs[1]) .'"';
+                               if ( isset($editlinkArgs[2]) ) {
+                                       $editlink .= '>' . $editlinkArgs[2] . '</editsection>';
+                               } else {
+                                       $editlink .= '/>';
                                }
                        } else {
                                $editlink = '';
@@ -4064,7 +4090,9 @@ class Parser {
                        "\r\n" => "\n",
                );
                $text = str_replace( array_keys( $pairs ), array_values( $pairs ), $text );
-               $text = $this->pstPass2( $text, $user );
+               if( $options->getPreSaveTransform() ) {
+                       $text = $this->pstPass2( $text, $user );
+               }
                $text = $this->mStripState->unstripBoth( $text );
 
                $this->setUser( null ); #Reset
@@ -4101,9 +4129,9 @@ class Parser {
                # whatever crap the system uses, localised or not, so we cannot
                # ship premade translations.
                $key = 'timezone-' . strtolower( trim( $tzMsg ) );
-               $value = wfMsgForContent( $key );
-               if ( !wfEmptyMsg( $key, $value ) ) {
-                       $tzMsg = $value;
+               $msg = wfMessage( $key )->inContentLanguage();
+               if ( $msg->exists() ) {
+                       $tzMsg = $msg->text();
                }
 
                date_default_timezone_set( $oldtz );
@@ -4279,7 +4307,6 @@ class Parser {
         */
        public function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
                $this->setTitle( $title );
-               $options->resetUsage();
                $this->mOptions = $options;
                $this->setOutputType( $outputType );
                if ( $clearState ) {
@@ -4326,6 +4353,7 @@ class Parser {
         */
        public function setHook( $tag, $callback ) {
                $tag = strtolower( $tag );
+               if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
                $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
                $this->mTagHooks[$tag] = $callback;
                if ( !in_array( $tag, $this->mStripList ) ) {
@@ -4337,6 +4365,7 @@ class Parser {
 
        function setTransparentTagHook( $tag, $callback ) {
                $tag = strtolower( $tag );
+               if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
                $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
                $this->mTransparentTagHooks[$tag] = $callback;
 
@@ -4441,6 +4470,7 @@ class Parser {
         */
        function setFunctionTagHook( $tag, $callback, $flags ) {
                $tag = strtolower( $tag );
+               if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
                $old = isset( $this->mFunctionTagHooks[$tag] ) ?
                        $this->mFunctionTagHooks[$tag] : null;
                $this->mFunctionTagHooks[$tag] = array( $callback, $flags );
@@ -4530,7 +4560,7 @@ class Parser {
                        if ( strpos( $matches[0], '%' ) !== false ) {
                                $matches[1] = rawurldecode( $matches[1] );
                        }
-                       $tp = Title::newFromText( $matches[1] );
+                       $tp = Title::newFromText( $matches[1], NS_FILE );
                        $nt =& $tp;
                        if ( is_null( $nt ) ) {
                                # Bogus title. Ignore these so we don't bomb out later.
@@ -5179,7 +5209,6 @@ class Parser {
                        $title = Title::newFromText( $title );
                }
                $this->mTitle = $title;
-               $options->resetUsage();
                $this->setOutputType( $outputType );
                $text = $this->replaceVariables( $text );
                $text = $this->mStripState->unstripBoth( $text );