Merge "Make TOC hideable"
[lhc/web/wiklou.git] / includes / parser / Parser.php
index 1ab29eb..1f14223 100644 (file)
@@ -115,6 +115,10 @@ class Parser {
        # Marker Suffix needs to be accessible staticly.
        const MARKER_SUFFIX = "-QINU\x7f";
 
+       # Markers used for wrapping the table of contents
+       const TOC_START = '<mw:toc>';
+       const TOC_END = '</mw:toc>';
+
        # Persistent:
        var $mTagHooks = array();
        var $mTransparentTagHooks = array();
@@ -191,6 +195,7 @@ class Parser {
        var $mRevisionId;   # ID to display in {{REVISIONID}} tags
        var $mRevisionTimestamp; # The timestamp of the specified revision ID
        var $mRevisionUser; # User to display in {{REVISIONUSER}} tag
+       var $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
        var $mRevIdForTs;   # The revision ID which was used to fetch the timestamp
        var $mInputSize = false; # For {{PAGESIZE}} on current page.
 
@@ -292,7 +297,7 @@ class Parser {
                $this->mLinkHolders = new LinkHolderArray( $this );
                $this->mLinkID = 0;
                $this->mRevisionObject = $this->mRevisionTimestamp =
-                       $this->mRevisionId = $this->mRevisionUser = null;
+                       $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
                $this->mVarCache = array();
                $this->mUser = null;
                $this->mLangLinkLanguages = array();
@@ -354,7 +359,7 @@ class Parser {
                 * to internalParse() which does all the real work.
                 */
 
-               global $wgUseTidy, $wgAlwaysUseTidy;
+               global $wgUseTidy, $wgAlwaysUseTidy, $wgShowHostnames;
                $fname = __METHOD__ . '-' . wfGetCaller();
                wfProfileIn( __METHOD__ );
                wfProfileIn( $fname );
@@ -375,11 +380,13 @@ class Parser {
                $oldRevisionObject = $this->mRevisionObject;
                $oldRevisionTimestamp = $this->mRevisionTimestamp;
                $oldRevisionUser = $this->mRevisionUser;
+               $oldRevisionSize = $this->mRevisionSize;
                if ( $revid !== null ) {
                        $this->mRevisionId = $revid;
                        $this->mRevisionObject = null;
                        $this->mRevisionTimestamp = null;
                        $this->mRevisionUser = null;
+                       $this->mRevisionSize = null;
                }
 
                wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
@@ -529,6 +536,9 @@ class Parser {
                        wfRunHooks( 'ParserLimitReportPrepare', array( $this, $this->mOutput ) );
 
                        $limitReport = "NewPP limit report\n";
+                       if ( $wgShowHostnames ) {
+                               $limitReport .= 'Parsed by ' . wfHostname() . "\n";
+                       }
                        foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
                                if ( wfRunHooks( 'ParserLimitReportFormat',
                                        array( $key, $value, &$limitReport, false, false )
@@ -566,6 +576,7 @@ class Parser {
                $this->mRevisionObject = $oldRevisionObject;
                $this->mRevisionTimestamp = $oldRevisionTimestamp;
                $this->mRevisionUser = $oldRevisionUser;
+               $this->mRevisionSize = $oldRevisionSize;
                $this->mInputSize = false;
                wfProfileOut( $fname );
                wfProfileOut( __METHOD__ );
@@ -1414,7 +1425,8 @@ class Parser {
         */
        public function doQuotes( $text ) {
                $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
-               if ( count( $arr ) == 1 ) {
+               $countarr = count( $arr );
+               if ( $countarr == 1 ) {
                        return $text;
                }
 
@@ -1423,26 +1435,29 @@ class Parser {
                // of bold and italics mark-ups.
                $numbold = 0;
                $numitalics = 0;
-               for ( $i = 1; $i < count( $arr ); $i += 2 ) {
+               for ( $i = 1; $i < $countarr; $i += 2 ) {
+                       $thislen = strlen( $arr[$i] );
                        // If there are ever four apostrophes, assume the first is supposed to
                        // be text, and the remaining three constitute mark-up for bold text.
                        // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
-                       if ( strlen( $arr[$i] ) == 4 ) {
+                       if ( $thislen == 4 ) {
                                $arr[$i - 1] .= "'";
                                $arr[$i] = "'''";
-                       } elseif ( strlen( $arr[$i] ) > 5 ) {
+                               $thislen = 3;
+                       } elseif ( $thislen > 5 ) {
                                // If there are more than 5 apostrophes in a row, assume they're all
                                // text except for the last 5.
                                // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
-                               $arr[$i - 1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
+                               $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
                                $arr[$i] = "'''''";
+                               $thislen = 5;
                        }
                        // Count the number of occurrences of bold and italics mark-ups.
-                       if ( strlen( $arr[$i] ) == 2 ) {
+                       if ( $thislen == 2 ) {
                                $numitalics++;
-                       } elseif ( strlen( $arr[$i] ) == 3 ) {
+                       } elseif ( $thislen == 3 ) {
                                $numbold++;
-                       } elseif ( strlen( $arr[$i] ) == 5 ) {
+                       } elseif ( $thislen == 5 ) {
                                $numitalics++;
                                $numbold++;
                        }
@@ -1456,7 +1471,7 @@ class Parser {
                        $firstsingleletterword = -1;
                        $firstmultiletterword = -1;
                        $firstspace = -1;
-                       for ( $i = 1; $i < count( $arr ); $i += 2 ) {
+                       for ( $i = 1; $i < $countarr; $i += 2 ) {
                                if ( strlen( $arr[$i] ) == 3 ) {
                                        $x1 = substr( $arr[$i - 1], -1 );
                                        $x2 = substr( $arr[$i - 1], -2, 1 );
@@ -1467,6 +1482,9 @@ class Parser {
                                        } elseif ( $x2 === ' ' ) {
                                                if ( $firstsingleletterword == -1 ) {
                                                        $firstsingleletterword = $i;
+                                                       // if $firstsingleletterword is set, we don't
+                                                       // look at the other options, so we can bail early.
+                                                       break;
                                                }
                                        } else {
                                                if ( $firstmultiletterword == -1 ) {
@@ -1506,7 +1524,8 @@ class Parser {
                                        $output .= $r;
                                }
                        } else {
-                               if ( strlen( $r ) == 2 ) {
+                               $thislen = strlen( $r );
+                               if ( $thislen == 2 ) {
                                        if ( $state === 'i' ) {
                                                $output .= '</i>';
                                                $state = '';
@@ -1523,7 +1542,7 @@ class Parser {
                                                $output .= '<i>';
                                                $state .= 'i';
                                        }
-                               } elseif ( strlen( $r ) == 3 ) {
+                               } elseif ( $thislen == 3 ) {
                                        if ( $state === 'b' ) {
                                                $output .= '</b>';
                                                $state = '';
@@ -1540,7 +1559,7 @@ class Parser {
                                                $output .= '<b>';
                                                $state .= 'b';
                                        }
-                               } elseif ( strlen( $r ) == 5 ) {
+                               } elseif ( $thislen == 5 ) {
                                        if ( $state === 'b' ) {
                                                $output .= '</b><i>';
                                                $state = 'i';
@@ -2098,16 +2117,12 @@ class Parser {
                                }
                        }
 
-                       # Self-link checking
-                       if ( $nt->getFragment() === '' && $ns != NS_SPECIAL ) {
-                               if ( $nt->equals( $this->mTitle ) || ( !$nt->isKnown() && in_array(
-                                       $this->mTitle->getPrefixedText(),
-                                       $this->getConverterLanguage()->autoConvertToAllVariants( $nt->getPrefixedText() ),
-                                       true
-                               ) ) ) {
-                                       $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
-                                       continue;
-                               }
+                       # Self-link checking. For some languages, variants of the title are checked in
+                       # LinkHolderArray::doVariants() to allow batching the existence checks necessary
+                       # for linking to a different variant.
+                       if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && $nt->getFragment() === '' ) {
+                               $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
+                               continue;
                        }
 
                        # NS_MEDIA is a pseudo-namespace for linking directly to a file
@@ -2266,13 +2281,13 @@ class Parser {
                $result = $this->closeParagraph();
 
                if ( '*' === $char ) {
-                       $result .= '<ul><li>';
+                       $result .= "<ul>\n<li>";
                } elseif ( '#' === $char ) {
-                       $result .= '<ol><li>';
+                       $result .= "<ol>\n<li>";
                } elseif ( ':' === $char ) {
-                       $result .= '<dl><dd>';
+                       $result .= "<dl>\n<dd>";
                } elseif ( ';' === $char ) {
-                       $result .= '<dl><dt>';
+                       $result .= "<dl>\n<dt>";
                        $this->mDTopen = true;
                } else {
                        $result = '<!-- ERR 1 -->';
@@ -2290,11 +2305,11 @@ class Parser {
         */
        function nextItem( $char ) {
                if ( '*' === $char || '#' === $char ) {
-                       return '</li><li>';
+                       return "</li>\n<li>";
                } elseif ( ':' === $char || ';' === $char ) {
-                       $close = '</dd>';
+                       $close = "</dd>\n";
                        if ( $this->mDTopen ) {
-                               $close = '</dt>';
+                               $close = "</dt>\n";
                        }
                        if ( ';' === $char ) {
                                $this->mDTopen = true;
@@ -2316,15 +2331,15 @@ class Parser {
         */
        function closeList( $char ) {
                if ( '*' === $char ) {
-                       $text = '</li></ul>';
+                       $text = "</li>\n</ul>";
                } elseif ( '#' === $char ) {
-                       $text = '</li></ol>';
+                       $text = "</li>\n</ol>";
                } elseif ( ':' === $char ) {
                        if ( $this->mDTopen ) {
                                $this->mDTopen = false;
-                               $text = '</dt></dl>';
+                               $text = "</dt>\n</dl>";
                        } else {
-                               $text = '</dd></dl>';
+                               $text = "</dd>\n</dl>";
                        }
                } else {
                        return '<!-- ERR 3 -->';
@@ -2354,6 +2369,7 @@ class Parser {
                $this->mDTopen = $inBlockElem = false;
                $prefixLength = 0;
                $paragraphStack = false;
+               $inBlockquote = false;
 
                foreach ( $textLines as $oLine ) {
                        # Fix up $linestart
@@ -2447,10 +2463,10 @@ class Parser {
                                wfProfileIn( __METHOD__ . "-paragraph" );
                                # No prefix (not in list)--go to paragraph mode
                                # XXX: use a stack for nestable elements like span, table and div
-                               $openmatch = preg_match( '/(?:<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
+                               $openmatch = preg_match( '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
                                $closematch = preg_match(
-                                       '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' .
-                                       '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|' . $this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t );
+                                       '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' .
+                                       '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|' . $this->mUniqPrefix . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS', $t );
                                if ( $openmatch or $closematch ) {
                                        $paragraphStack = false;
                                        # TODO bug 5718: paragraph closed
@@ -2458,9 +2474,14 @@ class Parser {
                                        if ( $preOpenMatch and !$preCloseMatch ) {
                                                $this->mInPre = true;
                                        }
+                                       $bqOffset = 0;
+                                       while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset ) ) {
+                                               $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
+                                               $bqOffset = $bqMatch[0][1] + strlen( $bqMatch[0][0] );
+                                       }
                                        $inBlockElem = !$closematch;
                                } elseif ( !$inBlockElem && !$this->mInPre ) {
-                                       if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) ) {
+                                       if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' || trim( $t ) != '' ) and !$inBlockquote ) {
                                                # pre
                                                if ( $this->mLastSection !== 'pre' ) {
                                                        $paragraphStack = false;
@@ -2890,6 +2911,13 @@ class Parser {
                                wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-revision...\n" );
                                $value = $this->getRevisionUser();
                                break;
+                       case 'revisionsize':
+                               # Let the edit saving system know we should parse the page
+                               # *after* a revision ID has been assigned. This is for null edits.
+                               $this->mOutput->setFlag( 'vary-revision' );
+                               wfDebug( __METHOD__ . ": {{REVISIONSIZE}} used, setting vary-revision...\n" );
+                               $value = $this->getRevisionSize();
+                               break;
                        case 'namespace':
                                $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
                                break;
@@ -3180,7 +3208,7 @@ class Parser {
        function limitationWarn( $limitationType, $current = '', $max = '' ) {
                # does no harm if $current and $max are present but are unnecessary for the message
                $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
-                       ->inContentLanguage()->escaped();
+                       ->inLanguage( $this->mOptions->getUserLangObj() )->text();
                $this->mOutput->addWarning( $warning );
                $this->addTrackingCategory( "$limitationType-category" );
        }
@@ -4422,7 +4450,8 @@ class Parser {
 
                        # Add the section to the section tree
                        # Find the DOM node for this header
-                       while ( $node && !$isTemplate ) {
+                       $noOffset = ( $isTemplate || $sectionIndex === false );
+                       while ( $node && !$noOffset ) {
                                if ( $node->getName() === 'h' ) {
                                        $bits = $node->splitHeading();
                                        if ( $bits['i'] == $sectionIndex ) {
@@ -4440,7 +4469,7 @@ class Parser {
                                'number' => $numbering,
                                'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
                                'fromtitle' => $titleText,
-                               'byteoffset' => ( $isTemplate ? null : $byteOffset ),
+                               'byteoffset' => ( $noOffset ? null : $byteOffset ),
                                'anchor' => $anchor,
                        );
 
@@ -4491,6 +4520,7 @@ class Parser {
                        }
                        $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
                        $this->mOutput->setTOCHTML( $toc );
+                       $toc = self::TOC_START . $toc . self::TOC_END;
                }
 
                if ( $isMain ) {
@@ -5789,6 +5819,27 @@ class Parser {
                return $this->mRevisionUser;
        }
 
+       /**
+        * Get the size of the revision
+        *
+        * @return int|null revision size
+        */
+       function getRevisionSize() {
+               if ( is_null( $this->mRevisionSize ) ) {
+                       $revObject = $this->getRevisionObject();
+
+                       # if this variable is subst: the revision id will be blank,
+                       # so just use the parser input size, because the own substituation
+                       # will change the size.
+                       if ( $revObject ) {
+                               $this->mRevisionSize = $revObject->getSize();
+                       } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
+                               $this->mRevisionSize = $this->mInputSize;
+                       }
+               }
+               return $this->mRevisionSize;
+       }
+
        /**
         * Mutator for $mDefaultSort
         *