Merge "Add a sort parameter to SearchEngine"
[lhc/web/wiklou.git] / includes / parser / Parser.php
index a9daa22..3027ab6 100644 (file)
@@ -90,6 +90,9 @@ class Parser {
        const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
                \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
 
+       # Regular expression for a non-newline space
+       const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
+
        # State constants for the definition list colon extraction
        const COLON_STATE_TEXT = 0;
        const COLON_STATE_TAG = 1;
@@ -143,7 +146,8 @@ class Parser {
         * @var MagicWordArray
         */
        public $mSubstWords;
-       public $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
+       # Initialised in constructor
+       public $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols;
 
        # Cleared with clearState():
        /**
@@ -295,14 +299,12 @@ class Parser {
                }
                $this->mFirstCall = false;
 
-               wfProfileIn( __METHOD__ );
 
                CoreParserFunctions::register( $this );
                CoreTagHooks::register( $this );
                $this->initialiseVariables();
 
                Hooks::run( 'ParserFirstCallInit', array( &$this ) );
-               wfProfileOut( __METHOD__ );
        }
 
        /**
@@ -311,7 +313,6 @@ class Parser {
         * @private
         */
        public function clearState() {
-               wfProfileIn( __METHOD__ );
                if ( $this->mFirstCall ) {
                        $this->firstCallInit();
                }
@@ -370,7 +371,6 @@ class Parser {
                $this->mProfiler = new SectionProfiler();
 
                Hooks::run( 'ParserClearState', array( &$this ) );
-               wfProfileOut( __METHOD__ );
        }
 
        /**
@@ -395,7 +395,6 @@ class Parser {
 
                global $wgShowHostnames;
                $fname = __METHOD__ . '-' . wfGetCaller();
-               wfProfileIn( __METHOD__ );
                wfProfileIn( $fname );
 
                if ( $clearState ) {
@@ -558,7 +557,6 @@ class Parser {
                $this->mInputSize = false;
                $this->currentRevisionCache = null;
                wfProfileOut( $fname );
-               wfProfileOut( __METHOD__ );
 
                return $this->mOutput;
        }
@@ -586,11 +584,9 @@ class Parser {
         * @return string UNSAFE half-parsed HTML
         */
        public function recursiveTagParse( $text, $frame = false ) {
-               wfProfileIn( __METHOD__ );
                Hooks::run( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
                Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
                $text = $this->internalParse( $text, false, $frame );
-               wfProfileOut( __METHOD__ );
                return $text;
        }
 
@@ -612,10 +608,8 @@ class Parser {
         * @return string Fully parsed HTML
         */
        public function recursiveTagParseFully( $text, $frame = false ) {
-               wfProfileIn( __METHOD__ );
                $text = $this->recursiveTagParse( $text, $frame );
                $text = $this->internalParseHalfParsed( $text, false );
-               wfProfileOut( __METHOD__ );
                return $text;
        }
 
@@ -630,8 +624,9 @@ class Parser {
         * @param bool|PPFrame $frame
         * @return mixed|string
         */
-       public function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null, $frame = false ) {
-               wfProfileIn( __METHOD__ );
+       public function preprocess( $text, Title $title = null,
+               ParserOptions $options, $revid = null, $frame = false
+       ) {
                $magicScopeVariable = $this->lock();
                $this->startParse( $title, $options, self::OT_PREPROCESS, true );
                if ( $revid !== null ) {
@@ -641,7 +636,6 @@ class Parser {
                Hooks::run( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
                $text = $this->replaceVariables( $text, $frame );
                $text = $this->mStripState->unstripBoth( $text );
-               wfProfileOut( __METHOD__ );
                return $text;
        }
 
@@ -655,10 +649,8 @@ class Parser {
         * @since 1.19
         */
        public function recursivePreprocess( $text, $frame = false ) {
-               wfProfileIn( __METHOD__ );
                $text = $this->replaceVariables( $text, $frame );
                $text = $this->mStripState->unstripBoth( $text );
-               wfProfileOut( __METHOD__ );
                return $text;
        }
 
@@ -1013,7 +1005,6 @@ class Parser {
         * @return string
         */
        public function doTableStuff( $text ) {
-               wfProfileIn( __METHOD__ );
 
                $lines = StringUtils::explode( "\n", $text );
                $out = '';
@@ -1200,7 +1191,6 @@ class Parser {
                        $out = '';
                }
 
-               wfProfileOut( __METHOD__ );
 
                return $out;
        }
@@ -1218,13 +1208,11 @@ class Parser {
         * @return string
         */
        public function internalParse( $text, $isMain = true, $frame = false ) {
-               wfProfileIn( __METHOD__ );
 
                $origText = $text;
 
                # Hook to suspend the parser in this state
                if ( !Hooks::run( 'ParserBeforeInternalParse', array( &$this, &$text, &$this->mStripState ) ) ) {
-                       wfProfileOut( __METHOD__ );
                        return $text;
                }
 
@@ -1275,7 +1263,6 @@ class Parser {
                $text = $this->doMagicLinks( $text );
                $text = $this->formatHeadings( $text, $origText, $isMain );
 
-               wfProfileOut( __METHOD__ );
                return $text;
        }
 
@@ -1386,22 +1373,24 @@ class Parser {
         * @return string
         */
        public function doMagicLinks( $text ) {
-               wfProfileIn( __METHOD__ );
                $prots = wfUrlProtocolsWithoutProtRel();
                $urlChar = self::EXT_LINK_URL_CLASS;
+               $space = self::SPACE_NOT_NL; #  non-newline space
+               $spdash = "(?:-|$space)"; # a dash or a non-newline space
+               $spaces = "$space++"; # possessive match of 1 or more spaces
                $text = preg_replace_callback(
                        '!(?:                           # Start cases
                                (<a[ \t\r\n>].*?</a>) |     # m[1]: Skip link text
                                (<.*?>) |                   # m[2]: Skip stuff inside HTML elements' . "
-                               (\b(?i:$prots)$urlChar+) |  # m[3]: Free external links" . '
-                               \b(?:RFC|PMID)\s+([0-9]+)\b |# m[4]: RFC or PMID, capture number
-                               \bISBN\s+(                  # m[5]: ISBN, capture number
-                                       (?: 97[89] [\ \-]? )?   # optional 13-digit ISBN prefix
-                                       (?: [0-9]  [\ \-]? ){9} # 9 digits with opt. delimiters
+                               (\b(?i:$prots)$urlChar+) |  # m[3]: Free external links
+                               \b(?:RFC|PMID) $spaces      # m[4]: RFC or PMID, capture number
+                                       ([0-9]+)\b |
+                               \bISBN $spaces (            # m[5]: ISBN, capture number
+                                       (?: 97[89] $spdash? )?   # optional 13-digit ISBN prefix
+                                       (?: [0-9]  $spdash? ){9} # 9 digits with opt. delimiters
                                        [0-9Xx]                 # check digit
-                                       )\b
-                       )!xu', array( &$this, 'magicLinkCallback' ), $text );
-               wfProfileOut( __METHOD__ );
+                               )\b
+                       )!xu", array( &$this, 'magicLinkCallback' ), $text );
                return $text;
        }
 
@@ -1441,6 +1430,8 @@ class Parser {
                } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
                        # ISBN
                        $isbn = $m[5];
+                       $space = self::SPACE_NOT_NL; #  non-newline space
+                       $isbn = preg_replace( "/$space/", ' ', $isbn );
                        $num = strtr( $isbn, array(
                                '-' => '',
                                ' ' => '',
@@ -1464,7 +1455,6 @@ class Parser {
         * @private
         */
        public function makeFreeExternalLink( $url ) {
-               wfProfileIn( __METHOD__ );
 
                $trail = '';
 
@@ -1484,7 +1474,20 @@ class Parser {
                        $sep .= ')';
                }
 
-               $numSepChars = strspn( strrev( $url ), $sep );
+               $urlRev = strrev( $url );
+               $numSepChars = strspn( $urlRev, $sep );
+               # Don't break a trailing HTML entity by moving the ; into $trail
+               # This is in hot code, so use substr_compare to avoid having to
+               # create a new string object for the comparison
+               if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0) {
+                       # more optimization: instead of running preg_match with a $
+                       # anchor, which can be slow, do the match on the reversed
+                       # string starting at the desired offset.
+                       # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
+                       if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
+                               $numSepChars--;
+                       }
+               }
                if ( $numSepChars ) {
                        $trail = substr( $url, -$numSepChars ) . $trail;
                        $url = substr( $url, 0, -$numSepChars );
@@ -1505,7 +1508,6 @@ class Parser {
                        $pasteurized = self::normalizeLinkUrl( $url );
                        $this->mOutput->addExternalLink( $pasteurized );
                }
-               wfProfileOut( __METHOD__ );
                return $text . $trail;
        }
 
@@ -1519,12 +1521,10 @@ class Parser {
         * @return string
         */
        public function doHeadings( $text ) {
-               wfProfileIn( __METHOD__ );
                for ( $i = 6; $i >= 1; --$i ) {
                        $h = str_repeat( '=', $i );
                        $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
                }
-               wfProfileOut( __METHOD__ );
                return $text;
        }
 
@@ -1537,14 +1537,12 @@ class Parser {
         * @return string The altered text
         */
        public function doAllQuotes( $text ) {
-               wfProfileIn( __METHOD__ );
                $outtext = '';
                $lines = StringUtils::explode( "\n", $text );
                foreach ( $lines as $line ) {
                        $outtext .= $this->doQuotes( $line ) . "\n";
                }
                $outtext = substr( $outtext, 0, -1 );
-               wfProfileOut( __METHOD__ );
                return $outtext;
        }
 
@@ -1746,11 +1744,9 @@ class Parser {
         * @return string
         */
        public function replaceExternalLinks( $text ) {
-               wfProfileIn( __METHOD__ );
 
                $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
                if ( $bits === false ) {
-                       wfProfileOut( __METHOD__ );
                        throw new MWException( "PCRE needs to be compiled with "
                                . "--enable-unicode-properties in order for MediaWiki to function" );
                }
@@ -1814,7 +1810,6 @@ class Parser {
                        $this->mOutput->addExternalLink( $pasteurized );
                }
 
-               wfProfileOut( __METHOD__ );
                return $s;
        }
 
@@ -2012,7 +2007,6 @@ class Parser {
         */
        public function replaceInternalLinks2( &$s ) {
                global $wgExtraInterlanguageLinkPrefixes;
-               wfProfileIn( __METHOD__ );
 
                wfProfileIn( __METHOD__ . '-setup' );
                static $tc = false, $e1, $e1_img;
@@ -2047,7 +2041,6 @@ class Parser {
 
                if ( is_null( $this->mTitle ) ) {
                        wfProfileOut( __METHOD__ . '-setup' );
-                       wfProfileOut( __METHOD__ );
                        throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
                }
                $nottalk = !$this->mTitle->isTalkPage();
@@ -2344,7 +2337,6 @@ class Parser {
                        }
                        wfProfileOut( __METHOD__ . "-always_known" );
                }
-               wfProfileOut( __METHOD__ );
                return $holders;
        }
 
@@ -2543,7 +2535,6 @@ class Parser {
         * @return string The lists rendered as HTML
         */
        public function doBlockLevels( $text, $linestart ) {
-               wfProfileIn( __METHOD__ );
 
                # Parsing through the text line by line.  The main thing
                # happening here is handling of block-level elements p, pre,
@@ -2746,7 +2737,6 @@ class Parser {
                        $this->mLastSection = '';
                }
 
-               wfProfileOut( __METHOD__ );
                return $output;
        }
 
@@ -2761,12 +2751,10 @@ class Parser {
         * @return string The position of the ':', or false if none found
         */
        public function findColonNoLinks( $str, &$before, &$after ) {
-               wfProfileIn( __METHOD__ );
 
                $pos = strpos( $str, ':' );
                if ( $pos === false ) {
                        # Nothing to find!
-                       wfProfileOut( __METHOD__ );
                        return false;
                }
 
@@ -2775,7 +2763,6 @@ class Parser {
                        # Easy; no tag nesting to worry about
                        $before = substr( $str, 0, $pos );
                        $after = substr( $str, $pos + 1 );
-                       wfProfileOut( __METHOD__ );
                        return $pos;
                }
 
@@ -2799,7 +2786,6 @@ class Parser {
                                                # We found it!
                                                $before = substr( $str, 0, $i );
                                                $after = substr( $str, $i + 1 );
-                                               wfProfileOut( __METHOD__ );
                                                return $i;
                                        }
                                        # Embedded in a tag; don't break it.
@@ -2809,7 +2795,6 @@ class Parser {
                                        $colon = strpos( $str, ':', $i );
                                        if ( $colon === false ) {
                                                # Nothing else interesting
-                                               wfProfileOut( __METHOD__ );
                                                return false;
                                        }
                                        $lt = strpos( $str, '<', $i );
@@ -2818,7 +2803,6 @@ class Parser {
                                                        # We found it!
                                                        $before = substr( $str, 0, $colon );
                                                        $after = substr( $str, $colon + 1 );
-                                                       wfProfileOut( __METHOD__ );
                                                        return $i;
                                                }
                                        }
@@ -2869,7 +2853,6 @@ class Parser {
                                        $stack--;
                                        if ( $stack < 0 ) {
                                                wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" );
-                                               wfProfileOut( __METHOD__ );
                                                return false;
                                        }
                                        $state = self::COLON_STATE_TEXT;
@@ -2904,16 +2887,13 @@ class Parser {
                                }
                                break;
                        default:
-                               wfProfileOut( __METHOD__ );
                                throw new MWException( "State machine error in " . __METHOD__ );
                        }
                }
                if ( $stack > 0 ) {
                        wfDebug( __METHOD__ . ": Invalid input; not enough close tags (stack $stack, state $state)\n" );
-                       wfProfileOut( __METHOD__ );
                        return false;
                }
-               wfProfileOut( __METHOD__ );
                return false;
        }
 
@@ -3281,13 +3261,11 @@ class Parser {
         * @private
         */
        public function initialiseVariables() {
-               wfProfileIn( __METHOD__ );
                $variableIDs = MagicWord::getVariableIDs();
                $substIDs = MagicWord::getSubstIDs();
 
                $this->mVariables = new MagicWordArray( $variableIDs );
                $this->mSubstWords = new MagicWordArray( $substIDs );
-               wfProfileOut( __METHOD__ );
        }
 
        /**
@@ -3362,7 +3340,6 @@ class Parser {
                if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
                        return $text;
                }
-               wfProfileIn( __METHOD__ );
 
                if ( $frame === false ) {
                        $frame = $this->getPreprocessor()->newFrame();
@@ -3376,7 +3353,6 @@ class Parser {
                $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
                $text = $frame->expand( $dom, $flags );
 
-               wfProfileOut( __METHOD__ );
                return $text;
        }
 
@@ -3454,7 +3430,6 @@ class Parser {
         * @return string The text of the template
         */
        public function braceSubstitution( $piece, $frame ) {
-               wfProfileIn( __METHOD__ );
                wfProfileIn( __METHOD__ . '-setup' );
 
                // Flags
@@ -3567,7 +3542,6 @@ class Parser {
                                        $result = $this->callParserFunction( $frame, $func, $funcArgs );
                                } catch ( Exception $ex ) {
                                        wfProfileOut( __METHOD__ . '-pfunc' );
-                                       wfProfileOut( __METHOD__ );
                                        throw $ex;
                                }
 
@@ -3696,7 +3670,6 @@ class Parser {
                        if ( $profileSection ) {
                                $this->mProfiler->scopedProfileOut( $profileSection );
                        }
-                       wfProfileOut( __METHOD__ );
                        return array( 'object' => $text );
                }
 
@@ -3762,7 +3735,6 @@ class Parser {
                        $ret = array( 'text' => $text );
                }
 
-               wfProfileOut( __METHOD__ );
                return $ret;
        }
 
@@ -3788,7 +3760,6 @@ class Parser {
        public function callParserFunction( $frame, $function, array $args = array() ) {
                global $wgContLang;
 
-               wfProfileIn( __METHOD__ );
 
                # Case sensitive functions
                if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
@@ -3799,7 +3770,6 @@ class Parser {
                        if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
                                $function = $this->mFunctionSynonyms[0][$function];
                        } else {
-                               wfProfileOut( __METHOD__ );
                                return array( 'found' => false );
                        }
                }
@@ -3810,7 +3780,6 @@ class Parser {
                # Workaround for PHP bug 35229 and similar
                if ( !is_callable( $callback ) ) {
                        wfProfileOut( __METHOD__ . '-pfunc-' . $function );
-                       wfProfileOut( __METHOD__ );
                        throw new MWException( "Tag hook for $function is not callable\n" );
                }
 
@@ -3876,7 +3845,6 @@ class Parser {
                        $result['isChildObj'] = true;
                }
                wfProfileOut( __METHOD__ . '-pfunc-' . $function );
-               wfProfileOut( __METHOD__ );
 
                return $result;
        }
@@ -4206,7 +4174,6 @@ class Parser {
         * @return array
         */
        public function argSubstitution( $piece, $frame ) {
-               wfProfileIn( __METHOD__ );
 
                $error = false;
                $parts = $piece['parts'];
@@ -4241,7 +4208,6 @@ class Parser {
                        $ret = array( 'text' => $text );
                }
 
-               wfProfileOut( __METHOD__ );
                return $ret;
        }
 
@@ -4372,7 +4338,6 @@ class Parser {
         * @return string
         */
        public function doDoubleUnderscore( $text ) {
-               wfProfileIn( __METHOD__ );
 
                # The position of __TOC__ needs to be recorded
                $mw = MagicWord::get( 'toc' );
@@ -4420,7 +4385,6 @@ class Parser {
                        $this->mOutput->setProperty( $key, '' );
                }
 
-               wfProfileOut( __METHOD__ );
                return $text;
        }
 
@@ -4614,14 +4578,15 @@ class Parser {
                        # * <sup> and <sub> (bug 8393)
                        # * <i> (bug 26375)
                        # * <b> (r105284)
+                       # * <bdi> (bug 72884)
                        # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
                        #
                        # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
                        # to allow setting directionality in toc items.
                        $tocline = preg_replace(
                                array(
-                                       '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?' . '>#',
-                                       '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?' . '>#'
+                                       '#<(?!/?(span|sup|sub|bdi|i|b)(?: [^>]*)?>).*?' . '>#',
+                                       '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b))(?: .*?)?' . '>#'
                                ),
                                array( '', '<$1>' ),
                                $safeHeadline
@@ -5125,7 +5090,6 @@ class Parser {
                }
                $executing = true;
 
-               wfProfileIn( __METHOD__ );
                if ( !$title ) {
                        global $wgTitle;
                        $title = $wgTitle;
@@ -5134,7 +5098,6 @@ class Parser {
                $text = $this->preprocess( $text, $title, $options );
 
                $executing = false;
-               wfProfileOut( __METHOD__ );
                return $text;
        }
 
@@ -5361,7 +5324,6 @@ class Parser {
         * @return string HTML
         */
        public function renderImageGallery( $text, $params ) {
-               wfProfileIn( __METHOD__ );
 
                $mode = false;
                if ( isset( $params['mode'] ) ) {
@@ -5513,7 +5475,6 @@ class Parser {
                }
                $html = $ig->toHTML();
                Hooks::run( 'AfterParserFetchFileAndTitle', array( $this, $ig, &$html ) );
-               wfProfileOut( __METHOD__ );
                return $html;
        }
 
@@ -6085,7 +6046,6 @@ class Parser {
         */
        public function getRevisionTimestamp() {
                if ( is_null( $this->mRevisionTimestamp ) ) {
-                       wfProfileIn( __METHOD__ );
 
                        global $wgContLang;
 
@@ -6100,7 +6060,6 @@ class Parser {
                        # it needs to be consistent for all visitors.
                        $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
 
-                       wfProfileOut( __METHOD__ );
                }
                return $this->mRevisionTimestamp;
        }
@@ -6355,14 +6314,12 @@ class Parser {
         * @return array
         */
        public function serializeHalfParsedText( $text ) {
-               wfProfileIn( __METHOD__ );
                $data = array(
                        'text' => $text,
                        'version' => self::HALF_PARSED_VERSION,
                        'stripState' => $this->mStripState->getSubState( $text ),
                        'linkHolders' => $this->mLinkHolders->getSubArray( $text )
                );
-               wfProfileOut( __METHOD__ );
                return $data;
        }