Merge "Hard deprecate MWNamespace::canTalk()"
[lhc/web/wiklou.git] / includes / parser / Parser.php
index 11825fa..deeb858 100644 (file)
@@ -1029,7 +1029,7 @@ class Parser {
                $matches = [];
 
                $taglist = implode( '|', $elements );
-               $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
+               $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
 
                while ( $text != '' ) {
                        $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
@@ -3146,7 +3146,7 @@ class Parser {
                # $args is a list of argument nodes, starting from index 0, not including $part1
                # @todo FIXME: If piece['parts'] is null then the call to getLength()
                # below won't work b/c this $args isn't an object
-               $args = ( null == $piece['parts'] ) ? [] : $piece['parts'];
+               $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
 
                $profileSection = null; // profile templates
 
@@ -3681,7 +3681,6 @@ class Parser {
                $deps = [];
 
                # Loop to fetch the article, with up to 1 redirect
-               // phpcs:ignore Generic.CodeAnalysis.ForLoopWithTestFunctionCall
                for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
                        # Give extensions a chance to select the revision instead
                        $id = false; # Assume current
@@ -4371,7 +4370,7 @@ class Parser {
                        $anchor = $safeHeadline;
                        $fallbackAnchor = $fallbackHeadline;
                        if ( isset( $refers[$arrayKey] ) ) {
-                               // phpcs:ignore Generic.CodeAnalysis.ForLoopWithTestFunctionCall,Generic.Formatting.DisallowMultipleStatements
+                               // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
                                for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
                                $anchor .= "_$i";
                                $linkAnchor .= "_$i";
@@ -4380,7 +4379,7 @@ class Parser {
                                $refers[$arrayKey] = true;
                        }
                        if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
-                               // phpcs:ignore Generic.CodeAnalysis.ForLoopWithTestFunctionCall,Generic.Formatting.DisallowMultipleStatements
+                               // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
                                for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
                                $fallbackAnchor .= "_$i";
                                $refers["${fallbackArrayKey}_$i"] = true;
@@ -5066,9 +5065,10 @@ class Parser {
                        $ig->setShowFilename( false );
                }
                if ( isset( $params['caption'] ) ) {
-                       $caption = $params['caption'];
-                       $caption = htmlspecialchars( $caption );
-                       $caption = $this->replaceInternalLinks( $caption );
+                       // NOTE: We aren't passing a frame here or below.  Frame info
+                       // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
+                       // See T107332#4030581
+                       $caption = $this->recursiveTagParse( $params['caption'] );
                        $ig->setCaptionHtml( $caption );
                }
                if ( isset( $params['perrow'] ) ) {
@@ -5469,6 +5469,7 @@ class Parser {
         * Adds an entry to appropriate link tables.
         *
         * @since 1.32
+        * @param string $value
         * @return array of `[ type, target ]`, where:
         *   - `type` is one of:
         *     - `null`: Given value is not a valid link target, use default
@@ -5524,6 +5525,40 @@ class Parser {
                # that are later expanded to html- so expand them now and
                # remove the tags
                $tooltip = $this->mStripState->unstripBoth( $tooltip );
+               # Compatibility hack!  In HTML certain entity references not terminated
+               # by a semicolon are decoded (but not if we're in an attribute; that's
+               # how link URLs get away without properly escaping & in queries).
+               # But wikitext has always required semicolon-termination of entities,
+               # so encode & where needed to avoid decode of semicolon-less entities.
+               # See T209236 and
+               # https://www.w3.org/TR/html5/syntax.html#named-character-references
+               # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
+               $tooltip = preg_replace( "/
+                       &                       # 1. entity prefix
+                       (?=                     # 2. followed by:
+                       (?:                     #  a. one of the legacy semicolon-less named entities
+                               A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
+                               C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
+                               GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
+                               O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
+                               U(?:acute|circ|grave|uml)|Yacute|
+                               a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
+                               c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
+                               divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
+                               frac(?:1(?:2|4)|34)|
+                               gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
+                               i(?:acute|circ|excl|grave|quest|uml)|laquo|
+                               lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
+                               m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
+                               not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
+                               o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
+                               p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
+                               s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
+                               u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
+                       )
+                       (?:[^;]|$))     #  b. and not followed by a semicolon
+                       # S = study, for efficiency
+                       /Sx", '&amp;', $tooltip );
                $tooltip = Sanitizer::stripAllTags( $tooltip );
 
                return $tooltip;