* (bug 2374) Allow extension tags to be terminated empty elements
[lhc/web/wiklou.git] / includes / Parser.php
index c286003..a910afe 100644 (file)
@@ -140,7 +140,10 @@ class Parser
                $this->mStripState = array();
                $this->mArgStack = array();
                $this->mInPre = false;
-               $this->mInterwikiLinkHolders = array();
+               $this->mInterwikiLinkHolders = array(
+                       'texts' => array(),
+                       'titles' => array()
+               );
                $this->mLinkHolders = array(
                        'namespaces' => array(),
                        'dbkeys' => array(),
@@ -194,8 +197,6 @@ class Parser
                        '/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1 \\2',
                        # french spaces, Guillemet-right
                        '/(\\302\\253) /' => '\\1 ',
-                       '/<hr *>/i' => '<hr />',
-                       '/<br *>/i' => '<br />',
                        '/<center *>/i' => '<div class="center">',
                        '/<\\/center *>/i' => '</div>',
                );
@@ -255,38 +256,76 @@ class Parser
         * @access private
         * @static
         */
-       function extractTags($tag, $text, &$content, $uniq_prefix = ''){
+       function extractTagsAndParams($tag, $text, &$content, &$tags, &$params, $uniq_prefix = ''){
                $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
                if ( !$content ) {
                        $content = array( );
                }
                $n = 1;
                $stripped = '';
+       
+               if ( !$tags ) {
+                       $tags = array( );
+               }
+               
+               if ( !$params ) {
+                       $params = array( );
+               }
+
+               // Hack to support short XML style tags
+               $text = preg_replace( "/<$tag(\\s+[^>]*|\\s*)\\/>/i", "<$tag\\1></$tag>", $text );
+               
+               if( $tag == STRIP_COMMENTS ) {
+                       $start = '/<!--()/';
+                       $end   = '/-->/';
+               } else {
+                       $start = "/<$tag(\\s+[^>]*|\\s*)>/i";
+                       $end   = "/<\\/$tag\\s*>/i";
+               }
 
                while ( '' != $text ) {
-                       if($tag==STRIP_COMMENTS) {
-                               $p = preg_split( '/<!--/', $text, 2 );
-                       } else {
-                               $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
-                       }
+                       $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
                        $stripped .= $p[0];
-                       if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
-                               $text = '';
+                       if( count( $p ) < 3 ) {
+                               break;
+                       }
+                       $attributes = $p[1];
+                       $inside     = $p[2];
+                       
+                       $marker = $rnd . sprintf('%08X', $n++);
+                       $stripped .= $marker;
+                       
+                       $tags[$marker] = "<$tag$attributes>";
+                       $params[$marker] = Sanitizer::decodeTagAttributes( $attributes );
+                       
+                       $q = preg_split( $end, $inside, 2 );
+                       $content[$marker] = $q[0];
+                       if( count( $q ) < 1 ) {
+                               # No end tag -- let it run out to the end of the text.
+                               break;
                        } else {
-                               if($tag==STRIP_COMMENTS) {
-                                       $q = preg_split( '/-->/i', $p[1], 2 );
-                               } else {
-                                       $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
-                               }
-                               $marker = $rnd . sprintf('%08X', $n++);
-                               $content[$marker] = $q[0];
-                               $stripped .= $marker;
                                $text = $q[1];
                        }
                }
                return $stripped;
        }
 
+       /**
+        * Wrapper function for extractTagsAndParams
+        * for cases where $tags and $params isn't needed
+        * i.e. where tags will never have params, like <nowiki>
+        *
+        * @access private
+        * @static
+        */
+       function extractTags( $tag, $text, &$content, $uniq_prefix = '' ) {
+               $dummy_tags = array();
+               $dummy_params = array();
+               
+               return Parser::extractTagsAndParams( $tag, $text, $content,
+                       $dummy_tags, $dummy_params, $uniq_prefix );
+       }
+       
        /**
         * Strips and renders nowiki, pre, math, hiero
         * If $render is set, performs necessary rendering operations on plugins
@@ -308,6 +347,8 @@ class Parser
                $pre_content = array();
                $comment_content = array();
                $ext_content = array();
+               $ext_tags = array();
+               $ext_params = array();
                $gallery_content = array();
 
                # Replace any instances of the placeholders
@@ -384,12 +425,15 @@ class Parser
                # Extensions
                foreach ( $this->mTagHooks as $tag => $callback ) {
                        $ext_content[$tag] = array();
-                       $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
+                       $text = Parser::extractTagsAndParams( $tag, $text, $ext_content[$tag],
+                               $ext_tags[$tag], $ext_params[$tag], $uniq_prefix );
                        foreach( $ext_content[$tag] as $marker => $content ) {
+                               $full_tag = $ext_tags[$tag][$marker];
+                               $params = $ext_params[$tag][$marker];
                                if ( $render ) {
-                                       $ext_content[$tag][$marker] = $callback( $content );
+                                       $ext_content[$tag][$marker] = $callback( $content, $params );
                                } else {
-                                       $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
+                                       $ext_content[$tag][$marker] = "$full_tag$content</$tag>";
                                }
                        }
                }
@@ -710,7 +754,7 @@ class Parser
                $fname = 'Parser::internalParse';
                wfProfileIn( $fname );
 
-               $text = Sanitizer::removeHTMLtags( $text );
+               $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'replaceVariables' ) );
                $text = $this->replaceVariables( $text, $args );
 
                $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
@@ -1392,7 +1436,13 @@ class Parser
                                $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
                                continue;
                        }
-                       if ( $nt->isAlwaysKnown() ) {
+                       if( $nt->isLocal() && $nt->isAlwaysKnown() ) {
+                               /**
+                                * Skip lookups for special pages and self-links.
+                                * External interwiki links are not included here because
+                                * the HTTP urls would break output in the next parse step;
+                                * they will have placeholders kept.
+                                */
                                $s .= $sk->makeKnownLinkObj( $nt, $text, '', $trail, $prefix );
                        } else {
                                /**
@@ -1423,8 +1473,8 @@ class Parser
                        list( $inside, $trail ) = Linker::splitTrail( $trail );
                        
                        if ( $nt->isExternal() ) {
-                               $iwRecord = array( $nt->getPrefixedDBkey(), $prefix.$text.$inside );
-                               $nr = array_push($this->mInterwikiLinkHolders, $iwRecord);
+                               $nr = array_push( $this->mInterwikiLinkHolders['texts'], $prefix.$text.$inside );
+                               $this->mInterwikiLinkHolders['titles'][] =& $nt;
                                $retVal = '<!--IWLINK '. ($nr-1) ."-->{$trail}";
                        } else {
                                $nr = array_push( $this->mLinkHolders['namespaces'], $nt->getNamespace() );
@@ -2036,9 +2086,13 @@ class Parser
                        $mwMsgnw =& MagicWord::get( MAG_MSGNW );
                        if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
                                $nowiki = true;
+                       } else {
+                               # Remove obsolete MSG:
+                               $mwMsg =& MagicWord::get( MAG_MSG );
+                               $mwMsg->matchStartAndRemove( $part1 );
                        }
 
-                       # int: is the wikitext equivalent of wfMsg()
+                       # Check if it is an internal message
                        $mwInt =& MagicWord::get( MAG_INT );
                        if ( $mwInt->matchStartAndRemove( $part1 ) ) {
                                if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
@@ -2046,15 +2100,6 @@ class Parser
                                        $found = true;
                                }
                        }
-
-                       # msg: is the wikitext equivalent of wfMsgForContent()
-                       $mwMsg =& MagicWord::get( MAG_MSG );
-                       if ( $mwMsg->matchStartAndRemove( $part1 ) ) {
-                               if ( $this->incrementIncludeCount( 'msg:'.$part1 ) ) {
-                                        $text = $linestart . wfMsgReal( $part1, $args, true, true );
-                                        $found = true;
-                               }
-                       }
                }
 
                # NS
@@ -2209,7 +2254,7 @@ class Parser
 
                        if( $this->mOutputType == OT_HTML ) {
                                $text = $this->strip( $text, $this->mStripState );
-                               $text = Sanitizer::removeHTMLtags( $text );
+                               $text = Sanitizer::removeHTMLtags( $text, array( &$this, 'replaceVariables' ), $assocArgs );
                        }
                        $text = $this->replaceVariables( $text, $assocArgs );
 
@@ -2481,7 +2526,7 @@ class Parser
                                                            "\$this->mLinkHolders['texts'][\$1]",
                                                            $canonized_headline );
                        $canonized_headline = preg_replace( '/<!--IWLINK ([0-9]*)-->/e',
-                                                           "\$this->mInterwikiLinkHolders[\$1][1]",
+                                                           "\$this->mInterwikiLinkHolders['texts'][\$1]",
                                                            $canonized_headline );
 
                        # strip out HTML
@@ -2985,13 +3030,13 @@ class Parser
 
                # Now process interwiki link holders
                # This is quite a bit simpler than internal links
-               if ( !empty( $this->mInterwikiLinkHolders ) ) {
+               if ( !empty( $this->mInterwikiLinkHolders['texts'] ) ) {
                        wfProfileIn( $fname.'-interwiki' );
                        # Make interwiki link HTML
                        $wgOutputReplace = array();
-                       foreach( $this->mInterwikiLinkHolders as $i => $lh ) {
-                               $s = $sk->makeLink( $lh[0], $lh[1] );
-                               $wgOutputReplace[] = $s;
+                       foreach( $this->mInterwikiLinkHolders['texts'] as $key => $link ) {
+                               $title = $this->mInterwikiLinkHolders['titles'][$key];
+                               $wgOutputReplace[$key] = $sk->makeLinkObj( $title, $link );
                        }
                        
                        $text = preg_replace_callback(
@@ -3040,8 +3085,8 @@ class Parser
                                return $this->mLinkHolders['texts'][$key];
                        }
                } elseif( $type == 'IWLINK' ) {
-                       if( isset( $this->mInterwikiLinkHolders[$key][1] ) ) {
-                               return $this->mInterwikiLinkHolders[$key][1];
+                       if( isset( $this->mInterwikiLinkHolders['texts'][$key] ) ) {
+                               return $this->mInterwikiLinkHolders['texts'][$key];
                        }
                }
                return $matches[0];
@@ -3060,9 +3105,10 @@ class Parser
         */
        function renderImageGallery( $text ) {
                # Setup the parser
-               global $wgUser, $wgParser, $wgTitle;
+               global $wgUser, $wgTitle;
                $parserOptions = ParserOptions::newFromUser( $wgUser );
-       
+               $localParser = new Parser();
+               
                global $wgLinkCache;
                $ig = new ImageGallery();
                $ig->setShowBytes( false );
@@ -3088,7 +3134,7 @@ class Parser
                                $label = '';
                        }
                        
-                       $html = $wgParser->parse( $label , $wgTitle, $parserOptions );
+                       $html = $localParser->parse( $label , $wgTitle, $parserOptions );
                        $html = $html->mText;
                        
                        $ig->add( new Image( $nt ), $html );