* Fix regression(?) in behavior of initial-whitespace-pre in <center>

[lhc/web/wiklou.git] / includes / Parser.php
diff --git a/includes/Parser.php b/includes/Parser.php

index e580cc4..60aaaed 100644 (file)
--- a/includes/Parser.php
+++ b/includes/Parser.php
@@ -9,7 +9,6 @@
  /** */
  require_once( 'Sanitizer.php' );
  require_once( 'HttpFunctions.php' );
-require_once( 'ImageGallery.php' );
  
  /**
   * Update this version number when the ParserOutput format
@@ -48,11 +47,12 @@ define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  define( 'HTTP_PROTOCOLS', 'http:\/\/|https:\/\/' );
  # Everything except bracket, space, or control characters
  define( 'EXT_LINK_URL_CLASS', '[^][<>"\\x00-\\x20\\x7F]' );
-# Including space
-define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
+# Including space, but excluding newlines
+define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x0a\\x0d]' );
  define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
-define( 'EXT_LINK_BRACKETED',  '/\[(\b(' . wfUrlProtocols() . ')'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
+define( 'EXT_LINK_BRACKETED',  '/\[(\b(' . wfUrlProtocols() . ')'.
+       EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  define( 'EXT_IMAGE_REGEX',
         '/^('.HTTP_PROTOCOLS.')'.  # Protocol
         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
@@ -133,6 +133,7 @@ class Parser
                 $this->mTagHooks = array();
                 $this->mFunctionHooks = array();
                 $this->clearState();
+               $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
         }
  
         /**
@@ -248,7 +249,6 @@ class Parser
                         '/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1&nbsp;\\2',
                         # french spaces, Guillemet-right
                         '/(\\302\\253) /' => '\\1&nbsp;',
-                       '/<center *>(.*)<\\/center *>/i' => '<div class="center">\\1</div>',
                 );
                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
  
@@ -367,7 +367,7 @@ class Parser
                                 $inside     = $p[4];
                         }
  
-                       $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++);
+                       $marker = "$uniq_prefix-$element-$rand" . sprintf('%08X', $n++) . '-QINU';
                         $stripped .= $marker;
  
                         if ( $close === '/>' ) {
@@ -423,7 +423,7 @@ class Parser
                 $commentState = array();
                 
                 $elements = array_merge(
-                       array( 'nowiki', 'pre', 'gallery' ),
+                       array( 'nowiki', 'gallery' ),
                         array_keys( $this->mTagHooks ) );
                 global $wgRawHtml;
                 if( $wgRawHtml ) {
@@ -462,12 +462,7 @@ class Parser
                                         $output = wfEscapeHTMLTagsOnly( $content );
                                         break;
                                 case 'math':
-                                       $output = renderMath( $content );
-                                       break;
-                               case 'pre':
-                                       // Backwards-compatibility hack
-                                       $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $content );
-                                       $output = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
+                                       $output = MathRenderer::renderMath( $content );
                                         break;
                                 case 'gallery':
                                         $output = $this->renderImageGallery( $content );
@@ -477,7 +472,7 @@ class Parser
                                                 $output = call_user_func_array( $this->mTagHooks[$tagName],
                                                         array( $content, $params, $this ) );
                                         } else {
-                                               wfDebugDieBacktrace( "Invalid call hook $element" );
+                                               throw new MWException( "Invalid call hook $element" );
                                         }
                                 }
                         } else {
@@ -883,7 +878,7 @@ class Parser
                 wfProfileIn( $fname );
                 for ( $i = 6; $i >= 1; --$i ) {
                         $h = str_repeat( '=', $i );
-                       $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
+                       $text = preg_replace( "/^{$h}(.+){$h}\\s*$/m",
                           "<h{$i}>\\1</h{$i}>\\2", $text );
                 }
                 wfProfileOut( $fname );
@@ -1335,7 +1330,7 @@ class Parser
                 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
  
                 if( is_null( $this->mTitle ) ) {
-                       wfDebugDieBacktrace( 'nooo' );
+                       throw new MWException( 'nooo' );
                 }
                 $nottalk = !$this->mTitle->isTalkPage();
  
@@ -1896,10 +1891,10 @@ class Parser
                                 wfProfileIn( "$fname-paragraph" );
                                 # No prefix (not in list)--go to paragraph mode
                                 // XXX: use a stack for nestable elements like span, table and div
-                               $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/iS', $t );
+                               $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<ol|<li|<\\/center|<\\/tr|<\\/td|<\\/th)/iS', $t );
                                 $closematch = preg_match(
                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
-                                       '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul)/iS', $t );
+                                       '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<center)/iS', $t );
                                 if ( $openmatch or $closematch ) {
                                         $paragraphStack = false;
                                         # TODO bug 5718: paragraph closed
@@ -2125,7 +2120,7 @@ class Parser
                                 }
                                 break;
                         default:
-                               wfDebugDieBacktrace( "State machine error in $fname" );
+                               throw new MWException( "State machine error in $fname" );
                         }
                 }
                 if( $stack > 0 ) {
@@ -2254,6 +2249,9 @@ class Parser
                                 return $wgScriptPath;
                         case MAG_DIRECTIONMARK:
                                 return $wgContLang->getDirMark();
+                       case MAG_CONTENTLANGUAGE:
+                               global $wgContLanguageCode;
+                               return $wgContLanguageCode;
                         default:
                                 $ret = null;
                                 if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$varCache, &$index, &$ret ) ) )
@@ -2749,8 +2747,9 @@ class Parser
                 if ( !$found && $argc >= 2 ) {
                         $mwPluralForm =& MagicWord::get( MAG_PLURAL );
                         if ( $mwPluralForm->matchStartAndRemove( $part1 ) ) {
-                               if ($argc==2) {$args[2]=$args[1];}
-                               $text = $linestart . $lang->convertPlural( $part1, $args[0], $args[1], $args[2]);
+                               while ( count($args) < 5 ) { $args[] = $args[count($args)-1]; }
+                               $text = $linestart . $lang->convertPlural( $part1, $args[0], $args[1],
+                                       $args[2], $args[3], $args[4]);
                                 $found = true;
                         }
                 }
@@ -2918,7 +2917,11 @@ class Parser
                                 # Use the original $piece['title'] not the mangled $part1, so that
                                 # modifiers such as RAW: produce separate cache entries
                                 if( $found ) {
-                                       $this->mTemplates[$piece['title']] = $text;
+                                       if( $isHTML ) {
+                                               // A special page; don't store it in the template cache.
+                                       } else {
+                                               $this->mTemplates[$piece['title']] = $text;
+                                       }
                                         $text = $linestart . $text;
                                 }
                         }
@@ -4028,6 +4031,19 @@ class Parser
                 return $matches[0];
         }
  
+       /**
+        * Tag hook handler for 'pre'.
+        */
+       function renderPreTag( $text, $attribs, $parser ) {
+               // Backwards-compatibility hack
+               $content = preg_replace( '!<nowiki>(.*?)</nowiki>!is', '\\1', $text );
+               
+               $attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
+               return wfOpenElement( 'pre', $attribs ) .
+                       wfEscapeHTMLTagsOnly( $content ) .
+                       '</pre>';
+       }
+       
         /**
          * Renders an image gallery from a text with one line per image.
          * text labels may be given by using |-style alternative text. E.g.
@@ -4205,6 +4221,165 @@ class Parser
          */
         function getTags() { return array_keys( $this->mTagHooks ); }
         /**#@-*/
+
+
+       /**
+        * Break wikitext input into sections, and either pull or replace
+        * some particular section's text.
+        *
+        * External callers should use the getSection and replaceSection methods.
+        *
+        * @param $text Page wikitext
+        * @param $section Numbered section. 0 pulls the text before the first
+        *                 heading; other numbers will pull the given section
+        *                 along with its lower-level subsections.
+        * @param $mode One of "get" or "replace"
+        * @param $newtext Replacement text for section data.
+        * @return string for "get", the extracted section text.
+        *                for "replace", the whole page with the section replaced.
+        */
+       private function extractSections( $text, $section, $mode, $newtext='' ) {
+               # strip NOWIKI etc. to avoid confusion (true-parameter causes HTML
+               # comments to be stripped as well)
+               $striparray = array();
+               
+               $oldOutputType = $this->mOutputType;
+               $oldOptions = $this->mOptions;
+               $this->mOptions = new ParserOptions();
+               $this->mOutputType = OT_WIKI;
+               
+               $striptext = $this->strip( $text, $striparray, true );
+               
+               $this->mOutputType = $oldOutputType;
+               $this->mOptions = $oldOptions;
+
+               # now that we can be sure that no pseudo-sections are in the source,
+               # split it up by section
+               $uniq = preg_quote( $this->uniqPrefix(), '/' );
+               $comment = "(?:$uniq-!--.*?QINU)";
+               $secs = preg_split(
+               /*
+                       "/
+                       ^(
+                       (?:$comment|<\/?noinclude>)* # Initial comments will be stripped
+                       (?:
+                               (=+) # Should this be limited to 6?
+                               .+?  # Section title...
+                               \\2  # Ending = count must match start
+                       |
+                               ^
+                               <h([1-6])\b.*?>
+                               .*?
+                               <\/h\\3\s*>
+                       )
+                       (?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok
+                       )$
+                       /mix",
+               */
+                       "/
+                       (
+                               ^
+                               (?:$comment|<\/?noinclude>)* # Initial comments will be stripped
+                               (=+) # Should this be limited to 6?
+                               .+?  # Section title...
+                               \\2  # Ending = count must match start
+                               (?:$comment|<\/?noinclude>|\s+)* # Trailing whitespace ok
+                               $
+                       |
+                               <h([1-6])\b.*?>
+                               .*?
+                               <\/h\\3\s*>
+                       )
+                       /mix",
+                       $striptext, -1,
+                       PREG_SPLIT_DELIM_CAPTURE);
+               
+               if( $mode == "get" ) {
+                       if( $section == 0 ) {
+                               // "Section 0" returns the content before any other section.
+                               $rv = $secs[0];
+                       } else {
+                               $rv = "";
+                       }
+               } elseif( $mode == "replace" ) {
+                       if( $section == 0 ) {
+                               $rv = $newtext . "\n\n";
+                               $remainder = true;
+                       } else {
+                               $rv = $secs[0];
+                               $remainder = false;
+                       }
+               }
+               $count = 0;
+               $sectionLevel = 0;
+               for( $index = 1; $index < count( $secs ); ) {
+                       $headerLine = $secs[$index++];
+                       if( $secs[$index] ) {
+                               // A wiki header
+                               $headerLevel = strlen( $secs[$index++] );
+                       } else {
+                               // An HTML header
+                               $index++;
+                               $headerLevel = intval( $secs[$index++] );
+                       }
+                       $content = $secs[$index++];
+
+                       $count++;
+                       if( $mode == "get" ) {
+                               if( $count == $section ) {
+                                       $rv = $headerLine . $content;
+                                       $sectionLevel = $headerLevel;
+                               } elseif( $count > $section ) {
+                                       if( $sectionLevel && $headerLevel > $sectionLevel ) {
+                                               $rv .= $headerLine . $content;
+                                       } else {
+                                               // Broke out to a higher-level section
+                                               break;
+                                       }
+                               }
+                       } elseif( $mode == "replace" ) {
+                               if( $count < $section ) {
+                                       $rv .= $headerLine . $content;
+                               } elseif( $count == $section ) {
+                                       $rv .= $newtext . "\n\n";
+                                       $sectionLevel = $headerLevel;
+                               } elseif( $count > $section ) {
+                                       if( $headerLevel <= $sectionLevel ) {
+                                               // Passed the section's sub-parts.
+                                               $remainder = true;
+                                       }
+                                       if( $remainder ) {
+                                               $rv .= $headerLine . $content;
+                                       }
+                               }
+                       }
+               }
+               # reinsert stripped tags
+               $rv = $this->unstrip( $rv, $striparray );
+               $rv = $this->unstripNoWiki( $rv, $striparray );
+               $rv = trim( $rv );
+               return $rv;
+       }
+       
+       /**
+        * This function returns the text of a section, specified by a number ($section).
+        * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
+        * the first section before any such heading (section 0).
+        *
+        * If a section contains subsections, these are also returned.
+        *
+        * @param $text String: text to look in
+        * @param $section Integer: section number
+        * @return string text of the requested section
+        */
+       function getSection( $text, $section ) {
+               return $this->extractSections( $text, $section, "get" );
+       }
+       
+       function replaceSection( $oldtext, $section, $text ) {
+               return $this->extractSections( $oldtext, $section, "replace", $text );
+       }
+
  }
  
  /**