Revert r61528, r61527, r61526, r61525, r61519, r61515, r61053, r61052 (Parser::doQuot...

author Tim Starling <tstarling@users.mediawiki.org>

Wed, 27 Jan 2010 02:41:22 +0000 (02:41 +0000)

committer Tim Starling <tstarling@users.mediawiki.org>

Wed, 27 Jan 2010 02:41:22 +0000 (02:41 +0000)
author Tim Starling <tstarling@users.mediawiki.org>
Wed, 27 Jan 2010 02:41:22 +0000 (02:41 +0000)
committer Tim Starling <tstarling@users.mediawiki.org>
Wed, 27 Jan 2010 02:41:22 +0000 (02:41 +0000)
diff --git a/RELEASE-NOTES b/RELEASE-NOTES

index 7daff4b..3230382 100644 (file)
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -711,8 +711,6 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
  * (bug 9794) User rights log entries for foreign user now links to the foreign
    user's page if possible
  * (bug 14717) Don't load nonexistent CSS fix files for non-Monobook skins
-* (bug 18765) Increased consistency of bold-italic markup for unbalanced quotes.
-  Improved representation of six quotes (may break existing markup).
  * (bug 22034) Use wfClientAcceptsGzip() in wfGzipHandler instead of
    reimplementing it.
  * (bug 19226) First line renders differently on many UI messages.
diff --git a/includes/StringUtils.php b/includes/StringUtils.php

index bab9be4..c437b3c 100644 (file)
--- a/includes/StringUtils.php
+++ b/includes/StringUtils.php
@@ -179,14 +179,6 @@ class StringUtils {
                         return new ArrayIterator( explode( $separator, $subject ) );
                 }
         }
-
-       /**
-        * Workalike for preg_split() with limited memory usage.
-        * Returns an Iterator
-        */
-       static function preg_split( $pattern, $subject, $limit = -1, $flags = 0 ) {
-               return new PregSplitIterator( $pattern, $subject, $limit, $flags );
-       }
  }
  
  /**
@@ -417,82 +409,3 @@ class ExplodeIterator implements Iterator {
         }
  }
  
-
-/**
- * An iterator which works exactly like:
- * 
- * foreach ( preg_split( $pattern, $s, $limit, $flags ) as $element ) {
- *    ...
- * }
- *
- * Except it doesn't use huge amounts of memory when $limit is -1
- *
- * The flag PREG_SPLIT_OFFSET_CAPTURE isn't supported.
- */
-class PregSplitIterator implements Iterator {
-       // The subject string
-       var $pattern, $subject, $originalLimit, $flags;
-
-       // The last extracted group of items.
-       var $smallArray;
-
-       // The position on the iterator.
-       var $curPos;
-
-       const MAX_LIMIT = 100;
-
-       /** 
-        * Construct a PregSplitIterator
-        */
-       function __construct( $pattern, $s, $limit, $flags) {
-               $this->pattern = $pattern;
-               $this->subject = $s;
-               $this->originalLimit = $limit;
-               $this->flags = $flags;
-
-               $this->rewind();
-       }
-
-       private function effectiveLimit() {
-               if ($this->originalLimit == -1) {
-                       return self::MAX_LIMIT + 1;
-               } else if ($this->limit > self::MAX_LIMIT) {
-                       $this->limit -= self::MAX_LIMIT;
-                       return self::MAX_LIMIT + 1;
-               } else {
-                       $old = $this->limit;
-                       $this->limit = 0;
-                       return $old;
-               }
-       }
-
-       function rewind() {
-               $this->curPos = 0;
-               $this->limit =  $this->originalLimit;
-               if ($this->limit == -1) $this->limit = self::MAX_LIMIT;
-               $this->smallArray = preg_split( $this->pattern, $this->subject, $this->effectiveLimit(), $this->flags);
-       }
-
-       function current() {
-               return $this->smallArray[$this->curPos % self::MAX_LIMIT];
-       }
-
-       function key() {
-               return $this->curPos;
-       }
-
-       function next() {
-               $this->curPos++;
-               if ( $this->curPos % self::MAX_LIMIT == 0 ) {
-                       # Last item contains the rest unsplitted.
-                       if ($this->limit > 0) {
-                               $this->smallArray = preg_split( $this->pattern, $this->smallArray[self::MAX_LIMIT], $this->effectiveLimit(), $this->flags);
-                       }
-               }
-               return;
-       }
-
-       function valid() {
-               return $this->curPos % self::MAX_LIMIT < count($this->smallArray);
-       }
-}
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php

index 50e15dc..ee7383d 100644 (file)
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -213,7 +213,7 @@ class Parser
                  * Must not consist of all title characters, or else it will change
                  * the behaviour of <nowiki> in a link.
                  */
-               # $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
+               #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
                 # Changed to \x7f to allow XML double-parsing -- TS
                 $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
  
@@ -338,7 +338,7 @@ class Parser
                         '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&nbsp;\\2',
                         # french spaces, Guillemet-right
                         '/(\\302\\253) /' => '\\1&nbsp;',
-                       '/&nbsp;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
+                       '/&nbsp;(!\s*important)/' => ' \\1', #Beware of CSS magic word !important, bug #11874.
                 );
                 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
  
@@ -556,7 +556,7 @@ class Parser
                 $taglist = implode( '|', $elements );
                 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
  
-               while ( $text !== '' ) {
+               while ( $text != '' ) {
                         $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
                         $stripped .= $p[0];
                         if( count( $p ) < 5 ) {
@@ -723,11 +723,11 @@ class Parser
                                 array_push( $tr_history , false );
                                 array_push( $tr_attributes , '' );
                                 array_push( $has_opened_tr , false );
-                       } elseif ( count ( $td_history ) == 0 ) {
+                       } else if ( count ( $td_history ) == 0 ) {
                                 // Don't do any of the following
                                 $out .= $outLine."\n";
                                 continue;
-                       } elseif ( substr ( $line , 0 , 2 ) === '|}' ) {
+                       } else if ( substr ( $line , 0 , 2 ) === '|}' ) {
                                 // We are ending a table
                                 $line = '</table>' . substr ( $line , 2 );
                                 $last_tag = array_pop ( $last_tag_history );
@@ -745,7 +745,7 @@ class Parser
                                 }
                                 array_pop ( $tr_attributes );
                                 $outLine = $line . str_repeat( '</dd></dl>' , $indent_level );
-                       } elseif ( substr ( $line , 0 , 2 ) === '|-' ) {
+                       } else if ( substr ( $line , 0 , 2 ) === '|-' ) {
                                 // Now we have a table row
                                 $line = preg_replace( '#^\|-+#', '', $line );
  
@@ -773,7 +773,7 @@ class Parser
                                 array_push ( $td_history , false );
                                 array_push ( $last_tag_history , '' );
                         }
-                       elseif ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 )  === '|+' ) {
+                       else if ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 )  === '|+' ) {
                                 // This might be cell elements, td, th or captions
                                 if ( substr ( $line , 0 , 2 ) === '|+' ) {
                                         $first_character = '+';
@@ -818,9 +818,9 @@ class Parser
  
                                         if ( $first_character === '|' ) {
                                                 $last_tag = 'td';
-                                       } elseif ( $first_character === '!' ) {
+                                       } else if ( $first_character === '!' ) {
                                                 $last_tag = 'th';
-                                       } elseif ( $first_character === '+' ) {
+                                       } else if ( $first_character === '+' ) {
                                                 $last_tag = 'caption';
                                         } else {
                                                 $last_tag = '';
@@ -835,7 +835,7 @@ class Parser
                                         // be mistaken as delimiting cell parameters
                                         if ( strpos( $cell_data[0], '[[' ) !== false ) {
                                                 $cell = "{$previous}<{$last_tag}>{$cell}";
-                                       } elseif ( count ( $cell_data ) == 1 )
+                                       } else if ( count ( $cell_data ) == 1 )
                                                 $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
                                         else {
                                                 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
@@ -1108,59 +1108,100 @@ class Parser
         }
  
         /**
-        * Processes bolds and italics on a single line.
          * Helper function for doAllQuotes()
          */
         public function doQuotes( $text ) {
-               # Counts the number of occurrences of bold and italics mark-ups.
-               self::countBoldAndItalic($text, $numbold, $numitalics);
-               
-               if ( ( $numbold == 0 ) && ( $numitalics == 0 ) )
+               $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
+               if ( count( $arr ) == 1 )
                         return $text;
                 else
                 {
+                       # First, do some preliminary work. This may shift some apostrophes from
+                       # being mark-up to being text. It also counts the number of occurrences
+                       # of bold and italics mark-ups.
+                       $i = 0;
+                       $numbold = 0;
+                       $numitalics = 0;
+                       foreach ( $arr as $r )
+                       {
+                               if ( ( $i % 2 ) == 1 )
+                               {
+                                       # If there are ever four apostrophes, assume the first is supposed to
+                                       # be text, and the remaining three constitute mark-up for bold text.
+                                       if ( strlen( $arr[$i] ) == 4 )
+                                       {
+                                               $arr[$i-1] .= "'";
+                                               $arr[$i] = "'''";
+                                       }
+                                       # If there are more than 5 apostrophes in a row, assume they're all
+                                       # text except for the last 5.
+                                       else if ( strlen( $arr[$i] ) > 5 )
+                                       {
+                                               $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
+                                               $arr[$i] = "'''''";
+                                       }
+                                       # Count the number of occurrences of bold and italics mark-ups.
+                                       # We are not counting sequences of five apostrophes.
+                                       if ( strlen( $arr[$i] ) == 2 )      { $numitalics++;             }
+                                       else if ( strlen( $arr[$i] ) == 3 ) { $numbold++;                }
+                                       else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; }
+                               }
+                               $i++;
+                       }
+
                         # If there is an odd number of both bold and italics, it is likely
                         # that one of the bold ones was meant to be an apostrophe followed
                         # by italics. Which one we cannot know for certain, but it is more
                         # likely to be one that has a single-letter word before it.
                         if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) )
                         {
+                               $i = 0;
+                               $firstsingleletterword = -1;
+                               $firstmultiletterword = -1;
+                               $firstspace = -1;
+                               foreach ( $arr as $r )
+                               {
+                                       if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) )
+                                       {
+                                               $x1 = substr ($arr[$i-1], -1);
+                                               $x2 = substr ($arr[$i-1], -2, 1);
+                                               if ($x1 === ' ') {
+                                                       if ($firstspace == -1) $firstspace = $i;
+                                               } else if ($x2 === ' ') {
+                                                       if ($firstsingleletterword == -1) $firstsingleletterword = $i;
+                                               } else {
+                                                       if ($firstmultiletterword == -1) $firstmultiletterword = $i;
+                                               }
+                                       }
+                                       $i++;
+                               }
  
-                               # This algorithm moves the literal quote at the 
-                               # right of a single word, at the right of a 
-                               # multiletter word or at the right of a space.
-                               # Otherwise, it does nothing.
-                               #
-                               # The original if-based version can be found at
-                               # http://svn.wikimedia.org/viewvc/mediawiki/trunk/phase3/includes/parser/Parser.php?revision=61519&view=markup
-                               #
-                               # Unlike the original one, here we convert the 
-                               # texty quotes to &#39; which shouldn't matter.
-
-                               $quoteBalancerReplacements = array( 
-                                                                                               "/(?<= [^ ])'''(?!')/"=>"&#39;''", 
-                                                                                               "/(?<=[^ '])'''(?!')/"=>"&#39;''", 
-                                                                                               "/(^|(?<=[^'])) '''(?!')/"=>" &#39;''");
-
-                               foreach( $quoteBalancerReplacements as $k => $v) {
-                                       $text = preg_replace($k, $v, $text, 1, $count);
-                                       if ($count != 0)
-                                               break;
+                               # If there is a single-letter word, use it!
+                               if ($firstsingleletterword > -1)
+                               {
+                                       $arr [ $firstsingleletterword ] = "''";
+                                       $arr [ $firstsingleletterword-1 ] .= "'";
+                               }
+                               # If not, but there's a multi-letter word, use that one.
+                               else if ($firstmultiletterword > -1)
+                               {
+                                       $arr [ $firstmultiletterword ] = "''";
+                                       $arr [ $firstmultiletterword-1 ] .= "'";
+                               }
+                               # ... otherwise use the first one that has neither.
+                               # (notice that it is possible for all three to be -1 if, for example,
+                               # there is only one pentuple-apostrophe in the line)
+                               else if ($firstspace > -1)
+                               {
+                                       $arr [ $firstspace ] = "''";
+                                       $arr [ $firstspace-1 ] .= "'";
                                 }
                         }
  
-                       # Split in groups of 2, 3, 5 or 6 apostrophes.
-                       # If there are ever four apostrophes, assume the first is supposed to
-                       # be text, and the remaining three constitute mark-up for bold text.
-                       # If there are more than 6 apostrophes in a row, assume they're all
-                       # text except for the last 6.           
-                       $arr = Stringutils::preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
-
-
                         # Now let's actually convert our apostrophic mush to HTML!
-                       $output = ''; # Processed text
-                       $buffer = ''; # Content if $state is 'both'
-                       $state = '';  # Flags with the order of open tags: '|b|i|bi|ib|both'
+                       $output = '';
+                       $buffer = '';
+                       $state = '';
                         $i = 0;
                         foreach ($arr as $r)
                         {
@@ -1177,58 +1218,43 @@ class Parser
                                         {
                                                 if ($state === 'i')
                                                 { $output .= '</i>'; $state = ''; }
-                                               elseif ($state === 'bi')
+                                               else if ($state === 'bi')
                                                 { $output .= '</i>'; $state = 'b'; }
-                                               elseif ($state === 'ib')
+                                               else if ($state === 'ib')
                                                 { $output .= '</b></i><b>'; $state = 'b'; }
-                                               elseif ($state === 'both')
+                                               else if ($state === 'both')
                                                 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
                                                 else # $state can be 'b' or ''
                                                 { $output .= '<i>'; $state .= 'i'; }
                                         }
-                                       elseif (strlen ($r) == 3)
+                                       else if (strlen ($r) == 3)
                                         {
                                                 if ($state === 'b')
                                                 { $output .= '</b>'; $state = ''; }
-                                               elseif ($state === 'bi')
+                                               else if ($state === 'bi')
                                                 { $output .= '</i></b><i>'; $state = 'i'; }
-                                               elseif ($state === 'ib')
+                                               else if ($state === 'ib')
                                                 { $output .= '</b>'; $state = 'i'; }
-                                               elseif ($state === 'both')
+                                               else if ($state === 'both')
                                                 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
                                                 else # $state can be 'i' or ''
                                                 { $output .= '<b>'; $state .= 'b'; }
                                         }
-                                       elseif (strlen ($r) == 5)
+                                       else if (strlen ($r) == 5)
                                         {
                                                 if ($state === 'b')
                                                 { $output .= '</b><i>'; $state = 'i'; }
-                                               elseif ($state === 'i')
+                                               else if ($state === 'i')
                                                 { $output .= '</i><b>'; $state = 'b'; }
-                                               elseif ($state === 'bi')
+                                               else if ($state === 'bi')
                                                 { $output .= '</i></b>'; $state = ''; }
-                                               elseif ($state === 'ib')
+                                               else if ($state === 'ib')
                                                 { $output .= '</b></i>'; $state = ''; }
-                                               elseif ($state === 'both')
+                                               else if ($state === 'both')
                                                 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
                                                 else # ($state == '')
                                                 { $buffer = ''; $state = 'both'; }
                                         }
-                                       elseif (strlen ($r) == 6)
-                                       {
-                                               if ($state === 'b')
-                                               { $output .= '</b><b>'; $state = 'b'; }
-                                               elseif ($state === 'i')
-                                               { $output .= '\'</i><b>'; $state = 'b'; }
-                                               elseif ($state === 'bi')
-                                               { $output .= '\'</i></b>'; $state = ''; }
-                                               elseif ($state === 'ib')
-                                               { $output .= '\'</b></i>'; $state = ''; }
-                                               elseif ($state === 'both')
-                                               { $output .= '<i><b>'.$buffer.'</b><b>'; $state = 'ib'; }
-                                               else # ($state == '')
-                                               { $buffer = ''; $state = ''; }
-                                       }
                                 }
                                 $i++;
                         }
@@ -1246,57 +1272,6 @@ class Parser
                 }
         }
  
-       /**
-        * Counts the number of bold and italic items from a line of text.
-        * Helper function for doQuotes()
-        */
-       private static function countBoldAndItalic($text, &$numBold, &$numItalics) {
-               $numBold = 0;
-               $numItalics = 0;
-               $offset = 0;
-
-               do {
-                       $offset = strpos($text, "'", $offset);
-                       if ($offset === false)
-                               return;
-
-                       $quoteLen = strspn($text, "'", $offset);
-                       $offset += $quoteLen;
-
-                       switch ($quoteLen) {
-                               case 0:
-                               case 1:
-                                       break;
-
-                               case 2:
-                                       $numItalics++;
-                                       break;
-
-                               case 3:
-                                       $numBold++;
-                                       break;
-
-                               case 4:
-                                       # If there are ever four apostrophes, assume the first is supposed to
-                                       # be text, and the remaining three constitute mark-up for bold text.
-                                       $numBold++;
-                                       $numItalics++;
-                                       break;
-
-                               case 5:
-                                       $numItalics++;
-                                       $numBold++;
-                                       break;
-
-                               case 6:
-                               default:
-                                       # If there are more than 6 apostrophes in a row, assume they're all
-                                       # text except for the last 6.
-                                       $numBold+=2;
-                       }
-               } while (true);
-       }
-
         /**
          * Replace external links (REL)
          *
@@ -1538,9 +1513,9 @@ class Parser
                 $sk = $this->mOptions->getSkin();
                 $holders = new LinkHolderArray( $this );
  
-               # split the entire text string on occurences of [[
+               #split the entire text string on occurences of [[
                 $a = StringUtils::explode( '[[', ' ' . $s );
-               # get the first element (all text up to first [[), and remove the space we added
+               #get the first element (all text up to first [[), and remove the space we added
                 $s = $a->current();
                 $a->next();
                 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
@@ -1685,10 +1660,10 @@ class Parser
  
                         if ( $might_be_img ) { # if this is actually an invalid link
                                 wfProfileIn( __METHOD__."-might_be_img" );
-                               if ( $ns == NS_FILE && $noforce ) { # but might be an image
+                               if ( $ns == NS_FILE && $noforce ) { #but might be an image
                                         $found = false;
                                         while ( true ) {
-                                               # look at the next 'line' to see if we can close it there
+                                               #look at the next 'line' to see if we can close it there
                                                 $a->next();
                                                 $next_line = $a->current();
                                                 if ( $next_line === false || $next_line === null ) {
@@ -1702,24 +1677,24 @@ class Parser
                                                         $trail = $m[2];
                                                         break;
                                                 } elseif ( count( $m ) == 2 ) {
-                                                       # if there's exactly one ]] that's fine, we'll keep looking
+                                                       #if there's exactly one ]] that's fine, we'll keep looking
                                                         $text .= "[[{$m[0]}]]{$m[1]}";
                                                 } else {
-                                                       # if $next_line is invalid too, we need look no further
+                                                       #if $next_line is invalid too, we need look no further
                                                         $text .= '[[' . $next_line;
                                                         break;
                                                 }
                                         }
                                         if ( !$found ) {
                                                 # we couldn't find the end of this imageLink, so output it raw
-                                               # but don't ignore what might be perfectly normal links in the text we've examined
+                                               #but don't ignore what might be perfectly normal links in the text we've examined
                                                 $holders->merge( $this->replaceInternalLinks2( $text ) );
                                                 $s .= "{$prefix}[[$link|$text";
                                                 # note: no $trail, because without an end, there *is* no trail
                                                 wfProfileOut( __METHOD__."-might_be_img" );
                                                 continue;
                                         }
-                               } else { # it's not an image, so output it raw
+                               } else { #it's not an image, so output it raw
                                         $s .= "{$prefix}[[$link|$text";
                                         # note: no $trail, because without an end, there *is* no trail
                                         wfProfileOut( __METHOD__."-might_be_img" );
@@ -1796,7 +1771,7 @@ class Parser
                         }
  
                         # Self-link checking
-                       if( $nt->getFragment() === '' && $ns !== NS_SPECIAL ) {
+                       if( $nt->getFragment() === '' && $ns != NS_SPECIAL ) {
                                 if( in_array( $nt->getPrefixedText(), $selflink, true ) ) {
                                         $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
                                         continue;
@@ -1916,7 +1891,7 @@ class Parser
          */
         /* private */ function closeParagraph() {
                 $result = '';
-               if ( $this->mLastSection !== '' ) {
+               if ( $this->mLastSection != '' ) {
                         $result = '</' . $this->mLastSection  . ">\n";
                 }
                 $this->mInPre = false;
@@ -1932,7 +1907,7 @@ class Parser
                 if ( $fl < $shorter ) { $shorter = $fl; }
  
                 for ( $i = 0; $i < $shorter; ++$i ) {
-                       if ( $st1{$i} !== $st2{$i} ) { break; }
+                       if ( $st1{$i} != $st2{$i} ) { break; }
                 }
                 return $i;
         }
@@ -2105,7 +2080,7 @@ class Parser
                                         '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
                                 if ( $openmatch or $closematch ) {
                                         $paragraphStack = false;
-                                       # TODO bug 5718: paragraph closed
+                                       # TODO bug 5718: paragraph closed
                                         $output .= $this->closeParagraph();
                                         if ( $preOpenMatch and !$preCloseMatch ) {
                                                 $this->mInPre = true;
@@ -2115,8 +2090,8 @@ class Parser
                                         } else {
                                                 $inBlockElem = true;
                                         }
-                               } elseif ( !$inBlockElem && !$this->mInPre ) {
-                                       if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) !== '' ) ) {
+                               } else if ( !$inBlockElem && !$this->mInPre ) {
+                                       if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
                                                 // pre
                                                 if ($this->mLastSection !== 'pre') {
                                                         $paragraphStack = false;
@@ -2145,7 +2120,7 @@ class Parser
                                                                 $output .= $paragraphStack;
                                                                 $paragraphStack = false;
                                                                 $this->mLastSection = 'p';
-                                                       } elseif ($this->mLastSection !== 'p') {
+                                                       } else if ($this->mLastSection !== 'p') {
                                                                 $output .= $this->closeParagraph().'<p>';
                                                                 $this->mLastSection = 'p';
                                                         }
@@ -2166,7 +2141,7 @@ class Parser
                         $output .= $this->closeList( $prefix2[$prefixLength-1] );
                         --$prefixLength;
                 }
-               if ( $this->mLastSection !== '' ) {
+               if ( $this->mLastSection != '' ) {
                         $output .= '</' . $this->mLastSection . '>';
                         $this->mLastSection = '';
                 }
@@ -2972,7 +2947,7 @@ class Parser
                                                 $isHTML = true;
                                                 $this->disableCache();
                                         }
-                               } elseif ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) {
+                               } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) {
                                         $found = false; //access denied
                                         wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() );
                                 } else {
@@ -3585,7 +3560,7 @@ class Parser
                         if (preg_match("/^$markerRegex/", $headline, $markerMatches)) {
                                 $serial = $markerMatches[1];
                                 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
-                               $isTemplate = ($titleText !== $baseTitleText);
+                               $isTemplate = ($titleText != $baseTitleText);
                                 $headline = preg_replace("/^$markerRegex/", "", $headline);
                         }
  
@@ -3701,7 +3676,7 @@ class Parser
                                 if ( $legacyHeadline == $safeHeadline ) {
                                         # No reason to have both (in fact, we can't)
                                         $legacyHeadline = false;
-                               } elseif ( $legacyHeadline !== Sanitizer::escapeId(
+                               } elseif ( $legacyHeadline != Sanitizer::escapeId(
                                 $legacyHeadline, 'xml' ) ) {
                                         # The legacy id is invalid XML.  We used to allow this, but
                                         # there's no reason to do so anymore.  Backward
@@ -3875,8 +3850,8 @@ class Parser
                                 else
                                         continue;
                         }
-                       if ( $s['index'] !== $section ||
-                                       $s['fromtitle'] !== $titletext ) {
+                       if ( $s['index'] != $section ||
+                                       $s['fromtitle'] != $titletext ) {
                                 self::incrementNumbering( $numbering,
                                         $s['toclevel'], $lastLevel );
  
@@ -3927,7 +3902,7 @@ class Parser
         private static function incrementNumbering( &$number, $level, $lastLevel ) {
                 if ( $level > $lastLevel )
                         $number[$level - 1] = 1;
-               elseif ( $level < $lastLevel ) {
+               else if ( $level < $lastLevel ) {
                         foreach ( $number as $key => $unused )
                                 if ( $key >= $level )
                                         unset( $number[$key] );
@@ -4037,7 +4012,7 @@ class Parser
                 $m = array();
                 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
-               } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" !== '' ) {
+               } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
                 } else {
                         # if there's no context, don't bother duplicating the title
@@ -4876,7 +4851,7 @@ class Parser
                         if ( $node->getName() === 'h' ) {
                                 $bits = $node->splitHeading();
                                 $curLevel = $bits['level'];
-                               if ( $bits['i'] !== $sectionIndex && $curLevel <= $targetLevel ) {
+                               if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
                                         break;
                                 }
                         }
@@ -4892,7 +4867,7 @@ class Parser
                         // Add two newlines on -- trailing whitespace in $newText is conventionally
                         // stripped by the editor, so we need both newlines to restore the paragraph gap
                         // Only add trailing whitespace if there is newText
-                       if($newText !== "") {
+                       if($newText != "") {
                                 $outText .= $newText . "\n\n";
                         }
  
diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt

index fedbb85..6695973 100644 (file)
--- a/maintenance/parserTests.txt
+++ b/maintenance/parserTests.txt
@@ -116,7 +116,7 @@ Italics and bold
  </li><li> plain<b><i>bold-italic</i>bold</b>plain
  </li><li> plain<i>italic<b>bold-italic</b></i>plain
  </li><li> plain<b>bold<i>bold-italic</i></b>plain
-</li><li> plain l&#39;<i>italic</i>plain
+</li><li> plain l'<i>italic</i>plain
  </li><li> plain l'<b>bold</b> plain
  </li></ul>
  
@@ -5253,17 +5253,19 @@ New wiki paragraph
  </p>
  !! end
  
-# This was the original html, but it has also been
-# <p>'<i>bold'</i><b>bold<i>bolditalics</i></b>
+# Original result was this:
+# <p><b>bold</b><b>bold<i>bolditalics</i></b>
  # </p>
-# See bug 18765.
+# While that might be marginally more intuitive, maybe, the six-apostrophe
+# construct is clearly pathological and the result stated here (which is what
+# the parser actually does) is about as reasonable as anything.
  !!test
  Mixing markup for italics and bold
  !! options
  !! input
  '''bold''''''bold''bolditalics'''''
  !! result
-<p><b>bold</b><b>bold<i>bolditalics</i></b>
+<p>'<i>bold'</i><b>bold<i>bolditalics</i></b>
  </p>
  !! end
  
@@ -6415,7 +6417,7 @@ Another italics / bold test
  !! input
   ''' ''x'
  !! result
-<pre>&#39;<i> </i>x'
+<pre>'<i> </i>x'
  </pre>
  !!end
  
@@ -7558,82 +7560,6 @@ title=[[Main Page]]
  <a href="/wiki/Main_Page#section" title="Main Page">#section</a>
  !! end
  
-!! test
-Bold/italic markup handled differently depending on leading whitespace (bug 18765)
-!!input
-'''Look at ''this edit'''s complicated bold/italic markup!'''
-
-<!-- Comment -->'''Look at ''this edit'''s complicated bold/italic markup!'''
-
-<span> '''Look at ''this edit'''s complicated bold/italic markup!'''</span>
-
-<nowiki></nowiki> '''Look at ''this edit'''s complicated bold/italic markup!'''
-
-<!-- Hello world---> '''Look at ''this edit'''s complicated bold/italic markup!'''
-
-{|
-| '''Look at ''this edit'''s complicated bold/italic markup!'''
-|}
-
-'''This was Italic'' this was plain''' and this was bold'''
-but '''This is bold'' this is bold italic''' and this is bold'''
-
-<!-- Wishlist: Breaking because <span> and | are treated as text 
-<span>'''Look at ''this edit'''s complicated bold/italic markup!'''</span> 
-{|
-|'''Look at ''this edit'''s complicated bold/italic markup!'''
-|}
--->
-!! result
-<p><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</p><p><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</p><p><span> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b></span>
-</p><p> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</p>
-<pre><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</pre>
-<table>
-<tr>
-<td> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</td></tr></table>
-<p><b>This was Italic<i> this was plain&#39;</i> and this was bold</b>
-but <b>This is bold<i> this is bold italic&#39;</i> and this is bold</b>
-</p><p><br />
-</p>
-!! end
-
-!! test
-Six quotes
-!!input
-''Italic''''''Bold
-
-'''Bold''BoldItalic''''''Normal
-
-''Italic'''BoldItalic''''''Normal'''''
-
-'''''BoldItalic''''''MoreBoldItalic''
-
-''''''Normal
-!!result
-<p><i>Italic'</i><b>Bold</b>
-</p><p><b>Bold<i>BoldItalic'</i></b>Normal
-</p><p><i>Italic<b>BoldItalic'</b></i>Normal
-</p><p><i><b>BoldItalic</b><b>MoreBoldItalic</b></i>
-</p><p>Normal
-</p>
-!!end
-
-
-!! test
-Too many quotes
-!!input
-I '''like'''''quotes''''''''''' 
-!! result
-<p>I <b>like</b><i>quotes''''''</i><b> </b>
-</p>
-!! end
-
-
  Note: some elements used in these Microdata examples don't work, like <img>
  and <time>.
  !! test
diff --git a/tests/preg_split_test.php b/tests/preg_split_test.php

deleted file mode 100644 (file)

index 69c977f..0000000
--- a/tests/preg_split_test.php
+++ /dev/null
@@ -1,24 +0,0 @@
-<?php
-include "../includes/StringUtils.php";
-
-$pattern = "/('')+/";
-$subject = str_repeat("'' ", 1024*1024 + 7);
-
-$m = memory_get_usage();
-
-$ps1 = preg_split($pattern, $subject);
-
-$r = "";
-foreach ($ps1 as $c) {
-       $r .= $c . "|";
-}
-echo "Original preg_split: " . md5($r) . "  " . (memory_get_usage()-$m) . "\n";
-
-unset($ps1);
-
-$r = "";
-$ps2 = StringUtils::preg_split($pattern, $subject);
-foreach ($ps2 as $c) {
-       $r .= $c . "|";
-}
-echo "StringUtils preg_split: " . md5($r) . "  " . (memory_get_usage()-$m) . "\n";
author	Tim Starling <tstarling@users.mediawiki.org>
	Wed, 27 Jan 2010 02:41:22 +0000 (02:41 +0000)
committer	Tim Starling <tstarling@users.mediawiki.org>
	Wed, 27 Jan 2010 02:41:22 +0000 (02:41 +0000)
RELEASE-NOTES		patch \| blob \| history
includes/StringUtils.php		patch \| blob \| history
includes/parser/Parser.php		patch \| blob \| history
maintenance/parserTests.txt		patch \| blob \| history
tests/preg_split_test.php	[deleted file]	patch \| blob \| history