Revert r61528, r61527, r61526, r61525, r61519, r61515, r61053, r61052 (Parser::doQuot...
authorTim Starling <tstarling@users.mediawiki.org>
Wed, 27 Jan 2010 02:41:22 +0000 (02:41 +0000)
committerTim Starling <tstarling@users.mediawiki.org>
Wed, 27 Jan 2010 02:41:22 +0000 (02:41 +0000)
RELEASE-NOTES
includes/StringUtils.php
includes/parser/Parser.php
maintenance/parserTests.txt
tests/preg_split_test.php [deleted file]

index 7daff4b..3230382 100644 (file)
@@ -711,8 +711,6 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * (bug 9794) User rights log entries for foreign user now links to the foreign
   user's page if possible
 * (bug 14717) Don't load nonexistent CSS fix files for non-Monobook skins
-* (bug 18765) Increased consistency of bold-italic markup for unbalanced quotes.
-  Improved representation of six quotes (may break existing markup).
 * (bug 22034) Use wfClientAcceptsGzip() in wfGzipHandler instead of
   reimplementing it.
 * (bug 19226) First line renders differently on many UI messages.
index bab9be4..c437b3c 100644 (file)
@@ -179,14 +179,6 @@ class StringUtils {
                        return new ArrayIterator( explode( $separator, $subject ) );
                }
        }
-
-       /**
-        * Workalike for preg_split() with limited memory usage.
-        * Returns an Iterator
-        */
-       static function preg_split( $pattern, $subject, $limit = -1, $flags = 0 ) {
-               return new PregSplitIterator( $pattern, $subject, $limit, $flags );
-       }
 }
 
 /**
@@ -417,82 +409,3 @@ class ExplodeIterator implements Iterator {
        }
 }
 
-
-/**
- * An iterator which works exactly like:
- * 
- * foreach ( preg_split( $pattern, $s, $limit, $flags ) as $element ) {
- *    ...
- * }
- *
- * Except it doesn't use huge amounts of memory when $limit is -1
- *
- * The flag PREG_SPLIT_OFFSET_CAPTURE isn't supported.
- */
-class PregSplitIterator implements Iterator {
-       // The subject string
-       var $pattern, $subject, $originalLimit, $flags;
-
-       // The last extracted group of items.
-       var $smallArray;
-
-       // The position on the iterator.
-       var $curPos;
-
-       const MAX_LIMIT = 100;
-
-       /** 
-        * Construct a PregSplitIterator
-        */
-       function __construct( $pattern, $s, $limit, $flags) {
-               $this->pattern = $pattern;
-               $this->subject = $s;
-               $this->originalLimit = $limit;
-               $this->flags = $flags;
-
-               $this->rewind();
-       }
-
-       private function effectiveLimit() {
-               if ($this->originalLimit == -1) {
-                       return self::MAX_LIMIT + 1;
-               } else if ($this->limit > self::MAX_LIMIT) {
-                       $this->limit -= self::MAX_LIMIT;
-                       return self::MAX_LIMIT + 1;
-               } else {
-                       $old = $this->limit;
-                       $this->limit = 0;
-                       return $old;
-               }
-       }
-
-       function rewind() {
-               $this->curPos = 0;
-               $this->limit =  $this->originalLimit;
-               if ($this->limit == -1) $this->limit = self::MAX_LIMIT;
-               $this->smallArray = preg_split( $this->pattern, $this->subject, $this->effectiveLimit(), $this->flags);
-       }
-
-       function current() {
-               return $this->smallArray[$this->curPos % self::MAX_LIMIT];
-       }
-
-       function key() {
-               return $this->curPos;
-       }
-
-       function next() {
-               $this->curPos++;
-               if ( $this->curPos % self::MAX_LIMIT == 0 ) {
-                       # Last item contains the rest unsplitted.
-                       if ($this->limit > 0) {
-                               $this->smallArray = preg_split( $this->pattern, $this->smallArray[self::MAX_LIMIT], $this->effectiveLimit(), $this->flags);
-                       }
-               }
-               return;
-       }
-
-       function valid() {
-               return $this->curPos % self::MAX_LIMIT < count($this->smallArray);
-       }
-}
index 50e15dc..ee7383d 100644 (file)
@@ -213,7 +213,7 @@ class Parser
                 * Must not consist of all title characters, or else it will change
                 * the behaviour of <nowiki> in a link.
                 */
-               # $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
+               #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
                # Changed to \x7f to allow XML double-parsing -- TS
                $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
 
@@ -338,7 +338,7 @@ class Parser
                        '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&nbsp;\\2',
                        # french spaces, Guillemet-right
                        '/(\\302\\253) /' => '\\1&nbsp;',
-                       '/&nbsp;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
+                       '/&nbsp;(!\s*important)/' => ' \\1', #Beware of CSS magic word !important, bug #11874.
                );
                $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 
@@ -556,7 +556,7 @@ class Parser
                $taglist = implode( '|', $elements );
                $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
 
-               while ( $text !== '' ) {
+               while ( $text != '' ) {
                        $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
                        $stripped .= $p[0];
                        if( count( $p ) < 5 ) {
@@ -723,11 +723,11 @@ class Parser
                                array_push( $tr_history , false );
                                array_push( $tr_attributes , '' );
                                array_push( $has_opened_tr , false );
-                       } elseif ( count ( $td_history ) == 0 ) {
+                       } else if ( count ( $td_history ) == 0 ) {
                                // Don't do any of the following
                                $out .= $outLine."\n";
                                continue;
-                       } elseif ( substr ( $line , 0 , 2 ) === '|}' ) {
+                       } else if ( substr ( $line , 0 , 2 ) === '|}' ) {
                                // We are ending a table
                                $line = '</table>' . substr ( $line , 2 );
                                $last_tag = array_pop ( $last_tag_history );
@@ -745,7 +745,7 @@ class Parser
                                }
                                array_pop ( $tr_attributes );
                                $outLine = $line . str_repeat( '</dd></dl>' , $indent_level );
-                       } elseif ( substr ( $line , 0 , 2 ) === '|-' ) {
+                       } else if ( substr ( $line , 0 , 2 ) === '|-' ) {
                                // Now we have a table row
                                $line = preg_replace( '#^\|-+#', '', $line );
 
@@ -773,7 +773,7 @@ class Parser
                                array_push ( $td_history , false );
                                array_push ( $last_tag_history , '' );
                        }
-                       elseif ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 )  === '|+' ) {
+                       else if ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 )  === '|+' ) {
                                // This might be cell elements, td, th or captions
                                if ( substr ( $line , 0 , 2 ) === '|+' ) {
                                        $first_character = '+';
@@ -818,9 +818,9 @@ class Parser
 
                                        if ( $first_character === '|' ) {
                                                $last_tag = 'td';
-                                       } elseif ( $first_character === '!' ) {
+                                       } else if ( $first_character === '!' ) {
                                                $last_tag = 'th';
-                                       } elseif ( $first_character === '+' ) {
+                                       } else if ( $first_character === '+' ) {
                                                $last_tag = 'caption';
                                        } else {
                                                $last_tag = '';
@@ -835,7 +835,7 @@ class Parser
                                        // be mistaken as delimiting cell parameters
                                        if ( strpos( $cell_data[0], '[[' ) !== false ) {
                                                $cell = "{$previous}<{$last_tag}>{$cell}";
-                                       } elseif ( count ( $cell_data ) == 1 )
+                                       } else if ( count ( $cell_data ) == 1 )
                                                $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
                                        else {
                                                $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
@@ -1108,59 +1108,100 @@ class Parser
        }
 
        /**
-        * Processes bolds and italics on a single line.
         * Helper function for doAllQuotes()
         */
        public function doQuotes( $text ) {
-               # Counts the number of occurrences of bold and italics mark-ups.
-               self::countBoldAndItalic($text, $numbold, $numitalics);
-               
-               if ( ( $numbold == 0 ) && ( $numitalics == 0 ) )
+               $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
+               if ( count( $arr ) == 1 )
                        return $text;
                else
                {
+                       # First, do some preliminary work. This may shift some apostrophes from
+                       # being mark-up to being text. It also counts the number of occurrences
+                       # of bold and italics mark-ups.
+                       $i = 0;
+                       $numbold = 0;
+                       $numitalics = 0;
+                       foreach ( $arr as $r )
+                       {
+                               if ( ( $i % 2 ) == 1 )
+                               {
+                                       # If there are ever four apostrophes, assume the first is supposed to
+                                       # be text, and the remaining three constitute mark-up for bold text.
+                                       if ( strlen( $arr[$i] ) == 4 )
+                                       {
+                                               $arr[$i-1] .= "'";
+                                               $arr[$i] = "'''";
+                                       }
+                                       # If there are more than 5 apostrophes in a row, assume they're all
+                                       # text except for the last 5.
+                                       else if ( strlen( $arr[$i] ) > 5 )
+                                       {
+                                               $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
+                                               $arr[$i] = "'''''";
+                                       }
+                                       # Count the number of occurrences of bold and italics mark-ups.
+                                       # We are not counting sequences of five apostrophes.
+                                       if ( strlen( $arr[$i] ) == 2 )      { $numitalics++;             }
+                                       else if ( strlen( $arr[$i] ) == 3 ) { $numbold++;                }
+                                       else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; }
+                               }
+                               $i++;
+                       }
+
                        # If there is an odd number of both bold and italics, it is likely
                        # that one of the bold ones was meant to be an apostrophe followed
                        # by italics. Which one we cannot know for certain, but it is more
                        # likely to be one that has a single-letter word before it.
                        if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) )
                        {
+                               $i = 0;
+                               $firstsingleletterword = -1;
+                               $firstmultiletterword = -1;
+                               $firstspace = -1;
+                               foreach ( $arr as $r )
+                               {
+                                       if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) )
+                                       {
+                                               $x1 = substr ($arr[$i-1], -1);
+                                               $x2 = substr ($arr[$i-1], -2, 1);
+                                               if ($x1 === ' ') {
+                                                       if ($firstspace == -1) $firstspace = $i;
+                                               } else if ($x2 === ' ') {
+                                                       if ($firstsingleletterword == -1) $firstsingleletterword = $i;
+                                               } else {
+                                                       if ($firstmultiletterword == -1) $firstmultiletterword = $i;
+                                               }
+                                       }
+                                       $i++;
+                               }
 
-                               # This algorithm moves the literal quote at the 
-                               # right of a single word, at the right of a 
-                               # multiletter word or at the right of a space.
-                               # Otherwise, it does nothing.
-                               #
-                               # The original if-based version can be found at
-                               # http://svn.wikimedia.org/viewvc/mediawiki/trunk/phase3/includes/parser/Parser.php?revision=61519&view=markup
-                               #
-                               # Unlike the original one, here we convert the 
-                               # texty quotes to &#39; which shouldn't matter.
-
-                               $quoteBalancerReplacements = array( 
-                                                                                               "/(?<= [^ ])'''(?!')/"=>"&#39;''", 
-                                                                                               "/(?<=[^ '])'''(?!')/"=>"&#39;''", 
-                                                                                               "/(^|(?<=[^'])) '''(?!')/"=>" &#39;''");
-
-                               foreach( $quoteBalancerReplacements as $k => $v) {
-                                       $text = preg_replace($k, $v, $text, 1, $count);
-                                       if ($count != 0)
-                                               break;
+                               # If there is a single-letter word, use it!
+                               if ($firstsingleletterword > -1)
+                               {
+                                       $arr [ $firstsingleletterword ] = "''";
+                                       $arr [ $firstsingleletterword-1 ] .= "'";
+                               }
+                               # If not, but there's a multi-letter word, use that one.
+                               else if ($firstmultiletterword > -1)
+                               {
+                                       $arr [ $firstmultiletterword ] = "''";
+                                       $arr [ $firstmultiletterword-1 ] .= "'";
+                               }
+                               # ... otherwise use the first one that has neither.
+                               # (notice that it is possible for all three to be -1 if, for example,
+                               # there is only one pentuple-apostrophe in the line)
+                               else if ($firstspace > -1)
+                               {
+                                       $arr [ $firstspace ] = "''";
+                                       $arr [ $firstspace-1 ] .= "'";
                                }
                        }
 
-                       # Split in groups of 2, 3, 5 or 6 apostrophes.
-                       # If there are ever four apostrophes, assume the first is supposed to
-                       # be text, and the remaining three constitute mark-up for bold text.
-                       # If there are more than 6 apostrophes in a row, assume they're all
-                       # text except for the last 6.           
-                       $arr = Stringutils::preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
-
-
                        # Now let's actually convert our apostrophic mush to HTML!
-                       $output = ''; # Processed text
-                       $buffer = ''; # Content if $state is 'both'
-                       $state = '';  # Flags with the order of open tags: '|b|i|bi|ib|both'
+                       $output = '';
+                       $buffer = '';
+                       $state = '';
                        $i = 0;
                        foreach ($arr as $r)
                        {
@@ -1177,58 +1218,43 @@ class Parser
                                        {
                                                if ($state === 'i')
                                                { $output .= '</i>'; $state = ''; }
-                                               elseif ($state === 'bi')
+                                               else if ($state === 'bi')
                                                { $output .= '</i>'; $state = 'b'; }
-                                               elseif ($state === 'ib')
+                                               else if ($state === 'ib')
                                                { $output .= '</b></i><b>'; $state = 'b'; }
-                                               elseif ($state === 'both')
+                                               else if ($state === 'both')
                                                { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
                                                else # $state can be 'b' or ''
                                                { $output .= '<i>'; $state .= 'i'; }
                                        }
-                                       elseif (strlen ($r) == 3)
+                                       else if (strlen ($r) == 3)
                                        {
                                                if ($state === 'b')
                                                { $output .= '</b>'; $state = ''; }
-                                               elseif ($state === 'bi')
+                                               else if ($state === 'bi')
                                                { $output .= '</i></b><i>'; $state = 'i'; }
-                                               elseif ($state === 'ib')
+                                               else if ($state === 'ib')
                                                { $output .= '</b>'; $state = 'i'; }
-                                               elseif ($state === 'both')
+                                               else if ($state === 'both')
                                                { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
                                                else # $state can be 'i' or ''
                                                { $output .= '<b>'; $state .= 'b'; }
                                        }
-                                       elseif (strlen ($r) == 5)
+                                       else if (strlen ($r) == 5)
                                        {
                                                if ($state === 'b')
                                                { $output .= '</b><i>'; $state = 'i'; }
-                                               elseif ($state === 'i')
+                                               else if ($state === 'i')
                                                { $output .= '</i><b>'; $state = 'b'; }
-                                               elseif ($state === 'bi')
+                                               else if ($state === 'bi')
                                                { $output .= '</i></b>'; $state = ''; }
-                                               elseif ($state === 'ib')
+                                               else if ($state === 'ib')
                                                { $output .= '</b></i>'; $state = ''; }
-                                               elseif ($state === 'both')
+                                               else if ($state === 'both')
                                                { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
                                                else # ($state == '')
                                                { $buffer = ''; $state = 'both'; }
                                        }
-                                       elseif (strlen ($r) == 6)
-                                       {
-                                               if ($state === 'b')
-                                               { $output .= '</b><b>'; $state = 'b'; }
-                                               elseif ($state === 'i')
-                                               { $output .= '\'</i><b>'; $state = 'b'; }
-                                               elseif ($state === 'bi')
-                                               { $output .= '\'</i></b>'; $state = ''; }
-                                               elseif ($state === 'ib')
-                                               { $output .= '\'</b></i>'; $state = ''; }
-                                               elseif ($state === 'both')
-                                               { $output .= '<i><b>'.$buffer.'</b><b>'; $state = 'ib'; }
-                                               else # ($state == '')
-                                               { $buffer = ''; $state = ''; }
-                                       }
                                }
                                $i++;
                        }
@@ -1246,57 +1272,6 @@ class Parser
                }
        }
 
-       /**
-        * Counts the number of bold and italic items from a line of text.
-        * Helper function for doQuotes()
-        */
-       private static function countBoldAndItalic($text, &$numBold, &$numItalics) {
-               $numBold = 0;
-               $numItalics = 0;
-               $offset = 0;
-
-               do {
-                       $offset = strpos($text, "'", $offset);
-                       if ($offset === false)
-                               return;
-
-                       $quoteLen = strspn($text, "'", $offset);
-                       $offset += $quoteLen;
-
-                       switch ($quoteLen) {
-                               case 0:
-                               case 1:
-                                       break;
-
-                               case 2:
-                                       $numItalics++;
-                                       break;
-
-                               case 3:
-                                       $numBold++;
-                                       break;
-
-                               case 4:
-                                       # If there are ever four apostrophes, assume the first is supposed to
-                                       # be text, and the remaining three constitute mark-up for bold text.
-                                       $numBold++;
-                                       $numItalics++;
-                                       break;
-
-                               case 5:
-                                       $numItalics++;
-                                       $numBold++;
-                                       break;
-
-                               case 6:
-                               default:
-                                       # If there are more than 6 apostrophes in a row, assume they're all
-                                       # text except for the last 6.
-                                       $numBold+=2;
-                       }
-               } while (true);
-       }
-
        /**
         * Replace external links (REL)
         *
@@ -1538,9 +1513,9 @@ class Parser
                $sk = $this->mOptions->getSkin();
                $holders = new LinkHolderArray( $this );
 
-               # split the entire text string on occurences of [[
+               #split the entire text string on occurences of [[
                $a = StringUtils::explode( '[[', ' ' . $s );
-               # get the first element (all text up to first [[), and remove the space we added
+               #get the first element (all text up to first [[), and remove the space we added
                $s = $a->current();
                $a->next();
                $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
@@ -1685,10 +1660,10 @@ class Parser
 
                        if ( $might_be_img ) { # if this is actually an invalid link
                                wfProfileIn( __METHOD__."-might_be_img" );
-                               if ( $ns == NS_FILE && $noforce ) { # but might be an image
+                               if ( $ns == NS_FILE && $noforce ) { #but might be an image
                                        $found = false;
                                        while ( true ) {
-                                               # look at the next 'line' to see if we can close it there
+                                               #look at the next 'line' to see if we can close it there
                                                $a->next();
                                                $next_line = $a->current();
                                                if ( $next_line === false || $next_line === null ) {
@@ -1702,24 +1677,24 @@ class Parser
                                                        $trail = $m[2];
                                                        break;
                                                } elseif ( count( $m ) == 2 ) {
-                                                       # if there's exactly one ]] that's fine, we'll keep looking
+                                                       #if there's exactly one ]] that's fine, we'll keep looking
                                                        $text .= "[[{$m[0]}]]{$m[1]}";
                                                } else {
-                                                       # if $next_line is invalid too, we need look no further
+                                                       #if $next_line is invalid too, we need look no further
                                                        $text .= '[[' . $next_line;
                                                        break;
                                                }
                                        }
                                        if ( !$found ) {
                                                # we couldn't find the end of this imageLink, so output it raw
-                                               # but don't ignore what might be perfectly normal links in the text we've examined
+                                               #but don't ignore what might be perfectly normal links in the text we've examined
                                                $holders->merge( $this->replaceInternalLinks2( $text ) );
                                                $s .= "{$prefix}[[$link|$text";
                                                # note: no $trail, because without an end, there *is* no trail
                                                wfProfileOut( __METHOD__."-might_be_img" );
                                                continue;
                                        }
-                               } else { # it's not an image, so output it raw
+                               } else { #it's not an image, so output it raw
                                        $s .= "{$prefix}[[$link|$text";
                                        # note: no $trail, because without an end, there *is* no trail
                                        wfProfileOut( __METHOD__."-might_be_img" );
@@ -1796,7 +1771,7 @@ class Parser
                        }
 
                        # Self-link checking
-                       if( $nt->getFragment() === '' && $ns !== NS_SPECIAL ) {
+                       if( $nt->getFragment() === '' && $ns != NS_SPECIAL ) {
                                if( in_array( $nt->getPrefixedText(), $selflink, true ) ) {
                                        $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
                                        continue;
@@ -1916,7 +1891,7 @@ class Parser
         */
        /* private */ function closeParagraph() {
                $result = '';
-               if ( $this->mLastSection !== '' ) {
+               if ( $this->mLastSection != '' ) {
                        $result = '</' . $this->mLastSection  . ">\n";
                }
                $this->mInPre = false;
@@ -1932,7 +1907,7 @@ class Parser
                if ( $fl < $shorter ) { $shorter = $fl; }
 
                for ( $i = 0; $i < $shorter; ++$i ) {
-                       if ( $st1{$i} !== $st2{$i} ) { break; }
+                       if ( $st1{$i} != $st2{$i} ) { break; }
                }
                return $i;
        }
@@ -2105,7 +2080,7 @@ class Parser
                                        '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
                                if ( $openmatch or $closematch ) {
                                        $paragraphStack = false;
-                                       # TODO bug 5718: paragraph closed
+                                       # TODO bug 5718: paragraph closed
                                        $output .= $this->closeParagraph();
                                        if ( $preOpenMatch and !$preCloseMatch ) {
                                                $this->mInPre = true;
@@ -2115,8 +2090,8 @@ class Parser
                                        } else {
                                                $inBlockElem = true;
                                        }
-                               } elseif ( !$inBlockElem && !$this->mInPre ) {
-                                       if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) !== '' ) ) {
+                               } else if ( !$inBlockElem && !$this->mInPre ) {
+                                       if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
                                                // pre
                                                if ($this->mLastSection !== 'pre') {
                                                        $paragraphStack = false;
@@ -2145,7 +2120,7 @@ class Parser
                                                                $output .= $paragraphStack;
                                                                $paragraphStack = false;
                                                                $this->mLastSection = 'p';
-                                                       } elseif ($this->mLastSection !== 'p') {
+                                                       } else if ($this->mLastSection !== 'p') {
                                                                $output .= $this->closeParagraph().'<p>';
                                                                $this->mLastSection = 'p';
                                                        }
@@ -2166,7 +2141,7 @@ class Parser
                        $output .= $this->closeList( $prefix2[$prefixLength-1] );
                        --$prefixLength;
                }
-               if ( $this->mLastSection !== '' ) {
+               if ( $this->mLastSection != '' ) {
                        $output .= '</' . $this->mLastSection . '>';
                        $this->mLastSection = '';
                }
@@ -2972,7 +2947,7 @@ class Parser
                                                $isHTML = true;
                                                $this->disableCache();
                                        }
-                               } elseif ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) {
+                               } else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) {
                                        $found = false; //access denied
                                        wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() );
                                } else {
@@ -3585,7 +3560,7 @@ class Parser
                        if (preg_match("/^$markerRegex/", $headline, $markerMatches)) {
                                $serial = $markerMatches[1];
                                list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
-                               $isTemplate = ($titleText !== $baseTitleText);
+                               $isTemplate = ($titleText != $baseTitleText);
                                $headline = preg_replace("/^$markerRegex/", "", $headline);
                        }
 
@@ -3701,7 +3676,7 @@ class Parser
                                if ( $legacyHeadline == $safeHeadline ) {
                                        # No reason to have both (in fact, we can't)
                                        $legacyHeadline = false;
-                               } elseif ( $legacyHeadline !== Sanitizer::escapeId(
+                               } elseif ( $legacyHeadline != Sanitizer::escapeId(
                                $legacyHeadline, 'xml' ) ) {
                                        # The legacy id is invalid XML.  We used to allow this, but
                                        # there's no reason to do so anymore.  Backward
@@ -3875,8 +3850,8 @@ class Parser
                                else
                                        continue;
                        }
-                       if ( $s['index'] !== $section ||
-                                       $s['fromtitle'] !== $titletext ) {
+                       if ( $s['index'] != $section ||
+                                       $s['fromtitle'] != $titletext ) {
                                self::incrementNumbering( $numbering,
                                        $s['toclevel'], $lastLevel );
 
@@ -3927,7 +3902,7 @@ class Parser
        private static function incrementNumbering( &$number, $level, $lastLevel ) {
                if ( $level > $lastLevel )
                        $number[$level - 1] = 1;
-               elseif ( $level < $lastLevel ) {
+               else if ( $level < $lastLevel ) {
                        foreach ( $number as $key => $unused )
                                if ( $key >= $level )
                                        unset( $number[$key] );
@@ -4037,7 +4012,7 @@ class Parser
                $m = array();
                if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
                        $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
-               } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" !== '' ) {
+               } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
                        $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
                } else {
                        # if there's no context, don't bother duplicating the title
@@ -4876,7 +4851,7 @@ class Parser
                        if ( $node->getName() === 'h' ) {
                                $bits = $node->splitHeading();
                                $curLevel = $bits['level'];
-                               if ( $bits['i'] !== $sectionIndex && $curLevel <= $targetLevel ) {
+                               if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
                                        break;
                                }
                        }
@@ -4892,7 +4867,7 @@ class Parser
                        // Add two newlines on -- trailing whitespace in $newText is conventionally
                        // stripped by the editor, so we need both newlines to restore the paragraph gap
                        // Only add trailing whitespace if there is newText
-                       if($newText !== "") {
+                       if($newText != "") {
                                $outText .= $newText . "\n\n";
                        }
 
index fedbb85..6695973 100644 (file)
@@ -116,7 +116,7 @@ Italics and bold
 </li><li> plain<b><i>bold-italic</i>bold</b>plain
 </li><li> plain<i>italic<b>bold-italic</b></i>plain
 </li><li> plain<b>bold<i>bold-italic</i></b>plain
-</li><li> plain l&#39;<i>italic</i>plain
+</li><li> plain l'<i>italic</i>plain
 </li><li> plain l'<b>bold</b> plain
 </li></ul>
 
@@ -5253,17 +5253,19 @@ New wiki paragraph
 </p>
 !! end
 
-# This was the original html, but it has also been
-# <p>'<i>bold'</i><b>bold<i>bolditalics</i></b>
+# Original result was this:
+# <p><b>bold</b><b>bold<i>bolditalics</i></b>
 # </p>
-# See bug 18765.
+# While that might be marginally more intuitive, maybe, the six-apostrophe
+# construct is clearly pathological and the result stated here (which is what
+# the parser actually does) is about as reasonable as anything.
 !!test
 Mixing markup for italics and bold
 !! options
 !! input
 '''bold''''''bold''bolditalics'''''
 !! result
-<p><b>bold</b><b>bold<i>bolditalics</i></b>
+<p>'<i>bold'</i><b>bold<i>bolditalics</i></b>
 </p>
 !! end
 
@@ -6415,7 +6417,7 @@ Another italics / bold test
 !! input
  ''' ''x'
 !! result
-<pre>&#39;<i> </i>x'
+<pre>'<i> </i>x'
 </pre>
 !!end
 
@@ -7558,82 +7560,6 @@ title=[[Main Page]]
 <a href="/wiki/Main_Page#section" title="Main Page">#section</a>
 !! end
 
-!! test
-Bold/italic markup handled differently depending on leading whitespace (bug 18765)
-!!input
-'''Look at ''this edit'''s complicated bold/italic markup!'''
-
-<!-- Comment -->'''Look at ''this edit'''s complicated bold/italic markup!'''
-
-<span> '''Look at ''this edit'''s complicated bold/italic markup!'''</span>
-
-<nowiki></nowiki> '''Look at ''this edit'''s complicated bold/italic markup!'''
-
-<!-- Hello world---> '''Look at ''this edit'''s complicated bold/italic markup!'''
-
-{|
-| '''Look at ''this edit'''s complicated bold/italic markup!'''
-|}
-
-'''This was Italic'' this was plain''' and this was bold'''
-but '''This is bold'' this is bold italic''' and this is bold'''
-
-<!-- Wishlist: Breaking because <span> and | are treated as text 
-<span>'''Look at ''this edit'''s complicated bold/italic markup!'''</span> 
-{|
-|'''Look at ''this edit'''s complicated bold/italic markup!'''
-|}
--->
-!! result
-<p><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</p><p><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</p><p><span> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b></span>
-</p><p> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</p>
-<pre><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</pre>
-<table>
-<tr>
-<td> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</td></tr></table>
-<p><b>This was Italic<i> this was plain&#39;</i> and this was bold</b>
-but <b>This is bold<i> this is bold italic&#39;</i> and this is bold</b>
-</p><p><br />
-</p>
-!! end
-
-!! test
-Six quotes
-!!input
-''Italic''''''Bold
-
-'''Bold''BoldItalic''''''Normal
-
-''Italic'''BoldItalic''''''Normal'''''
-
-'''''BoldItalic''''''MoreBoldItalic''
-
-''''''Normal
-!!result
-<p><i>Italic'</i><b>Bold</b>
-</p><p><b>Bold<i>BoldItalic'</i></b>Normal
-</p><p><i>Italic<b>BoldItalic'</b></i>Normal
-</p><p><i><b>BoldItalic</b><b>MoreBoldItalic</b></i>
-</p><p>Normal
-</p>
-!!end
-
-
-!! test
-Too many quotes
-!!input
-I '''like'''''quotes''''''''''' 
-!! result
-<p>I <b>like</b><i>quotes''''''</i><b> </b>
-</p>
-!! end
-
-
 Note: some elements used in these Microdata examples don't work, like <img>
 and <time>.
 !! test
diff --git a/tests/preg_split_test.php b/tests/preg_split_test.php
deleted file mode 100644 (file)
index 69c977f..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-<?php
-include "../includes/StringUtils.php";
-
-$pattern = "/('')+/";
-$subject = str_repeat("'' ", 1024*1024 + 7);
-
-$m = memory_get_usage();
-
-$ps1 = preg_split($pattern, $subject);
-
-$r = "";
-foreach ($ps1 as $c) {
-       $r .= $c . "|";
-}
-echo "Original preg_split: " . md5($r) . "  " . (memory_get_usage()-$m) . "\n";
-
-unset($ps1);
-
-$r = "";
-$ps2 = StringUtils::preg_split($pattern, $subject);
-foreach ($ps2 as $c) {
-       $r .= $c . "|";
-}
-echo "StringUtils preg_split: " . md5($r) . "  " . (memory_get_usage()-$m) . "\n";