From 8b9bedbad7e74849e420c26381d3e58f6cd712f9 Mon Sep 17 00:00:00 2001
From: Tim Starling <tstarling@users.mediawiki.org>
Date: Wed, 27 Jan 2010 02:41:22 +0000
Subject: [PATCH] Revert r61528, r61527, r61526, r61525, r61519, r61515,
 r61053, r61052 (Parser::doQuotes() rewrite). Lots of issues to discuss, needs
 more review than I have time to give it pre-1.16. I'll split it out to a
 branch.

---
 RELEASE-NOTES               |   2 -
 includes/StringUtils.php    |  87 ------------
 includes/parser/Parser.php  | 273 ++++++++++++++++--------------------
 maintenance/parserTests.txt |  90 ++----------
 tests/preg_split_test.php   |  24 ----
 5 files changed, 132 insertions(+), 344 deletions(-)
 delete mode 100644 tests/preg_split_test.php

diff --git a/RELEASE-NOTES b/RELEASE-NOTES
index 7daff4b4eb..3230382630 100644
--- a/RELEASE-NOTES
+++ b/RELEASE-NOTES
@@ -711,8 +711,6 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
 * (bug 9794) User rights log entries for foreign user now links to the foreign
   user's page if possible
 * (bug 14717) Don't load nonexistent CSS fix files for non-Monobook skins
-* (bug 18765) Increased consistency of bold-italic markup for unbalanced quotes.
-  Improved representation of six quotes (may break existing markup).
 * (bug 22034) Use wfClientAcceptsGzip() in wfGzipHandler instead of
   reimplementing it.
 * (bug 19226) First line renders differently on many UI messages.
diff --git a/includes/StringUtils.php b/includes/StringUtils.php
index bab9be4bbe..c437b3c19e 100644
--- a/includes/StringUtils.php
+++ b/includes/StringUtils.php
@@ -179,14 +179,6 @@ class StringUtils {
 			return new ArrayIterator( explode( $separator, $subject ) );
 		}
 	}
-
-	/**
-	 * Workalike for preg_split() with limited memory usage.
-	 * Returns an Iterator
-	 */
-	static function preg_split( $pattern, $subject, $limit = -1, $flags = 0 ) {
-		return new PregSplitIterator( $pattern, $subject, $limit, $flags );
-	}
 }
 
 /**
@@ -417,82 +409,3 @@ class ExplodeIterator implements Iterator {
 	}
 }
 
-
-/**
- * An iterator which works exactly like:
- * 
- * foreach ( preg_split( $pattern, $s, $limit, $flags ) as $element ) {
- *    ...
- * }
- *
- * Except it doesn't use huge amounts of memory when $limit is -1
- *
- * The flag PREG_SPLIT_OFFSET_CAPTURE isn't supported.
- */
-class PregSplitIterator implements Iterator {
-	// The subject string
-	var $pattern, $subject, $originalLimit, $flags;
-
-	// The last extracted group of items.
-	var $smallArray;
-
-	// The position on the iterator.
-	var $curPos;
-
-	const MAX_LIMIT = 100;
-
-	/** 
-	 * Construct a PregSplitIterator
-	 */
-	function __construct( $pattern, $s, $limit, $flags) {
-		$this->pattern = $pattern;
-		$this->subject = $s;
-		$this->originalLimit = $limit;
-		$this->flags = $flags;
-
-		$this->rewind();
-	}
-
-	private function effectiveLimit() {
-		if ($this->originalLimit == -1) {
-			return self::MAX_LIMIT + 1;
-		} else if ($this->limit > self::MAX_LIMIT) {
-			$this->limit -= self::MAX_LIMIT;
-			return self::MAX_LIMIT + 1;
-		} else {
-			$old = $this->limit;
-			$this->limit = 0;
-			return $old;
-		}
-	}
-
-	function rewind() {
-		$this->curPos = 0;
-		$this->limit = 	$this->originalLimit;
-		if ($this->limit == -1) $this->limit = self::MAX_LIMIT;
-		$this->smallArray = preg_split( $this->pattern, $this->subject, $this->effectiveLimit(), $this->flags);
-	}
-
-	function current() {
-		return $this->smallArray[$this->curPos % self::MAX_LIMIT];
-	}
-
-	function key() {
-		return $this->curPos;
-	}
-
-	function next() {
-		$this->curPos++;
-		if ( $this->curPos % self::MAX_LIMIT == 0 ) {
-			# Last item contains the rest unsplitted.
-			if ($this->limit > 0) {
-				$this->smallArray = preg_split( $this->pattern, $this->smallArray[self::MAX_LIMIT], $this->effectiveLimit(), $this->flags);
-			}
-		}
-		return;
-	}
-
-	function valid() {
-		return $this->curPos % self::MAX_LIMIT < count($this->smallArray);
-	}
-}
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 50e15dc4f8..ee7383dc2f 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -213,7 +213,7 @@ class Parser
 		 * Must not consist of all title characters, or else it will change
 		 * the behaviour of <nowiki> in a link.
 		 */
-		# $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
+		#$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
 		# Changed to \x7f to allow XML double-parsing -- TS
 		$this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
 
@@ -338,7 +338,7 @@ class Parser
 			'/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&nbsp;\\2',
 			# french spaces, Guillemet-right
 			'/(\\302\\253) /' => '\\1&nbsp;',
-			'/&nbsp;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
+			'/&nbsp;(!\s*important)/' => ' \\1', #Beware of CSS magic word !important, bug #11874.
 		);
 		$text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 
@@ -556,7 +556,7 @@ class Parser
 		$taglist = implode( '|', $elements );
 		$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
 
-		while ( $text !== '' ) {
+		while ( $text != '' ) {
 			$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
 			$stripped .= $p[0];
 			if( count( $p ) < 5 ) {
@@ -723,11 +723,11 @@ class Parser
 				array_push( $tr_history , false );
 				array_push( $tr_attributes , '' );
 				array_push( $has_opened_tr , false );
-			} elseif ( count ( $td_history ) == 0 ) {
+			} else if ( count ( $td_history ) == 0 ) {
 				// Don't do any of the following
 				$out .= $outLine."\n";
 				continue;
-			} elseif ( substr ( $line , 0 , 2 ) === '|}' ) {
+			} else if ( substr ( $line , 0 , 2 ) === '|}' ) {
 				// We are ending a table
 				$line = '</table>' . substr ( $line , 2 );
 				$last_tag = array_pop ( $last_tag_history );
@@ -745,7 +745,7 @@ class Parser
 				}
 				array_pop ( $tr_attributes );
 				$outLine = $line . str_repeat( '</dd></dl>' , $indent_level );
-			} elseif ( substr ( $line , 0 , 2 ) === '|-' ) {
+			} else if ( substr ( $line , 0 , 2 ) === '|-' ) {
 				// Now we have a table row
 				$line = preg_replace( '#^\|-+#', '', $line );
 
@@ -773,7 +773,7 @@ class Parser
 				array_push ( $td_history , false );
 				array_push ( $last_tag_history , '' );
 			}
-			elseif ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 )  === '|+' ) {
+			else if ( $first_character === '|' || $first_character === '!' || substr ( $line , 0 , 2 )  === '|+' ) {
 				// This might be cell elements, td, th or captions
 				if ( substr ( $line , 0 , 2 ) === '|+' ) {
 					$first_character = '+';
@@ -818,9 +818,9 @@ class Parser
 
 					if ( $first_character === '|' ) {
 						$last_tag = 'td';
-					} elseif ( $first_character === '!' ) {
+					} else if ( $first_character === '!' ) {
 						$last_tag = 'th';
-					} elseif ( $first_character === '+' ) {
+					} else if ( $first_character === '+' ) {
 						$last_tag = 'caption';
 					} else {
 						$last_tag = '';
@@ -835,7 +835,7 @@ class Parser
 					// be mistaken as delimiting cell parameters
 					if ( strpos( $cell_data[0], '[[' ) !== false ) {
 						$cell = "{$previous}<{$last_tag}>{$cell}";
-					} elseif ( count ( $cell_data ) == 1 )
+					} else if ( count ( $cell_data ) == 1 )
 						$cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
 					else {
 						$attributes = $this->mStripState->unstripBoth( $cell_data[0] );
@@ -1108,59 +1108,100 @@ class Parser
 	}
 
 	/**
-	 * Processes bolds and italics on a single line.
 	 * Helper function for doAllQuotes()
 	 */
 	public function doQuotes( $text ) {
-		# Counts the number of occurrences of bold and italics mark-ups.
-		self::countBoldAndItalic($text, $numbold, $numitalics);
-		
-		if ( ( $numbold == 0 ) && ( $numitalics == 0 ) )
+		$arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
+		if ( count( $arr ) == 1 )
 			return $text;
 		else
 		{
+			# First, do some preliminary work. This may shift some apostrophes from
+			# being mark-up to being text. It also counts the number of occurrences
+			# of bold and italics mark-ups.
+			$i = 0;
+			$numbold = 0;
+			$numitalics = 0;
+			foreach ( $arr as $r )
+			{
+				if ( ( $i % 2 ) == 1 )
+				{
+					# If there are ever four apostrophes, assume the first is supposed to
+					# be text, and the remaining three constitute mark-up for bold text.
+					if ( strlen( $arr[$i] ) == 4 )
+					{
+						$arr[$i-1] .= "'";
+						$arr[$i] = "'''";
+					}
+					# If there are more than 5 apostrophes in a row, assume they're all
+					# text except for the last 5.
+					else if ( strlen( $arr[$i] ) > 5 )
+					{
+						$arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
+						$arr[$i] = "'''''";
+					}
+					# Count the number of occurrences of bold and italics mark-ups.
+					# We are not counting sequences of five apostrophes.
+					if ( strlen( $arr[$i] ) == 2 )      { $numitalics++;             }
+					else if ( strlen( $arr[$i] ) == 3 ) { $numbold++;                }
+					else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; }
+				}
+				$i++;
+			}
+
 			# If there is an odd number of both bold and italics, it is likely
 			# that one of the bold ones was meant to be an apostrophe followed
 			# by italics. Which one we cannot know for certain, but it is more
 			# likely to be one that has a single-letter word before it.
 			if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) )
 			{
+				$i = 0;
+				$firstsingleletterword = -1;
+				$firstmultiletterword = -1;
+				$firstspace = -1;
+				foreach ( $arr as $r )
+				{
+					if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) )
+					{
+						$x1 = substr ($arr[$i-1], -1);
+						$x2 = substr ($arr[$i-1], -2, 1);
+						if ($x1 === ' ') {
+							if ($firstspace == -1) $firstspace = $i;
+						} else if ($x2 === ' ') {
+							if ($firstsingleletterword == -1) $firstsingleletterword = $i;
+						} else {
+							if ($firstmultiletterword == -1) $firstmultiletterword = $i;
+						}
+					}
+					$i++;
+				}
 
-				# This algorithm moves the literal quote at the 
-				# right of a single word, at the right of a 
-				# multiletter word or at the right of a space.
-				# Otherwise, it does nothing.
-				#
-				# The original if-based version can be found at
-				# http://svn.wikimedia.org/viewvc/mediawiki/trunk/phase3/includes/parser/Parser.php?revision=61519&view=markup
-				#
-				# Unlike the original one, here we convert the 
-				# texty quotes to &#39; which shouldn't matter.
-
-				$quoteBalancerReplacements = array( 
-												"/(?<= [^ ])'''(?!')/"=>"&#39;''", 
-												"/(?<=[^ '])'''(?!')/"=>"&#39;''", 
-												"/(^|(?<=[^'])) '''(?!')/"=>" &#39;''");
-
-				foreach( $quoteBalancerReplacements as $k => $v) {
-					$text = preg_replace($k, $v, $text, 1, $count);
-					if ($count != 0)
-						break;
+				# If there is a single-letter word, use it!
+				if ($firstsingleletterword > -1)
+				{
+					$arr [ $firstsingleletterword ] = "''";
+					$arr [ $firstsingleletterword-1 ] .= "'";
+				}
+				# If not, but there's a multi-letter word, use that one.
+				else if ($firstmultiletterword > -1)
+				{
+					$arr [ $firstmultiletterword ] = "''";
+					$arr [ $firstmultiletterword-1 ] .= "'";
+				}
+				# ... otherwise use the first one that has neither.
+				# (notice that it is possible for all three to be -1 if, for example,
+				# there is only one pentuple-apostrophe in the line)
+				else if ($firstspace > -1)
+				{
+					$arr [ $firstspace ] = "''";
+					$arr [ $firstspace-1 ] .= "'";
 				}
 			}
 
-			# Split in groups of 2, 3, 5 or 6 apostrophes.
-			# If there are ever four apostrophes, assume the first is supposed to
-			# be text, and the remaining three constitute mark-up for bold text.
-			# If there are more than 6 apostrophes in a row, assume they're all
-			# text except for the last 6.		
-			$arr = Stringutils::preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
-
-
 			# Now let's actually convert our apostrophic mush to HTML!
-			$output = ''; # Processed text
-			$buffer = ''; # Content if $state is 'both'
-			$state = '';  # Flags with the order of open tags: '|b|i|bi|ib|both'
+			$output = '';
+			$buffer = '';
+			$state = '';
 			$i = 0;
 			foreach ($arr as $r)
 			{
@@ -1177,58 +1218,43 @@ class Parser
 					{
 						if ($state === 'i')
 						{ $output .= '</i>'; $state = ''; }
-						elseif ($state === 'bi')
+						else if ($state === 'bi')
 						{ $output .= '</i>'; $state = 'b'; }
-						elseif ($state === 'ib')
+						else if ($state === 'ib')
 						{ $output .= '</b></i><b>'; $state = 'b'; }
-						elseif ($state === 'both')
+						else if ($state === 'both')
 						{ $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
 						else # $state can be 'b' or ''
 						{ $output .= '<i>'; $state .= 'i'; }
 					}
-					elseif (strlen ($r) == 3)
+					else if (strlen ($r) == 3)
 					{
 						if ($state === 'b')
 						{ $output .= '</b>'; $state = ''; }
-						elseif ($state === 'bi')
+						else if ($state === 'bi')
 						{ $output .= '</i></b><i>'; $state = 'i'; }
-						elseif ($state === 'ib')
+						else if ($state === 'ib')
 						{ $output .= '</b>'; $state = 'i'; }
-						elseif ($state === 'both')
+						else if ($state === 'both')
 						{ $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
 						else # $state can be 'i' or ''
 						{ $output .= '<b>'; $state .= 'b'; }
 					}
-					elseif (strlen ($r) == 5)
+					else if (strlen ($r) == 5)
 					{
 						if ($state === 'b')
 						{ $output .= '</b><i>'; $state = 'i'; }
-						elseif ($state === 'i')
+						else if ($state === 'i')
 						{ $output .= '</i><b>'; $state = 'b'; }
-						elseif ($state === 'bi')
+						else if ($state === 'bi')
 						{ $output .= '</i></b>'; $state = ''; }
-						elseif ($state === 'ib')
+						else if ($state === 'ib')
 						{ $output .= '</b></i>'; $state = ''; }
-						elseif ($state === 'both')
+						else if ($state === 'both')
 						{ $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
 						else # ($state == '')
 						{ $buffer = ''; $state = 'both'; }
 					}
-					elseif (strlen ($r) == 6)
-					{
-						if ($state === 'b')
-						{ $output .= '</b><b>'; $state = 'b'; }
-						elseif ($state === 'i')
-						{ $output .= '\'</i><b>'; $state = 'b'; }
-						elseif ($state === 'bi')
-						{ $output .= '\'</i></b>'; $state = ''; }
-						elseif ($state === 'ib')
-						{ $output .= '\'</b></i>'; $state = ''; }
-						elseif ($state === 'both')
-						{ $output .= '<i><b>'.$buffer.'</b><b>'; $state = 'ib'; }
-						else # ($state == '')
-						{ $buffer = ''; $state = ''; }
-					}
 				}
 				$i++;
 			}
@@ -1246,57 +1272,6 @@ class Parser
 		}
 	}
 
-	/**
-	 * Counts the number of bold and italic items from a line of text.
-	 * Helper function for doQuotes()
-	 */
-	private static function countBoldAndItalic($text, &$numBold, &$numItalics) {
-		$numBold = 0;
-		$numItalics = 0;
-		$offset = 0;
-
-		do {
-			$offset = strpos($text, "'", $offset);
-			if ($offset === false)
-				return;
-
-			$quoteLen = strspn($text, "'", $offset);
-			$offset += $quoteLen;
-
-			switch ($quoteLen) {
-				case 0:
-				case 1:
-					break;
-
-				case 2:
-					$numItalics++;
-					break;
-
-				case 3:
-					$numBold++;
-					break;
-
-				case 4:
-					# If there are ever four apostrophes, assume the first is supposed to
-					# be text, and the remaining three constitute mark-up for bold text.
-					$numBold++;
-					$numItalics++;
-					break;
-
-				case 5:
-					$numItalics++;
-					$numBold++;
-					break;
-
-				case 6:
-				default:
-					# If there are more than 6 apostrophes in a row, assume they're all
-					# text except for the last 6.
-					$numBold+=2;
-			}
-		} while (true);
-	}
-
 	/**
 	 * Replace external links (REL)
 	 *
@@ -1538,9 +1513,9 @@ class Parser
 		$sk = $this->mOptions->getSkin();
 		$holders = new LinkHolderArray( $this );
 
-		# split the entire text string on occurences of [[
+		#split the entire text string on occurences of [[
 		$a = StringUtils::explode( '[[', ' ' . $s );
-		# get the first element (all text up to first [[), and remove the space we added
+		#get the first element (all text up to first [[), and remove the space we added
 		$s = $a->current();
 		$a->next();
 		$line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
@@ -1685,10 +1660,10 @@ class Parser
 
 			if ( $might_be_img ) { # if this is actually an invalid link
 				wfProfileIn( __METHOD__."-might_be_img" );
-				if ( $ns == NS_FILE && $noforce ) { # but might be an image
+				if ( $ns == NS_FILE && $noforce ) { #but might be an image
 					$found = false;
 					while ( true ) {
-						# look at the next 'line' to see if we can close it there
+						#look at the next 'line' to see if we can close it there
 						$a->next();
 						$next_line = $a->current();
 						if ( $next_line === false || $next_line === null ) {
@@ -1702,24 +1677,24 @@ class Parser
 							$trail = $m[2];
 							break;
 						} elseif ( count( $m ) == 2 ) {
-							# if there's exactly one ]] that's fine, we'll keep looking
+							#if there's exactly one ]] that's fine, we'll keep looking
 							$text .= "[[{$m[0]}]]{$m[1]}";
 						} else {
-							# if $next_line is invalid too, we need look no further
+							#if $next_line is invalid too, we need look no further
 							$text .= '[[' . $next_line;
 							break;
 						}
 					}
 					if ( !$found ) {
 						# we couldn't find the end of this imageLink, so output it raw
-						# but don't ignore what might be perfectly normal links in the text we've examined
+						#but don't ignore what might be perfectly normal links in the text we've examined
 						$holders->merge( $this->replaceInternalLinks2( $text ) );
 						$s .= "{$prefix}[[$link|$text";
 						# note: no $trail, because without an end, there *is* no trail
 						wfProfileOut( __METHOD__."-might_be_img" );
 						continue;
 					}
-				} else { # it's not an image, so output it raw
+				} else { #it's not an image, so output it raw
 					$s .= "{$prefix}[[$link|$text";
 					# note: no $trail, because without an end, there *is* no trail
 					wfProfileOut( __METHOD__."-might_be_img" );
@@ -1796,7 +1771,7 @@ class Parser
 			}
 
 			# Self-link checking
-			if( $nt->getFragment() === '' && $ns !== NS_SPECIAL ) {
+			if( $nt->getFragment() === '' && $ns != NS_SPECIAL ) {
 				if( in_array( $nt->getPrefixedText(), $selflink, true ) ) {
 					$s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
 					continue;
@@ -1916,7 +1891,7 @@ class Parser
 	 */
 	/* private */ function closeParagraph() {
 		$result = '';
-		if ( $this->mLastSection !== '' ) {
+		if ( $this->mLastSection != '' ) {
 			$result = '</' . $this->mLastSection  . ">\n";
 		}
 		$this->mInPre = false;
@@ -1932,7 +1907,7 @@ class Parser
 		if ( $fl < $shorter ) { $shorter = $fl; }
 
 		for ( $i = 0; $i < $shorter; ++$i ) {
-			if ( $st1{$i} !== $st2{$i} ) { break; }
+			if ( $st1{$i} != $st2{$i} ) { break; }
 		}
 		return $i;
 	}
@@ -2105,7 +2080,7 @@ class Parser
 					'<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
 				if ( $openmatch or $closematch ) {
 					$paragraphStack = false;
-					# TODO bug 5718: paragraph closed
+					#Â TODO bug 5718: paragraph closed
 					$output .= $this->closeParagraph();
 					if ( $preOpenMatch and !$preCloseMatch ) {
 						$this->mInPre = true;
@@ -2115,8 +2090,8 @@ class Parser
 					} else {
 						$inBlockElem = true;
 					}
-				} elseif ( !$inBlockElem && !$this->mInPre ) {
-					if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) !== '' ) ) {
+				} else if ( !$inBlockElem && !$this->mInPre ) {
+					if ( ' ' == substr( $t, 0, 1 ) and ( $this->mLastSection === 'pre' or trim($t) != '' ) ) {
 						// pre
 						if ($this->mLastSection !== 'pre') {
 							$paragraphStack = false;
@@ -2145,7 +2120,7 @@ class Parser
 								$output .= $paragraphStack;
 								$paragraphStack = false;
 								$this->mLastSection = 'p';
-							} elseif ($this->mLastSection !== 'p') {
+							} else if ($this->mLastSection !== 'p') {
 								$output .= $this->closeParagraph().'<p>';
 								$this->mLastSection = 'p';
 							}
@@ -2166,7 +2141,7 @@ class Parser
 			$output .= $this->closeList( $prefix2[$prefixLength-1] );
 			--$prefixLength;
 		}
-		if ( $this->mLastSection !== '' ) {
+		if ( $this->mLastSection != '' ) {
 			$output .= '</' . $this->mLastSection . '>';
 			$this->mLastSection = '';
 		}
@@ -2972,7 +2947,7 @@ class Parser
 						$isHTML = true;
 						$this->disableCache();
 					}
-				} elseif ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) {
+				} else if ( $wgNonincludableNamespaces && in_array( $title->getNamespace(), $wgNonincludableNamespaces ) ) {
 					$found = false; //access denied
 					wfDebug( __METHOD__.": template inclusion denied for " . $title->getPrefixedDBkey() );
 				} else {
@@ -3585,7 +3560,7 @@ class Parser
 			if (preg_match("/^$markerRegex/", $headline, $markerMatches)) {
 				$serial = $markerMatches[1];
 				list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
-				$isTemplate = ($titleText !== $baseTitleText);
+				$isTemplate = ($titleText != $baseTitleText);
 				$headline = preg_replace("/^$markerRegex/", "", $headline);
 			}
 
@@ -3701,7 +3676,7 @@ class Parser
 				if ( $legacyHeadline == $safeHeadline ) {
 					# No reason to have both (in fact, we can't)
 					$legacyHeadline = false;
-				} elseif ( $legacyHeadline !== Sanitizer::escapeId(
+				} elseif ( $legacyHeadline != Sanitizer::escapeId(
 				$legacyHeadline, 'xml' ) ) {
 					# The legacy id is invalid XML.  We used to allow this, but
 					# there's no reason to do so anymore.  Backward
@@ -3875,8 +3850,8 @@ class Parser
 				else
 					continue;
 			}
-			if ( $s['index'] !== $section ||
-					$s['fromtitle'] !== $titletext ) {
+			if ( $s['index'] != $section ||
+					$s['fromtitle'] != $titletext ) {
 				self::incrementNumbering( $numbering,
 					$s['toclevel'], $lastLevel );
 
@@ -3927,7 +3902,7 @@ class Parser
 	private static function incrementNumbering( &$number, $level, $lastLevel ) {
 		if ( $level > $lastLevel )
 			$number[$level - 1] = 1;
-		elseif ( $level < $lastLevel ) {
+		else if ( $level < $lastLevel ) {
 			foreach ( $number as $key => $unused )
 				if ( $key >= $level )
 					unset( $number[$key] );
@@ -4037,7 +4012,7 @@ class Parser
 		$m = array();
 		if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
 			$text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
-		} elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" !== '' ) {
+		} elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
 			$text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
 		} else {
 			# if there's no context, don't bother duplicating the title
@@ -4876,7 +4851,7 @@ class Parser
 			if ( $node->getName() === 'h' ) {
 				$bits = $node->splitHeading();
 				$curLevel = $bits['level'];
-				if ( $bits['i'] !== $sectionIndex && $curLevel <= $targetLevel ) {
+				if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
 					break;
 				}
 			}
@@ -4892,7 +4867,7 @@ class Parser
 			// Add two newlines on -- trailing whitespace in $newText is conventionally
 			// stripped by the editor, so we need both newlines to restore the paragraph gap
 			// Only add trailing whitespace if there is newText
-			if($newText !== "") {
+			if($newText != "") {
 				$outText .= $newText . "\n\n";
 			}
 
diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt
index fedbb85c1c..6695973adf 100644
--- a/maintenance/parserTests.txt
+++ b/maintenance/parserTests.txt
@@ -116,7 +116,7 @@ Italics and bold
 </li><li> plain<b><i>bold-italic</i>bold</b>plain
 </li><li> plain<i>italic<b>bold-italic</b></i>plain
 </li><li> plain<b>bold<i>bold-italic</i></b>plain
-</li><li> plain l&#39;<i>italic</i>plain
+</li><li> plain l'<i>italic</i>plain
 </li><li> plain l'<b>bold</b> plain
 </li></ul>
 
@@ -5253,17 +5253,19 @@ New wiki paragraph
 </p>
 !! end
 
-# This was the original html, but it has also been
-# <p>'<i>bold'</i><b>bold<i>bolditalics</i></b>
+# Original result was this:
+# <p><b>bold</b><b>bold<i>bolditalics</i></b>
 # </p>
-# See bug 18765.
+# While that might be marginally more intuitive, maybe, the six-apostrophe
+# construct is clearly pathological and the result stated here (which is what
+# the parser actually does) is about as reasonable as anything.
 !!test
 Mixing markup for italics and bold
 !! options
 !! input
 '''bold''''''bold''bolditalics'''''
 !! result
-<p><b>bold</b><b>bold<i>bolditalics</i></b>
+<p>'<i>bold'</i><b>bold<i>bolditalics</i></b>
 </p>
 !! end
 
@@ -6415,7 +6417,7 @@ Another italics / bold test
 !! input
  ''' ''x'
 !! result
-<pre>&#39;<i> </i>x'
+<pre>'<i> </i>x'
 </pre>
 !!end
 
@@ -7558,82 +7560,6 @@ title=[[Main Page]]
 <a href="/wiki/Main_Page#section" title="Main Page">#section</a>
 !! end
 
-!! test
-Bold/italic markup handled differently depending on leading whitespace (bug 18765)
-!!input
-'''Look at ''this edit'''s complicated bold/italic markup!'''
-
-<!-- Comment -->'''Look at ''this edit'''s complicated bold/italic markup!'''
-
-<span> '''Look at ''this edit'''s complicated bold/italic markup!'''</span>
-
-<nowiki></nowiki> '''Look at ''this edit'''s complicated bold/italic markup!'''
-
-<!-- Hello world---> '''Look at ''this edit'''s complicated bold/italic markup!'''
-
-{|
-| '''Look at ''this edit'''s complicated bold/italic markup!'''
-|}
-
-'''This was Italic'' this was plain''' and this was bold'''
-but '''This is bold'' this is bold italic''' and this is bold'''
-
-<!-- Wishlist: Breaking because <span> and | are treated as text 
-<span>'''Look at ''this edit'''s complicated bold/italic markup!'''</span> 
-{|
-|'''Look at ''this edit'''s complicated bold/italic markup!'''
-|}
--->
-!! result
-<p><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</p><p><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</p><p><span> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b></span>
-</p><p> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</p>
-<pre><b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</pre>
-<table>
-<tr>
-<td> <b>Look at <i>this edit&#39;</i>s complicated bold/italic markup!</b>
-</td></tr></table>
-<p><b>This was Italic<i> this was plain&#39;</i> and this was bold</b>
-but <b>This is bold<i> this is bold italic&#39;</i> and this is bold</b>
-</p><p><br />
-</p>
-!! end
-
-!! test
-Six quotes
-!!input
-''Italic''''''Bold
-
-'''Bold''BoldItalic''''''Normal
-
-''Italic'''BoldItalic''''''Normal'''''
-
-'''''BoldItalic''''''MoreBoldItalic''
-
-''''''Normal
-!!result
-<p><i>Italic'</i><b>Bold</b>
-</p><p><b>Bold<i>BoldItalic'</i></b>Normal
-</p><p><i>Italic<b>BoldItalic'</b></i>Normal
-</p><p><i><b>BoldItalic</b><b>MoreBoldItalic</b></i>
-</p><p>Normal
-</p>
-!!end
-
-
-!! test
-Too many quotes
-!!input
-I '''like'''''quotes''''''''''' 
-!! result
-<p>I <b>like</b><i>quotes''''''</i><b> </b>
-</p>
-!! end
-
-
 Note: some elements used in these Microdata examples don't work, like <img>
 and <time>.
 !! test
diff --git a/tests/preg_split_test.php b/tests/preg_split_test.php
deleted file mode 100644
index 69c977f774..0000000000
--- a/tests/preg_split_test.php
+++ /dev/null
@@ -1,24 +0,0 @@
-<?php
-include "../includes/StringUtils.php";
-
-$pattern = "/('')+/";
-$subject = str_repeat("'' ", 1024*1024 + 7);
-
-$m = memory_get_usage();
-
-$ps1 = preg_split($pattern, $subject);
-
-$r = "";
-foreach ($ps1 as $c) {
-	$r .= $c . "|";
-}
-echo "Original preg_split: " . md5($r) . "  " . (memory_get_usage()-$m) . "\n";
-
-unset($ps1);
-
-$r = "";
-$ps2 = StringUtils::preg_split($pattern, $subject);
-foreach ($ps2 as $c) {
-	$r .= $c . "|";
-}
-echo "StringUtils preg_split: " . md5($r) . "  " . (memory_get_usage()-$m) . "\n";
-- 
2.20.1