From 40e3893b518560d858a48f74a7096223ce4c7b5f Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Thu, 29 Apr 2004 06:16:21 +0000 Subject: [PATCH] Partial fix for #595869 ": in URL after ; parsed as list item" Also put sensible names on many variables in doBlockLevels() and added some comments. This partial fix requires there to be a space prior to the post-; : to get parsed as the definition: ; def title : def text thus it doesn't trigger on a URL or namesapce properly written: ; def title [http://link] : def text However the regexp will fail and break things if there's an actual space before a : in the link, which is possible in piped text etc. It may be desirable to deal with this at the tokenizer level, to keep it clear of links, tags, etc. --- includes/Parser.php | 147 ++++++++++++++++++++++++-------------------- 1 file changed, 80 insertions(+), 67 deletions(-) diff --git a/includes/Parser.php b/includes/Parser.php index 942621c1eb..2750a1c89e 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -1050,81 +1050,94 @@ class Parser return $text."\n"; } - /* private */ function doBlockLevels( $text, $linestart ) - { + /* private */ function doBlockLevels( $text, $linestart ) { $fname = "Parser::doBlockLevels"; wfProfileIn( $fname ); + # Parsing through the text line by line. The main thing # happening here is handling of block-level elements p, pre, # and making lists from lines starting with * # : etc. # - $a = explode( "\n", $text ); + $textLines = explode( "\n", $text ); - $lastPref = $text = $lastLine = ''; + $lastPrefix = $output = $lastLine = ''; $this->mDTopen = $inBlockElem = false; - $npl = 0; - $pstack = false; - - if ( ! $linestart ) { $text .= array_shift( $a ); } - foreach ( $a as $t ) { - $oLine = $t; - $opl = strlen( $lastPref ); - $preCloseMatch = preg_match("/<\\/pre/i", $t ); - $preOpenMatch = preg_match("/
mInPre) {
 				$this->mInPre = !empty($preOpenMatch);
 			}
 			if ( !$this->mInPre ) {
-				$npl = strspn( $t, "*#:;" );
-				$pref = substr( $t, 0, $npl );
+				# Multiple prefixes may abut each other for nested lists.
+				$prefixLength = strspn( $oLine, "*#:;" );
+				$pref = substr( $oLine, 0, $prefixLength );
+				
+				# eh?
 				$pref2 = str_replace( ";", ":", $pref );
-				$t = substr( $t, $npl );
+				$t = substr( $oLine, $prefixLength );
 			} else {
-				$npl = 0;
+				# Don't interpret any other prefixes in preformatted text
+				$prefixLength = 0;
 				$pref = $pref2 = '';
+				$t = $oLine;
 			}
 
-			// list generation
-			if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
-				$text .= $this->nextItem( substr( $pref, -1 ) );
-				if ( $pstack ) { $pstack = false; }
+			# List generation
+			if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
+				# Same as the last item, so no need to deal with nesting or opening stuff
+				$output .= $this->nextItem( substr( $pref, -1 ) );
+				$paragraphStack = false;
 
 				if ( ";" == substr( $pref, -1 ) ) {
-					$cpos = strpos( $t, ":" );
-					if ( false !== $cpos ) {
-						$term = substr( $t, 0, $cpos );
-						$text .= $term . $this->nextItem( ":" );
-						$t = substr( $t, $cpos + 1 );
+					# The one nasty exception: definition lists work like this:
+					# ; title : definition text
+					# So we check for : in the remainder text to split up the
+					# title and definition, without b0rking links.
+					# FIXME: This is not foolproof. Something better in Tokenizer might help.
+					if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
+						$term = $match[1];
+						$output .= $term . $this->nextItem( ":" );
+						$t = $match[2];
 					}
 				}
-			} else if (0 != $npl || 0 != $opl) {
-				$cpl = $this->getCommon( $pref, $lastPref );
-				if ( $pstack ) { $pstack = false; }
-
-				while ( $cpl < $opl ) {
-					$text .= $this->closeList( $lastPref{$opl-1} );
-					--$opl;
+			} elseif( $prefixLength || $lastPrefixLength ) {
+				# Either open or close a level...
+				$commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
+				$paragraphStack = false;
+
+				while( $commonPrefixLength < $lastPrefixLength ) {
+					$output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
+					--$lastPrefixLength;
 				}
-				if ( $npl <= $cpl && $cpl > 0 ) {
-					$text .= $this->nextItem( $pref{$cpl-1} );
+				if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
+					$output .= $this->nextItem( $pref{$commonPrefixLength-1} );
 				}
-				while ( $npl > $cpl ) {
-					$char = substr( $pref, $cpl, 1 );
-					$text .= $this->openList( $char );
+				while ( $prefixLength > $commonPrefixLength ) {
+					$char = substr( $pref, $commonPrefixLength, 1 );
+					$output .= $this->openList( $char );
 
 					if ( ";" == $char ) {
-						$cpos = strpos( $t, ":" );
-						if ( ! ( false === $cpos ) ) {
-							$term = substr( $t, 0, $cpos );
-							$text .= $term . $this->nextItem( ":" );
-							$t = substr( $t, $cpos + 1 );
+						# FIXME: This is dupe of code above
+						if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
+							$term = $match[1];
+							$output .= $term . $this->nextItem( ":" );
+							$t = $match[2];
 						}
 					}
-					++$cpl;
+					++$commonPrefixLength;
 				}
-				$lastPref = $pref2;
+				$lastPrefix = $pref2;
 			}
-			if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
+			if( 0 == $prefixLength ) {
+				# No prefix (not in list)--go to paragraph mode
 				$uniq_prefix = UNIQ_PREFIX;
 				// XXX: use a stack for nestable elements like span, table and div
 				$openmatch = preg_match("/(closeParagraph();
+					$paragraphStack = false;
+					$output .= $this->closeParagraph();
 					if($preOpenMatch and !$preCloseMatch) {
 						$this->mInPre = true;	
 					}
@@ -1146,54 +1159,54 @@ class Parser
 					if ( " " == $t{0} ) {
 						// pre
 						if ($this->mLastSection != 'pre') {
-							$pstack = false;
-							$text .= $this->closeParagraph().'
';
+							$paragraphStack = false;
+							$output .= $this->closeParagraph().'
';
 							$this->mLastSection = 'pre';
 						}
 					} else {
 						// paragraph
 						if ( '' == trim($t) ) {
-							if ( $pstack ) {
-								$text .= $pstack.'
'; - $pstack = false; + if ( $paragraphStack ) { + $output .= $paragraphStack.'
'; + $paragraphStack = false; $this->mLastSection = 'p'; } else { if ($this->mLastSection != 'p' ) { - $text .= $this->closeParagraph(); + $output .= $this->closeParagraph(); $this->mLastSection = ''; - $pstack = "

"; + $paragraphStack = "

"; } else { - $pstack = '

'; + $paragraphStack = '

'; } } } else { - if ( $pstack ) { - $text .= $pstack; - $pstack = false; + if ( $paragraphStack ) { + $output .= $paragraphStack; + $paragraphStack = false; $this->mLastSection = 'p'; } else if ($this->mLastSection != 'p') { - $text .= $this->closeParagraph().'

'; + $output .= $this->closeParagraph().'

'; $this->mLastSection = 'p'; } } } } } - if ($pstack === false) { - $text .= $t."\n"; + if ($paragraphStack === false) { + $output .= $t."\n"; } } - while ( $npl ) { - $text .= $this->closeList( $pref2{$npl-1} ); - --$npl; + while ( $prefixLength ) { + $output .= $this->closeList( $pref2{$prefixLength-1} ); + --$prefixLength; } if ( "" != $this->mLastSection ) { - $text .= "mLastSection . ">"; + $output .= "mLastSection . ">"; $this->mLastSection = ""; } wfProfileOut( $fname ); - return $text; + return $output; } function getVariableValue( $index ) { -- 2.20.1