From 9b019e519352cee66b5078eb8534a0f7a08e29a1 Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Fri, 16 Aug 2013 17:08:00 -0400 Subject: [PATCH] Clean up Parser::doQuotes(). Yes, I'm a perfectionistic masochist. But there are 1-2 extra levels of indentation here that could go away. Touch up some comments while we're at it. Change-Id: Iab2c2c8a565186eb7e7cf02ba6094f762a1b0a99 --- includes/parser/Parser.php | 305 ++++++++++++++++++------------------- 1 file changed, 151 insertions(+), 154 deletions(-) diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index ba5c8decc8..379456c2e4 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -1416,172 +1416,169 @@ class Parser { $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); if ( count( $arr ) == 1 ) { return $text; - } else { - # First, do some preliminary work. This may shift some apostrophes from - # being mark-up to being text. It also counts the number of occurrences - # of bold and italics mark-ups. - $numbold = 0; - $numitalics = 0; - for ( $i = 0; $i < count( $arr ); $i++ ) { - if ( ( $i % 2 ) == 1 ) { - # If there are ever four apostrophes, assume the first is supposed to - # be text, and the remaining three constitute mark-up for bold text. - if ( strlen( $arr[$i] ) == 4 ) { - $arr[$i - 1] .= "'"; - $arr[$i] = "'''"; - } elseif ( strlen( $arr[$i] ) > 5 ) { - # If there are more than 5 apostrophes in a row, assume they're all - # text except for the last 5. - $arr[$i - 1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); - $arr[$i] = "'''''"; - } - # Count the number of occurrences of bold and italics mark-ups. - # We are not counting sequences of five apostrophes. - if ( strlen( $arr[$i] ) == 2 ) { - $numitalics++; - } elseif ( strlen( $arr[$i] ) == 3 ) { - $numbold++; - } elseif ( strlen( $arr[$i] ) == 5 ) { - $numitalics++; - $numbold++; - } - } - } + } - # If there is an odd number of both bold and italics, it is likely - # that one of the bold ones was meant to be an apostrophe followed - # by italics. Which one we cannot know for certain, but it is more - # likely to be one that has a single-letter word before it. - if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { - $i = 0; - $firstsingleletterword = -1; - $firstmultiletterword = -1; - $firstspace = -1; - foreach ( $arr as $r ) { - if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) { - $x1 = substr( $arr[$i - 1], -1 ); - $x2 = substr( $arr[$i - 1], -2, 1 ); - if ( $x1 === ' ' ) { - if ( $firstspace == -1 ) { - $firstspace = $i; - } - } elseif ( $x2 === ' ' ) { - if ( $firstsingleletterword == -1 ) { - $firstsingleletterword = $i; - } - } else { - if ( $firstmultiletterword == -1 ) { - $firstmultiletterword = $i; - } + // First, do some preliminary work. This may shift some apostrophes from + // being mark-up to being text. It also counts the number of occurrences + // of bold and italics mark-ups. + $numbold = 0; + $numitalics = 0; + for ( $i = 1; $i < count( $arr ); $i+=2 ) { + // If there are ever four apostrophes, assume the first is supposed to + // be text, and the remaining three constitute mark-up for bold text. + // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''') + if ( strlen( $arr[$i] ) == 4 ) { + $arr[$i - 1] .= "'"; + $arr[$i] = "'''"; + } elseif ( strlen( $arr[$i] ) > 5 ) { + // If there are more than 5 apostrophes in a row, assume they're all + // text except for the last 5. + // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''') + $arr[$i - 1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); + $arr[$i] = "'''''"; + } + // Count the number of occurrences of bold and italics mark-ups. + if ( strlen( $arr[$i] ) == 2 ) { + $numitalics++; + } elseif ( strlen( $arr[$i] ) == 3 ) { + $numbold++; + } elseif ( strlen( $arr[$i] ) == 5 ) { + $numitalics++; + $numbold++; + } + } + + // If there is an odd number of both bold and italics, it is likely + // that one of the bold ones was meant to be an apostrophe followed + // by italics. Which one we cannot know for certain, but it is more + // likely to be one that has a single-letter word before it. + if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { + $firstsingleletterword = -1; + $firstmultiletterword = -1; + $firstspace = -1; + for ( $i = 1; $i < count( $arr ); $i+=2 ) { + if ( strlen( $arr[$i] ) == 3 ) { + $x1 = substr( $arr[$i - 1], -1 ); + $x2 = substr( $arr[$i - 1], -2, 1 ); + if ( $x1 === ' ' ) { + if ( $firstspace == -1 ) { + $firstspace = $i; + } + } elseif ( $x2 === ' ' ) { + if ( $firstsingleletterword == -1 ) { + $firstsingleletterword = $i; + } + } else { + if ( $firstmultiletterword == -1 ) { + $firstmultiletterword = $i; } } - $i++; } + } - # If there is a single-letter word, use it! - if ( $firstsingleletterword > -1 ) { - $arr[$firstsingleletterword] = "''"; - $arr[$firstsingleletterword - 1] .= "'"; - } elseif ( $firstmultiletterword > -1 ) { - # If not, but there's a multi-letter word, use that one. - $arr[$firstmultiletterword] = "''"; - $arr[$firstmultiletterword - 1] .= "'"; - } elseif ( $firstspace > -1 ) { - # ... otherwise use the first one that has neither. - # (notice that it is possible for all three to be -1 if, for example, - # there is only one pentuple-apostrophe in the line) - $arr[$firstspace] = "''"; - $arr[$firstspace - 1] .= "'"; - } + // If there is a single-letter word, use it! + if ( $firstsingleletterword > -1 ) { + $arr[$firstsingleletterword] = "''"; + $arr[$firstsingleletterword - 1] .= "'"; + } elseif ( $firstmultiletterword > -1 ) { + // If not, but there's a multi-letter word, use that one. + $arr[$firstmultiletterword] = "''"; + $arr[$firstmultiletterword - 1] .= "'"; + } elseif ( $firstspace > -1 ) { + // ... otherwise use the first one that has neither. + // (notice that it is possible for all three to be -1 if, for example, + // there is only one pentuple-apostrophe in the line) + $arr[$firstspace] = "''"; + $arr[$firstspace - 1] .= "'"; } + } - # Now let's actually convert our apostrophic mush to HTML! - $output = ''; - $buffer = ''; - $state = ''; - $i = 0; - foreach ( $arr as $r ) { - if ( ( $i % 2 ) == 0 ) { - if ( $state === 'both' ) { - $buffer .= $r; - } else { - $output .= $r; - } + // Now let's actually convert our apostrophic mush to HTML! + $output = ''; + $buffer = ''; + $state = ''; + $i = 0; + foreach ( $arr as $r ) { + if ( ( $i % 2 ) == 0 ) { + if ( $state === 'both' ) { + $buffer .= $r; } else { - if ( strlen( $r ) == 2 ) { - if ( $state === 'i' ) { - $output .= ''; - $state = ''; - } elseif ( $state === 'bi' ) { - $output .= ''; - $state = 'b'; - } elseif ( $state === 'ib' ) { - $output .= ''; - $state = 'b'; - } elseif ( $state === 'both' ) { - $output .= '' . $buffer . ''; - $state = 'b'; - } else { # $state can be 'b' or '' - $output .= ''; - $state .= 'i'; - } - } elseif ( strlen( $r ) == 3 ) { - if ( $state === 'b' ) { - $output .= ''; - $state = ''; - } elseif ( $state === 'bi' ) { - $output .= ''; - $state = 'i'; - } elseif ( $state === 'ib' ) { - $output .= ''; - $state = 'i'; - } elseif ( $state === 'both' ) { - $output .= '' . $buffer . ''; - $state = 'i'; - } else { # $state can be 'i' or '' - $output .= ''; - $state .= 'b'; - } - } elseif ( strlen( $r ) == 5 ) { - if ( $state === 'b' ) { - $output .= ''; - $state = 'i'; - } elseif ( $state === 'i' ) { - $output .= ''; - $state = 'b'; - } elseif ( $state === 'bi' ) { - $output .= ''; - $state = ''; - } elseif ( $state === 'ib' ) { - $output .= ''; - $state = ''; - } elseif ( $state === 'both' ) { - $output .= '' . $buffer . ''; - $state = ''; - } else { # ($state == '') - $buffer = ''; - $state = 'both'; - } + $output .= $r; + } + } else { + if ( strlen( $r ) == 2 ) { + if ( $state === 'i' ) { + $output .= ''; + $state = ''; + } elseif ( $state === 'bi' ) { + $output .= ''; + $state = 'b'; + } elseif ( $state === 'ib' ) { + $output .= ''; + $state = 'b'; + } elseif ( $state === 'both' ) { + $output .= '' . $buffer . ''; + $state = 'b'; + } else { // $state can be 'b' or '' + $output .= ''; + $state .= 'i'; + } + } elseif ( strlen( $r ) == 3 ) { + if ( $state === 'b' ) { + $output .= ''; + $state = ''; + } elseif ( $state === 'bi' ) { + $output .= ''; + $state = 'i'; + } elseif ( $state === 'ib' ) { + $output .= ''; + $state = 'i'; + } elseif ( $state === 'both' ) { + $output .= '' . $buffer . ''; + $state = 'i'; + } else { // $state can be 'i' or '' + $output .= ''; + $state .= 'b'; + } + } elseif ( strlen( $r ) == 5 ) { + if ( $state === 'b' ) { + $output .= ''; + $state = 'i'; + } elseif ( $state === 'i' ) { + $output .= ''; + $state = 'b'; + } elseif ( $state === 'bi' ) { + $output .= ''; + $state = ''; + } elseif ( $state === 'ib' ) { + $output .= ''; + $state = ''; + } elseif ( $state === 'both' ) { + $output .= '' . $buffer . ''; + $state = ''; + } else { // ($state == '') + $buffer = ''; + $state = 'both'; } } - $i++; - } - # Now close all remaining tags. Notice that the order is important. - if ( $state === 'b' || $state === 'ib' ) { - $output .= ''; - } - if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { - $output .= ''; } - if ( $state === 'bi' ) { - $output .= ''; - } - # There might be lonely ''''', so make sure we have a buffer - if ( $state === 'both' && $buffer ) { - $output .= '' . $buffer . ''; - } - return $output; + $i++; + } + // Now close all remaining tags. Notice that the order is important. + if ( $state === 'b' || $state === 'ib' ) { + $output .= ''; } + if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { + $output .= ''; + } + if ( $state === 'bi' ) { + $output .= ''; + } + // There might be lonely ''''', so make sure we have a buffer + if ( $state === 'both' && $buffer ) { + $output .= '' . $buffer . ''; + } + return $output; } /** -- 2.20.1