From: Platonides Date: Tue, 26 Jan 2010 18:56:50 +0000 (+0000) Subject: Step 3: Balance the quotes directly on $text X-Git-Tag: 1.31.0-rc.0~38099 X-Git-Url: http://git.cyclocoop.org/%22%2C%20generer_url_ecrire%28?a=commitdiff_plain;h=68f525870196403b870290776f15555582b61197;p=lhc%2Fweb%2Fwiklou.git Step 3: Balance the quotes directly on $text Side effect: Some ' are converted to ' on output. --- diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 647b6b949f..e419a30df4 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -1119,70 +1119,44 @@ class Parser return $text; else { - # Split in groups of 2, 3, 5 or 6 apostrophes. - # If there are ever four apostrophes, assume the first is supposed to - # be text, and the remaining three constitute mark-up for bold text. - # If there are more than 6 apostrophes in a row, assume they're all - # text except for the last 6. - $arr = preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); - - # If there is an odd number of both bold and italics, it is likely # that one of the bold ones was meant to be an apostrophe followed # by italics. Which one we cannot know for certain, but it is more # likely to be one that has a single-letter word before it. if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { - $i = 0; - - # These are indexes to the /next/ array entry than the - # one holding the text matching the condition which gives name - # to the variable. - $firstsingleletterword = -1; - $firstmultiletterword = -1; - $firstspace = -1; - - foreach ( $arr as $r ) - { - # Filter the "'''". Separators are on odd positions. - # $arr[0] will be an empty string if needed. - if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) - { - $x1 = substr ($arr[$i-1], -1); - $x2 = substr ($arr[$i-1], -2, 1); - if ($x1 === ' ') { - if ($firstspace == -1) $firstspace = $i; - } elseif ($x2 === ' ') { - if ($firstsingleletterword == -1) $firstsingleletterword = $i; - } elseif ($arr[$i-1] !== "") { - if ($firstmultiletterword == -1) $firstmultiletterword = $i; - } - } - $i++; - } - # If there is a single-letter word, use it! - if ($firstsingleletterword > -1) - { - $arr [ $firstsingleletterword ] = "''"; - $arr [ $firstsingleletterword-1 ] .= "'"; - } - # If not, but there's a multi-letter word, use that one. - elseif ($firstmultiletterword > -1) - { - $arr [ $firstmultiletterword ] = "''"; - $arr [ $firstmultiletterword-1 ] .= "'"; - } - # ... otherwise use the first one that has neither. - # (notice that it is possible for all three to be -1 if, for example, - # there is only one pentuple-apostrophe in the line) - elseif ($firstspace > -1) - { - $arr [ $firstspace ] = "''"; - $arr [ $firstspace-1 ] .= "'"; + # This algorithm moves the literal quote at the + # right of a single word, at the right of a + # multiletter word or at the right of a space. + # Otherwise, it does nothing. + # + # The original if-based version can be found at + # http://svn.wikimedia.org/viewvc/mediawiki/trunk/phase3/includes/parser/Parser.php?revision=61519&view=markup + # + # Unlike the original one, here we convert the + # texty quotes to ' which shouldn't matter. + + $quoteBalancerReplacements = array( + "/(?<= [^ ])'''(?!')/"=>"'''", + "/(?<=[^ '])'''(?!')/"=>"'''", + "/(^|(?<=[^'])) '''(?!')/"=>" '''"); + + foreach( $quoteBalancerReplacements as $k => $v) { + $text = preg_replace($k, $v, $text, 1, $count); + if ($count != 0) + break; } } + # Split in groups of 2, 3, 5 or 6 apostrophes. + # If there are ever four apostrophes, assume the first is supposed to + # be text, and the remaining three constitute mark-up for bold text. + # If there are more than 6 apostrophes in a row, assume they're all + # text except for the last 6. + $arr = preg_split( "/('{2,3}(?:''')?)(?!')/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); + + # Now let's actually convert our apostrophic mush to HTML! $output = ''; # Processed text $buffer = ''; # Content if $state is 'both' diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index fabff60109..fedbb85c1c 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -116,7 +116,7 @@ Italics and bold
  • plainbold-italicboldplain
  • plainitalicbold-italicplain
  • plainboldbold-italicplain -
  • plain l'italicplain +
  • plain l'italicplain
  • plain l'bold plain
  • @@ -6415,7 +6415,7 @@ Another italics / bold test !! input ''' ''x' !! result -
    ' x'
    +
    ' x'
     
    !!end @@ -7585,19 +7585,19 @@ but '''This is bold'' this is bold italic''' and this is bold''' |} --> !! result -

    Look at this edit's complicated bold/italic markup! -

    Look at this edit's complicated bold/italic markup! -

    Look at this edit's complicated bold/italic markup! -

    Look at this edit's complicated bold/italic markup! +

    Look at this edit's complicated bold/italic markup! +

    Look at this edit's complicated bold/italic markup! +

    Look at this edit's complicated bold/italic markup! +

    Look at this edit's complicated bold/italic markup!

    -
    Look at this edit's complicated bold/italic markup!
    +
    Look at this edit's complicated bold/italic markup!
     
    -
    Look at this edit's complicated bold/italic markup! + Look at this edit's complicated bold/italic markup!
    -

    This was Italic this was plain' and this was bold -but This is bold this is bold italic' and this is bold +

    This was Italic this was plain' and this was bold +but This is bold this is bold italic' and this is bold


    !! end