From 4cd1269d6e525c6964dfbcaadbeca5747da9521c Mon Sep 17 00:00:00 2001 From: Platonides Date: Thu, 14 Jan 2010 16:18:29 +0000 Subject: [PATCH] * (bug 18765) Increased consistency of bold-italic markup for unbalanced quotes. The representation of six quotes is now improved (changes the meaning of some dubious markup). --- RELEASE-NOTES | 2 + includes/parser/Parser.php | 41 +++++++-- maintenance/parserTests.txt | 168 ++++++++++++++++++++++++++---------- 3 files changed, 155 insertions(+), 56 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 6378abe265..2af828f242 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -704,6 +704,8 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * (bug 9794) User rights log entries for foreign user now links to the foreign user's page if possible * (bug 14717) Don't load nonexistent CSS fix files for non-Monobook skins +* (bug 18765) Increased consistency of bold-italic markup for unbalanced quotes. + Improved representation of six quotes (may break existing markup). == API changes in 1.16 == diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index dea1cda66f..47a6b828e8 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -1114,6 +1114,7 @@ class Parser } /** + * Processes bolds and italics on a single line. * Helper function for doAllQuotes() */ public function doQuotes( $text ) { @@ -1139,18 +1140,19 @@ class Parser $arr[$i-1] .= "'"; $arr[$i] = "'''"; } - # If there are more than 5 apostrophes in a row, assume they're all - # text except for the last 5. - else if ( strlen( $arr[$i] ) > 5 ) + # If there are more than 6 apostrophes in a row, assume they're all + # text except for the last 6. + else if ( strlen( $arr[$i] ) > 6 ) { - $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); - $arr[$i] = "'''''"; + $arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 6 ); + $arr[$i] = "''''''"; } # Count the number of occurrences of bold and italics mark-ups. # We are not counting sequences of five apostrophes. if ( strlen( $arr[$i] ) == 2 ) { $numitalics++; } else if ( strlen( $arr[$i] ) == 3 ) { $numbold++; } else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; } + else if ( strlen( $arr[$i] ) == 6 ) { $numbold+=2; } } $i++; } @@ -1162,11 +1164,17 @@ class Parser if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { $i = 0; + + #These are indexes to the /next/ array entry than the + #one holding the text matching the condition. $firstsingleletterword = -1; $firstmultiletterword = -1; $firstspace = -1; + foreach ( $arr as $r ) { + #Filter the "'''". Separators are on odd positions. + #$arr[0] will be an empty string if needed. if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) { $x1 = substr ($arr[$i-1], -1); @@ -1175,7 +1183,7 @@ class Parser if ($firstspace == -1) $firstspace = $i; } else if ($x2 === ' ') { if ($firstsingleletterword == -1) $firstsingleletterword = $i; - } else { + } else if ($arr[$i-1] != "") { if ($firstmultiletterword == -1) $firstmultiletterword = $i; } } @@ -1205,9 +1213,9 @@ class Parser } # Now let's actually convert our apostrophic mush to HTML! - $output = ''; - $buffer = ''; - $state = ''; + $output = ''; #Processed text + $buffer = ''; #Content if $state is 'both' + $state = ''; #Flags with the order of open tags: '|b|i|bi|ib|both' $i = 0; foreach ($arr as $r) { @@ -1261,6 +1269,21 @@ class Parser else # ($state == '') { $buffer = ''; $state = 'both'; } } + else if (strlen ($r) == 6) + { + if ($state === 'b') + { $output .= ''; $state = 'b'; } + else if ($state === 'i') + { $output .= '\''; $state = 'b'; } + else if ($state === 'bi') + { $output .= '\''; $state = ''; } + else if ($state === 'ib') + { $output .= '\''; $state = ''; } + else if ($state === 'both') + { $output .= ''.$buffer.''; $state = 'ib'; } + else # ($state == '') + { $buffer = ''; $state = ''; } + } } $i++; } diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index b8f44d45a9..c191f83205 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -1635,8 +1635,8 @@ Interwiki link encoding conversion (bug 1636) *[[Wikipedia:ro:Olteniţa]] *[[Wikipedia:ro:Olteniţa]] !! result -
  • Wikipedia:ro:Olteniţa -
  • Wikipedia:ro:Olteniţa + !! end @@ -3394,7 +3394,7 @@ BUG 499: Alt text should have Ӓ, not &1234; !! input [[Image:foobar.jpg|♀]] !! result -

    ♀ +

    ?

    !! end @@ -3537,7 +3537,7 @@ ill [[fr:Nourriture]] [[zh:食品]] !! result -es:Alimento fr:Nourriture zh:食品 +es:Alimento fr:Nourriture zh:?? !! end ### @@ -5252,19 +5252,17 @@ New wiki paragraph

    !! end -# Original result was this: -#

    boldboldbolditalics +# This was the original html, but it has also been +#

    'bold'boldbolditalics #

    -# While that might be marginally more intuitive, maybe, the six-apostrophe -# construct is clearly pathological and the result stated here (which is what -# the parser actually does) is about as reasonable as anything. +# See bug 18765. !!test Mixing markup for italics and bold !! options !! input '''bold''''''bold''bolditalics''''' !! result -

    'bold'boldbolditalics +

    boldboldbolditalics

    !! end @@ -6746,9 +6744,9 @@ Self-link in language variants !! options title=[[Dunav]] language=sr !! input -Both [[Dunav]] and [[Дунав]] are names for this river. +Both [[Dunav]] and [[?????]] are names for this river. !! result -

    Both Dunav and Дунав are names for this river. +

    Both Dunav and ????? are names for this river.

    !!end @@ -6758,9 +6756,9 @@ Link to pages in language variants !! options language=sr !! input -Main Page can be written as [[Маин Паге]] +Main Page can be written as [[???? ????]] !! result -

    Main Page can be written as Маин Паге +

    Main Page can be written as ???? ????

    !!end @@ -6770,9 +6768,9 @@ Multiple links to pages in language variants !! options language=sr !! input -[[Main Page]] can be written as [[Маин Паге]] same as [[Маин Паге]]. +[[Main Page]] can be written as [[???? ????]] same as [[???? ????]]. !! result -

    Main Page can be written as Маин Паге same as Маин Паге. +

    Main Page can be written as ???? ???? same as ???? ????.

    !!end @@ -6782,7 +6780,7 @@ Simple template in language variants !! options language=sr !! input -{{тест}} +{{????}} !! result

    This is a test template

    @@ -6794,7 +6792,7 @@ Template with explicit namespace in language variants !! options language=sr !! input -{{Template:тест}} +{{Template:????}} !! result

    This is a test template

    @@ -6806,7 +6804,7 @@ Basic test for template parameter in language variants !! options language=sr !! input -{{парамтест|param=foo}} +{{?????????|param=foo}} !! result

    This is a test template with parameter foo

    @@ -6818,9 +6816,9 @@ Simple category in language variants !! options language=sr cat !! input -[[Category:МедиаWики Усер'с Гуиде]] +[[Category:?????W??? ????'? ?????]] !! result -MediaWiki User's Guide +MediaWiki User's Guide !! end @@ -6843,7 +6841,7 @@ language=sr variant=sr-ec !! input Latinski: -{Ne nuntium necare}- !! result -

    Латински: Ne nuntium necare +

    ????????: Ne nuntium necare

    !! end @@ -6855,7 +6853,7 @@ language=sr variant=sr-ec !! input Latinski: -{Ne nuntium necare}- !! result -

    Латински: Ne nuntium necare +

    ????????: Ne nuntium necare

    !! end @@ -6879,7 +6877,7 @@ language=sr variant=sr-ec !! input == -{Naslov}- == !! result -

    [уреди] Naslov

    +

    [?????] Naslov

    !! end @@ -6979,7 +6977,7 @@ language=sr variant=sr-ec !! input Fridrih IV je car. !! result -

    Фридрих IV је цар. +

    ??????? IV ?? ???.

    !! end @@ -7056,23 +7054,23 @@ Case insensitivity of parser functions for non-ASCII characters (bug 8143) language=cs title=[[Main Page]] !! input -{{PRVNÍVELKÉ:ěščř}} -{{prvnívelké:ěščř}} -{{PRVNÍMALÉ:ěščř}} -{{prvnímalé:ěščř}} -{{MALÁ:ěščř}} -{{malá:ěščř}} -{{VELKÁ:ěščř}} -{{velká:ěščř}} +{{PRVNÍVELKÉ:ešcr}} +{{prvnívelké:ešcr}} +{{PRVNÍMALÉ:ešcr}} +{{prvnímalé:ešcr}} +{{MALÁ:ešcr}} +{{malá:ešcr}} +{{VELKÁ:ešcr}} +{{velká:ešcr}} !! result -

    Ěščř -Ěščř -ěščř -ěščř -ěščř -ěščř -ĚŠČŘ -ĚŠČŘ +

    Ešcr +Ešcr +ešcr +ešcr +ešcr +ešcr +EŠCR +EŠCR

    !! end @@ -7339,7 +7337,7 @@ language=fa !! input [http://en.wikipedia.org/] !! result -

    [Û±] +

    [?]

    !! end @@ -7466,7 +7464,7 @@ comment title=[[Main Page]] !! input /* External links */ removed bogus entries !! result -→External links: removed bogus entries +?External links: removed bogus entries !!end !! test @@ -7476,7 +7474,7 @@ comment local title=[[Main Page]] !! input /* External links */ removed bogus entries !! result -→External links: removed bogus entries +?External links: removed bogus entries !!end !! test @@ -7537,10 +7535,86 @@ title=[[Main Page]] #section !! end +!! test +Bold/italic markup handled differently depending on leading whitespace (bug 18765) +!!input +'''Look at ''this edit'''s complicated bold/italic markup!''' + +'''Look at ''this edit'''s complicated bold/italic markup!''' + + '''Look at ''this edit'''s complicated bold/italic markup!''' + + '''Look at ''this edit'''s complicated bold/italic markup!''' + + '''Look at ''this edit'''s complicated bold/italic markup!''' + +{| +| '''Look at ''this edit'''s complicated bold/italic markup!''' +|} + +'''This was Italic'' this was plain''' and this was bold''' +but '''This is bold'' this is bold italic''' and this is bold''' + + +!! result +

    Look at this edit's complicated bold/italic markup! +

    Look at this edit's complicated bold/italic markup! +

    Look at this edit's complicated bold/italic markup! +

    Look at this edit's complicated bold/italic markup! +

    +
    Look at this edit's complicated bold/italic markup!
    +
    + + +
    Look at this edit's complicated bold/italic markup! +
    +

    This was Italic this was plain' and this was bold +but This is bold this is bold italic' and this is bold +


    +

    +!! end + +!! test +Six quotes +!!input +''Italic''''''Bold + +'''Bold''BoldItalic''''''Normal + +''Italic'''BoldItalic''''''Normal''''' + +'''''BoldItalic''''''MoreBoldItalic'' + +''''''Normal +!!result +

    Italic'Bold +

    BoldBoldItalic'Normal +

    ItalicBoldItalic'Normal +

    BoldItalicMoreBoldItalic +

    Normal +

    +!!end + + +!! test +Too many quotes +!!input +I '''like'''''quotes''''''''''' +!! result +

    I likequotes'''''' +

    +!! end + + + + TODO: more images more tables math -character entities -and much more -Try for 100% code coverage +char \ No newline at end of file -- 2.20.1