* Added truncate() & truncateHTML() tests
authorAaron Schulz <aaron@users.mediawiki.org>
Tue, 28 Jun 2011 01:09:02 +0000 (01:09 +0000)
committerAaron Schulz <aaron@users.mediawiki.org>
Tue, 28 Jun 2011 01:09:02 +0000 (01:09 +0000)
* Some fixes/changes to truncateHTML() based on tests
** Something like "<span>hello</span>" ends up as "<span>...</span>" instead of just "..." for relevant cases)
** If we get something like "<span></span" in, just return it back instead of ""
* Renamed $dispLength -> $dispLen in truncateHTML()

languages/Language.php
tests/phpunit/languages/LanguageTest.php

index 7939713..06b39bd 100644 (file)
@@ -2770,31 +2770,33 @@ class Language {
                        return $text; // string short enough even *with* HTML (short-circuit)
                }
 
-               $displayLen = 0; // innerHTML legth so far
+               $dispLen = 0; // innerHTML legth so far
                $testingEllipsis = false; // checking if ellipses will make string longer/equal?
                $tagType = 0; // 0-open, 1-close
                $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
                $entityState = 0; // 0-not entity, 1-entity
-               $tag = $ret = $pRet = ''; // accumulated tag name, accumulated result string
+               $tag = $ret = ''; // accumulated tag name, accumulated result string
                $openTags = array(); // open tag stack
-               $pOpenTags = array();
+               $maybeState = null; // possible truncation state
 
                $textLen = strlen( $text );
                $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated
                for ( $pos = 0; true; ++$pos ) {
                        # Consider truncation once the display length has reached the maximim.
+                       # We check if $dispLen > 0 to grab tags for the $neLength = 0 case.
                        # Check that we're not in the middle of a bracket/entity...
-                       if ( $displayLen >= $neLength && $bracketState == 0 && $entityState == 0 ) {
+                       if ( $dispLen && $dispLen >= $neLength && $bracketState == 0 && !$entityState ) {
                                if ( !$testingEllipsis ) {
                                        $testingEllipsis = true;
                                        # Save where we are; we will truncate here unless there turn out to
                                        # be so few remaining characters that truncation is not necessary.
-                                       $pOpenTags = $openTags; // save state
-                                       $pRet = $ret; // save state
-                               } elseif ( $displayLen > $length && $displayLen > strlen( $ellipsis ) ) {
+                                       if ( !$maybeState ) { // already saved? ($neLength = 0 case)
+                                               $maybeState = array( $ret, $openTags ); // save state
+                                       }
+                               } elseif ( $dispLen > $length && $dispLen > strlen( $ellipsis ) ) {
                                        # String in fact does need truncation, the truncation point was OK.
-                                       $openTags = $pOpenTags; // reload state
-                                       $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
+                                       list( $ret, $openTags ) = $maybeState; // reload state
+                                       $ret = $this->removeBadCharLast( $ret ); // multi-byte char fix
                                        $ret .= $ellipsis; // add ellipsis
                                        break;
                                }
@@ -2832,25 +2834,27 @@ class Language {
                                if ( $entityState ) {
                                        if ( $ch == ';' ) {
                                                $entityState = 0;
-                                               $displayLen++; // entity is one displayed char
+                                               $dispLen++; // entity is one displayed char
                                        }
                                } else {
+                                       if ( $neLength == 0 && !$maybeState ) {
+                                               // Save state without $ch. We want to *hit* the first
+                                               // display char (to get tags) but not *use* it if truncating.
+                                               $maybeState = array( substr( $ret, 0, -1 ), $openTags );
+                                       }
                                        if ( $ch == '&' ) {
                                                $entityState = 1; // entity found, (e.g. "&#160;")
                                        } else {
-                                               $displayLen++; // this char is displayed
+                                               $dispLen++; // this char is displayed
                                                // Add the next $max display text chars after this in one swoop...
-                                               $max = ( $testingEllipsis ? $length : $neLength ) - $displayLen;
+                                               $max = ( $testingEllipsis ? $length : $neLength ) - $dispLen;
                                                $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos + 1, $max );
-                                               $displayLen += $skipped;
+                                               $dispLen += $skipped;
                                                $pos += $skipped;
                                        }
                                }
                        }
                }
-               if ( $displayLen == 0 ) {
-                       return ''; // no text shown, nothing to format
-               }
                // Close the last tag if left unclosed by bad HTML
                $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
                while ( count( $openTags ) > 0 ) {
index 085420e..34baea8 100644 (file)
@@ -112,6 +112,108 @@ class LanguageTest extends MediaWikiTestCase {
                );
        }
 
+       function testTruncate() {
+               $this->assertEquals(
+                       "XXX",
+                       $this->lang->truncate( "1234567890", 0, 'XXX' ),
+                       'truncate prefix, len 0, small ellipsis'
+               );
+
+               $this->assertEquals(
+                       "12345XXX",
+                       $this->lang->truncate( "1234567890", 8, 'XXX' ),
+                       'truncate prefix, small ellipsis'
+               );
+
+               $this->assertEquals(
+                       "123456789",
+                       $this->lang->truncate( "123456789", 5, 'XXXXXXXXXXXXXXX' ),
+                       'truncate prefix, large ellipsis'
+               );
+
+               $this->assertEquals(
+                       "XXX67890",
+                       $this->lang->truncate( "1234567890", -8, 'XXX' ),
+                       'truncate suffix, small ellipsis'
+               );
+
+               $this->assertEquals(
+                       "123456789",
+                       $this->lang->truncate( "123456789", -5, 'XXXXXXXXXXXXXXX' ),
+                       'truncate suffix, large ellipsis'
+               );
+       }
+
+       /**
+       * @dataProvider provideHTMLTruncateData()
+       */
+       function testTruncateHTML( $len, $ellipsis, $input, $expected ) {
+               // Actual HTML...
+               $this->assertEquals(
+                       $expected,
+                       $this->lang->truncateHTML( $input, $len, $ellipsis )
+               );
+       }
+
+       /**
+        * Array format is ($len, $ellipsis, $input, $expected)
+        */
+       function provideHTMLTruncateData() {
+               return array(
+                       array( 0, 'XXX', "1234567890", "XXX" ),
+                       array( 8, 'XXX', "1234567890", "12345XXX" ),
+                       array( 5, 'XXXXXXXXXXXXXXX', '1234567890', "1234567890" ),
+                       array( 2, '***',
+                               '<p><span style="font-weight:bold;"></span></p>',
+                               '<p><span style="font-weight:bold;"></span></p>',
+                       ),
+                       array( 2, '***',
+                               '<p><span style="font-weight:bold;">123456789</span></p>',
+                               '<p><span style="font-weight:bold;">***</span></p>',
+                       ),
+                       array( 2, '***',
+                               '<p><span style="font-weight:bold;">&nbsp;23456789</span></p>',
+                               '<p><span style="font-weight:bold;">***</span></p>',
+                       ),
+                       array( 3, '***',
+                               '<p><span style="font-weight:bold;">123456789</span></p>',
+                               '<p><span style="font-weight:bold;">***</span></p>',
+                       ),
+                       array( 4, '***',
+                               '<p><span style="font-weight:bold;">123456789</span></p>',
+                               '<p><span style="font-weight:bold;">1***</span></p>',
+                       ),
+                       array( 5, '***',
+                               '<tt><span style="font-weight:bold;">123456789</span></tt>',
+                               '<tt><span style="font-weight:bold;">12***</span></tt>',
+                       ),
+                       array( 6, '***',
+                               '<p><a href="www.mediawiki.org">123456789</a></p>',
+                               '<p><a href="www.mediawiki.org">123***</a></p>',
+                       ),
+                       array( 6, '***',
+                               '<p><a href="www.mediawiki.org">12&nbsp;456789</a></p>',
+                               '<p><a href="www.mediawiki.org">12&nbsp;***</a></p>',
+                       ),
+                       array( 7, '***',
+                               '<small><span style="font-weight:bold;">123<p id="#moo">456</p>789</span></small>',
+                               '<small><span style="font-weight:bold;">123<p id="#moo">4***</p></span></small>',
+                       ),
+                       array( 8, '***',
+                               '<div><span style="font-weight:bold;">123<span>4</span>56789</span></div>',
+                               '<div><span style="font-weight:bold;">123<span>4</span>5***</span></div>',
+                       ),
+                       array( 9, '***',
+                               '<p><table style="font-weight:bold;"><tr><td>123456789</td></tr></table></p>',
+                               '<p><table style="font-weight:bold;"><tr><td>123456789</td></tr></table></p>',
+                       ),
+                       array( 10, '***',
+                               '<p><font style="font-weight:bold;">123456789</font></p>',
+                               '<p><font style="font-weight:bold;">123456789</font></p>',
+                       ),
+               );
+       }
+
        /**
         * Test Language::isValidBuiltInCode()
         * @dataProvider provideLanguageCodes