if( $usePrefs ) {
$datePreference = $wgUser->getDatePreference();
} else {
- $options = User::getDefaultOptions();
- $datePreference = (string)$options['date'];
+ $datePreference = (string)User::getDefaultOption( 'date' );
}
} else {
$datePreference = (string)$usePrefs;
* @param $string String
* @return String
*/
- function wordSegmentation( $string ) {
+ function segmentByWord( $string ) {
return $string;
}
* @return String
*/
function normalizeForSearch( $string ) {
- return $string;
+ return self::convertDoubleWidth($string);
}
/**
* range: ff00-ff5f ~= 0020-007f
*/
protected static function convertDoubleWidth( $string ) {
- $string = preg_replace( '/\xef\xbc([\x80-\xbf])/e', 'chr((ord("$1") & 0x3f) + 0x20)', $string );
- $string = preg_replace( '/\xef\xbd([\x80-\x99])/e', 'chr((ord("$1") & 0x3f) + 0x60)', $string );
+ static $full = null;
+ static $half = null;
+
+ if( $full === null ) {
+ $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+ $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+ $full = str_split( $fullWidth, 3 );
+ $half = str_split( $halfWidth );
+ }
+
+ $string = str_replace( $full, $half, $string );
return $string;
}
*/
function truncate( $string, $length, $ellipsis = '...' ) {
# Use the localized ellipsis character
- if( $ellipsis == '...' ) {
+ if ( $ellipsis == '...' ) {
$ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
}
-
- if( $length == 0 ) {
+ # Check if there is no need to truncate
+ if ( $length == 0 ) {
return $ellipsis;
- }
- if ( strlen( $string ) <= abs( $length ) ) {
+ } elseif ( strlen( $string ) <= abs( $length ) ) {
return $string;
}
$stringOriginal = $string;
- if( $length > 0 ) {
- $string = substr( $string, 0, $length );
- $char = ord( $string[strlen( $string ) - 1] );
- $m = array();
- if ($char >= 0xc0) {
- # We got the first byte only of a multibyte char; remove it.
- $string = substr( $string, 0, -1 );
- } elseif( $char >= 0x80 &&
- preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
- '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) {
- # We chopped in the middle of a character; remove it
- $string = $m[1];
- }
+ if ( $length > 0 ) {
+ $string = substr( $string, 0, $length ); // xyz...
+ $string = $this->removeBadCharLast( $string );
$string = $string . $ellipsis;
-
} else {
- $string = substr( $string, $length );
- $char = ord( $string[0] );
- if( $char >= 0x80 && $char < 0xc0 ) {
- # We chopped in the middle of a character; remove the whole thing
- $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
- }
+ $string = substr( $string, $length ); // ...xyz
+ $string = $this->removeBadCharFirst( $string );
$string = $ellipsis . $string;
}
- # Do not truncate if the ellipsis actually make the string longer. Bug 22181
+ # Do not truncate if the ellipsis makes the string longer/equal (bug 22181)
if ( strlen( $string ) < strlen( $stringOriginal ) ) {
return $string;
} else {
}
}
+ /**
+ * Remove bytes that represent an incomplete Unicode character
+ * at the end of string (e.g. bytes of the char are missing)
+ *
+ * @param $string String
+ * @return string
+ */
+ protected function removeBadCharLast( $string ) {
+ $char = ord( $string[strlen( $string ) - 1] );
+ $m = array();
+ if ( $char >= 0xc0 ) {
+ # We got the first byte only of a multibyte char; remove it.
+ $string = substr( $string, 0, -1 );
+ } elseif ( $char >= 0x80 &&
+ preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
+ '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
+ {
+ # We chopped in the middle of a character; remove it
+ $string = $m[1];
+ }
+ return $string;
+ }
+
+ /**
+ * Remove bytes that represent an incomplete Unicode character
+ * at the start of string (e.g. bytes of the char are missing)
+ *
+ * @param $string String
+ * @return string
+ */
+ protected function removeBadCharFirst( $string ) {
+ $char = ord( $string[0] );
+ if ( $char >= 0x80 && $char < 0xc0 ) {
+ # We chopped in the middle of a character; remove the whole thing
+ $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
+ }
+ return $string;
+ }
+
+ /*
+ * Truncate a string of valid HTML to a specified length in bytes,
+ * appending an optional string (e.g. for ellipses), and return valid HTML
+ *
+ * This is only intended for styled/linked text, such as HTML with
+ * tags like <span> and <a>, were the tags are self-contained (valid HTML)
+ *
+ * Note: tries to fix broken HTML with MWTidy
+ *
+ * @param string $text String to truncate
+ * @param int $length (zero/positive) Maximum length (excluding ellipses)
+ * @param string $ellipsis String to append to the truncated text
+ * @returns string
+ */
+ function truncateHtml( $text, $length, $ellipsis = '...' ) {
+ # Use the localized ellipsis character
+ if ( $ellipsis == '...' ) {
+ $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
+ }
+ # Check if there is no need to truncate
+ if ( $length <= 0 ) {
+ return $ellipsis; // no text shown, nothing to format
+ } elseif ( strlen($text) <= $length ) {
+ return $text; // string short enough even *with* HTML
+ }
+ $text = MWTidy::tidy( $text ); // fix tags
+ $displayLen = 0; // innerHTML legth so far
+ $testingEllipsis = false; // checking if ellipses will make string longer/equal?
+ $tagType = 0; // 0-open, 1-close
+ $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
+ $entityState = 0; // 0-not entity, 1-entity
+ $tag = $ret = $ch = '';
+ $openTags = array();
+ $textLen = strlen($text);
+ for( $pos = 0; $pos < $textLen; ++$pos ) {
+ $ch = $text[$pos];
+ $lastCh = $pos ? $text[$pos-1] : '';
+ $ret .= $ch; // add to result string
+ if ( $ch == '<' ) {
+ $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
+ $entityState = 0; // for bad HTML
+ $bracketState = 1; // tag started (checking for backslash)
+ } elseif ( $ch == '>' ) {
+ $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
+ $entityState = 0; // for bad HTML
+ $bracketState = 0; // out of brackets
+ } elseif ( $bracketState == 1 ) {
+ if ( $ch == '/' ) {
+ $tagType = 1; // close tag (e.g. "</span>")
+ } else {
+ $tagType = 0; // open tag (e.g. "<span>")
+ $tag .= $ch;
+ }
+ $bracketState = 2; // building tag name
+ } elseif ( $bracketState == 2 ) {
+ if ( $ch != ' ' ) {
+ $tag .= $ch;
+ } else {
+ // Name found (e.g. "<a href=..."), add on tag attributes...
+ $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
+ }
+ } elseif ( $bracketState == 0 ) {
+ if ( $entityState ) {
+ if ( $ch == ';' ) {
+ $entityState = 0;
+ $displayLen++; // entity is one displayed char
+ }
+ } else {
+ if ( $ch == '&' ) {
+ $entityState = 1; // entity found, (e.g. " ")
+ } else {
+ $displayLen++; // this char is displayed
+ // Add on the other display text after this...
+ $skipped = $this->truncate_skip(
+ $ret, $text, "<>&", $pos + 1, $length - $displayLen );
+ $displayLen += $skipped;
+ $pos += $skipped;
+ }
+ }
+ }
+ # Consider truncation once the display length has reached the maximim.
+ # Double-check that we're not in the middle of a bracket/entity...
+ if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
+ if ( !$testingEllipsis ) {
+ $testingEllipsis = true;
+ # Save where we are; we will truncate here unless
+ # the ellipsis actually makes the string longer.
+ $pOpenTags = $openTags; // save state
+ $pRet = $ret; // save state
+ } elseif ( $displayLen > ($length + strlen($ellipsis)) ) {
+ # Ellipsis won't make string longer/equal, the truncation point was OK.
+ $openTags = $pOpenTags; // reload state
+ $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
+ $ret .= $ellipsis; // add ellipsis
+ break;
+ }
+ }
+ }
+ if ( $displayLen == 0 ) {
+ return ''; // no text shown, nothing to format
+ }
+ $this->truncate_endBracket( $tag, $text[$textLen-1], $tagType, $openTags ); // for bad HTML
+ while ( count( $openTags ) > 0 ) {
+ $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
+ }
+ return $ret;
+ }
+
+ // truncateHtml() helper function
+ // like strcspn() but adds the skipped chars to $ret
+ private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) {
+ $skipCount = 0;
+ if( $start < strlen($text) ) {
+ $skipCount = strcspn( $text, $search, $start, $len );
+ $ret .= substr( $text, $start, $skipCount );
+ }
+ return $skipCount;
+ }
+
+ // truncateHtml() helper function
+ // (a) push or pop $tag from $openTags as needed
+ // (b) clear $tag value
+ private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
+ $tag = ltrim( $tag );
+ if( $tag != '' ) {
+ if( $tagType == 0 && $lastCh != '/' ) {
+ $openTags[] = $tag; // tag opened (didn't close itself)
+ } else if( $tagType == 1 ) {
+ if( $openTags && $tag == $openTags[count($openTags)-1] ) {
+ array_pop( $openTags ); // tag closed
+ }
+ }
+ $tag = '';
+ }
+ }
+
/**
* Grammatical transformations, needed for inflected languages
* Invoked by putting {{grammar:case|word}} in a message
function formatTimePeriod( $seconds ) {
if ( $seconds < 10 ) {
- return $this->formatNum( sprintf( "%.1f", $seconds ) ) . wfMsg( 'seconds-abbrev' );
+ return $this->formatNum( sprintf( "%.1f", $seconds ) ) . ' ' . wfMsg( 'seconds-abbrev' );
} elseif ( $seconds < 60 ) {
- return $this->formatNum( round( $seconds ) ) . wfMsg( 'seconds-abbrev' );
+ return $this->formatNum( round( $seconds ) ) . ' ' . wfMsg( 'seconds-abbrev' );
} elseif ( $seconds < 3600 ) {
- return $this->formatNum( floor( $seconds / 60 ) ) . wfMsg( 'minutes-abbrev' ) .
- $this->formatNum( round( fmod( $seconds, 60 ) ) ) . wfMsg( 'seconds-abbrev' );
+ return $this->formatNum( floor( $seconds / 60 ) ) . ' ' . wfMsg( 'minutes-abbrev' ) . ' ' .
+ $this->formatNum( round( fmod( $seconds, 60 ) ) ) . ' ' . wfMsg( 'seconds-abbrev' );
} else {
$hours = floor( $seconds / 3600 );
$minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
$secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
- return $this->formatNum( $hours ) . wfMsg( 'hours-abbrev' ) .
- $this->formatNum( $minutes ) . wfMsg( 'minutes-abbrev' ) .
- $this->formatNum( $secondsPart ) . wfMsg( 'seconds-abbrev' );
+ return $this->formatNum( $hours ) . ' ' . wfMsg( 'hours-abbrev' ) . ' ' .
+ $this->formatNum( $minutes ) . ' ' . wfMsg( 'minutes-abbrev' ) . ' ' .
+ $this->formatNum( $secondsPart ) . ' ' . wfMsg( 'seconds-abbrev' );
}
}