* Split the byte length logic into a seperate method to allow it to be called directly on a string (easier to test and easier re-use)
* Added basic unit tests for it.
'scripts' => 'resources/jquery/jquery.autoEllipsis.js',
'dependencies' => 'jquery.highlightText',
),
+ 'jquery.byteLength' => array(
+ 'scripts' => 'resources/jquery/jquery.byteLength.js',
+ ),
'jquery.byteLimit' => array(
'scripts' => 'resources/jquery/jquery.byteLimit.js',
+ 'dependencies' => 'jquery.byteLength',
),
'jquery.checkboxShiftClick' => array(
'scripts' => 'resources/jquery/jquery.checkboxShiftClick.js',
--- /dev/null
+/**
+ * jQuery.byteLength
+ *
+ * Calculate the byte length of a string (accounting for UTF-8).
+ *
+ * @author Jan Paul Posma
+ */
+jQuery.byteLength = function( str ) {
+
+ // This basically figures out how many bytes a UTF-16 string (which is what js sees)
+ // will take in UTF-8 by replacing a 2 byte character with 2 *'s, etc, and counting that.
+ // Note, surrogate (\uD800-\uDFFF) characters are counted as 2 bytes, since there's two of them
+ // and the actual character takes 4 bytes in UTF-8 (2*2=4). Might not work perfectly in
+ // edge cases such as illegal sequences, but that should never happen.
+ return str
+ .replace( /[\u0080-\u07FF\uD800-\uDFFF]/g, '**' )
+ .replace( /[\u0800-\uD7FF\uE000-\uFFFF]/g, '***' )
+ .length;
+}
/**
* jQuery byteLimit
+ *
+ * @author Jan Paul Posma
*/
( function( $ ) {
this.attr( 'maxLength', limit );
}
- // Nothing passed and/or empty attribute, return this for further chaining.
+ // Nothing passed and/or empty attribute, return without binding an event.
if ( limit == null ) {
return this;
}
return true; //a special key (backspace, etc) so don't interfere.
}
- // This basically figures out how many bytes a UTF-16 string (which is what js sees)
- // will take in UTF-8 by replacing a 2 byte character with 2 *'s, etc, and counting that.
- // Note, surrogate (\uD800-\uDFFF) characters are counted as 2 bytes, since there's two of them
- // and the actual character takes 4 bytes in UTF-8 (2*2=4). Might not work perfectly in
- // edge cases such as illegal sequences, but that should never happen.
-
- var len = this.value
- .replace( /[\u0080-\u07FF\uD800-\uDFFF]/g, '**' )
- .replace( /[\u0800-\uD7FF\uE000-\uFFFF]/g, '***' )
- .length;
+ var len = $.byteLength( this.value );
// limit-3 as this doesn't count the character about to be inserted.
if ( len > ( limit-3 ) ) {
<!-- MW: Non-default modules -->
<script src="../../resources/jquery/jquery.autoEllipsis.js"></script>
+ <script src="../../resources/jquery/jquery.byteLength.js"></script>
<script src="../../resources/jquery/jquery.colorUtil.js"></script>
<script src="../../resources/jquery/jquery.tabIndex.js"></script>
<script src="../../resources/jquery/jquery.tablesorter.js"></script>
<script src="suites/resources/mediawiki/mediawiki.util.js"></script>
<script src="suites/resources/jquery/jquery.autoEllipsis.js"></script>
+ <script src="suites/resources/jquery/jquery.byteLength.js"></script>
<script src="suites/resources/jquery/jquery.colorUtil.js"></script>
<script src="suites/resources/jquery/jquery.tabIndex.js"></script>
<script src="suites/resources/jquery/jquery.tablesorter.test.js" charset="UTF-8"></script>
--- /dev/null
+module( 'jquery.byteLength.js' );
+
+test( '-- Initial check', function() {
+ expect(1);
+ ok( $.byteLength, 'jQuery.byteLength defined' );
+} );
+
+test( 'Simple text', function() {
+ expect(5);
+
+ var azLc = 'abcdefghijklmnopqrstuvwxyz',
+ azUc = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
+ num = '0123456789',
+ x = '*',
+ space = ' ';
+
+ equal( $.byteLength( azLc ), 26, 'Lowercase a-z' );
+ equal( $.byteLength( azUc ), 26, 'Uppercase A-Z' );
+ equal( $.byteLength( num ), 10, 'Numbers 0-9' );
+ equal( $.byteLength( x ), 1, 'An asterisk' );
+ equal( $.byteLength( space ), 3, '3 spaces' );
+
+} );
+
+test( 'Special text', window.foo = function() {
+ expect(4);
+
+ // http://en.wikipedia.org/wiki/UTF-8
+ var U_0024 = '\u0024',
+ U_00A2 = '\u00A2',
+ U_20AC = '\u20AC',
+ U_024B62 = '\u024B62';
+
+ strictEqual( $.byteLength( U_0024 ), 1, 'U+0024: 1 byte (dollar sign) $' );
+ strictEqual( $.byteLength( U_00A2 ), 2, 'U+00A2: 2 bytes (cent sign) ¢' );
+ strictEqual( $.byteLength( U_20AC ), 3, 'U+20AC: 3 bytes (euro sign) €' );
+ strictEqual( $.byteLength( U_024B62 ), 4, 'U+024B62: 4 bytes 𤭢 \U00024B62 ' );
+} );