2 var byteLength
= require( 'mediawiki.String' ).byteLength
;
4 QUnit
.module( 'mediawiki.String.byteLength', QUnit
.newMwEnvironment() );
6 QUnit
.test( 'Simple text', function ( assert
) {
7 var azLc
= 'abcdefghijklmnopqrstuvwxyz',
8 azUc
= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
13 assert
.strictEqual( byteLength( azLc
), 26, 'Lowercase a-z' );
14 assert
.strictEqual( byteLength( azUc
), 26, 'Uppercase A-Z' );
15 assert
.strictEqual( byteLength( num
), 10, 'Numbers 0-9' );
16 assert
.strictEqual( byteLength( x
), 1, 'An asterisk' );
17 assert
.strictEqual( byteLength( space
), 3, '3 spaces' );
21 QUnit
.test( 'Special text', function ( assert
) {
22 // https://en.wikipedia.org/wiki/UTF-8
28 // Character \U00024B62 (Han script) can't be represented in javascript as a single
29 // code point, instead it is composed as a surrogate pair of two separate code units.
30 // http://codepoints.net/U+24B62
31 // http://www.fileformat.info/info/unicode/char/24B62/index.htm
32 u024B62
= '\uD852\uDF62';
34 assert
.strictEqual( byteLength( u0024
), 1, 'U+0024' );
35 assert
.strictEqual( byteLength( u00A2
), 2, 'U+00A2' );
36 assert
.strictEqual( byteLength( u20AC
), 3, 'U+20AC' );
37 assert
.strictEqual( byteLength( u024B62
), 4, 'U+024B62 (surrogate pair: \\uD852\\uDF62)' );