* @return array
*/
private function getSummaryInputAttributes( array $inputAttrs = null ) {
- // Note: the maxlength is overridden in JS to 255 and to make it use UTF-8 bytes, not characters.
+ $conf = $this->context->getConfig();
+ $oldCommentSchema = $conf->get( 'CommentTableSchemaMigrationStage' ) === MIGRATION_OLD;
+ // HTML maxlength uses "UTF-16 code units", which means that characters outside BMP
+ // (e.g. emojis) count for two each. This limit is overridden in JS to instead count
+ // Unicode codepoints (or 255 UTF-8 bytes for old schema).
return ( is_array( $inputAttrs ) ? $inputAttrs : [] ) + [
'id' => 'wpSummary',
'name' => 'wpSummary',
- 'maxlength' => '200',
+ 'maxlength' => $oldCommentSchema ? 200 : CommentStore::COMMENT_CHARACTER_LIMIT,
'tabindex' => 1,
'size' => 60,
'spellcheck' => 'true',
$user = $context->getUser();
$output = $context->getOutput();
$lang = $context->getLanguage();
- $cascadingRestrictionLevels = $context->getConfig()->get( 'CascadingRestrictionLevels' );
+ $conf = $context->getConfig();
+ $cascadingRestrictionLevels = $conf->get( 'CascadingRestrictionLevels' );
+ $oldCommentSchema = $conf->get( 'CommentTableSchemaMigrationStage' ) === MIGRATION_OLD;
$out = '';
if ( !$this->disabled ) {
$output->addModules( 'mediawiki.legacy.protect' );
$this->mReasonSelection,
'mwProtect-reason', 4 );
+ // HTML maxlength uses "UTF-16 code units", which means that characters outside BMP
+ // (e.g. emojis) count for two each. This limit is overridden in JS to instead count
+ // Unicode codepoints (or 180 UTF-8 bytes for old schema).
+ // Subtract arbitrary 75 to leave some space for the autogenerated null edit's summary
+ // and other texts chosen by dropdown menus on this page.
+ $maxlength = $oldCommentSchema ? 180 : CommentStore::COMMENT_CHARACTER_LIMIT - 75;
+
$out .= Xml::openElement( 'table', [ 'id' => 'mw-protect-table3' ] ) .
Xml::openElement( 'tbody' );
$out .= "
</td>
<td class='mw-input'>" .
Xml::input( 'mwProtect-reason', 60, $this->mReason, [ 'type' => 'text',
- 'id' => 'mwProtect-reason', 'maxlength' => 180 ] ) .
- // Limited maxlength as the database trims at 255 bytes and other texts
- // chosen by dropdown menus on this page are also included in this database field.
- // The byte limit of 180 bytes is enforced in javascript
+ 'id' => 'mwProtect-reason', 'maxlength' => $maxlength ] ) .
"</td>
</tr>";
# Disallow watching is user is not logged in
}
$illegalFileChars = $conf->get( 'IllegalFileChars' );
+ $oldCommentSchema = $conf->get( 'CommentTableSchemaMigrationStage' ) === MIGRATION_OLD;
// Build list of variables
$vars = [
'wgResourceLoaderStorageEnabled' => $conf->get( 'ResourceLoaderStorageEnabled' ),
'wgForeignUploadTargets' => $conf->get( 'ForeignUploadTargets' ),
'wgEnableUploads' => $conf->get( 'EnableUploads' ),
+ 'wgCommentByteLimit' => $oldCommentSchema ? 255 : null,
+ 'wgCommentCodePointLimit' => $oldCommentSchema ? null : CommentStore::COMMENT_CHARACTER_LIMIT,
];
Hooks::run( 'ResourceLoaderGetConfigVars', [ &$vars ] );
'focus.lengthLimit',
'blur.lengthLimit'
].join( ' ' ),
- trimByteLength = require( 'mediawiki.String' ).trimByteLength;
+ trimByteLength = require( 'mediawiki.String' ).trimByteLength,
+ trimCodePointLength = require( 'mediawiki.String' ).trimCodePointLength;
/**
* Utility function to trim down a string, based on byteLimit
* function, if none, pass empty string.
* @param {string} newVal New value that may have to be trimmed down.
* @param {number} byteLimit Number of bytes the value may be in size.
- * @param {Function} [fn] See jQuery#byteLimit.
+ * @param {Function} [filterFn] See jQuery#byteLimit.
* @return {Object}
* @return {string} return.newVal
* @return {boolean} return.trimmed
mw.log.deprecate( $, 'trimByteLength', trimByteLength,
'Use require( \'mediawiki.String\' ).trimByteLength instead.', '$.trimByteLength' );
- /**
- * Enforces a byte limit on an input field, so that UTF-8 entries are counted as well,
- * when, for example, a database field has a byte limit rather than a character limit.
- * Plugin rationale: Browser has native maxlength for number of characters, this plugin
- * exists to limit number of bytes instead.
- *
- * Can be called with a custom limit (to use that limit instead of the maxlength attribute
- * value), a filter function (in case the limit should apply to something other than the
- * exact input value), or both. Order of parameters is important!
- *
- * @param {number} [limit] Limit to enforce, fallsback to maxLength-attribute,
- * called with fetched value as argument.
- * @param {Function} [fn] Function to call on the string before assessing the length.
- * @return {jQuery}
- * @chainable
- */
- $.fn.byteLimit = function ( limit, fn ) {
+ function lengthLimit( trimFn, limit, filterFn ) {
+ var allowNativeMaxlength = trimFn === trimByteLength;
+
// If the first argument is the function,
- // set fn to the first argument's value and ignore the second argument.
+ // set filterFn to the first argument's value and ignore the second argument.
if ( $.isFunction( limit ) ) {
- fn = limit;
+ filterFn = limit;
limit = undefined;
// Either way, verify it is a function so we don't have to call
// isFunction again after this.
- } else if ( !fn || !$.isFunction( fn ) ) {
- fn = undefined;
+ } else if ( !filterFn || !$.isFunction( filterFn ) ) {
+ filterFn = undefined;
}
// The following is specific to each element in the collection.
$el = $( el );
- // If no limit was passed to byteLimit(), use the maxlength value.
+ // If no limit was passed to lengthLimit(), use the maxlength value.
// Can't re-use 'limit' variable because it's in the higher scope
// that would affect the next each() iteration as well.
// Note that we use attribute to read the value instead of property,
return;
}
- if ( fn ) {
+ if ( filterFn ) {
// Save function for reference
- $el.data( 'byteLimit.callback', fn );
+ $el.data( 'lengthLimit.callback', filterFn );
}
// Remove old event handlers (if there are any)
- $el.off( '.byteLimit' );
+ $el.off( '.lengthLimit' );
- if ( fn ) {
+ if ( filterFn || !allowNativeMaxlength ) {
// Disable the native maxLength (if there is any), because it interferes
- // with the (differently calculated) byte limit.
- // Aside from being differently calculated (average chars with byteLimit
- // is lower), we also support a callback which can make it to allow longer
+ // with the (differently calculated) character/byte limit.
+ // Aside from being differently calculated,
+ // we also support a callback which can make it to allow longer
// values (e.g. count "Foo" from "User:Foo").
// maxLength is a strange property. Removing or setting the property to
// undefined directly doesn't work. Instead, it can only be unset internally
$el.removeAttr( 'maxlength' );
} else {
- // If we don't have a callback the bytelimit can only be lower than the charlimit
+ // For $.byteLimit only, if we don't have a callback,
+ // the byteLimit can only be lower than the native maxLength limit
// (that is, there are no characters less than 1 byte in size). So lets (re-)enforce
// the native limit for efficiency when possible (it will make the while-loop below
- // faster by there being less left to interate over).
+ // faster by there being less left to interate over). This does not work for $.codePointLimit
+ // (code units for surrogates represent half a character each).
$el.attr( 'maxlength', elLimit );
}
// See https://www.w3.org/TR/DOM-Level-3-Events/#events-keyboard-event-order for
// the order and characteristics of the key events.
$el.on( eventKeys, function () {
- var res = trimByteLength(
+ var res = trimFn(
prevSafeVal,
this.value,
elLimit,
- fn
+ filterFn
);
// Only set value property if it was trimmed, because whenever the
$el.trigger( 'change' );
}
// Always adjust prevSafeVal to reflect the input value. Not doing this could cause
- // trimByteLength to compare the new value to an empty string instead of the
+ // trimFn to compare the new value to an empty string instead of the
// old value, resulting in trimming always from the end (T42850).
prevSafeVal = res.newVal;
} );
} );
+ }
+
+ /**
+ * Enforces a byte limit on an input field, assuming UTF-8 encoding, for situations
+ * when, for example, a database field has a byte limit rather than a character limit.
+ * Plugin rationale: Browser has native maxlength for number of characters (technically,
+ * UTF-16 code units), this plugin exists to limit number of bytes instead.
+ *
+ * Can be called with a custom limit (to use that limit instead of the maxlength attribute
+ * value), a filter function (in case the limit should apply to something other than the
+ * exact input value), or both. Order of parameters is important!
+ *
+ * @param {number} [limit] Limit to enforce, fallsback to maxLength-attribute,
+ * called with fetched value as argument.
+ * @param {Function} [filterFn] Function to call on the string before assessing the length.
+ * @return {jQuery}
+ * @chainable
+ */
+ $.fn.byteLimit = function ( limit, filterFn ) {
+ return lengthLimit.call( this, trimByteLength, limit, filterFn );
+ };
+
+ /**
+ * Enforces a codepoint (character) limit on an input field.
+ *
+ * For unfortunate historical reasons, browsers' native maxlength counts [the number of UTF-16
+ * code units rather than Unicode codepoints] [1], which means that codepoints outside the Basic
+ * Multilingual Plane (e.g. many emojis) count as 2 characters each. This plugin exists to
+ * correct this.
+ *
+ * [1]: https://www.w3.org/TR/html5/sec-forms.html#limiting-user-input-length-the-maxlength-attribute
+ *
+ * Can be called with a custom limit (to use that limit instead of the maxlength attribute
+ * value), a filter function (in case the limit should apply to something other than the
+ * exact input value), or both. Order of parameters is important!
+ *
+ * @param {number} [limit] Limit to enforce, fallsback to maxLength-attribute,
+ * called with fetched value as argument.
+ * @param {Function} [filterFn] Function to call on the string before assessing the length.
+ * @return {jQuery}
+ * @chainable
+ */
+ $.fn.codePointLimit = function ( limit, filterFn ) {
+ return lengthLimit.call( this, trimCodePointLength, limit, filterFn );
};
/**
$( function () {
var editBox, scrollTop, $editForm,
- // TODO T6714: Once this can be adjusted, read this from config.
- summaryByteLimit = 255,
+ summaryCodePointLimit = mw.config.get( 'wgCommentCodePointLimit' ),
+ summaryByteLimit = mw.config.get( 'wgCommentByteLimit' ),
wpSummary = OO.ui.infuse( $( '#wpSummaryWidget' ) );
// Show a byte-counter to users with how many bytes are left for their edit summary.
// TODO: This looks a bit weird, as there is no unit in the UI, just numbers; showing
// 'bytes' confused users in testing, and showing 'chars' would be a lie. See T42035.
- mw.widgets.visibleByteLimit( wpSummary, summaryByteLimit );
+ // (Showing 'chars' is still confusing with the code point limit, since it's not obvious
+ // that e.g. combining diacritics or zero-width punctuation count as characters.)
+ if ( summaryCodePointLimit ) {
+ mw.widgets.visibleCodePointLimit( wpSummary, summaryCodePointLimit );
+ } else if ( summaryByteLimit ) {
+ mw.widgets.visibleByteLimit( wpSummary, summaryByteLimit );
+ }
// Restore the edit box scroll state following a preview operation,
// and set up a form submission handler to remember this state.
( function ( mw, $ ) {
+ var ProtectionForm,
+ reasonCodePointLimit = mw.config.get( 'wgCommentCodePointLimit' ),
+ reasonByteLimit = mw.config.get( 'wgCommentByteLimit' );
- var ProtectionForm = window.ProtectionForm = {
+ ProtectionForm = window.ProtectionForm = {
/**
* Set up the protection chaining interface (i.e. "unlock move permissions" checkbox)
* on the protection form
this.toggleUnchainedInputs( !this.areAllTypesMatching() );
}
- $( '#mwProtect-reason' ).byteLimit( 180 );
+ // Arbitrary 75 to leave some space for the autogenerated null edit's summary
+ if ( reasonCodePointLimit ) {
+ $( '#mwProtect-reason' ).codePointLimit( reasonCodePointLimit - 75 );
+ } else if ( reasonByteLimit ) {
+ $( '#mwProtect-reason' ).byteLimit( reasonByteLimit - 75 );
+ }
this.updateCascadeCheckbox();
return true;
( function ( mw ) {
- var byteLength = require( 'mediawiki.String' ).byteLength;
+ var byteLength = require( 'mediawiki.String' ).byteLength,
+ codePointLength = require( 'mediawiki.String' ).codePointLength;
/**
* @class mw.widgets
textInputWidget.$input.byteLimit( limit );
};
+ /**
+ * Add a visible codepoint (character) limit label to a TextInputWidget.
+ *
+ * Uses jQuery#codePointLimit to enforce the limit.
+ *
+ * @param {OO.ui.TextInputWidget} textInputWidget Text input widget
+ * @param {number} [limit] Byte limit, defaults to $input's maxlength
+ */
+ mw.widgets.visibleCodePointLimit = function ( textInputWidget, limit ) {
+ limit = limit || +textInputWidget.$input.attr( 'maxlength' );
+
+ function updateCount() {
+ textInputWidget.setLabel( ( limit - codePointLength( textInputWidget.getValue() ) ).toString() );
+ }
+ textInputWidget.on( 'change', updateCount );
+ // Initialise value
+ updateCount();
+
+ // Actually enforce limit
+ textInputWidget.$input.codePointLimit( limit );
+ };
+
}( mediaWiki ) );
.length;
}
+ /**
+ * Calculate the character length of a string (accounting for UTF-16 surrogates).
+ *
+ * @param {string} str
+ * @return {number}
+ */
+ function codePointLength( str ) {
+ return str
+ // Low surrogate + high surrogate pairs represent one character (codepoint) each
+ .replace( /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, '*' )
+ .length;
+ }
+
// Like String#charAt, but return the pair of UTF-16 surrogates for characters outside of BMP.
function codePointAt( string, offset, backwards ) {
// We don't need to check for offsets at the beginning or end of string,
}
}
- /**
- * Utility function to trim down a string, based on byteLimit
- * and given a safe start position. It supports insertion anywhere
- * in the string, so "foo" to "fobaro" if limit is 4 will result in
- * "fobo", not "foba". Basically emulating the native maxlength by
- * reconstructing where the insertion occurred.
- *
- * @param {string} safeVal Known value that was previously returned by this
- * function, if none, pass empty string.
- * @param {string} newVal New value that may have to be trimmed down.
- * @param {number} byteLimit Number of bytes the value may be in size.
- * @param {Function} [fn] Function to call on the string before assessing the length.
- * @return {Object}
- * @return {string} return.newVal
- * @return {boolean} return.trimmed
- */
- function trimByteLength( safeVal, newVal, byteLimit, fn ) {
+ function trimLength( safeVal, newVal, length, lengthFn ) {
var startMatches, endMatches, matchesLen, inpParts, chopOff, oldChar, newChar,
oldVal = safeVal;
// Run the hook if one was provided, but only on the length
// assessment. The value itself is not to be affected by the hook.
- if ( byteLength( fn ? fn( newVal ) : newVal ) <= byteLimit ) {
+ if ( lengthFn( newVal ) <= length ) {
// Limit was not reached, just remember the new value
// and let the user continue.
return {
// Chop off characters from the end of the "inserted content" string
// until the limit is statisfied.
- if ( fn ) {
- // stop, when there is nothing to slice - T43450
- while ( byteLength( fn( inpParts.join( '' ) ) ) > byteLimit && inpParts[ 1 ].length > 0 ) {
- // Do not chop off halves of surrogate pairs
- chopOff = /[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( inpParts[ 1 ] ) ? 2 : 1;
- inpParts[ 1 ] = inpParts[ 1 ].slice( 0, -chopOff );
- }
- } else {
- while ( byteLength( inpParts.join( '' ) ) > byteLimit ) {
- // Do not chop off halves of surrogate pairs
- chopOff = /[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( inpParts[ 1 ] ) ? 2 : 1;
- inpParts[ 1 ] = inpParts[ 1 ].slice( 0, -chopOff );
- }
+ // Make sure to stop when there is nothing to slice (T43450).
+ while ( lengthFn( inpParts.join( '' ) ) > length && inpParts[ 1 ].length > 0 ) {
+ // Do not chop off halves of surrogate pairs
+ chopOff = /[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( inpParts[ 1 ] ) ? 2 : 1;
+ inpParts[ 1 ] = inpParts[ 1 ].slice( 0, -chopOff );
}
return {
newVal: inpParts.join( '' ),
- // For pathological fn() that always returns a value longer than the limit, we might have
+ // For pathological lengthFn() that always returns a length greater than the limit, we might have
// ended up not trimming - check for this case to avoid infinite loops
trimmed: newVal !== inpParts.join( '' )
};
}
+ /**
+ * Utility function to trim down a string, based on byteLimit
+ * and given a safe start position. It supports insertion anywhere
+ * in the string, so "foo" to "fobaro" if limit is 4 will result in
+ * "fobo", not "foba". Basically emulating the native maxlength by
+ * reconstructing where the insertion occurred.
+ *
+ * @param {string} safeVal Known value that was previously returned by this
+ * function, if none, pass empty string.
+ * @param {string} newVal New value that may have to be trimmed down.
+ * @param {number} byteLimit Number of bytes the value may be in size.
+ * @param {Function} [filterFn] Function to call on the string before assessing the length.
+ * @return {Object}
+ * @return {string} return.newVal
+ * @return {boolean} return.trimmed
+ */
+ function trimByteLength( safeVal, newVal, byteLimit, filterFn ) {
+ var lengthFn;
+ if ( filterFn ) {
+ lengthFn = function ( val ) {
+ return byteLength( filterFn( val ) );
+ };
+ } else {
+ lengthFn = byteLength;
+ }
+
+ return trimLength( safeVal, newVal, byteLimit, lengthFn );
+ }
+
+ /**
+ * Utility function to trim down a string, based on codePointLimit
+ * and given a safe start position. It supports insertion anywhere
+ * in the string, so "foo" to "fobaro" if limit is 4 will result in
+ * "fobo", not "foba". Basically emulating the native maxlength by
+ * reconstructing where the insertion occurred.
+ *
+ * @param {string} safeVal Known value that was previously returned by this
+ * function, if none, pass empty string.
+ * @param {string} newVal New value that may have to be trimmed down.
+ * @param {number} codePointLimit Number of characters the value may be in size.
+ * @param {Function} [filterFn] Function to call on the string before assessing the length.
+ * @return {Object}
+ * @return {string} return.newVal
+ * @return {boolean} return.trimmed
+ */
+ function trimCodePointLength( safeVal, newVal, codePointLimit, filterFn ) {
+ var lengthFn;
+ if ( filterFn ) {
+ lengthFn = function ( val ) {
+ return codePointLength( filterFn( val ) );
+ };
+ } else {
+ lengthFn = codePointLength;
+ }
+
+ return trimLength( safeVal, newVal, codePointLimit, lengthFn );
+ }
+
module.exports = {
byteLength: byteLength,
- trimByteLength: trimByteLength
+ codePointLength: codePointLength,
+ trimByteLength: trimByteLength,
+ trimCodePointLength: trimCodePointLength
};
}() );