2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
15 slice
= Array
.prototype.slice
,
17 // Magic words and their expansions. Server-side data is added to this below.
19 PAGENAME
: mw
.config
.get( 'wgPageName' ),
20 PAGENAMEE
: mw
.util
.wikiUrlencode( mw
.config
.get( 'wgPageName' ) )
22 // Whitelist for allowed HTML elements in wikitext.
23 // Self-closing tags are not currently supported.
24 // Filled in with server-side data below
25 allowedHtmlElements
: [],
26 // Key tag name, value allowed attributes for that tag.
27 // See Sanitizer::setupAttributeWhitelist
28 allowedHtmlCommonAttributes
: [
41 // Attributes allowed for specific elements.
42 // Key is element name in lower case
43 // Value is array of allowed attributes for that element
44 allowedHtmlAttributesByElement
: {},
45 messages
: mw
.messages
,
46 language
: mw
.language
,
48 // Same meaning as in mediawiki.js.
50 // Only 'text', 'parse', and 'escaped' are supported, and the
51 // actual escaping for 'escaped' is done by other code (generally
52 // through mediawiki.js).
54 // However, note that this default only
55 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
56 // is 'text', including when it uses jqueryMsg.
60 // Add in server-side data (allowedHtmlElements and magic words)
61 $.extend( true, parserDefaults
, require( './parserDefaults.json' ) );
64 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
65 * convert what it detects as an htmlString to an element.
67 * If our own HtmlEmitter jQuery object is given, its children will be unwrapped and appended to
70 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
73 * @param {jQuery} $parent Parent node wrapped by jQuery
74 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
75 * @return {jQuery} $parent
77 function appendWithoutParsing( $parent
, children
) {
80 if ( !Array
.isArray( children
) ) {
81 children
= [ children
];
84 for ( i
= 0, len
= children
.length
; i
< len
; i
++ ) {
85 if ( typeof children
[ i
] !== 'object' ) {
86 children
[ i
] = document
.createTextNode( children
[ i
] );
88 if ( children
[ i
] instanceof $ && children
[ i
].hasClass( 'mediaWiki_htmlEmitter' ) ) {
89 children
[ i
] = children
[ i
].contents();
93 return $parent
.append( children
);
97 * Decodes the main HTML entities, those encoded by mw.html.escape.
100 * @param {string} encoded Encoded string
101 * @return {string} String with those entities decoded
103 function decodePrimaryHtmlEntities( encoded
) {
105 .replace( /'/g, '\'' )
106 .replace( /"/g, '"' )
107 .replace( /</g, '<' )
108 .replace( />/g, '>' )
109 .replace( /&/g, '&' );
113 * Turn input into a string.
116 * @param {string|jQuery} input
117 * @return {string} Textual value of input
119 function textify( input
) {
120 if ( input
instanceof $ ) {
121 input
= input
.text();
123 return String( input
);
127 * Given parser options, return a function that parses a key and replacements, returning jQuery object
129 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
130 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
131 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
134 * @param {Object} options Parser options
136 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
137 * @return {jQuery} return.return
139 function getFailableParserFn( options
) {
140 return function ( args
) {
142 parser
= new mw
.jqueryMsg
.Parser( options
),
144 argsArray
= Array
.isArray( args
[ 1 ] ) ? args
[ 1 ] : slice
.call( args
, 1 );
146 return parser
.parse( key
, argsArray
);
148 fallback
= parser
.settings
.messages
.get( key
);
149 mw
.log
.warn( 'mediawiki.jqueryMsg: ' + key
+ ': ' + e
.message
);
150 mw
.track( 'mediawiki.jqueryMsg.error', {
152 errorMessage
: e
.message
154 return $( '<span>' ).text( fallback
);
162 * Initialize parser defaults.
164 * ResourceLoaderJqueryMsgModule calls this to provide default values from
165 * Sanitizer.php for allowed HTML elements. To override this data for individual
166 * parsers, pass the relevant options to mw.jqueryMsg.Parser.
169 * @param {Object} data New data to extend parser defaults with
170 * @param {boolean} [deep=false] Whether the extend is done recursively (deep)
172 mw
.jqueryMsg
.setParserDefaults = function ( data
, deep
) {
174 $.extend( true, parserDefaults
, data
);
176 $.extend( parserDefaults
, data
);
181 * Get current parser defaults.
183 * Primarily used for the unit test. Returns a copy.
188 mw
.jqueryMsg
.getParserDefaults = function () {
189 return $.extend( {}, parserDefaults
);
193 * Returns a function suitable for static use, to construct strings from a message key (and optional replacements).
197 * var format = mediaWiki.jqueryMsg.getMessageFunction( options );
198 * $( '#example' ).text( format( 'hello-user', username ) );
200 * Tthis returns only strings, so it destroys any bindings. If you want to preserve bindings, use the
201 * jQuery plugin version instead. This was originally created to ease migration from `window.gM()`,
202 * from a time when the parser used by `mw.message` was not extendable.
204 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
205 * somefunction( a, b, c, d )
207 * somefunction( a, [b, c, d] )
209 * @param {Object} options parser options
210 * @return {Function} Function The message formatter
211 * @return {string} return.key Message key.
212 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
213 * @return {string} return.return Rendered HTML.
215 mw
.jqueryMsg
.getMessageFunction = function ( options
) {
216 var failableParserFn
, format
;
218 if ( options
&& options
.format
!== undefined ) {
219 format
= options
.format
;
221 format
= parserDefaults
.format
;
226 if ( !failableParserFn
) {
227 failableParserFn
= getFailableParserFn( options
);
229 failableResult
= failableParserFn( arguments
);
230 if ( format
=== 'text' || format
=== 'escaped' ) {
231 return failableResult
.text();
233 return failableResult
.html();
239 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
240 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
243 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
244 * var $userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
245 * $( 'p#headline' ).msg( 'hello-user', $userlink );
247 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
248 * somefunction( a, b, c, d )
250 * somefunction( a, [b, c, d] )
252 * We append to 'this', which in a jQuery plugin context will be the selected elements.
254 * @param {Object} options Parser options
255 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
256 * @return {string} return.key Message key.
257 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
258 * @return {jQuery} return.return
260 mw
.jqueryMsg
.getPlugin = function ( options
) {
261 var failableParserFn
;
265 if ( !failableParserFn
) {
266 failableParserFn
= getFailableParserFn( options
);
268 $target
= this.empty();
269 appendWithoutParsing( $target
, failableParserFn( arguments
) );
276 * Describes an object, whose primary duty is to .parse() message keys.
280 * @param {Object} options
282 mw
.jqueryMsg
.Parser = function ( options
) {
283 this.settings
= $.extend( {}, parserDefaults
, options
);
284 this.settings
.onlyCurlyBraceTransform
= ( this.settings
.format
=== 'text' || this.settings
.format
=== 'escaped' );
287 this.emitter
= new mw
.jqueryMsg
.HtmlEmitter( this.settings
.language
, this.settings
.magic
);
289 // Backwards-compatible alias
290 // @deprecated since 1.31
291 mw
.jqueryMsg
.parser
= mw
.jqueryMsg
.Parser
;
293 mw
.jqueryMsg
.Parser
.prototype = {
295 * Where the magic happens.
296 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
297 * If an error is thrown, returns original key, and logs the error
299 * @param {string} key Message key.
300 * @param {Array} replacements Variable replacements for $1, $2... $n
303 parse: function ( key
, replacements
) {
304 var ast
= this.getAst( key
, replacements
);
305 return this.emitter
.emit( ast
, replacements
);
309 * Fetch the message string associated with a key, return parsed structure. Memoized.
310 * Note that we pass '⧼' + key + '⧽' back for a missing message here.
312 * @param {string} key
313 * @param {Array} replacements Variable replacements for $1, $2... $n
314 * @return {string|Array} string of '⧼key⧽' if message missing, simple string if possible, array of arrays if needs parsing
316 getAst: function ( key
, replacements
) {
319 if ( !Object
.prototype.hasOwnProperty
.call( this.astCache
, key
) ) {
320 wikiText
= this.settings
.messages
.get( key
);
322 mw
.config
.get( 'wgUserLanguage' ) === 'qqx' &&
323 ( !wikiText
|| wikiText
=== '(' + key
+ ')' )
325 wikiText
= '(' + key
+ '$*)';
326 } else if ( typeof wikiText
!== 'string' ) {
327 wikiText
= '⧼' + key
+ '⧽';
329 wikiText
= mw
.internalDoTransformFormatForQqx( wikiText
, replacements
);
330 this.astCache
[ key
] = this.wikiTextToAst( wikiText
);
332 return this.astCache
[ key
];
336 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
338 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
339 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
341 * @param {string} input Message string wikitext
343 * @return {Mixed} abstract syntax tree
345 wikiTextToAst: function ( input
) {
347 regularLiteral
, regularLiteralWithoutBar
, regularLiteralWithoutSpace
, regularLiteralWithSquareBrackets
,
348 doubleQuote
, singleQuote
, backslash
, anyCharacter
, asciiAlphabetLiteral
,
349 escapedOrLiteralWithoutSpace
, escapedOrLiteralWithoutBar
, escapedOrRegularLiteral
,
350 whitespace
, dollar
, digits
, htmlDoubleQuoteAttributeValue
, htmlSingleQuoteAttributeValue
,
351 htmlAttributeEquals
, openHtmlStartTag
, optionalForwardSlash
, openHtmlEndTag
, closeHtmlTag
,
352 openExtlink
, closeExtlink
, wikilinkContents
, openWikilink
, closeWikilink
, templateName
, pipe
, colon
,
353 templateContents
, openTemplate
, closeTemplate
,
354 nonWhitespaceExpression
, paramExpression
, expression
, curlyBraceTransformExpression
, result
,
355 settings
= this.settings
,
356 concat
= Array
.prototype.concat
;
358 // Indicates current position in input as we parse through it.
359 // Shared among all parsing functions below.
362 // =========================================================
363 // parsing combinators - could be a library on its own
364 // =========================================================
367 * Try parsers until one works, if none work return null
370 * @param {Function[]} ps
371 * @return {string|null}
373 function choice( ps
) {
376 for ( i
= 0; i
< ps
.length
; i
++ ) {
378 if ( result
!== null ) {
387 * Try several ps in a row, all must succeed or return null.
388 * This is the only eager one.
391 * @param {Function[]} ps
392 * @return {string|null}
394 function sequence( ps
) {
398 for ( i
= 0; i
< ps
.length
; i
++ ) {
400 if ( res
=== null ) {
410 * Run the same parser over and over until it fails.
411 * Must succeed a minimum of n times or return null.
415 * @param {Function} p
416 * @return {string|null}
418 function nOrMore( n
, p
) {
420 var originalPos
= pos
,
423 while ( parsed
!== null ) {
424 result
.push( parsed
);
427 if ( result
.length
< n
) {
436 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
438 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
439 * May be some scoping issue
442 * @param {Function} p
443 * @param {Function} fn
444 * @return {string|null}
446 function transform( p
, fn
) {
449 return result
=== null ? null : fn( result
);
454 * Just make parsers out of simpler JS builtin types
459 * @return {string} return.return
461 function makeStringParser( s
) {
465 if ( input
.substr( pos
, len
) === s
) {
474 * Makes a regex parser, given a RegExp object.
475 * The regex being passed in should start with a ^ to anchor it to the start
479 * @param {RegExp} regex anchored regex
480 * @return {Function} function to parse input based on the regex
482 function makeRegexParser( regex
) {
484 var matches
= input
.slice( pos
).match( regex
);
485 if ( matches
=== null ) {
488 pos
+= matches
[ 0 ].length
;
493 // ===================================================================
494 // General patterns above this line -- wikitext specific parsers below
495 // ===================================================================
497 // Parsing functions follow. All parsing functions work like this:
498 // They don't accept any arguments.
499 // Instead, they just operate non destructively on the string 'input'
500 // As they can consume parts of the string, they advance the shared variable pos,
501 // and return tokens (or whatever else they want to return).
502 // some things are defined as closures and other things as ordinary functions
503 // converting everything to a closure makes it a lot harder to debug... errors pop up
504 // but some debuggers can't tell you exactly where they come from. Also the mutually
505 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
506 // This may be because, to save code, memoization was removed
508 /* eslint-disable no-useless-escape */
509 regularLiteral
= makeRegexParser( /^[^{}\[\]$<\\]/ );
510 regularLiteralWithoutBar
= makeRegexParser( /^[^{}\[\]$\\|]/ );
511 regularLiteralWithoutSpace
= makeRegexParser( /^[^{}\[\]$\s]/ );
512 regularLiteralWithSquareBrackets
= makeRegexParser( /^[^{}$\\]/ );
513 /* eslint-enable no-useless-escape */
515 backslash
= makeStringParser( '\\' );
516 doubleQuote
= makeStringParser( '"' );
517 singleQuote
= makeStringParser( '\'' );
518 anyCharacter
= makeRegexParser( /^./ );
520 openHtmlStartTag
= makeStringParser( '<' );
521 optionalForwardSlash
= makeRegexParser( /^\/?/ );
522 openHtmlEndTag
= makeStringParser( '</' );
523 htmlAttributeEquals
= makeRegexParser( /^\s*=\s*/ );
524 closeHtmlTag
= makeRegexParser( /^\s*>/ );
526 function escapedLiteral() {
527 var result
= sequence( [
531 return result
=== null ? null : result
[ 1 ];
533 escapedOrLiteralWithoutSpace
= choice( [
535 regularLiteralWithoutSpace
537 escapedOrLiteralWithoutBar
= choice( [
539 regularLiteralWithoutBar
541 escapedOrRegularLiteral
= choice( [
545 // Used to define "literals" without spaces, in space-delimited situations
546 function literalWithoutSpace() {
547 var result
= nOrMore( 1, escapedOrLiteralWithoutSpace
)();
548 return result
=== null ? null : result
.join( '' );
550 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
551 // it is not a literal in the parameter
552 function literalWithoutBar() {
553 var result
= nOrMore( 1, escapedOrLiteralWithoutBar
)();
554 return result
=== null ? null : result
.join( '' );
558 var result
= nOrMore( 1, escapedOrRegularLiteral
)();
559 return result
=== null ? null : result
.join( '' );
562 function curlyBraceTransformExpressionLiteral() {
563 var result
= nOrMore( 1, regularLiteralWithSquareBrackets
)();
564 return result
=== null ? null : result
.join( '' );
567 asciiAlphabetLiteral
= makeRegexParser( /^[A-Za-z]+/ );
568 htmlDoubleQuoteAttributeValue
= makeRegexParser( /^[^"]*/ );
569 htmlSingleQuoteAttributeValue
= makeRegexParser( /^[^']*/ );
571 whitespace
= makeRegexParser( /^\s+/ );
572 dollar
= makeStringParser( '$' );
573 digits
= makeRegexParser( /^\d+/ );
575 function replacement() {
576 var result
= sequence( [
580 if ( result
=== null ) {
583 return [ 'REPLACE', parseInt( result
[ 1 ], 10 ) - 1 ];
585 openExtlink
= makeStringParser( '[' );
586 closeExtlink
= makeStringParser( ']' );
587 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
589 var result
, parsedResult
, target
;
591 parsedResult
= sequence( [
593 nOrMore( 1, nonWhitespaceExpression
),
595 nOrMore( 1, expression
),
598 if ( parsedResult
!== null ) {
599 // When the entire link target is a single parameter, we can't use CONCAT, as we allow
600 // passing fancy parameters (like a whole jQuery object or a function) to use for the
601 // link. Check only if it's a single match, since we can either do CONCAT or not for
602 // singles with the same effect.
603 target
= parsedResult
[ 1 ].length
=== 1 ?
604 parsedResult
[ 1 ][ 0 ] :
605 [ 'CONCAT' ].concat( parsedResult
[ 1 ] );
609 [ 'CONCAT' ].concat( parsedResult
[ 3 ] )
614 openWikilink
= makeStringParser( '[[' );
615 closeWikilink
= makeStringParser( ']]' );
616 pipe
= makeStringParser( '|' );
618 function template() {
619 var result
= sequence( [
624 return result
=== null ? null : result
[ 1 ];
627 function pipedWikilink() {
628 var result
= sequence( [
629 nOrMore( 1, paramExpression
),
631 nOrMore( 1, expression
)
633 return result
=== null ? null : [
634 [ 'CONCAT' ].concat( result
[ 0 ] ),
635 [ 'CONCAT' ].concat( result
[ 2 ] )
639 function unpipedWikilink() {
640 var result
= sequence( [
641 nOrMore( 1, paramExpression
)
643 return result
=== null ? null : [
644 [ 'CONCAT' ].concat( result
[ 0 ] )
648 wikilinkContents
= choice( [
653 function wikilink() {
654 var result
, parsedResult
, parsedLinkContents
;
657 parsedResult
= sequence( [
662 if ( parsedResult
!== null ) {
663 parsedLinkContents
= parsedResult
[ 1 ];
664 result
= [ 'WIKILINK' ].concat( parsedLinkContents
);
669 // TODO: Support data- if appropriate
670 function doubleQuotedHtmlAttributeValue() {
671 var parsedResult
= sequence( [
673 htmlDoubleQuoteAttributeValue
,
676 return parsedResult
=== null ? null : parsedResult
[ 1 ];
679 function singleQuotedHtmlAttributeValue() {
680 var parsedResult
= sequence( [
682 htmlSingleQuoteAttributeValue
,
685 return parsedResult
=== null ? null : parsedResult
[ 1 ];
688 function htmlAttribute() {
689 var parsedResult
= sequence( [
691 asciiAlphabetLiteral
,
694 doubleQuotedHtmlAttributeValue
,
695 singleQuotedHtmlAttributeValue
698 return parsedResult
=== null ? null : [ parsedResult
[ 1 ], parsedResult
[ 3 ] ];
702 * Checks if HTML is allowed
704 * @param {string} startTagName HTML start tag name
705 * @param {string} endTagName HTML start tag name
706 * @param {Object} attributes array of consecutive key value pairs,
707 * with index 2 * n being a name and 2 * n + 1 the associated value
708 * @return {boolean} true if this is HTML is allowed, false otherwise
710 function isAllowedHtml( startTagName
, endTagName
, attributes
) {
711 var i
, len
, attributeName
;
713 startTagName
= startTagName
.toLowerCase();
714 endTagName
= endTagName
.toLowerCase();
715 if ( startTagName
!== endTagName
|| settings
.allowedHtmlElements
.indexOf( startTagName
) === -1 ) {
719 for ( i
= 0, len
= attributes
.length
; i
< len
; i
+= 2 ) {
720 attributeName
= attributes
[ i
];
721 if ( settings
.allowedHtmlCommonAttributes
.indexOf( attributeName
) === -1 &&
722 ( settings
.allowedHtmlAttributesByElement
[ startTagName
] || [] ).indexOf( attributeName
) === -1 ) {
730 function htmlAttributes() {
731 var parsedResult
= nOrMore( 0, htmlAttribute
)();
732 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
733 return concat
.apply( [ 'HTMLATTRIBUTES' ], parsedResult
);
736 // Subset of allowed HTML markup.
737 // Most elements and many attributes allowed on the server are not supported yet.
739 var parsedOpenTagResult
, parsedHtmlContents
, parsedCloseTagResult
,
740 wrappedAttributes
, attributes
, startTagName
, endTagName
, startOpenTagPos
,
741 startCloseTagPos
, endOpenTagPos
, endCloseTagPos
,
744 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
745 // 1. open through closeHtmlTag
747 // 3. openHtmlEnd through close
748 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
750 startOpenTagPos
= pos
;
751 parsedOpenTagResult
= sequence( [
753 asciiAlphabetLiteral
,
755 optionalForwardSlash
,
759 if ( parsedOpenTagResult
=== null ) {
764 startTagName
= parsedOpenTagResult
[ 1 ];
766 parsedHtmlContents
= nOrMore( 0, expression
)();
768 startCloseTagPos
= pos
;
769 parsedCloseTagResult
= sequence( [
771 asciiAlphabetLiteral
,
775 if ( parsedCloseTagResult
=== null ) {
776 // Closing tag failed. Return the start tag and contents.
777 return [ 'CONCAT', input
.slice( startOpenTagPos
, endOpenTagPos
) ]
778 .concat( parsedHtmlContents
);
781 endCloseTagPos
= pos
;
782 endTagName
= parsedCloseTagResult
[ 1 ];
783 wrappedAttributes
= parsedOpenTagResult
[ 2 ];
784 attributes
= wrappedAttributes
.slice( 1 );
785 if ( isAllowedHtml( startTagName
, endTagName
, attributes
) ) {
786 result
= [ 'HTMLELEMENT', startTagName
, wrappedAttributes
]
787 .concat( parsedHtmlContents
);
789 // HTML is not allowed, so contents will remain how
790 // it was, while HTML markup at this level will be
792 // E.g. assuming script tags are not allowed:
794 // <script>[[Foo|bar]]</script>
796 // results in '<script>' and '</script>'
797 // (not treated as an HTML tag), surrounding a fully
800 // Concatenate everything from the tag, flattening the contents.
801 result
= [ 'CONCAT', input
.slice( startOpenTagPos
, endOpenTagPos
) ]
802 .concat( parsedHtmlContents
, input
.slice( startCloseTagPos
, endCloseTagPos
) );
808 // <nowiki>...</nowiki> tag. The tags are stripped and the contents are returned unparsed.
810 var parsedResult
, plainText
,
813 parsedResult
= sequence( [
814 makeStringParser( '<nowiki>' ),
815 // We use a greedy non-backtracking parser, so we must ensure here that we don't take too much
816 makeRegexParser( /^.*?(?=<\/nowiki>)/ ),
817 makeStringParser( '</nowiki>' )
819 if ( parsedResult
!== null ) {
820 plainText
= parsedResult
[ 1 ];
821 result
= [ 'CONCAT' ].concat( plainText
);
827 templateName
= transform(
828 // see $wgLegalTitleChars
829 // not allowing : due to the need to catch "PLURAL:$1"
830 makeRegexParser( /^[ !"$&'()*,./0-9;=?@A
-Z
^_
`a-z~\x80-\xFF+-]+/ ),
831 function ( result ) { return result.toString(); }
833 function templateParam() {
837 nOrMore( 0, paramExpression )
839 if ( result === null ) {
843 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
844 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[ 0 ];
847 function templateWithReplacement() {
848 var result = sequence( [
853 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
855 function templateWithOutReplacement() {
856 var result = sequence( [
861 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
863 function templateWithOutFirstParameter() {
864 var result = sequence( [
868 return result === null ? null : [ result[ 0 ], '' ];
870 colon = makeStringParser( ':' );
871 templateContents = choice( [
873 var res = sequence( [
874 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
875 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
876 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
877 nOrMore( 0, templateParam )
879 return res === null ? null : res[ 0 ].concat( res[ 1 ] );
882 var res = sequence( [
884 nOrMore( 0, templateParam )
886 if ( res === null ) {
889 return [ res[ 0 ] ].concat( res[ 1 ] );
892 openTemplate = makeStringParser( '{{' );
893 closeTemplate = makeStringParser( '}}' );
894 nonWhitespaceExpression = choice( [
901 paramExpression = choice( [
909 expression = choice( [
919 // Used when only {{-transformation is wanted, for 'text'
920 // or 'escaped' formats
921 curlyBraceTransformExpression = choice( [
924 curlyBraceTransformExpressionLiteral
930 * @param {Function} rootExpression Root parse function
931 * @return {Array|null}
933 function start( rootExpression ) {
934 var result = nOrMore( 0, rootExpression )();
935 if ( result === null ) {
938 return [ 'CONCAT' ].concat( result );
940 // everything above this point is supposed to be stateless/static, but
941 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
942 // finally let's do some actual work...
944 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
947 * For success, the p must have gotten to the end of the input
948 * and returned a non-null.
949 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
951 if ( result === null || pos !== input.length ) {
952 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
960 * Class that primarily exists to emit HTML from parser ASTs.
964 * @param {Object} language
965 * @param {Object} magic
967 mw.jqueryMsg.HtmlEmitter = function ( language, magic ) {
969 this.language = language;
970 // eslint-disable-next-line no-jquery/no-each-util
971 $.each( magic, function ( key, val ) {
972 jmsg[ key.toLowerCase() ] = function () {
978 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
979 * Walk entire node structure, applying replacements and template functions when appropriate
981 * @param {Mixed} node Abstract syntax tree (top node or subnode)
982 * @param {Array} replacements for $1, $2, ... $n
983 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
985 this.emit = function ( node, replacements ) {
986 var ret, subnodes, operation,
988 switch ( typeof node ) {
993 // typeof returns object for arrays
995 // node is an array of nodes
996 // eslint-disable-next-line no-jquery/no-map-util
997 subnodes = $.map( node.slice( 1 ), function ( n ) {
998 return jmsg.emit( n, replacements );
1000 operation = node[ 0 ].toLowerCase();
1001 if ( typeof jmsg[ operation ] === 'function' ) {
1002 ret = jmsg[ operation ]( subnodes, replacements );
1004 throw new Error( 'Unknown operation "' + operation + '"' );
1008 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
1009 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
1010 // The logical thing is probably to return the empty string here when we encounter undefined.
1014 throw new Error( 'Unexpected type in AST: ' + typeof node );
1020 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
1021 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
1022 // If you have 'magic words' then configure the parser to have them upon creation.
1024 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
1025 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
1026 mw.jqueryMsg.HtmlEmitter.prototype = {
1028 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
1029 * Must return a single node to parents -- a jQuery with synthetic span
1030 * However, unwrap any other synthetic spans in our children and pass them upwards
1032 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
1035 concat: function ( nodes ) {
1036 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
1037 // eslint-disable-next-line no-jquery/no-each-util
1038 $.each( nodes, function ( i, node ) {
1039 // Let jQuery append nodes, arrays of nodes and jQuery objects
1040 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
1041 appendWithoutParsing( $span, node );
1047 * Return escaped replacement of correct index, or string if unavailable.
1048 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
1049 * if the specified parameter is not found return the same string
1050 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
1052 * TODO: Throw error if nodes.length > 1 ?
1054 * @param {Array} nodes List of one element, integer, n >= 0
1055 * @param {Array} replacements List of at least n strings
1056 * @return {string|jQuery} replacement
1058 replace: function ( nodes, replacements ) {
1059 var index = parseInt( nodes[ 0 ], 10 );
1061 if ( index < replacements.length ) {
1062 return replacements[ index ];
1064 // index not found, fallback to displaying variable
1065 return '$' + ( index + 1 );
1070 * Transform wiki-link
1073 * It only handles basic cases, either no pipe, or a pipe with an explicit
1076 * It does not attempt to handle features like the pipe trick.
1077 * However, the pipe trick should usually not be present in wikitext retrieved
1078 * from the server, since the replacement is done at save time.
1079 * It may, though, if the wikitext appears in extension-controlled content.
1081 * @param {string[]} nodes
1084 wikilink: function ( nodes ) {
1085 var page, anchor, url, $el;
1087 page = textify( nodes[ 0 ] );
1088 // Strip leading ':', which is used to suppress special behavior in wikitext links,
1089 // e.g. [[:Category:Foo]] or [[:File:Foo.jpg]]
1090 if ( page.charAt( 0 ) === ':' ) {
1091 page = page.slice( 1 );
1093 url = mw.util.getUrl( page );
1095 if ( nodes.length === 1 ) {
1096 // [[Some Page]] or [[Namespace:Some Page]]
1099 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1100 anchor = nodes[ 1 ];
1103 $el = $( '<a>' ).attr( {
1107 return appendWithoutParsing( $el, anchor );
1111 * Converts array of HTML element key value pairs to object
1113 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1114 * name and 2 * n + 1 the associated value
1115 * @return {Object} Object mapping attribute name to attribute value
1117 htmlattributes: function ( nodes ) {
1118 var i, len, mapping = {};
1119 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1120 mapping[ nodes[ i ] ] = decodePrimaryHtmlEntities( nodes[ i + 1 ] );
1126 * Handles an (already-validated) HTML element.
1128 * @param {Array} nodes Nodes to process when creating element
1131 htmlelement: function ( nodes ) {
1132 var tagName, attributes, contents, $element;
1134 tagName = nodes.shift();
1135 attributes = nodes.shift();
1137 $element = $( document.createElement( tagName ) ).attr( attributes );
1138 return appendWithoutParsing( $element, contents );
1142 * Transform parsed structure into external link.
1144 * The "href" can be:
1145 * - a jQuery object, treat it as "enclosing" the link text.
1146 * - a function, treat it as the click handler.
1147 * - a string, or our HtmlEmitter jQuery object, treat it as a URI after stringifying.
1149 * TODO: throw an error if nodes.length > 2 ?
1151 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {string}
1154 extlink: function ( nodes ) {
1157 contents = nodes[ 1 ];
1158 if ( arg instanceof $ && !arg.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1162 if ( typeof arg === 'function' ) {
1166 } ).on( 'click keypress', function ( e ) {
1168 e.type === 'click' ||
1169 e.type === 'keypress' && e.which === 13
1171 arg.call( this, e );
1175 $el.attr( 'href', textify( arg ) );
1178 return appendWithoutParsing( $el.empty(), contents );
1182 * Transform parsed structure into pluralization
1183 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1184 * So convert it back with the current language's convertNumber.
1186 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1187 * @return {string|jQuery} selected pluralized form according to current language
1189 plural: function ( nodes ) {
1190 var forms, firstChild, firstChildText, explicitPluralFormNumber, formIndex, form, count,
1191 explicitPluralForms = {};
1193 count = parseFloat( this.language.convertNumber( textify( nodes[ 0 ] ), true ) );
1194 forms = nodes.slice( 1 );
1195 for ( formIndex = 0; formIndex < forms.length; formIndex++ ) {
1196 form = forms[ formIndex ];
1198 if ( form instanceof $ && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1199 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1200 firstChild = form.contents().get( 0 );
1201 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1202 firstChildText = firstChild.textContent;
1203 if ( /^\d+=/.test( firstChildText ) ) {
1204 explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[ 0 ], 10 );
1205 // Use the digit part as key and rest of first text node and
1206 // rest of child nodes as value.
1207 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1208 explicitPluralForms[ explicitPluralFormNumber ] = form;
1209 forms[ formIndex ] = undefined;
1212 } else if ( /^\d+=/.test( form ) ) {
1213 // Simple explicit plural forms like 12=a dozen
1214 explicitPluralFormNumber = parseInt( form.split( /=/ )[ 0 ], 10 );
1215 explicitPluralForms[ explicitPluralFormNumber ] = form.slice( form.indexOf( '=' ) + 1 );
1216 forms[ formIndex ] = undefined;
1220 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1221 // eslint-disable-next-line no-jquery/no-map-util
1222 forms = $.map( forms, function ( form ) {
1226 return this.language.convertPlural( count, forms, explicitPluralForms );
1230 * Transform parsed structure according to gender.
1232 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1234 * The first node must be one of:
1235 * - the mw.user object (or a compatible one)
1236 * - an empty string - indicating the current user, same effect as passing the mw.user object
1237 * - a gender string ('male', 'female' or 'unknown')
1239 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1240 * @return {string|jQuery} Selected gender form according to current language
1242 gender: function ( nodes ) {
1244 maybeUser = nodes[ 0 ],
1245 forms = nodes.slice( 1 );
1247 if ( maybeUser === '' ) {
1248 maybeUser = mw.user;
1251 // If we are passed a mw.user-like object, check their gender.
1252 // Otherwise, assume the gender string itself was passed .
1253 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1254 gender = maybeUser.options.get( 'gender' );
1256 gender = textify( maybeUser );
1259 return this.language.gender( gender, forms );
1263 * Transform parsed structure into grammar conversion.
1264 * Invoked by putting `{{grammar
:form
|word
}}` in a message
1266 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1267 * @return {string|jQuery} selected grammatical form according to current language
1269 grammar: function ( nodes ) {
1270 var form = nodes[ 0 ],
1272 // These could be jQuery objects (passed as message parameters),
1273 // in which case we can't transform them (like rawParams() in PHP).
1274 if ( typeof form === 'string' && typeof word === 'string' ) {
1275 return this.language.convertGrammar( word, form );
1281 * Tranform parsed structure into a int: (interface language) message include
1282 * Invoked by putting `{{int:othermessage
}}` into a message
1284 * TODO Syntax in the included message is not parsed, this seems like a bug?
1286 * @param {Array} nodes List of nodes
1287 * @return {string} Other message
1289 int: function ( nodes ) {
1290 var msg = textify( nodes[ 0 ] );
1291 return mw.jqueryMsg.getMessageFunction()( msg.charAt( 0 ).toLowerCase() + msg.slice( 1 ) );
1295 * Get localized namespace name from canonical name or namespace number.
1296 * Invoked by putting `{{ns
:foo
}}` into a message
1298 * @param {Array} nodes List of nodes
1299 * @return {string} Localized namespace name
1301 ns: function ( nodes ) {
1302 var ns = textify( nodes[ 0 ] ).trim();
1303 if ( !/^\d+$/.test( ns ) ) {
1304 ns = mw.config.get( 'wgNamespaceIds' )[ ns.replace( / /g, '_' ).toLowerCase() ];
1306 ns = mw.config.get( 'wgFormattedNamespaces' )[ ns ];
1311 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1312 * and outputs it in the localized digit script and formatted with decimal
1313 * separator, according to the current language.
1315 * @param {Array} nodes List of nodes
1316 * @return {number|string|jQuery} Formatted number
1318 formatnum: function ( nodes ) {
1319 var isInteger = !!nodes[ 1 ] && nodes[ 1 ] === 'R',
1320 number = nodes[ 0 ];
1322 // These could be jQuery objects (passed as message parameters),
1323 // in which case we can't transform them (like rawParams() in PHP).
1324 if ( typeof number === 'string' || typeof number === 'number' ) {
1325 return this.language.convertNumber( number, isInteger );
1333 * @param {Array} nodes List of nodes
1334 * @return {string} The given text, all in lowercase
1336 lc: function ( nodes ) {
1337 return textify( nodes[ 0 ] ).toLowerCase();
1343 * @param {Array} nodes List of nodes
1344 * @return {string} The given text, all in uppercase
1346 uc: function ( nodes ) {
1347 return textify( nodes[ 0 ] ).toUpperCase();
1351 * Lowercase first letter of input, leaving the rest unchanged
1353 * @param {Array} nodes List of nodes
1354 * @return {string} The given text, with the first character in lowercase
1356 lcfirst: function ( nodes ) {
1357 var text = textify( nodes[ 0 ] );
1358 return text.charAt( 0 ).toLowerCase() + text.slice( 1 );
1362 * Uppercase first letter of input, leaving the rest unchanged
1364 * @param {Array} nodes List of nodes
1365 * @return {string} The given text, with the first character in uppercase
1367 ucfirst: function ( nodes ) {
1368 var text = textify( nodes[ 0 ] );
1369 return text.charAt( 0 ).toUpperCase() + text.slice( 1 );
1376 * @see mw.jqueryMsg#getPlugin
1378 $.fn.msg = mw.jqueryMsg.getPlugin();
1380 // Replace the default message parser with jqueryMsg
1381 oldParser = mw.Message.prototype.parser;
1382 mw.Message.prototype.parser = function () {
1383 // Fall back to mw.msg's simple parser where possible
1385 // Plain text output always uses the simple parser
1386 this.format === 'plain' ||
1388 // jqueryMsg parser is needed for messages containing wikitext
1389 !/\{\{|[<>[&]/.test( this.map.get( this.key ) ) &&
1390 // jqueryMsg parser is needed when jQuery objects or DOM nodes are passed in as parameters
1391 !this.parameters.some( function ( param ) {
1392 return param instanceof $ || ( param && param.nodeType !== undefined );
1396 return oldParser.apply( this );
1399 if ( !Object.prototype.hasOwnProperty.call( this.map, this.format ) ) {
1400 this.map[ this.format ] = mw.jqueryMsg.getMessageFunction( {
1402 // For format 'escaped', escaping part is handled by mediawiki.js
1406 return this.map[ this.format ]( this.key, this.parameters );
1410 * Parse the message to DOM nodes, rather than HTML string like #parse.
1412 * This method is only available when jqueryMsg is loaded.
1416 * @member mw.Message
1419 mw.Message.prototype.parseDom = ( function () {
1420 var $wrapper = $( '<div>' );
1421 return function () {
1422 return $wrapper.msg( this.key, this.parameters ).contents().detach();