2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
15 slice
= Array
.prototype.slice
,
17 // Magic words and their expansions. Server-side data is added to this below.
19 PAGENAME
: mw
.config
.get( 'wgPageName' ),
20 PAGENAMEE
: mw
.util
.wikiUrlencode( mw
.config
.get( 'wgPageName' ) )
22 // Whitelist for allowed HTML elements in wikitext.
23 // Self-closing tags are not currently supported.
24 // Filled in with server-side data below
25 allowedHtmlElements
: [],
26 // Key tag name, value allowed attributes for that tag.
27 // See Sanitizer::setupAttributeWhitelist
28 allowedHtmlCommonAttributes
: [
41 // Attributes allowed for specific elements.
42 // Key is element name in lower case
43 // Value is array of allowed attributes for that element
44 allowedHtmlAttributesByElement
: {},
45 messages
: mw
.messages
,
46 language
: mw
.language
,
48 // Same meaning as in mediawiki.js.
50 // Only 'text', 'parse', and 'escaped' are supported, and the
51 // actual escaping for 'escaped' is done by other code (generally
52 // through mediawiki.js).
54 // However, note that this default only
55 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
56 // is 'text', including when it uses jqueryMsg.
60 // Add in server-side data (allowedHtmlElements and magic words)
61 $.extend( true, parserDefaults
, require( './parserDefaults.json' ) );
64 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
65 * convert what it detects as an htmlString to an element.
67 * If our own HtmlEmitter jQuery object is given, its children will be unwrapped and appended to
70 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
73 * @param {jQuery} $parent Parent node wrapped by jQuery
74 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
75 * @return {jQuery} $parent
77 function appendWithoutParsing( $parent
, children
) {
80 if ( !Array
.isArray( children
) ) {
81 children
= [ children
];
84 for ( i
= 0, len
= children
.length
; i
< len
; i
++ ) {
85 if ( typeof children
[ i
] !== 'object' ) {
86 children
[ i
] = document
.createTextNode( children
[ i
] );
88 if ( children
[ i
] instanceof $ && children
[ i
].hasClass( 'mediaWiki_htmlEmitter' ) ) {
89 children
[ i
] = children
[ i
].contents();
93 return $parent
.append( children
);
97 * Decodes the main HTML entities, those encoded by mw.html.escape.
100 * @param {string} encoded Encoded string
101 * @return {string} String with those entities decoded
103 function decodePrimaryHtmlEntities( encoded
) {
105 .replace( /'/g, '\'' )
106 .replace( /"/g, '"' )
107 .replace( /</g, '<' )
108 .replace( />/g, '>' )
109 .replace( /&/g, '&' );
113 * Turn input into a string.
116 * @param {string|jQuery} input
117 * @return {string} Textual value of input
119 function textify( input
) {
120 if ( input
instanceof $ ) {
121 input
= input
.text();
123 return String( input
);
127 * Given parser options, return a function that parses a key and replacements, returning jQuery object
129 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
130 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
131 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
134 * @param {Object} options Parser options
136 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
137 * @return {jQuery} return.return
139 function getFailableParserFn( options
) {
140 return function ( args
) {
142 parser
= new mw
.jqueryMsg
.Parser( options
),
144 argsArray
= Array
.isArray( args
[ 1 ] ) ? args
[ 1 ] : slice
.call( args
, 1 );
146 return parser
.parse( key
, argsArray
);
148 fallback
= parser
.settings
.messages
.get( key
);
149 mw
.log
.warn( 'mediawiki.jqueryMsg: ' + key
+ ': ' + e
.message
);
150 mw
.track( 'mediawiki.jqueryMsg.error', {
152 errorMessage
: e
.message
154 return $( '<span>' ).text( fallback
);
162 * Initialize parser defaults.
164 * ResourceLoaderJqueryMsgModule calls this to provide default values from
165 * Sanitizer.php for allowed HTML elements. To override this data for individual
166 * parsers, pass the relevant options to mw.jqueryMsg.Parser.
169 * @param {Object} data New data to extend parser defaults with
170 * @param {boolean} [deep=false] Whether the extend is done recursively (deep)
172 mw
.jqueryMsg
.setParserDefaults = function ( data
, deep
) {
174 $.extend( true, parserDefaults
, data
);
176 $.extend( parserDefaults
, data
);
181 * Get current parser defaults.
183 * Primarily used for the unit test. Returns a copy.
188 mw
.jqueryMsg
.getParserDefaults = function () {
189 return $.extend( {}, parserDefaults
);
193 * Returns a function suitable for static use, to construct strings from a message key (and optional replacements).
197 * var format = mediaWiki.jqueryMsg.getMessageFunction( options );
198 * $( '#example' ).text( format( 'hello-user', username ) );
200 * Tthis returns only strings, so it destroys any bindings. If you want to preserve bindings, use the
201 * jQuery plugin version instead. This was originally created to ease migration from `window.gM()`,
202 * from a time when the parser used by `mw.message` was not extendable.
204 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
205 * somefunction( a, b, c, d )
207 * somefunction( a, [b, c, d] )
209 * @param {Object} options parser options
210 * @return {Function} Function The message formatter
211 * @return {string} return.key Message key.
212 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
213 * @return {string} return.return Rendered HTML.
215 mw
.jqueryMsg
.getMessageFunction = function ( options
) {
216 var failableParserFn
, format
;
218 if ( options
&& options
.format
!== undefined ) {
219 format
= options
.format
;
221 format
= parserDefaults
.format
;
226 if ( !failableParserFn
) {
227 failableParserFn
= getFailableParserFn( options
);
229 failableResult
= failableParserFn( arguments
);
230 if ( format
=== 'text' || format
=== 'escaped' ) {
231 return failableResult
.text();
233 return failableResult
.html();
239 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
240 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
243 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
244 * var $userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
245 * $( 'p#headline' ).msg( 'hello-user', $userlink );
247 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
248 * somefunction( a, b, c, d )
250 * somefunction( a, [b, c, d] )
252 * We append to 'this', which in a jQuery plugin context will be the selected elements.
254 * @param {Object} options Parser options
255 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
256 * @return {string} return.key Message key.
257 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
258 * @return {jQuery} return.return
260 mw
.jqueryMsg
.getPlugin = function ( options
) {
261 var failableParserFn
;
265 if ( !failableParserFn
) {
266 failableParserFn
= getFailableParserFn( options
);
268 $target
= this.empty();
269 appendWithoutParsing( $target
, failableParserFn( arguments
) );
276 * Describes an object, whose primary duty is to .parse() message keys.
280 * @param {Object} options
282 mw
.jqueryMsg
.Parser = function ( options
) {
283 this.settings
= $.extend( {}, parserDefaults
, options
);
284 this.settings
.onlyCurlyBraceTransform
= ( this.settings
.format
=== 'text' || this.settings
.format
=== 'escaped' );
287 this.emitter
= new mw
.jqueryMsg
.HtmlEmitter( this.settings
.language
, this.settings
.magic
);
289 // Backwards-compatible alias
290 // @deprecated since 1.31
291 mw
.jqueryMsg
.parser
= mw
.jqueryMsg
.Parser
;
293 mw
.jqueryMsg
.Parser
.prototype = {
295 * Where the magic happens.
296 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
297 * If an error is thrown, returns original key, and logs the error
299 * @param {string} key Message key.
300 * @param {Array} replacements Variable replacements for $1, $2... $n
303 parse: function ( key
, replacements
) {
304 var ast
= this.getAst( key
, replacements
);
305 return this.emitter
.emit( ast
, replacements
);
309 * Fetch the message string associated with a key, return parsed structure. Memoized.
310 * Note that we pass '⧼' + key + '⧽' back for a missing message here.
312 * @param {string} key
313 * @param {Array} replacements Variable replacements for $1, $2... $n
314 * @return {string|Array} string of '⧼key⧽' if message missing, simple string if possible, array of arrays if needs parsing
316 getAst: function ( key
, replacements
) {
319 if ( !Object
.prototype.hasOwnProperty
.call( this.astCache
, key
) ) {
320 if ( mw
.config
.get( 'wgUserLanguage' ) === 'qqx' ) {
321 wikiText
= '(' + key
+ '$*)';
323 wikiText
= this.settings
.messages
.get( key
);
324 if ( typeof wikiText
!== 'string' ) {
325 wikiText
= '⧼' + key
+ '⧽';
328 wikiText
= mw
.internalDoTransformFormatForQqx( wikiText
, replacements
);
329 this.astCache
[ key
] = this.wikiTextToAst( wikiText
);
331 return this.astCache
[ key
];
335 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
337 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
338 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
340 * @param {string} input Message string wikitext
342 * @return {Mixed} abstract syntax tree
344 wikiTextToAst: function ( input
) {
346 regularLiteral
, regularLiteralWithoutBar
, regularLiteralWithoutSpace
, regularLiteralWithSquareBrackets
,
347 doubleQuote
, singleQuote
, backslash
, anyCharacter
, asciiAlphabetLiteral
,
348 escapedOrLiteralWithoutSpace
, escapedOrLiteralWithoutBar
, escapedOrRegularLiteral
,
349 whitespace
, dollar
, digits
, htmlDoubleQuoteAttributeValue
, htmlSingleQuoteAttributeValue
,
350 htmlAttributeEquals
, openHtmlStartTag
, optionalForwardSlash
, openHtmlEndTag
, closeHtmlTag
,
351 openExtlink
, closeExtlink
, wikilinkContents
, openWikilink
, closeWikilink
, templateName
, pipe
, colon
,
352 templateContents
, openTemplate
, closeTemplate
,
353 nonWhitespaceExpression
, paramExpression
, expression
, curlyBraceTransformExpression
, result
,
354 settings
= this.settings
,
355 concat
= Array
.prototype.concat
;
357 // Indicates current position in input as we parse through it.
358 // Shared among all parsing functions below.
361 // =========================================================
362 // parsing combinators - could be a library on its own
363 // =========================================================
366 * Try parsers until one works, if none work return null
369 * @param {Function[]} ps
370 * @return {string|null}
372 function choice( ps
) {
375 for ( i
= 0; i
< ps
.length
; i
++ ) {
377 if ( result
!== null ) {
386 * Try several ps in a row, all must succeed or return null.
387 * This is the only eager one.
390 * @param {Function[]} ps
391 * @return {string|null}
393 function sequence( ps
) {
397 for ( i
= 0; i
< ps
.length
; i
++ ) {
399 if ( res
=== null ) {
409 * Run the same parser over and over until it fails.
410 * Must succeed a minimum of n times or return null.
414 * @param {Function} p
415 * @return {string|null}
417 function nOrMore( n
, p
) {
419 var originalPos
= pos
,
422 while ( parsed
!== null ) {
423 result
.push( parsed
);
426 if ( result
.length
< n
) {
435 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
437 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
438 * May be some scoping issue
441 * @param {Function} p
442 * @param {Function} fn
443 * @return {string|null}
445 function transform( p
, fn
) {
448 return result
=== null ? null : fn( result
);
453 * Just make parsers out of simpler JS builtin types
458 * @return {string} return.return
460 function makeStringParser( s
) {
464 if ( input
.substr( pos
, len
) === s
) {
473 * Makes a regex parser, given a RegExp object.
474 * The regex being passed in should start with a ^ to anchor it to the start
478 * @param {RegExp} regex anchored regex
479 * @return {Function} function to parse input based on the regex
481 function makeRegexParser( regex
) {
483 var matches
= input
.slice( pos
).match( regex
);
484 if ( matches
=== null ) {
487 pos
+= matches
[ 0 ].length
;
492 // ===================================================================
493 // General patterns above this line -- wikitext specific parsers below
494 // ===================================================================
496 // Parsing functions follow. All parsing functions work like this:
497 // They don't accept any arguments.
498 // Instead, they just operate non destructively on the string 'input'
499 // As they can consume parts of the string, they advance the shared variable pos,
500 // and return tokens (or whatever else they want to return).
501 // some things are defined as closures and other things as ordinary functions
502 // converting everything to a closure makes it a lot harder to debug... errors pop up
503 // but some debuggers can't tell you exactly where they come from. Also the mutually
504 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
505 // This may be because, to save code, memoization was removed
507 /* eslint-disable no-useless-escape */
508 regularLiteral
= makeRegexParser( /^[^{}\[\]$<\\]/ );
509 regularLiteralWithoutBar
= makeRegexParser( /^[^{}\[\]$\\|]/ );
510 regularLiteralWithoutSpace
= makeRegexParser( /^[^{}\[\]$\s]/ );
511 regularLiteralWithSquareBrackets
= makeRegexParser( /^[^{}$\\]/ );
512 /* eslint-enable no-useless-escape */
514 backslash
= makeStringParser( '\\' );
515 doubleQuote
= makeStringParser( '"' );
516 singleQuote
= makeStringParser( '\'' );
517 anyCharacter
= makeRegexParser( /^./ );
519 openHtmlStartTag
= makeStringParser( '<' );
520 optionalForwardSlash
= makeRegexParser( /^\/?/ );
521 openHtmlEndTag
= makeStringParser( '</' );
522 htmlAttributeEquals
= makeRegexParser( /^\s*=\s*/ );
523 closeHtmlTag
= makeRegexParser( /^\s*>/ );
525 function escapedLiteral() {
526 var result
= sequence( [
530 return result
=== null ? null : result
[ 1 ];
532 escapedOrLiteralWithoutSpace
= choice( [
534 regularLiteralWithoutSpace
536 escapedOrLiteralWithoutBar
= choice( [
538 regularLiteralWithoutBar
540 escapedOrRegularLiteral
= choice( [
544 // Used to define "literals" without spaces, in space-delimited situations
545 function literalWithoutSpace() {
546 var result
= nOrMore( 1, escapedOrLiteralWithoutSpace
)();
547 return result
=== null ? null : result
.join( '' );
549 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
550 // it is not a literal in the parameter
551 function literalWithoutBar() {
552 var result
= nOrMore( 1, escapedOrLiteralWithoutBar
)();
553 return result
=== null ? null : result
.join( '' );
557 var result
= nOrMore( 1, escapedOrRegularLiteral
)();
558 return result
=== null ? null : result
.join( '' );
561 function curlyBraceTransformExpressionLiteral() {
562 var result
= nOrMore( 1, regularLiteralWithSquareBrackets
)();
563 return result
=== null ? null : result
.join( '' );
566 asciiAlphabetLiteral
= makeRegexParser( /^[A-Za-z]+/ );
567 htmlDoubleQuoteAttributeValue
= makeRegexParser( /^[^"]*/ );
568 htmlSingleQuoteAttributeValue
= makeRegexParser( /^[^']*/ );
570 whitespace
= makeRegexParser( /^\s+/ );
571 dollar
= makeStringParser( '$' );
572 digits
= makeRegexParser( /^\d+/ );
574 function replacement() {
575 var result
= sequence( [
579 if ( result
=== null ) {
582 return [ 'REPLACE', parseInt( result
[ 1 ], 10 ) - 1 ];
584 openExtlink
= makeStringParser( '[' );
585 closeExtlink
= makeStringParser( ']' );
586 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
588 var result
, parsedResult
, target
;
590 parsedResult
= sequence( [
592 nOrMore( 1, nonWhitespaceExpression
),
594 nOrMore( 1, expression
),
597 if ( parsedResult
!== null ) {
598 // When the entire link target is a single parameter, we can't use CONCAT, as we allow
599 // passing fancy parameters (like a whole jQuery object or a function) to use for the
600 // link. Check only if it's a single match, since we can either do CONCAT or not for
601 // singles with the same effect.
602 target
= parsedResult
[ 1 ].length
=== 1 ?
603 parsedResult
[ 1 ][ 0 ] :
604 [ 'CONCAT' ].concat( parsedResult
[ 1 ] );
608 [ 'CONCAT' ].concat( parsedResult
[ 3 ] )
613 openWikilink
= makeStringParser( '[[' );
614 closeWikilink
= makeStringParser( ']]' );
615 pipe
= makeStringParser( '|' );
617 function template() {
618 var result
= sequence( [
623 return result
=== null ? null : result
[ 1 ];
626 function pipedWikilink() {
627 var result
= sequence( [
628 nOrMore( 1, paramExpression
),
630 nOrMore( 1, expression
)
632 return result
=== null ? null : [
633 [ 'CONCAT' ].concat( result
[ 0 ] ),
634 [ 'CONCAT' ].concat( result
[ 2 ] )
638 function unpipedWikilink() {
639 var result
= sequence( [
640 nOrMore( 1, paramExpression
)
642 return result
=== null ? null : [
643 [ 'CONCAT' ].concat( result
[ 0 ] )
647 wikilinkContents
= choice( [
652 function wikilink() {
653 var result
, parsedResult
, parsedLinkContents
;
656 parsedResult
= sequence( [
661 if ( parsedResult
!== null ) {
662 parsedLinkContents
= parsedResult
[ 1 ];
663 result
= [ 'WIKILINK' ].concat( parsedLinkContents
);
668 // TODO: Support data- if appropriate
669 function doubleQuotedHtmlAttributeValue() {
670 var parsedResult
= sequence( [
672 htmlDoubleQuoteAttributeValue
,
675 return parsedResult
=== null ? null : parsedResult
[ 1 ];
678 function singleQuotedHtmlAttributeValue() {
679 var parsedResult
= sequence( [
681 htmlSingleQuoteAttributeValue
,
684 return parsedResult
=== null ? null : parsedResult
[ 1 ];
687 function htmlAttribute() {
688 var parsedResult
= sequence( [
690 asciiAlphabetLiteral
,
693 doubleQuotedHtmlAttributeValue
,
694 singleQuotedHtmlAttributeValue
697 return parsedResult
=== null ? null : [ parsedResult
[ 1 ], parsedResult
[ 3 ] ];
701 * Checks if HTML is allowed
703 * @param {string} startTagName HTML start tag name
704 * @param {string} endTagName HTML start tag name
705 * @param {Object} attributes array of consecutive key value pairs,
706 * with index 2 * n being a name and 2 * n + 1 the associated value
707 * @return {boolean} true if this is HTML is allowed, false otherwise
709 function isAllowedHtml( startTagName
, endTagName
, attributes
) {
710 var i
, len
, attributeName
;
712 startTagName
= startTagName
.toLowerCase();
713 endTagName
= endTagName
.toLowerCase();
714 if ( startTagName
!== endTagName
|| settings
.allowedHtmlElements
.indexOf( startTagName
) === -1 ) {
718 for ( i
= 0, len
= attributes
.length
; i
< len
; i
+= 2 ) {
719 attributeName
= attributes
[ i
];
720 if ( settings
.allowedHtmlCommonAttributes
.indexOf( attributeName
) === -1 &&
721 ( settings
.allowedHtmlAttributesByElement
[ startTagName
] || [] ).indexOf( attributeName
) === -1 ) {
729 function htmlAttributes() {
730 var parsedResult
= nOrMore( 0, htmlAttribute
)();
731 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
732 return concat
.apply( [ 'HTMLATTRIBUTES' ], parsedResult
);
735 // Subset of allowed HTML markup.
736 // Most elements and many attributes allowed on the server are not supported yet.
738 var parsedOpenTagResult
, parsedHtmlContents
, parsedCloseTagResult
,
739 wrappedAttributes
, attributes
, startTagName
, endTagName
, startOpenTagPos
,
740 startCloseTagPos
, endOpenTagPos
, endCloseTagPos
,
743 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
744 // 1. open through closeHtmlTag
746 // 3. openHtmlEnd through close
747 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
749 startOpenTagPos
= pos
;
750 parsedOpenTagResult
= sequence( [
752 asciiAlphabetLiteral
,
754 optionalForwardSlash
,
758 if ( parsedOpenTagResult
=== null ) {
763 startTagName
= parsedOpenTagResult
[ 1 ];
765 parsedHtmlContents
= nOrMore( 0, expression
)();
767 startCloseTagPos
= pos
;
768 parsedCloseTagResult
= sequence( [
770 asciiAlphabetLiteral
,
774 if ( parsedCloseTagResult
=== null ) {
775 // Closing tag failed. Return the start tag and contents.
776 return [ 'CONCAT', input
.slice( startOpenTagPos
, endOpenTagPos
) ]
777 .concat( parsedHtmlContents
);
780 endCloseTagPos
= pos
;
781 endTagName
= parsedCloseTagResult
[ 1 ];
782 wrappedAttributes
= parsedOpenTagResult
[ 2 ];
783 attributes
= wrappedAttributes
.slice( 1 );
784 if ( isAllowedHtml( startTagName
, endTagName
, attributes
) ) {
785 result
= [ 'HTMLELEMENT', startTagName
, wrappedAttributes
]
786 .concat( parsedHtmlContents
);
788 // HTML is not allowed, so contents will remain how
789 // it was, while HTML markup at this level will be
791 // E.g. assuming script tags are not allowed:
793 // <script>[[Foo|bar]]</script>
795 // results in '<script>' and '</script>'
796 // (not treated as an HTML tag), surrounding a fully
799 // Concatenate everything from the tag, flattening the contents.
800 result
= [ 'CONCAT', input
.slice( startOpenTagPos
, endOpenTagPos
) ]
801 .concat( parsedHtmlContents
, input
.slice( startCloseTagPos
, endCloseTagPos
) );
807 // <nowiki>...</nowiki> tag. The tags are stripped and the contents are returned unparsed.
809 var parsedResult
, plainText
,
812 parsedResult
= sequence( [
813 makeStringParser( '<nowiki>' ),
814 // We use a greedy non-backtracking parser, so we must ensure here that we don't take too much
815 makeRegexParser( /^.*?(?=<\/nowiki>)/ ),
816 makeStringParser( '</nowiki>' )
818 if ( parsedResult
!== null ) {
819 plainText
= parsedResult
[ 1 ];
820 result
= [ 'CONCAT' ].concat( plainText
);
826 templateName
= transform(
827 // see $wgLegalTitleChars
828 // not allowing : due to the need to catch "PLURAL:$1"
829 makeRegexParser( /^[ !"$&'()*,./0-9;=?@A
-Z
^_
`a-z~\x80-\xFF+-]+/ ),
830 function ( result ) { return result.toString(); }
832 function templateParam() {
836 nOrMore( 0, paramExpression )
838 if ( result === null ) {
842 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
843 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[ 0 ];
846 function templateWithReplacement() {
847 var result = sequence( [
852 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
854 function templateWithOutReplacement() {
855 var result = sequence( [
860 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
862 function templateWithOutFirstParameter() {
863 var result = sequence( [
867 return result === null ? null : [ result[ 0 ], '' ];
869 colon = makeStringParser( ':' );
870 templateContents = choice( [
872 var res = sequence( [
873 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
874 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
875 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
876 nOrMore( 0, templateParam )
878 return res === null ? null : res[ 0 ].concat( res[ 1 ] );
881 var res = sequence( [
883 nOrMore( 0, templateParam )
885 if ( res === null ) {
888 return [ res[ 0 ] ].concat( res[ 1 ] );
891 openTemplate = makeStringParser( '{{' );
892 closeTemplate = makeStringParser( '}}' );
893 nonWhitespaceExpression = choice( [
900 paramExpression = choice( [
908 expression = choice( [
918 // Used when only {{-transformation is wanted, for 'text'
919 // or 'escaped' formats
920 curlyBraceTransformExpression = choice( [
923 curlyBraceTransformExpressionLiteral
929 * @param {Function} rootExpression Root parse function
930 * @return {Array|null}
932 function start( rootExpression ) {
933 var result = nOrMore( 0, rootExpression )();
934 if ( result === null ) {
937 return [ 'CONCAT' ].concat( result );
939 // everything above this point is supposed to be stateless/static, but
940 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
941 // finally let's do some actual work...
943 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
946 * For success, the p must have gotten to the end of the input
947 * and returned a non-null.
948 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
950 if ( result === null || pos !== input.length ) {
951 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
959 * Class that primarily exists to emit HTML from parser ASTs.
963 * @param {Object} language
964 * @param {Object} magic
966 mw.jqueryMsg.HtmlEmitter = function ( language, magic ) {
968 this.language = language;
969 // eslint-disable-next-line no-jquery/no-each-util
970 $.each( magic, function ( key, val ) {
971 jmsg[ key.toLowerCase() ] = function () {
977 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
978 * Walk entire node structure, applying replacements and template functions when appropriate
980 * @param {Mixed} node Abstract syntax tree (top node or subnode)
981 * @param {Array} replacements for $1, $2, ... $n
982 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
984 this.emit = function ( node, replacements ) {
985 var ret, subnodes, operation,
987 switch ( typeof node ) {
992 // typeof returns object for arrays
994 // node is an array of nodes
995 // eslint-disable-next-line no-jquery/no-map-util
996 subnodes = $.map( node.slice( 1 ), function ( n ) {
997 return jmsg.emit( n, replacements );
999 operation = node[ 0 ].toLowerCase();
1000 if ( typeof jmsg[ operation ] === 'function' ) {
1001 ret = jmsg[ operation ]( subnodes, replacements );
1003 throw new Error( 'Unknown operation "' + operation + '"' );
1007 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
1008 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
1009 // The logical thing is probably to return the empty string here when we encounter undefined.
1013 throw new Error( 'Unexpected type in AST: ' + typeof node );
1019 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
1020 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
1021 // If you have 'magic words' then configure the parser to have them upon creation.
1023 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
1024 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
1025 mw.jqueryMsg.HtmlEmitter.prototype = {
1027 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
1028 * Must return a single node to parents -- a jQuery with synthetic span
1029 * However, unwrap any other synthetic spans in our children and pass them upwards
1031 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
1034 concat: function ( nodes ) {
1035 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
1036 // eslint-disable-next-line no-jquery/no-each-util
1037 $.each( nodes, function ( i, node ) {
1038 // Let jQuery append nodes, arrays of nodes and jQuery objects
1039 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
1040 appendWithoutParsing( $span, node );
1046 * Return escaped replacement of correct index, or string if unavailable.
1047 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
1048 * if the specified parameter is not found return the same string
1049 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
1051 * TODO: Throw error if nodes.length > 1 ?
1053 * @param {Array} nodes List of one element, integer, n >= 0
1054 * @param {Array} replacements List of at least n strings
1055 * @return {string|jQuery} replacement
1057 replace: function ( nodes, replacements ) {
1058 var index = parseInt( nodes[ 0 ], 10 );
1060 if ( index < replacements.length ) {
1061 return replacements[ index ];
1063 // index not found, fallback to displaying variable
1064 return '$' + ( index + 1 );
1069 * Transform wiki-link
1072 * It only handles basic cases, either no pipe, or a pipe with an explicit
1075 * It does not attempt to handle features like the pipe trick.
1076 * However, the pipe trick should usually not be present in wikitext retrieved
1077 * from the server, since the replacement is done at save time.
1078 * It may, though, if the wikitext appears in extension-controlled content.
1080 * @param {string[]} nodes
1083 wikilink: function ( nodes ) {
1084 var page, anchor, url, $el;
1086 page = textify( nodes[ 0 ] );
1087 // Strip leading ':', which is used to suppress special behavior in wikitext links,
1088 // e.g. [[:Category:Foo]] or [[:File:Foo.jpg]]
1089 if ( page.charAt( 0 ) === ':' ) {
1090 page = page.slice( 1 );
1092 url = mw.util.getUrl( page );
1094 if ( nodes.length === 1 ) {
1095 // [[Some Page]] or [[Namespace:Some Page]]
1098 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1099 anchor = nodes[ 1 ];
1102 $el = $( '<a>' ).attr( {
1106 return appendWithoutParsing( $el, anchor );
1110 * Converts array of HTML element key value pairs to object
1112 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1113 * name and 2 * n + 1 the associated value
1114 * @return {Object} Object mapping attribute name to attribute value
1116 htmlattributes: function ( nodes ) {
1117 var i, len, mapping = {};
1118 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1119 mapping[ nodes[ i ] ] = decodePrimaryHtmlEntities( nodes[ i + 1 ] );
1125 * Handles an (already-validated) HTML element.
1127 * @param {Array} nodes Nodes to process when creating element
1130 htmlelement: function ( nodes ) {
1131 var tagName, attributes, contents, $element;
1133 tagName = nodes.shift();
1134 attributes = nodes.shift();
1136 $element = $( document.createElement( tagName ) ).attr( attributes );
1137 return appendWithoutParsing( $element, contents );
1141 * Transform parsed structure into external link.
1143 * The "href" can be:
1144 * - a jQuery object, treat it as "enclosing" the link text.
1145 * - a function, treat it as the click handler.
1146 * - a string, or our HtmlEmitter jQuery object, treat it as a URI after stringifying.
1148 * TODO: throw an error if nodes.length > 2 ?
1150 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {string}
1153 extlink: function ( nodes ) {
1156 contents = nodes[ 1 ];
1157 if ( arg instanceof $ && !arg.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1161 if ( typeof arg === 'function' ) {
1165 } ).on( 'click keypress', function ( e ) {
1167 e.type === 'click' ||
1168 e.type === 'keypress' && e.which === 13
1170 arg.call( this, e );
1174 $el.attr( 'href', textify( arg ) );
1177 return appendWithoutParsing( $el.empty(), contents );
1181 * Transform parsed structure into pluralization
1182 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1183 * So convert it back with the current language's convertNumber.
1185 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1186 * @return {string|jQuery} selected pluralized form according to current language
1188 plural: function ( nodes ) {
1189 var forms, firstChild, firstChildText, explicitPluralFormNumber, formIndex, form, count,
1190 explicitPluralForms = {};
1192 count = parseFloat( this.language.convertNumber( textify( nodes[ 0 ] ), true ) );
1193 forms = nodes.slice( 1 );
1194 for ( formIndex = 0; formIndex < forms.length; formIndex++ ) {
1195 form = forms[ formIndex ];
1197 if ( form instanceof $ && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1198 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1199 firstChild = form.contents().get( 0 );
1200 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1201 firstChildText = firstChild.textContent;
1202 if ( /^\d+=/.test( firstChildText ) ) {
1203 explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[ 0 ], 10 );
1204 // Use the digit part as key and rest of first text node and
1205 // rest of child nodes as value.
1206 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1207 explicitPluralForms[ explicitPluralFormNumber ] = form;
1208 forms[ formIndex ] = undefined;
1211 } else if ( /^\d+=/.test( form ) ) {
1212 // Simple explicit plural forms like 12=a dozen
1213 explicitPluralFormNumber = parseInt( form.split( /=/ )[ 0 ], 10 );
1214 explicitPluralForms[ explicitPluralFormNumber ] = form.slice( form.indexOf( '=' ) + 1 );
1215 forms[ formIndex ] = undefined;
1219 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1220 // eslint-disable-next-line no-jquery/no-map-util
1221 forms = $.map( forms, function ( form ) {
1225 return this.language.convertPlural( count, forms, explicitPluralForms );
1229 * Transform parsed structure according to gender.
1231 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1233 * The first node must be one of:
1234 * - the mw.user object (or a compatible one)
1235 * - an empty string - indicating the current user, same effect as passing the mw.user object
1236 * - a gender string ('male', 'female' or 'unknown')
1238 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1239 * @return {string|jQuery} Selected gender form according to current language
1241 gender: function ( nodes ) {
1243 maybeUser = nodes[ 0 ],
1244 forms = nodes.slice( 1 );
1246 if ( maybeUser === '' ) {
1247 maybeUser = mw.user;
1250 // If we are passed a mw.user-like object, check their gender.
1251 // Otherwise, assume the gender string itself was passed .
1252 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1253 gender = maybeUser.options.get( 'gender' );
1255 gender = textify( maybeUser );
1258 return this.language.gender( gender, forms );
1262 * Transform parsed structure into grammar conversion.
1263 * Invoked by putting `{{grammar
:form
|word
}}` in a message
1265 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1266 * @return {string|jQuery} selected grammatical form according to current language
1268 grammar: function ( nodes ) {
1269 var form = nodes[ 0 ],
1271 // These could be jQuery objects (passed as message parameters),
1272 // in which case we can't transform them (like rawParams() in PHP).
1273 if ( typeof form === 'string' && typeof word === 'string' ) {
1274 return this.language.convertGrammar( word, form );
1280 * Tranform parsed structure into a int: (interface language) message include
1281 * Invoked by putting `{{int:othermessage
}}` into a message
1283 * TODO Syntax in the included message is not parsed, this seems like a bug?
1285 * @param {Array} nodes List of nodes
1286 * @return {string} Other message
1288 int: function ( nodes ) {
1289 var msg = textify( nodes[ 0 ] );
1290 return mw.jqueryMsg.getMessageFunction()( msg.charAt( 0 ).toLowerCase() + msg.slice( 1 ) );
1294 * Get localized namespace name from canonical name or namespace number.
1295 * Invoked by putting `{{ns
:foo
}}` into a message
1297 * @param {Array} nodes List of nodes
1298 * @return {string} Localized namespace name
1300 ns: function ( nodes ) {
1301 var ns = textify( nodes[ 0 ] ).trim();
1302 if ( !/^\d+$/.test( ns ) ) {
1303 ns = mw.config.get( 'wgNamespaceIds' )[ ns.replace( / /g, '_' ).toLowerCase() ];
1305 ns = mw.config.get( 'wgFormattedNamespaces' )[ ns ];
1310 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1311 * and outputs it in the localized digit script and formatted with decimal
1312 * separator, according to the current language.
1314 * @param {Array} nodes List of nodes
1315 * @return {number|string|jQuery} Formatted number
1317 formatnum: function ( nodes ) {
1318 var isInteger = !!nodes[ 1 ] && nodes[ 1 ] === 'R',
1319 number = nodes[ 0 ];
1321 // These could be jQuery objects (passed as message parameters),
1322 // in which case we can't transform them (like rawParams() in PHP).
1323 if ( typeof number === 'string' || typeof number === 'number' ) {
1324 return this.language.convertNumber( number, isInteger );
1332 * @param {Array} nodes List of nodes
1333 * @return {string} The given text, all in lowercase
1335 lc: function ( nodes ) {
1336 return textify( nodes[ 0 ] ).toLowerCase();
1342 * @param {Array} nodes List of nodes
1343 * @return {string} The given text, all in uppercase
1345 uc: function ( nodes ) {
1346 return textify( nodes[ 0 ] ).toUpperCase();
1350 * Lowercase first letter of input, leaving the rest unchanged
1352 * @param {Array} nodes List of nodes
1353 * @return {string} The given text, with the first character in lowercase
1355 lcfirst: function ( nodes ) {
1356 var text = textify( nodes[ 0 ] );
1357 return text.charAt( 0 ).toLowerCase() + text.slice( 1 );
1361 * Uppercase first letter of input, leaving the rest unchanged
1363 * @param {Array} nodes List of nodes
1364 * @return {string} The given text, with the first character in uppercase
1366 ucfirst: function ( nodes ) {
1367 var text = textify( nodes[ 0 ] );
1368 return text.charAt( 0 ).toUpperCase() + text.slice( 1 );
1375 * @see mw.jqueryMsg#getPlugin
1377 $.fn.msg = mw.jqueryMsg.getPlugin();
1379 // Replace the default message parser with jqueryMsg
1380 oldParser = mw.Message.prototype.parser;
1381 mw.Message.prototype.parser = function () {
1382 // Fall back to mw.msg's simple parser where possible
1384 // Plain text output always uses the simple parser
1385 this.format === 'plain' ||
1387 // jqueryMsg parser is needed for messages containing wikitext
1388 !/\{\{|[<>[&]/.test( this.map.get( this.key ) ) &&
1389 // jqueryMsg parser is needed when jQuery objects or DOM nodes are passed in as parameters
1390 !this.parameters.some( function ( param ) {
1391 return param instanceof $ || ( param && param.nodeType !== undefined );
1395 return oldParser.apply( this );
1398 if ( !Object.prototype.hasOwnProperty.call( this.map, this.format ) ) {
1399 this.map[ this.format ] = mw.jqueryMsg.getMessageFunction( {
1401 // For format 'escaped', escaping part is handled by mediawiki.js
1405 return this.map[ this.format ]( this.key, this.parameters );
1409 * Parse the message to DOM nodes, rather than HTML string like #parse.
1411 * This method is only available when jqueryMsg is loaded.
1415 * @member mw.Message
1418 mw.Message.prototype.parseDom = ( function () {
1419 var $wrapper = $( '<div>' );
1420 return function () {
1421 return $wrapper.msg( this.key, this.parameters ).contents().detach();