2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
15 slice
= Array
.prototype.slice
,
18 'SITENAME': mw
.config
.get( 'wgSiteName' )
20 // This is a whitelist like Sanitizer.php.
21 // Self-closing tags are not currently supported.
22 // The simplified default here is overridden below by data supplied
23 // by the mediawiki.jqueryMsg.data module.
24 allowedHtmlElements
: [
28 // Key tag name, value allowed attributes for that tag.
29 // See Sanitizer::setupAttributeWhitelist
30 allowedHtmlCommonAttributes
: [
43 // Attributes allowed for specific elements.
44 // Key is element name in lower case
45 // Value is array of allowed attributes for that element
46 allowedHtmlAttributesByElement
: {},
47 messages
: mw
.messages
,
48 language
: mw
.language
,
50 // Same meaning as in mediawiki.js.
52 // Only 'text', 'parse', and 'escaped' are supported, and the
53 // actual escaping for 'escaped' is done by other code (generally
54 // through mediawiki.js).
56 // However, note that this default only
57 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
58 // is 'text', including when it uses jqueryMsg.
64 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
65 * convert what it detects as an htmlString to an element.
67 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
70 * @param {jQuery} $parent Parent node wrapped by jQuery
71 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
72 * @return {jQuery} $parent
74 function appendWithoutParsing( $parent
, children
) {
77 if ( !$.isArray( children
) ) {
78 children
= [children
];
81 for ( i
= 0, len
= children
.length
; i
< len
; i
++ ) {
82 if ( typeof children
[i
] !== 'object' ) {
83 children
[i
] = document
.createTextNode( children
[i
] );
87 return $parent
.append( children
);
91 * Decodes the main HTML entities, those encoded by mw.html.escape.
94 * @param {string} encoded Encoded string
95 * @return {string} String with those entities decoded
97 function decodePrimaryHtmlEntities( encoded
) {
99 .replace( /'/g, '\'' )
100 .replace( /"/g, '"' )
101 .replace( /</g, '<' )
102 .replace( />/g, '>' )
103 .replace( /&/g, '&' );
107 * Given parser options, return a function that parses a key and replacements, returning jQuery object
109 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
110 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
111 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
113 * @param {Object} options Parser options
115 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
116 * @return {jQuery} return.return
118 function getFailableParserFn( options
) {
119 var parser
= new mw
.jqueryMsg
.parser( options
);
121 return function ( args
) {
124 argsArray
= $.isArray( args
[1] ) ? args
[1] : slice
.call( args
, 1 );
126 return parser
.parse( key
, argsArray
);
128 fallback
= parser
.settings
.messages
.get( key
);
129 mw
.log
.warn( 'mediawiki.jqueryMsg: ' + key
+ ': ' + e
.message
);
130 return $( '<span>' ).text( fallback
);
135 // Use data from mediawiki.jqueryMsg.data to override defaults, if
137 if ( mw
.jqueryMsg
&& mw
.jqueryMsg
.data
) {
138 if ( mw
.jqueryMsg
.data
.allowedHtmlElements
) {
139 parserDefaults
.allowedHtmlElements
= mw
.jqueryMsg
.data
.allowedHtmlElements
;
146 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
149 * window.gM = mediaWiki.jqueryMsg.getMessageFunction( options );
150 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
152 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
153 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
155 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
156 * somefunction( a, b, c, d )
158 * somefunction( a, [b, c, d] )
160 * @param {Object} options parser options
161 * @return {Function} Function suitable for assigning to window.gM
162 * @return {string} return.key Message key.
163 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
164 * @return {string} return.return Rendered HTML.
166 mw
.jqueryMsg
.getMessageFunction = function ( options
) {
167 var failableParserFn
= getFailableParserFn( options
),
170 if ( options
&& options
.format
!== undefined ) {
171 format
= options
.format
;
173 format
= parserDefaults
.format
;
177 var failableResult
= failableParserFn( arguments
);
178 if ( format
=== 'text' || format
=== 'escaped' ) {
179 return failableResult
.text();
181 return failableResult
.html();
187 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
188 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
191 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
192 * var userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
193 * $( 'p#headline' ).msg( 'hello-user', userlink );
195 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
196 * somefunction( a, b, c, d )
198 * somefunction( a, [b, c, d] )
200 * We append to 'this', which in a jQuery plugin context will be the selected elements.
202 * @param {Object} options Parser options
203 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
204 * @return {string} return.key Message key.
205 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
206 * @return {jQuery} return.return
208 mw
.jqueryMsg
.getPlugin = function ( options
) {
209 var failableParserFn
= getFailableParserFn( options
);
212 var $target
= this.empty();
213 // TODO: Simply appendWithoutParsing( $target, failableParserFn( arguments ).contents() )
214 // or Simply appendWithoutParsing( $target, failableParserFn( arguments ) )
215 $.each( failableParserFn( arguments
).contents(), function ( i
, node
) {
216 appendWithoutParsing( $target
, node
);
224 * Describes an object, whose primary duty is to .parse() message keys.
228 * @param {Object} options
230 mw
.jqueryMsg
.parser = function ( options
) {
231 this.settings
= $.extend( {}, parserDefaults
, options
);
232 this.settings
.onlyCurlyBraceTransform
= ( this.settings
.format
=== 'text' || this.settings
.format
=== 'escaped' );
234 this.emitter
= new mw
.jqueryMsg
.htmlEmitter( this.settings
.language
, this.settings
.magic
);
237 mw
.jqueryMsg
.parser
.prototype = {
239 * Cache mapping MediaWiki message keys and the value onlyCurlyBraceTransform, to the AST of the message.
241 * In most cases, the message is a string so this is identical.
242 * (This is why we would like to move this functionality server-side).
244 * The two parts of the key are separated by colon. For example:
246 * "message-key:true": ast
248 * if they key is "message-key" and onlyCurlyBraceTransform is true.
250 * This cache is shared by all instances of mw.jqueryMsg.parser.
252 * NOTE: We promise, it's static - when you create this empty object
253 * in the prototype, each new instance of the class gets a reference
254 * to the same object.
262 * Where the magic happens.
263 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
264 * If an error is thrown, returns original key, and logs the error
265 * @param {string} key Message key.
266 * @param {Array} replacements Variable replacements for $1, $2... $n
269 parse: function ( key
, replacements
) {
270 return this.emitter
.emit( this.getAst( key
), replacements
);
274 * Fetch the message string associated with a key, return parsed structure. Memoized.
275 * Note that we pass '[' + key + ']' back for a missing message here.
276 * @param {string} key
277 * @return {string|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
279 getAst: function ( key
) {
281 cacheKey
= [key
, this.settings
.onlyCurlyBraceTransform
].join( ':' );
283 if ( this.astCache
[ cacheKey
] === undefined ) {
284 wikiText
= this.settings
.messages
.get( key
);
285 if ( typeof wikiText
!== 'string' ) {
286 wikiText
= '\\[' + key
+ '\\]';
288 this.astCache
[ cacheKey
] = this.wikiTextToAst( wikiText
);
290 return this.astCache
[ cacheKey
];
294 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
296 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
297 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
299 * @param {string} input Message string wikitext
301 * @return {Mixed} abstract syntax tree
303 wikiTextToAst: function ( input
) {
305 regularLiteral
, regularLiteralWithoutBar
, regularLiteralWithoutSpace
, regularLiteralWithSquareBrackets
,
306 doubleQuote
, singleQuote
, backslash
, anyCharacter
, asciiAlphabetLiteral
,
307 escapedOrLiteralWithoutSpace
, escapedOrLiteralWithoutBar
, escapedOrRegularLiteral
,
308 whitespace
, dollar
, digits
, htmlDoubleQuoteAttributeValue
, htmlSingleQuoteAttributeValue
,
309 htmlAttributeEquals
, openHtmlStartTag
, optionalForwardSlash
, openHtmlEndTag
, closeHtmlTag
,
310 openExtlink
, closeExtlink
, wikilinkPage
, wikilinkContents
, openWikilink
, closeWikilink
, templateName
, pipe
, colon
,
311 templateContents
, openTemplate
, closeTemplate
,
312 nonWhitespaceExpression
, paramExpression
, expression
, curlyBraceTransformExpression
, result
,
313 settings
= this.settings
,
314 concat
= Array
.prototype.concat
;
316 // Indicates current position in input as we parse through it.
317 // Shared among all parsing functions below.
320 // =========================================================
321 // parsing combinators - could be a library on its own
322 // =========================================================
325 * Try parsers until one works, if none work return null
327 * @param {Function[]} ps
328 * @return {string|null}
330 function choice( ps
) {
333 for ( i
= 0; i
< ps
.length
; i
++ ) {
335 if ( result
!== null ) {
344 * Try several ps in a row, all must succeed or return null.
345 * This is the only eager one.
347 * @param {Function[]} ps
348 * @return {string|null}
350 function sequence( ps
) {
354 for ( i
= 0; i
< ps
.length
; i
++ ) {
356 if ( res
=== null ) {
366 * Run the same parser over and over until it fails.
367 * Must succeed a minimum of n times or return null.
370 * @param {Function} p
371 * @return {string|null}
373 function nOrMore( n
, p
) {
375 var originalPos
= pos
,
378 while ( parsed
!== null ) {
379 result
.push( parsed
);
382 if ( result
.length
< n
) {
391 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
393 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
394 * May be some scoping issue
397 * @param {Function} p
398 * @param {Function} fn
399 * @return {string|null}
401 function transform( p
, fn
) {
404 return result
=== null ? null : fn( result
);
409 * Just make parsers out of simpler JS builtin types
413 * @return {string} return.return
415 function makeStringParser( s
) {
419 if ( input
.substr( pos
, len
) === s
) {
428 * Makes a regex parser, given a RegExp object.
429 * The regex being passed in should start with a ^ to anchor it to the start
433 * @param {RegExp} regex anchored regex
434 * @return {Function} function to parse input based on the regex
436 function makeRegexParser( regex
) {
438 var matches
= input
.slice( pos
).match( regex
);
439 if ( matches
=== null ) {
442 pos
+= matches
[0].length
;
447 // ===================================================================
448 // General patterns above this line -- wikitext specific parsers below
449 // ===================================================================
451 // Parsing functions follow. All parsing functions work like this:
452 // They don't accept any arguments.
453 // Instead, they just operate non destructively on the string 'input'
454 // As they can consume parts of the string, they advance the shared variable pos,
455 // and return tokens (or whatever else they want to return).
456 // some things are defined as closures and other things as ordinary functions
457 // converting everything to a closure makes it a lot harder to debug... errors pop up
458 // but some debuggers can't tell you exactly where they come from. Also the mutually
459 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
460 // This may be because, to save code, memoization was removed
462 regularLiteral
= makeRegexParser( /^[^{}\[\]$<\\]/ );
463 regularLiteralWithoutBar
= makeRegexParser( /^[^{}\[\]$\\|]/ );
464 regularLiteralWithoutSpace
= makeRegexParser( /^[^{}\[\]$\s]/ );
465 regularLiteralWithSquareBrackets
= makeRegexParser( /^[^{}$\\]/ );
467 backslash
= makeStringParser( '\\' );
468 doubleQuote
= makeStringParser( '"' );
469 singleQuote
= makeStringParser( '\'' );
470 anyCharacter
= makeRegexParser( /^./ );
472 openHtmlStartTag
= makeStringParser( '<' );
473 optionalForwardSlash
= makeRegexParser( /^\/?/ );
474 openHtmlEndTag
= makeStringParser( '</' );
475 htmlAttributeEquals
= makeRegexParser( /^\s*=\s*/ );
476 closeHtmlTag
= makeRegexParser( /^\s*>/ );
478 function escapedLiteral() {
479 var result
= sequence( [
483 return result
=== null ? null : result
[1];
485 escapedOrLiteralWithoutSpace
= choice( [
487 regularLiteralWithoutSpace
489 escapedOrLiteralWithoutBar
= choice( [
491 regularLiteralWithoutBar
493 escapedOrRegularLiteral
= choice( [
497 // Used to define "literals" without spaces, in space-delimited situations
498 function literalWithoutSpace() {
499 var result
= nOrMore( 1, escapedOrLiteralWithoutSpace
)();
500 return result
=== null ? null : result
.join( '' );
502 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
503 // it is not a literal in the parameter
504 function literalWithoutBar() {
505 var result
= nOrMore( 1, escapedOrLiteralWithoutBar
)();
506 return result
=== null ? null : result
.join( '' );
509 // Used for wikilink page names. Like literalWithoutBar, but
510 // without allowing escapes.
511 function unescapedLiteralWithoutBar() {
512 var result
= nOrMore( 1, regularLiteralWithoutBar
)();
513 return result
=== null ? null : result
.join( '' );
517 var result
= nOrMore( 1, escapedOrRegularLiteral
)();
518 return result
=== null ? null : result
.join( '' );
521 function curlyBraceTransformExpressionLiteral() {
522 var result
= nOrMore( 1, regularLiteralWithSquareBrackets
)();
523 return result
=== null ? null : result
.join( '' );
526 asciiAlphabetLiteral
= makeRegexParser( /[A-Za-z]+/ );
527 htmlDoubleQuoteAttributeValue
= makeRegexParser( /^[^"]*/ );
528 htmlSingleQuoteAttributeValue
= makeRegexParser( /^[^']*/ );
530 whitespace
= makeRegexParser( /^\s+/ );
531 dollar
= makeStringParser( '$' );
532 digits
= makeRegexParser( /^\d+/ );
534 function replacement() {
535 var result
= sequence( [
539 if ( result
=== null ) {
542 return [ 'REPLACE', parseInt( result
[1], 10 ) - 1 ];
544 openExtlink
= makeStringParser( '[' );
545 closeExtlink
= makeStringParser( ']' );
546 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
548 var result
, parsedResult
;
550 parsedResult
= sequence( [
552 nonWhitespaceExpression
,
554 nOrMore( 1, expression
),
557 if ( parsedResult
!== null ) {
558 result
= [ 'EXTLINK', parsedResult
[1] ];
559 // TODO (mattflaschen, 2013-03-22): Clean this up if possible.
560 // It's avoiding CONCAT for single nodes, so they at least doesn't get the htmlEmitter span.
561 if ( parsedResult
[3].length
=== 1 ) {
562 result
.push( parsedResult
[3][0] );
564 result
.push( ['CONCAT'].concat( parsedResult
[3] ) );
569 // this is the same as the above extlink, except that the url is being passed on as a parameter
570 function extLinkParam() {
571 var result
= sequence( [
579 if ( result
=== null ) {
582 return [ 'EXTLINKPARAM', parseInt( result
[2], 10 ) - 1, result
[4] ];
584 openWikilink
= makeStringParser( '[[' );
585 closeWikilink
= makeStringParser( ']]' );
586 pipe
= makeStringParser( '|' );
588 function template() {
589 var result
= sequence( [
594 return result
=== null ? null : result
[1];
597 wikilinkPage
= choice( [
598 unescapedLiteralWithoutBar
,
602 function pipedWikilink() {
603 var result
= sequence( [
608 return result
=== null ? null : [ result
[0], result
[2] ];
611 wikilinkContents
= choice( [
613 wikilinkPage
// unpiped link
616 function wikilink() {
617 var result
, parsedResult
, parsedLinkContents
;
620 parsedResult
= sequence( [
625 if ( parsedResult
!== null ) {
626 parsedLinkContents
= parsedResult
[1];
627 result
= [ 'WIKILINK' ].concat( parsedLinkContents
);
632 // TODO: Support data- if appropriate
633 function doubleQuotedHtmlAttributeValue() {
634 var parsedResult
= sequence( [
636 htmlDoubleQuoteAttributeValue
,
639 return parsedResult
=== null ? null : parsedResult
[1];
642 function singleQuotedHtmlAttributeValue() {
643 var parsedResult
= sequence( [
645 htmlSingleQuoteAttributeValue
,
648 return parsedResult
=== null ? null : parsedResult
[1];
651 function htmlAttribute() {
652 var parsedResult
= sequence( [
654 asciiAlphabetLiteral
,
657 doubleQuotedHtmlAttributeValue
,
658 singleQuotedHtmlAttributeValue
661 return parsedResult
=== null ? null : [parsedResult
[1], parsedResult
[3]];
665 * Checks if HTML is allowed
667 * @param {string} startTagName HTML start tag name
668 * @param {string} endTagName HTML start tag name
669 * @param {Object} attributes array of consecutive key value pairs,
670 * with index 2 * n being a name and 2 * n + 1 the associated value
671 * @return {boolean} true if this is HTML is allowed, false otherwise
673 function isAllowedHtml( startTagName
, endTagName
, attributes
) {
674 var i
, len
, attributeName
;
676 startTagName
= startTagName
.toLowerCase();
677 endTagName
= endTagName
.toLowerCase();
678 if ( startTagName
!== endTagName
|| $.inArray( startTagName
, settings
.allowedHtmlElements
) === -1 ) {
682 for ( i
= 0, len
= attributes
.length
; i
< len
; i
+= 2 ) {
683 attributeName
= attributes
[i
];
684 if ( $.inArray( attributeName
, settings
.allowedHtmlCommonAttributes
) === -1 &&
685 $.inArray( attributeName
, settings
.allowedHtmlAttributesByElement
[startTagName
] || [] ) === -1 ) {
693 function htmlAttributes() {
694 var parsedResult
= nOrMore( 0, htmlAttribute
)();
695 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
696 return concat
.apply( ['HTMLATTRIBUTES'], parsedResult
);
699 // Subset of allowed HTML markup.
700 // Most elements and many attributes allowed on the server are not supported yet.
702 var parsedOpenTagResult
, parsedHtmlContents
, parsedCloseTagResult
,
703 wrappedAttributes
, attributes
, startTagName
, endTagName
, startOpenTagPos
,
704 startCloseTagPos
, endOpenTagPos
, endCloseTagPos
,
707 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
708 // 1. open through closeHtmlTag
710 // 3. openHtmlEnd through close
711 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
713 startOpenTagPos
= pos
;
714 parsedOpenTagResult
= sequence( [
716 asciiAlphabetLiteral
,
718 optionalForwardSlash
,
722 if ( parsedOpenTagResult
=== null ) {
727 startTagName
= parsedOpenTagResult
[1];
729 parsedHtmlContents
= nOrMore( 0, expression
)();
731 startCloseTagPos
= pos
;
732 parsedCloseTagResult
= sequence( [
734 asciiAlphabetLiteral
,
738 if ( parsedCloseTagResult
=== null ) {
739 // Closing tag failed. Return the start tag and contents.
740 return [ 'CONCAT', input
.slice( startOpenTagPos
, endOpenTagPos
) ]
741 .concat( parsedHtmlContents
);
744 endCloseTagPos
= pos
;
745 endTagName
= parsedCloseTagResult
[1];
746 wrappedAttributes
= parsedOpenTagResult
[2];
747 attributes
= wrappedAttributes
.slice( 1 );
748 if ( isAllowedHtml( startTagName
, endTagName
, attributes
) ) {
749 result
= [ 'HTMLELEMENT', startTagName
, wrappedAttributes
]
750 .concat( parsedHtmlContents
);
752 // HTML is not allowed, so contents will remain how
753 // it was, while HTML markup at this level will be
755 // E.g. assuming script tags are not allowed:
757 // <script>[[Foo|bar]]</script>
759 // results in '<script>' and '</script>'
760 // (not treated as an HTML tag), surrounding a fully
763 // Concatenate everything from the tag, flattening the contents.
764 result
= [ 'CONCAT', input
.slice( startOpenTagPos
, endOpenTagPos
) ]
765 .concat( parsedHtmlContents
, input
.slice( startCloseTagPos
, endCloseTagPos
) );
771 templateName
= transform(
772 // see $wgLegalTitleChars
773 // not allowing : due to the need to catch "PLURAL:$1"
774 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
775 function ( result
) { return result
.toString(); }
777 function templateParam() {
781 nOrMore( 0, paramExpression
)
783 if ( result
=== null ) {
787 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
788 return expr
.length
> 1 ? [ 'CONCAT' ].concat( expr
) : expr
[0];
791 function templateWithReplacement() {
792 var result
= sequence( [
797 return result
=== null ? null : [ result
[0], result
[2] ];
799 function templateWithOutReplacement() {
800 var result
= sequence( [
805 return result
=== null ? null : [ result
[0], result
[2] ];
807 function templateWithOutFirstParameter() {
808 var result
= sequence( [
812 return result
=== null ? null : [ result
[0], '' ];
814 colon
= makeStringParser( ':' );
815 templateContents
= choice( [
817 var res
= sequence( [
818 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
819 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
820 choice( [ templateWithReplacement
, templateWithOutReplacement
, templateWithOutFirstParameter
] ),
821 nOrMore( 0, templateParam
)
823 return res
=== null ? null : res
[0].concat( res
[1] );
826 var res
= sequence( [
828 nOrMore( 0, templateParam
)
830 if ( res
=== null ) {
833 return [ res
[0] ].concat( res
[1] );
836 openTemplate
= makeStringParser( '{{' );
837 closeTemplate
= makeStringParser( '}}' );
838 nonWhitespaceExpression
= choice( [
846 paramExpression
= choice( [
855 expression
= choice( [
865 // Used when only {{-transformation is wanted, for 'text'
866 // or 'escaped' formats
867 curlyBraceTransformExpression
= choice( [
870 curlyBraceTransformExpressionLiteral
876 * @param {Function} rootExpression root parse function
878 function start( rootExpression
) {
879 var result
= nOrMore( 0, rootExpression
)();
880 if ( result
=== null ) {
883 return [ 'CONCAT' ].concat( result
);
885 // everything above this point is supposed to be stateless/static, but
886 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
887 // finally let's do some actual work...
889 // If you add another possible rootExpression, you must update the astCache key scheme.
890 result
= start( this.settings
.onlyCurlyBraceTransform
? curlyBraceTransformExpression
: expression
);
893 * For success, the p must have gotten to the end of the input
894 * and returned a non-null.
895 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
897 if ( result
=== null || pos
!== input
.length
) {
898 throw new Error( 'Parse error at position ' + pos
.toString() + ' in input: ' + input
);
906 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
908 mw
.jqueryMsg
.htmlEmitter = function ( language
, magic
) {
909 this.language
= language
;
911 $.each( magic
, function ( key
, val
) {
912 jmsg
[ key
.toLowerCase() ] = function () {
918 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
919 * Walk entire node structure, applying replacements and template functions when appropriate
920 * @param {Mixed} node Abstract syntax tree (top node or subnode)
921 * @param {Array} replacements for $1, $2, ... $n
922 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
924 this.emit = function ( node
, replacements
) {
925 var ret
, subnodes
, operation
,
927 switch ( typeof node
) {
932 // typeof returns object for arrays
934 // node is an array of nodes
935 subnodes
= $.map( node
.slice( 1 ), function ( n
) {
936 return jmsg
.emit( n
, replacements
);
938 operation
= node
[0].toLowerCase();
939 if ( typeof jmsg
[operation
] === 'function' ) {
940 ret
= jmsg
[ operation
]( subnodes
, replacements
);
942 throw new Error( 'Unknown operation "' + operation
+ '"' );
946 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
947 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
948 // The logical thing is probably to return the empty string here when we encounter undefined.
952 throw new Error( 'Unexpected type in AST: ' + typeof node
);
958 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
959 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
960 // If you have 'magic words' then configure the parser to have them upon creation.
962 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
963 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
964 mw
.jqueryMsg
.htmlEmitter
.prototype = {
966 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
967 * Must return a single node to parents -- a jQuery with synthetic span
968 * However, unwrap any other synthetic spans in our children and pass them upwards
969 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
972 concat: function ( nodes
) {
973 var $span
= $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
974 $.each( nodes
, function ( i
, node
) {
975 if ( node
instanceof jQuery
&& node
.hasClass( 'mediaWiki_htmlEmitter' ) ) {
976 $.each( node
.contents(), function ( j
, childNode
) {
977 appendWithoutParsing( $span
, childNode
);
980 // Let jQuery append nodes, arrays of nodes and jQuery objects
981 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
982 appendWithoutParsing( $span
, node
);
989 * Return escaped replacement of correct index, or string if unavailable.
990 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
991 * if the specified parameter is not found return the same string
992 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
994 * TODO: Throw error if nodes.length > 1 ?
996 * @param {Array} nodes List of one element, integer, n >= 0
997 * @param {Array} replacements List of at least n strings
998 * @return {String} replacement
1000 replace: function ( nodes
, replacements
) {
1001 var index
= parseInt( nodes
[0], 10 );
1003 if ( index
< replacements
.length
) {
1004 return replacements
[index
];
1006 // index not found, fallback to displaying variable
1007 return '$' + ( index
+ 1 );
1012 * Transform wiki-link
1015 * It only handles basic cases, either no pipe, or a pipe with an explicit
1018 * It does not attempt to handle features like the pipe trick.
1019 * However, the pipe trick should usually not be present in wikitext retrieved
1020 * from the server, since the replacement is done at save time.
1021 * It may, though, if the wikitext appears in extension-controlled content.
1025 wikilink: function ( nodes
) {
1026 var page
, anchor
, url
;
1029 url
= mw
.util
.getUrl( page
);
1031 if ( nodes
.length
=== 1 ) {
1032 // [[Some Page]] or [[Namespace:Some Page]]
1035 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1039 return $( '<a>' ).attr( {
1046 * Converts array of HTML element key value pairs to object
1048 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1049 * name and 2 * n + 1 the associated value
1050 * @return {Object} Object mapping attribute name to attribute value
1052 htmlattributes: function ( nodes
) {
1053 var i
, len
, mapping
= {};
1054 for ( i
= 0, len
= nodes
.length
; i
< len
; i
+= 2 ) {
1055 mapping
[nodes
[i
]] = decodePrimaryHtmlEntities( nodes
[i
+ 1] );
1061 * Handles an (already-validated) HTML element.
1063 * @param {Array} nodes Nodes to process when creating element
1064 * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
1066 htmlelement: function ( nodes
) {
1067 var tagName
, attributes
, contents
, $element
;
1069 tagName
= nodes
.shift();
1070 attributes
= nodes
.shift();
1072 $element
= $( document
.createElement( tagName
) ).attr( attributes
);
1073 return appendWithoutParsing( $element
, contents
);
1077 * Transform parsed structure into external link
1078 * If the href is a jQuery object, treat it as "enclosing" the link text.
1080 * - ... function, treat it as the click handler.
1081 * - ... string, treat it as a URI.
1083 * TODO: throw an error if nodes.length > 2 ?
1085 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {String}
1088 extlink: function ( nodes
) {
1091 contents
= nodes
[1];
1092 if ( arg
instanceof jQuery
) {
1096 if ( typeof arg
=== 'function' ) {
1097 $el
.attr( 'href', '#' )
1098 .click( function ( e
) {
1103 $el
.attr( 'href', arg
.toString() );
1106 return appendWithoutParsing( $el
, contents
);
1110 * This is basically use a combination of replace + external link (link with parameter
1111 * as url), but we don't want to run the regular replace here-on: inserting a
1112 * url as href-attribute of a link will automatically escape it already, so
1113 * we don't want replace to (manually) escape it as well.
1115 * TODO: throw error if nodes.length > 1 ?
1117 * @param {Array} nodes List of one element, integer, n >= 0
1118 * @param {Array} replacements List of at least n strings
1119 * @return {string} replacement
1121 extlinkparam: function ( nodes
, replacements
) {
1123 index
= parseInt( nodes
[0], 10 );
1124 if ( index
< replacements
.length
) {
1125 replacement
= replacements
[index
];
1127 replacement
= '$' + ( index
+ 1 );
1129 return this.extlink( [ replacement
, nodes
[1] ] );
1133 * Transform parsed structure into pluralization
1134 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1135 * So convert it back with the current language's convertNumber.
1136 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1137 * @return {string} selected pluralized form according to current language
1139 plural: function ( nodes
) {
1140 var forms
, firstChild
, firstChildText
, explicitPluralFormNumber
, formIndex
, form
, count
,
1141 explicitPluralForms
= {};
1143 count
= parseFloat( this.language
.convertNumber( nodes
[0], true ) );
1144 forms
= nodes
.slice( 1 );
1145 for ( formIndex
= 0; formIndex
< forms
.length
; formIndex
++ ) {
1146 form
= forms
[formIndex
];
1148 if ( form
.jquery
&& form
.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1149 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1150 firstChild
= form
.contents().get( 0 );
1151 if ( firstChild
&& firstChild
.nodeType
=== Node
.TEXT_NODE
) {
1152 firstChildText
= firstChild
.textContent
;
1153 if ( /^\d+=/.test( firstChildText
) ) {
1154 explicitPluralFormNumber
= parseInt( firstChildText
.split( /=/ )[0], 10 );
1155 // Use the digit part as key and rest of first text node and
1156 // rest of child nodes as value.
1157 firstChild
.textContent
= firstChildText
.slice( firstChildText
.indexOf( '=' ) + 1 );
1158 explicitPluralForms
[explicitPluralFormNumber
] = form
;
1159 forms
[formIndex
] = undefined;
1162 } else if ( /^\d+=/.test( form
) ) {
1163 // Simple explicit plural forms like 12=a dozen
1164 explicitPluralFormNumber
= parseInt( form
.split( /=/ )[0], 10 );
1165 explicitPluralForms
[explicitPluralFormNumber
] = form
.slice( form
.indexOf( '=' ) + 1 );
1166 forms
[formIndex
] = undefined;
1170 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1171 forms
= $.map( forms
, function ( form
) {
1175 return this.language
.convertPlural( count
, forms
, explicitPluralForms
);
1179 * Transform parsed structure according to gender.
1181 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1183 * The first node must be one of:
1184 * - the mw.user object (or a compatible one)
1185 * - an empty string - indicating the current user, same effect as passing the mw.user object
1186 * - a gender string ('male', 'female' or 'unknown')
1188 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1189 * @return {string} Selected gender form according to current language
1191 gender: function ( nodes
) {
1193 maybeUser
= nodes
[0],
1194 forms
= nodes
.slice( 1 );
1196 if ( maybeUser
=== '' ) {
1197 maybeUser
= mw
.user
;
1200 // If we are passed a mw.user-like object, check their gender.
1201 // Otherwise, assume the gender string itself was passed .
1202 if ( maybeUser
&& maybeUser
.options
instanceof mw
.Map
) {
1203 gender
= maybeUser
.options
.get( 'gender' );
1208 return this.language
.gender( gender
, forms
);
1212 * Transform parsed structure into grammar conversion.
1213 * Invoked by putting `{{grammar:form|word}}` in a message
1214 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1215 * @return {string} selected grammatical form according to current language
1217 grammar: function ( nodes
) {
1218 var form
= nodes
[0],
1220 return word
&& form
&& this.language
.convertGrammar( word
, form
);
1224 * Tranform parsed structure into a int: (interface language) message include
1225 * Invoked by putting `{{int:othermessage}}` into a message
1226 * @param {Array} nodes List of nodes
1227 * @return {string} Other message
1229 'int': function ( nodes
) {
1230 return mw
.jqueryMsg
.getMessageFunction()( nodes
[0].toLowerCase() );
1234 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1235 * and outputs it in the localized digit script and formatted with decimal
1236 * separator, according to the current language.
1237 * @param {Array} nodes List of nodes
1238 * @return {number|string} Formatted number
1240 formatnum: function ( nodes
) {
1241 var isInteger
= ( nodes
[1] && nodes
[1] === 'R' ) ? true : false,
1244 return this.language
.convertNumber( number
, isInteger
);
1248 // Deprecated! don't rely on gM existing.
1249 // The window.gM ought not to be required - or if required, not required here.
1250 // But moving it to extensions breaks it (?!)
1251 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1252 // @deprecated since 1.23
1253 mw
.log
.deprecate( window
, 'gM', mw
.jqueryMsg
.getMessageFunction(), 'Use mw.message( ... ).parse() instead.' );
1258 * @see mw.jqueryMsg#getPlugin
1260 $.fn
.msg
= mw
.jqueryMsg
.getPlugin();
1262 // Replace the default message parser with jqueryMsg
1263 oldParser
= mw
.Message
.prototype.parser
;
1264 mw
.Message
.prototype.parser = function () {
1265 var messageFunction
;
1267 // TODO: should we cache the message function so we don't create a new one every time? Benchmark this maybe?
1268 // Caching is somewhat problematic, because we do need different message functions for different maps, so
1269 // we'd have to cache the parser as a member of this.map, which sounds a bit ugly.
1270 // Do not use mw.jqueryMsg unless required
1271 if ( this.format
=== 'plain' || !/\{\{|[\[<>]/.test( this.map
.get( this.key
) ) ) {
1272 // Fall back to mw.msg's simple parser
1273 return oldParser
.apply( this );
1276 messageFunction
= mw
.jqueryMsg
.getMessageFunction( {
1277 'messages': this.map
,
1278 // For format 'escaped', escaping part is handled by mediawiki.js
1279 'format': this.format
1281 return messageFunction( this.key
, this.parameters
);
1284 }( mediaWiki
, jQuery
) );