2 * Experimental advanced wikitext parser-emitter.
3 * See: http://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
5 * @author neilk@wikimedia.org
9 slice
= Array
.prototype.slice
,
12 'SITENAME' : mw
.config
.get( 'wgSiteName' )
14 messages
: mw
.messages
,
15 language
: mw
.language
19 * Given parser options, return a function that parses a key and replacements, returning jQuery object
20 * @param {Object} parser options
21 * @return {Function} accepting ( String message key, String replacement1, String replacement2 ... ) and returning {jQuery}
23 function getFailableParserFn( options
) {
24 var parser
= new mw
.jqueryMsg
.parser( options
);
26 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
27 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
28 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
30 * @param {Array} first element is the key, replacements may be in array in 2nd element, or remaining elements.
33 return function ( args
) {
35 argsArray
= $.isArray( args
[1] ) ? args
[1] : slice
.call( args
, 1 );
37 return parser
.parse( key
, argsArray
);
39 return $( '<span>' ).append( key
+ ': ' + e
.message
);
48 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
50 * window.gM = mediaWiki.parser.getMessageFunction( options );
51 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
53 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
54 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
56 * @param {Array} parser options
57 * @return {Function} function suitable for assigning to window.gM
59 mw
.jqueryMsg
.getMessageFunction = function ( options
) {
60 var failableParserFn
= getFailableParserFn( options
);
62 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
63 * somefunction(a, b, c, d)
65 * somefunction(a, [b, c, d])
67 * @param {string} key Message key.
68 * @param {Array|mixed} replacements Optional variable replacements (variadically or an array).
69 * @return {string} Rendered HTML.
72 return failableParserFn( arguments
).html();
78 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
79 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
81 * $.fn.msg = mediaWiki.parser.getJqueryPlugin( options );
82 * var userlink = $( '<a>' ).click( function () { alert( "hello!!") } );
83 * $( 'p#headline' ).msg( 'hello-user', userlink );
85 * @param {Array} parser options
86 * @return {Function} function suitable for assigning to jQuery plugin, such as $.fn.msg
88 mw
.jqueryMsg
.getPlugin = function ( options
) {
89 var failableParserFn
= getFailableParserFn( options
);
91 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
92 * somefunction(a, b, c, d)
94 * somefunction(a, [b, c, d])
96 * We append to 'this', which in a jQuery plugin context will be the selected elements.
97 * @param {string} key Message key.
98 * @param {Array|mixed} replacements Optional variable replacements (variadically or an array).
99 * @return {jQuery} this
102 var $target
= this.empty();
103 // TODO: Simply $target.append( failableParserFn( arguments ).contents() )
104 // or Simply $target.append( failableParserFn( arguments ) )
105 $.each( failableParserFn( arguments
).contents(), function ( i
, node
) {
106 $target
.append( node
);
114 * Describes an object, whose primary duty is to .parse() message keys.
115 * @param {Array} options
117 mw
.jqueryMsg
.parser = function ( options
) {
118 this.settings
= $.extend( {}, parserDefaults
, options
);
119 this.emitter
= new mw
.jqueryMsg
.htmlEmitter( this.settings
.language
, this.settings
.magic
);
122 mw
.jqueryMsg
.parser
.prototype = {
123 // cache, map of mediaWiki message key to the AST of the message. In most cases, the message is a string so this is identical.
124 // (This is why we would like to move this functionality server-side).
128 * Where the magic happens.
129 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
130 * If an error is thrown, returns original key, and logs the error
131 * @param {String} key Message key.
132 * @param {Array} replacements Variable replacements for $1, $2... $n
135 parse: function ( key
, replacements
) {
136 return this.emitter
.emit( this.getAst( key
), replacements
);
139 * Fetch the message string associated with a key, return parsed structure. Memoized.
140 * Note that we pass '[' + key + ']' back for a missing message here.
141 * @param {String} key
142 * @return {String|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
144 getAst: function ( key
) {
145 if ( this.astCache
[ key
] === undefined ) {
146 var wikiText
= this.settings
.messages
.get( key
);
147 if ( typeof wikiText
!== 'string' ) {
148 wikiText
= '\\[' + key
+ '\\]';
150 this.astCache
[ key
] = this.wikiTextToAst( wikiText
);
152 return this.astCache
[ key
];
155 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
157 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
158 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
160 * @param {String} message string wikitext
162 * @return {Mixed} abstract syntax tree
164 wikiTextToAst: function ( input
) {
166 regularLiteral
, regularLiteralWithoutBar
, regularLiteralWithoutSpace
, backslash
, anyCharacter
,
167 escapedOrLiteralWithoutSpace
, escapedOrLiteralWithoutBar
, escapedOrRegularLiteral
,
168 whitespace
, dollar
, digits
,
169 openExtlink
, closeExtlink
, wikilinkPage
, wikilinkContents
, openLink
, closeLink
, templateName
, pipe
, colon
,
170 templateContents
, openTemplate
, closeTemplate
,
171 nonWhitespaceExpression
, paramExpression
, expression
, result
;
173 // Indicates current position in input as we parse through it.
174 // Shared among all parsing functions below.
177 // =========================================================
178 // parsing combinators - could be a library on its own
179 // =========================================================
180 // Try parsers until one works, if none work return null
181 function choice( ps
) {
184 for ( i
= 0; i
< ps
.length
; i
++ ) {
186 if ( result
!== null ) {
193 // try several ps in a row, all must succeed or return null
194 // this is the only eager one
195 function sequence( ps
) {
199 for ( i
= 0; i
< ps
.length
; i
++ ) {
201 if ( res
=== null ) {
209 // run the same parser over and over until it fails.
210 // must succeed a minimum of n times or return null
211 function nOrMore( n
, p
) {
213 var originalPos
= pos
,
216 while ( parsed
!== null ) {
217 result
.push( parsed
);
220 if ( result
.length
< n
) {
227 // There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
228 // But using this as a combinator seems to cause problems when combined with nOrMore().
229 // May be some scoping issue
230 function transform( p
, fn
) {
233 return result
=== null ? null : fn( result
);
236 // Helpers -- just make ps out of simpler JS builtin types
237 function makeStringParser( s
) {
241 if ( input
.substr( pos
, len
) === s
) {
248 function makeRegexParser( regex
) {
250 var matches
= input
.substr( pos
).match( regex
);
251 if ( matches
=== null ) {
254 pos
+= matches
[0].length
;
260 * ===================================================================
261 * General patterns above this line -- wikitext specific parsers below
262 * ===================================================================
264 // Parsing functions follow. All parsing functions work like this:
265 // They don't accept any arguments.
266 // Instead, they just operate non destructively on the string 'input'
267 // As they can consume parts of the string, they advance the shared variable pos,
268 // and return tokens (or whatever else they want to return).
269 // some things are defined as closures and other things as ordinary functions
270 // converting everything to a closure makes it a lot harder to debug... errors pop up
271 // but some debuggers can't tell you exactly where they come from. Also the mutually
272 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
273 // This may be because, to save code, memoization was removed
274 regularLiteral
= makeRegexParser( /^[^{}\[\]$\\]/ );
275 regularLiteralWithoutBar
= makeRegexParser(/^[^{}\[\]$\\|]/);
276 regularLiteralWithoutSpace
= makeRegexParser(/^[^{}\[\]$\s]/);
277 backslash
= makeStringParser( '\\' );
278 anyCharacter
= makeRegexParser( /^./ );
279 function escapedLiteral() {
280 var result
= sequence( [
284 return result
=== null ? null : result
[1];
286 escapedOrLiteralWithoutSpace
= choice( [
288 regularLiteralWithoutSpace
290 escapedOrLiteralWithoutBar
= choice( [
292 regularLiteralWithoutBar
294 escapedOrRegularLiteral
= choice( [
298 // Used to define "literals" without spaces, in space-delimited situations
299 function literalWithoutSpace() {
300 var result
= nOrMore( 1, escapedOrLiteralWithoutSpace
)();
301 return result
=== null ? null : result
.join('');
303 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
304 // it is not a literal in the parameter
305 function literalWithoutBar() {
306 var result
= nOrMore( 1, escapedOrLiteralWithoutBar
)();
307 return result
=== null ? null : result
.join('');
310 // Used for wikilink page names. Like literalWithoutBar, but
311 // without allowing escapes.
312 function unescapedLiteralWithoutBar() {
313 var result
= nOrMore( 1, regularLiteralWithoutBar
)();
314 return result
=== null ? null : result
.join('');
318 var result
= nOrMore( 1, escapedOrRegularLiteral
)();
319 return result
=== null ? null : result
.join('');
321 whitespace
= makeRegexParser( /^\s+/ );
322 dollar
= makeStringParser( '$' );
323 digits
= makeRegexParser( /^\d+/ );
325 function replacement() {
326 var result
= sequence( [
330 if ( result
=== null ) {
333 return [ 'REPLACE', parseInt( result
[1], 10 ) - 1 ];
335 openExtlink
= makeStringParser( '[' );
336 closeExtlink
= makeStringParser( ']' );
337 // this extlink MUST have inner text, e.g. [foo] not allowed; [foo bar] is allowed
339 var result
, parsedResult
;
341 parsedResult
= sequence( [
343 nonWhitespaceExpression
,
348 if ( parsedResult
!== null ) {
349 result
= [ 'LINK', parsedResult
[1], parsedResult
[3] ];
353 // this is the same as the above extlink, except that the url is being passed on as a parameter
354 function extLinkParam() {
355 var result
= sequence( [
363 if ( result
=== null ) {
366 return [ 'LINKPARAM', parseInt( result
[2], 10 ) - 1, result
[4] ];
368 openLink
= makeStringParser( '[[' );
369 closeLink
= makeStringParser( ']]' );
370 pipe
= makeStringParser( '|' );
372 function template() {
373 var result
= sequence( [
378 return result
=== null ? null : result
[1];
381 wikilinkPage
= choice( [
382 unescapedLiteralWithoutBar
,
386 function pipedWikilink() {
387 var result
= sequence( [
392 return result
=== null ? null : [ result
[0], result
[2] ];
395 wikilinkContents
= choice( [
397 wikilinkPage
// unpiped link
401 var result
, parsedResult
, parsedLinkContents
;
404 parsedResult
= sequence( [
409 if ( parsedResult
!== null ) {
410 parsedLinkContents
= parsedResult
[1];
411 result
= [ 'WLINK' ].concat( parsedLinkContents
);
415 templateName
= transform(
416 // see $wgLegalTitleChars
417 // not allowing : due to the need to catch "PLURAL:$1"
418 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
419 function ( result
) { return result
.toString(); }
421 function templateParam() {
425 nOrMore( 0, paramExpression
)
427 if ( result
=== null ) {
431 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
432 return expr
.length
> 1 ? [ 'CONCAT' ].concat( expr
) : expr
[0];
435 function templateWithReplacement() {
436 var result
= sequence( [
441 return result
=== null ? null : [ result
[0], result
[2] ];
443 function templateWithOutReplacement() {
444 var result
= sequence( [
449 return result
=== null ? null : [ result
[0], result
[2] ];
451 colon
= makeStringParser(':');
452 templateContents
= choice( [
454 var res
= sequence( [
455 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
456 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
457 choice( [ templateWithReplacement
, templateWithOutReplacement
] ),
458 nOrMore( 0, templateParam
)
460 return res
=== null ? null : res
[0].concat( res
[1] );
463 var res
= sequence( [
465 nOrMore( 0, templateParam
)
467 if ( res
=== null ) {
470 return [ res
[0] ].concat( res
[1] );
473 openTemplate
= makeStringParser('{{');
474 closeTemplate
= makeStringParser('}}');
475 nonWhitespaceExpression
= choice( [
483 paramExpression
= choice( [
492 expression
= choice( [
502 var result
= nOrMore( 0, expression
)();
503 if ( result
=== null ) {
506 return [ 'CONCAT' ].concat( result
);
508 // everything above this point is supposed to be stateless/static, but
509 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
510 // finally let's do some actual work...
514 * For success, the p must have gotten to the end of the input
515 * and returned a non-null.
516 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
518 if ( result
=== null || pos
!== input
.length
) {
519 throw new Error( 'Parse error at position ' + pos
.toString() + ' in input: ' + input
);
526 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
528 mw
.jqueryMsg
.htmlEmitter = function ( language
, magic
) {
529 this.language
= language
;
531 $.each( magic
, function ( key
, val
) {
532 jmsg
[ key
.toLowerCase() ] = function () {
537 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
538 * Walk entire node structure, applying replacements and template functions when appropriate
539 * @param {Mixed} abstract syntax tree (top node or subnode)
540 * @param {Array} replacements for $1, $2, ... $n
541 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
543 this.emit = function ( node
, replacements
) {
544 var ret
, subnodes
, operation
,
546 switch ( typeof node
) {
551 // typeof returns object for arrays
553 // node is an array of nodes
554 subnodes
= $.map( node
.slice( 1 ), function ( n
) {
555 return jmsg
.emit( n
, replacements
);
557 operation
= node
[0].toLowerCase();
558 if ( typeof jmsg
[operation
] === 'function' ) {
559 ret
= jmsg
[ operation
]( subnodes
, replacements
);
561 throw new Error( 'Unknown operation "' + operation
+ '"' );
565 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
566 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
567 // The logical thing is probably to return the empty string here when we encounter undefined.
571 throw new Error( 'Unexpected type in AST: ' + typeof node
);
576 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
577 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
578 // If you have 'magic words' then configure the parser to have them upon creation.
580 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
581 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
582 mw
.jqueryMsg
.htmlEmitter
.prototype = {
584 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
585 * Must return a single node to parents -- a jQuery with synthetic span
586 * However, unwrap any other synthetic spans in our children and pass them upwards
587 * @param {Array} nodes - mixed, some single nodes, some arrays of nodes
590 concat: function ( nodes
) {
591 var $span
= $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
592 $.each( nodes
, function ( i
, node
) {
593 if ( node
instanceof jQuery
&& node
.hasClass( 'mediaWiki_htmlEmitter' ) ) {
594 $.each( node
.contents(), function ( j
, childNode
) {
595 $span
.append( childNode
);
598 // Let jQuery append nodes, arrays of nodes and jQuery objects
599 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
600 $span
.append( $.type( node
) === 'object' ? node
: document
.createTextNode( node
) );
607 * Return escaped replacement of correct index, or string if unavailable.
608 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
609 * if the specified parameter is not found return the same string
610 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
611 * TODO: Throw error if nodes.length > 1 ?
612 * @param {Array} of one element, integer, n >= 0
613 * @return {String} replacement
615 replace: function ( nodes
, replacements
) {
616 var index
= parseInt( nodes
[0], 10 );
618 if ( index
< replacements
.length
) {
619 return replacements
[index
];
621 // index not found, fallback to displaying variable
622 return '$' + ( index
+ 1 );
627 * Transform wiki-link
630 * It only handles basic cases, either no pipe, or a pipe with an explicit
633 * It does not attempt to handle features like the pipe trick.
634 * However, the pipe trick should usually not be present in wikitext retrieved
635 * from the server, since the replacement is done at save time.
636 * It may, though, if the wikitext appears in extension-controlled content.
640 wlink: function ( nodes
) {
641 var page
, anchor
, url
;
644 url
= mw
.util
.wikiGetlink( page
);
646 // [[Some Page]] or [[Namespace:Some Page]]
647 if ( nodes
.length
=== 1 ) {
652 * [[Some Page|anchor text]] or
653 * [[Namespace:Some Page|anchor]
659 return $( '<a />' ).attr( {
666 * Transform parsed structure into external link
667 * If the href is a jQuery object, treat it as "enclosing" the link text.
668 * ... function, treat it as the click handler
669 * ... string, treat it as a URI
670 * TODO: throw an error if nodes.length > 2 ?
671 * @param {Array} of two elements, {jQuery|Function|String} and {String}
674 link: function ( nodes
) {
678 if ( arg
instanceof jQuery
) {
682 if ( typeof arg
=== 'function' ) {
683 $el
.click( arg
).attr( 'href', '#' );
685 $el
.attr( 'href', arg
.toString() );
688 $el
.append( contents
);
693 * This is basically use a combination of replace + link (link with parameter
694 * as url), but we don't want to run the regular replace here-on: inserting a
695 * url as href-attribute of a link will automatically escape it already, so
696 * we don't want replace to (manually) escape it as well.
697 * TODO throw error if nodes.length > 1 ?
698 * @param {Array} of one element, integer, n >= 0
699 * @return {String} replacement
701 linkparam: function ( nodes
, replacements
) {
703 index
= parseInt( nodes
[0], 10 );
704 if ( index
< replacements
.length
) {
705 replacement
= replacements
[index
];
707 replacement
= '$' + ( index
+ 1 );
709 return this.link( [ replacement
, nodes
[1] ] );
713 * Transform parsed structure into pluralization
714 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
715 * So convert it back with the current language's convertNumber.
716 * @param {Array} of nodes, [ {String|Number}, {String}, {String} ... ]
717 * @return {String} selected pluralized form according to current language
719 plural: function ( nodes
) {
721 count
= parseFloat( this.language
.convertNumber( nodes
[0], true ) );
722 forms
= nodes
.slice(1);
723 return forms
.length
? this.language
.convertPlural( count
, forms
) : '';
727 * Transform parsed structure according to gender.
728 * Usage {{gender:[ gender | mw.user object ] | masculine form|feminine form|neutral form}}.
729 * The first node is either a string, which can be "male" or "female",
730 * or a User object (not a username).
732 * @param {Array} of nodes, [ {String|mw.User}, {String}, {String}, {String} ]
733 * @return {String} selected gender form according to current language
735 gender: function ( nodes
) {
738 if ( nodes
[0] && nodes
[0].options
instanceof mw
.Map
) {
739 gender
= nodes
[0].options
.get( 'gender' );
744 forms
= nodes
.slice( 1 );
746 return this.language
.gender( gender
, forms
);
750 * Transform parsed structure into grammar conversion.
751 * Invoked by putting {{grammar:form|word}} in a message
752 * @param {Array} of nodes [{Grammar case eg: genitive}, {String word}]
753 * @return {String} selected grammatical form according to current language
755 grammar: function ( nodes
) {
758 return word
&& form
&& this.language
.convertGrammar( word
, form
);
762 * Tranform parsed structure into a int: (interface language) message include
763 * Invoked by putting {{MediaWiki:othermessage}} into a message
764 * @param {Array} of nodes
765 * @return {string} Other message
767 int: function ( nodes
) {
768 return mw
.jqueryMsg
.getMessageFunction()( nodes
[0].toLowerCase() );
771 // Deprecated! don't rely on gM existing.
772 // The window.gM ought not to be required - or if required, not required here.
773 // But moving it to extensions breaks it (?!)
774 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
775 window
.gM
= mw
.jqueryMsg
.getMessageFunction();
776 $.fn
.msg
= mw
.jqueryMsg
.getPlugin();
778 // Replace the default message parser with jqueryMsg
779 oldParser
= mw
.Message
.prototype.parser
;
780 mw
.Message
.prototype.parser = function () {
781 // TODO: should we cache the message function so we don't create a new one every time? Benchmark this maybe?
782 // Caching is somewhat problematic, because we do need different message functions for different maps, so
783 // we'd have to cache the parser as a member of this.map, which sounds a bit ugly.
784 // Do not use mw.jqueryMsg unless required
785 if ( !/\{\{|\[/.test(this.map
.get( this.key
) ) ) {
786 // Fall back to mw.msg's simple parser
787 return oldParser
.apply( this );
789 var messageFunction
= mw
.jqueryMsg
.getMessageFunction( { 'messages': this.map
} );
790 return messageFunction( this.key
, this.parameters
);
793 }( mediaWiki
, jQuery
) );