mediawiki.jqueryMsg: Implement `<nowiki>` support
[lhc/web/wiklou.git] / resources / src / mediawiki / mediawiki.jqueryMsg.js
1 /*!
2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
4 *
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
7 */
8 ( function ( mw, $ ) {
9 /**
10 * @class mw.jqueryMsg
11 * @singleton
12 */
13
14 var oldParser,
15 slice = Array.prototype.slice,
16 parserDefaults = {
17 magic: {
18 SITENAME: mw.config.get( 'wgSiteName' )
19 },
20 // Whitelist for allowed HTML elements in wikitext.
21 // Self-closing tags are not currently supported.
22 // Can be populated via setPrivateData().
23 allowedHtmlElements: [],
24 // Key tag name, value allowed attributes for that tag.
25 // See Sanitizer::setupAttributeWhitelist
26 allowedHtmlCommonAttributes: [
27 // HTML
28 'id',
29 'class',
30 'style',
31 'lang',
32 'dir',
33 'title',
34
35 // WAI-ARIA
36 'role'
37 ],
38
39 // Attributes allowed for specific elements.
40 // Key is element name in lower case
41 // Value is array of allowed attributes for that element
42 allowedHtmlAttributesByElement: {},
43 messages: mw.messages,
44 language: mw.language,
45
46 // Same meaning as in mediawiki.js.
47 //
48 // Only 'text', 'parse', and 'escaped' are supported, and the
49 // actual escaping for 'escaped' is done by other code (generally
50 // through mediawiki.js).
51 //
52 // However, note that this default only
53 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
54 // is 'text', including when it uses jqueryMsg.
55 format: 'parse'
56 };
57
58 /**
59 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
60 * convert what it detects as an htmlString to an element.
61 *
62 * If our own htmlEmitter jQuery object is given, its children will be unwrapped and appended to
63 * new parent.
64 *
65 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
66 *
67 * @private
68 * @param {jQuery} $parent Parent node wrapped by jQuery
69 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
70 * @return {jQuery} $parent
71 */
72 function appendWithoutParsing( $parent, children ) {
73 var i, len;
74
75 if ( !$.isArray( children ) ) {
76 children = [ children ];
77 }
78
79 for ( i = 0, len = children.length; i < len; i++ ) {
80 if ( typeof children[ i ] !== 'object' ) {
81 children[ i ] = document.createTextNode( children[ i ] );
82 }
83 if ( children[ i ] instanceof jQuery && children[ i ].hasClass( 'mediaWiki_htmlEmitter' ) ) {
84 children[ i ] = children[ i ].contents();
85 }
86 }
87
88 return $parent.append( children );
89 }
90
91 /**
92 * Decodes the main HTML entities, those encoded by mw.html.escape.
93 *
94 * @private
95 * @param {string} encoded Encoded string
96 * @return {string} String with those entities decoded
97 */
98 function decodePrimaryHtmlEntities( encoded ) {
99 return encoded
100 .replace( /&#039;/g, '\'' )
101 .replace( /&quot;/g, '"' )
102 .replace( /&lt;/g, '<' )
103 .replace( /&gt;/g, '>' )
104 .replace( /&amp;/g, '&' );
105 }
106
107 /**
108 * Turn input into a string.
109 *
110 * @private
111 * @param {string|jQuery} input
112 * @return {string} Textual value of input
113 */
114 function textify( input ) {
115 if ( input instanceof jQuery ) {
116 input = input.text();
117 }
118 return String( input );
119 }
120
121 /**
122 * Given parser options, return a function that parses a key and replacements, returning jQuery object
123 *
124 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
125 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
126 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
127 *
128 * @private
129 * @param {Object} options Parser options
130 * @return {Function}
131 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
132 * @return {jQuery} return.return
133 */
134 function getFailableParserFn( options ) {
135 return function ( args ) {
136 var fallback,
137 parser = new mw.jqueryMsg.parser( options ),
138 key = args[ 0 ],
139 argsArray = $.isArray( args[ 1 ] ) ? args[ 1 ] : slice.call( args, 1 );
140 try {
141 return parser.parse( key, argsArray );
142 } catch ( e ) {
143 fallback = parser.settings.messages.get( key );
144 mw.log.warn( 'mediawiki.jqueryMsg: ' + key + ': ' + e.message );
145 return $( '<span>' ).text( fallback );
146 }
147 };
148 }
149
150 mw.jqueryMsg = {};
151
152 /**
153 * Initialize parser defaults.
154 *
155 * ResourceLoaderJqueryMsgModule calls this to provide default values from
156 * Sanitizer.php for allowed HTML elements. To override this data for individual
157 * parsers, pass the relevant options to mw.jqueryMsg.parser.
158 *
159 * @private
160 * @param {Object} data
161 */
162 mw.jqueryMsg.setParserDefaults = function ( data ) {
163 $.extend( parserDefaults, data );
164 };
165
166 /**
167 * Get current parser defaults.
168 *
169 * Primarily used for the unit test. Returns a copy.
170 *
171 * @private
172 * @return {Object}
173 */
174 mw.jqueryMsg.getParserDefaults = function () {
175 return $.extend( {}, parserDefaults );
176 };
177
178 /**
179 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
180 * e.g.
181 *
182 * window.gM = mediaWiki.jqueryMsg.getMessageFunction( options );
183 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
184 *
185 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
186 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
187 *
188 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
189 * somefunction( a, b, c, d )
190 * is equivalent to
191 * somefunction( a, [b, c, d] )
192 *
193 * @param {Object} options parser options
194 * @return {Function} Function suitable for assigning to window.gM
195 * @return {string} return.key Message key.
196 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
197 * @return {string} return.return Rendered HTML.
198 */
199 mw.jqueryMsg.getMessageFunction = function ( options ) {
200 var failableParserFn, format;
201
202 if ( options && options.format !== undefined ) {
203 format = options.format;
204 } else {
205 format = parserDefaults.format;
206 }
207
208 return function () {
209 if ( !failableParserFn ) {
210 failableParserFn = getFailableParserFn( options );
211 }
212 var failableResult = failableParserFn( arguments );
213 if ( format === 'text' || format === 'escaped' ) {
214 return failableResult.text();
215 } else {
216 return failableResult.html();
217 }
218 };
219 };
220
221 /**
222 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
223 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
224 * e.g.
225 *
226 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
227 * var userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
228 * $( 'p#headline' ).msg( 'hello-user', userlink );
229 *
230 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
231 * somefunction( a, b, c, d )
232 * is equivalent to
233 * somefunction( a, [b, c, d] )
234 *
235 * We append to 'this', which in a jQuery plugin context will be the selected elements.
236 *
237 * @param {Object} options Parser options
238 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
239 * @return {string} return.key Message key.
240 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
241 * @return {jQuery} return.return
242 */
243 mw.jqueryMsg.getPlugin = function ( options ) {
244 var failableParserFn;
245
246 return function () {
247 if ( !failableParserFn ) {
248 failableParserFn = getFailableParserFn( options );
249 }
250 var $target = this.empty();
251 appendWithoutParsing( $target, failableParserFn( arguments ) );
252 return $target;
253 };
254 };
255
256 /**
257 * The parser itself.
258 * Describes an object, whose primary duty is to .parse() message keys.
259 *
260 * @class
261 * @private
262 * @param {Object} options
263 */
264 mw.jqueryMsg.parser = function ( options ) {
265 this.settings = $.extend( {}, parserDefaults, options );
266 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
267 this.astCache = {};
268
269 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
270 };
271
272 mw.jqueryMsg.parser.prototype = {
273 /**
274 * Where the magic happens.
275 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
276 * If an error is thrown, returns original key, and logs the error
277 *
278 * @param {string} key Message key.
279 * @param {Array} replacements Variable replacements for $1, $2... $n
280 * @return {jQuery}
281 */
282 parse: function ( key, replacements ) {
283 var ast = this.getAst( key );
284 return this.emitter.emit( ast, replacements );
285 },
286
287 /**
288 * Fetch the message string associated with a key, return parsed structure. Memoized.
289 * Note that we pass '[' + key + ']' back for a missing message here.
290 *
291 * @param {string} key
292 * @return {string|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
293 */
294 getAst: function ( key ) {
295 var wikiText;
296
297 if ( !this.astCache.hasOwnProperty( key ) ) {
298 wikiText = this.settings.messages.get( key );
299 if ( typeof wikiText !== 'string' ) {
300 wikiText = '\\[' + key + '\\]';
301 }
302 this.astCache[ key ] = this.wikiTextToAst( wikiText );
303 }
304 return this.astCache[ key ];
305 },
306
307 /**
308 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
309 *
310 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
311 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
312 *
313 * @param {string} input Message string wikitext
314 * @throws Error
315 * @return {Mixed} abstract syntax tree
316 */
317 wikiTextToAst: function ( input ) {
318 var pos,
319 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
320 doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
321 escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
322 whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
323 htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
324 openExtlink, closeExtlink, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
325 templateContents, openTemplate, closeTemplate,
326 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result,
327 settings = this.settings,
328 concat = Array.prototype.concat;
329
330 // Indicates current position in input as we parse through it.
331 // Shared among all parsing functions below.
332 pos = 0;
333
334 // =========================================================
335 // parsing combinators - could be a library on its own
336 // =========================================================
337
338 /**
339 * Try parsers until one works, if none work return null
340 *
341 * @private
342 * @param {Function[]} ps
343 * @return {string|null}
344 */
345 function choice( ps ) {
346 return function () {
347 var i, result;
348 for ( i = 0; i < ps.length; i++ ) {
349 result = ps[ i ]();
350 if ( result !== null ) {
351 return result;
352 }
353 }
354 return null;
355 };
356 }
357
358 /**
359 * Try several ps in a row, all must succeed or return null.
360 * This is the only eager one.
361 *
362 * @private
363 * @param {Function[]} ps
364 * @return {string|null}
365 */
366 function sequence( ps ) {
367 var i, res,
368 originalPos = pos,
369 result = [];
370 for ( i = 0; i < ps.length; i++ ) {
371 res = ps[ i ]();
372 if ( res === null ) {
373 pos = originalPos;
374 return null;
375 }
376 result.push( res );
377 }
378 return result;
379 }
380
381 /**
382 * Run the same parser over and over until it fails.
383 * Must succeed a minimum of n times or return null.
384 *
385 * @private
386 * @param {number} n
387 * @param {Function} p
388 * @return {string|null}
389 */
390 function nOrMore( n, p ) {
391 return function () {
392 var originalPos = pos,
393 result = [],
394 parsed = p();
395 while ( parsed !== null ) {
396 result.push( parsed );
397 parsed = p();
398 }
399 if ( result.length < n ) {
400 pos = originalPos;
401 return null;
402 }
403 return result;
404 };
405 }
406
407 /**
408 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
409 *
410 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
411 * May be some scoping issue
412 *
413 * @private
414 * @param {Function} p
415 * @param {Function} fn
416 * @return {string|null}
417 */
418 function transform( p, fn ) {
419 return function () {
420 var result = p();
421 return result === null ? null : fn( result );
422 };
423 }
424
425 /**
426 * Just make parsers out of simpler JS builtin types
427 *
428 * @private
429 * @param {string} s
430 * @return {Function}
431 * @return {string} return.return
432 */
433 function makeStringParser( s ) {
434 var len = s.length;
435 return function () {
436 var result = null;
437 if ( input.substr( pos, len ) === s ) {
438 result = s;
439 pos += len;
440 }
441 return result;
442 };
443 }
444
445 /**
446 * Makes a regex parser, given a RegExp object.
447 * The regex being passed in should start with a ^ to anchor it to the start
448 * of the string.
449 *
450 * @private
451 * @param {RegExp} regex anchored regex
452 * @return {Function} function to parse input based on the regex
453 */
454 function makeRegexParser( regex ) {
455 return function () {
456 var matches = input.slice( pos ).match( regex );
457 if ( matches === null ) {
458 return null;
459 }
460 pos += matches[ 0 ].length;
461 return matches[ 0 ];
462 };
463 }
464
465 // ===================================================================
466 // General patterns above this line -- wikitext specific parsers below
467 // ===================================================================
468
469 // Parsing functions follow. All parsing functions work like this:
470 // They don't accept any arguments.
471 // Instead, they just operate non destructively on the string 'input'
472 // As they can consume parts of the string, they advance the shared variable pos,
473 // and return tokens (or whatever else they want to return).
474 // some things are defined as closures and other things as ordinary functions
475 // converting everything to a closure makes it a lot harder to debug... errors pop up
476 // but some debuggers can't tell you exactly where they come from. Also the mutually
477 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
478 // This may be because, to save code, memoization was removed
479
480 regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
481 regularLiteralWithoutBar = makeRegexParser( /^[^{}\[\]$\\|]/ );
482 regularLiteralWithoutSpace = makeRegexParser( /^[^{}\[\]$\s]/ );
483 regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
484
485 backslash = makeStringParser( '\\' );
486 doubleQuote = makeStringParser( '"' );
487 singleQuote = makeStringParser( '\'' );
488 anyCharacter = makeRegexParser( /^./ );
489
490 openHtmlStartTag = makeStringParser( '<' );
491 optionalForwardSlash = makeRegexParser( /^\/?/ );
492 openHtmlEndTag = makeStringParser( '</' );
493 htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
494 closeHtmlTag = makeRegexParser( /^\s*>/ );
495
496 function escapedLiteral() {
497 var result = sequence( [
498 backslash,
499 anyCharacter
500 ] );
501 return result === null ? null : result[ 1 ];
502 }
503 escapedOrLiteralWithoutSpace = choice( [
504 escapedLiteral,
505 regularLiteralWithoutSpace
506 ] );
507 escapedOrLiteralWithoutBar = choice( [
508 escapedLiteral,
509 regularLiteralWithoutBar
510 ] );
511 escapedOrRegularLiteral = choice( [
512 escapedLiteral,
513 regularLiteral
514 ] );
515 // Used to define "literals" without spaces, in space-delimited situations
516 function literalWithoutSpace() {
517 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
518 return result === null ? null : result.join( '' );
519 }
520 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
521 // it is not a literal in the parameter
522 function literalWithoutBar() {
523 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
524 return result === null ? null : result.join( '' );
525 }
526
527 function literal() {
528 var result = nOrMore( 1, escapedOrRegularLiteral )();
529 return result === null ? null : result.join( '' );
530 }
531
532 function curlyBraceTransformExpressionLiteral() {
533 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
534 return result === null ? null : result.join( '' );
535 }
536
537 asciiAlphabetLiteral = makeRegexParser( /[A-Za-z]+/ );
538 htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
539 htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
540
541 whitespace = makeRegexParser( /^\s+/ );
542 dollar = makeStringParser( '$' );
543 digits = makeRegexParser( /^\d+/ );
544
545 function replacement() {
546 var result = sequence( [
547 dollar,
548 digits
549 ] );
550 if ( result === null ) {
551 return null;
552 }
553 return [ 'REPLACE', parseInt( result[ 1 ], 10 ) - 1 ];
554 }
555 openExtlink = makeStringParser( '[' );
556 closeExtlink = makeStringParser( ']' );
557 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
558 function extlink() {
559 var result, parsedResult, target;
560 result = null;
561 parsedResult = sequence( [
562 openExtlink,
563 nOrMore( 1, nonWhitespaceExpression ),
564 whitespace,
565 nOrMore( 1, expression ),
566 closeExtlink
567 ] );
568 if ( parsedResult !== null ) {
569 // When the entire link target is a single parameter, we can't use CONCAT, as we allow
570 // passing fancy parameters (like a whole jQuery object or a function) to use for the
571 // link. Check only if it's a single match, since we can either do CONCAT or not for
572 // singles with the same effect.
573 target = parsedResult[ 1 ].length === 1 ?
574 parsedResult[ 1 ][ 0 ] :
575 [ 'CONCAT' ].concat( parsedResult[ 1 ] );
576 result = [
577 'EXTLINK',
578 target,
579 [ 'CONCAT' ].concat( parsedResult[ 3 ] )
580 ];
581 }
582 return result;
583 }
584 openWikilink = makeStringParser( '[[' );
585 closeWikilink = makeStringParser( ']]' );
586 pipe = makeStringParser( '|' );
587
588 function template() {
589 var result = sequence( [
590 openTemplate,
591 templateContents,
592 closeTemplate
593 ] );
594 return result === null ? null : result[ 1 ];
595 }
596
597 function pipedWikilink() {
598 var result = sequence( [
599 nOrMore( 1, paramExpression ),
600 pipe,
601 nOrMore( 1, expression )
602 ] );
603 return result === null ? null : [
604 [ 'CONCAT' ].concat( result[ 0 ] ),
605 [ 'CONCAT' ].concat( result[ 2 ] )
606 ];
607 }
608
609 function unpipedWikilink() {
610 var result = sequence( [
611 nOrMore( 1, paramExpression )
612 ] );
613 return result === null ? null : [
614 [ 'CONCAT' ].concat( result[ 0 ] )
615 ];
616 }
617
618 wikilinkContents = choice( [
619 pipedWikilink,
620 unpipedWikilink
621 ] );
622
623 function wikilink() {
624 var result, parsedResult, parsedLinkContents;
625 result = null;
626
627 parsedResult = sequence( [
628 openWikilink,
629 wikilinkContents,
630 closeWikilink
631 ] );
632 if ( parsedResult !== null ) {
633 parsedLinkContents = parsedResult[ 1 ];
634 result = [ 'WIKILINK' ].concat( parsedLinkContents );
635 }
636 return result;
637 }
638
639 // TODO: Support data- if appropriate
640 function doubleQuotedHtmlAttributeValue() {
641 var parsedResult = sequence( [
642 doubleQuote,
643 htmlDoubleQuoteAttributeValue,
644 doubleQuote
645 ] );
646 return parsedResult === null ? null : parsedResult[ 1 ];
647 }
648
649 function singleQuotedHtmlAttributeValue() {
650 var parsedResult = sequence( [
651 singleQuote,
652 htmlSingleQuoteAttributeValue,
653 singleQuote
654 ] );
655 return parsedResult === null ? null : parsedResult[ 1 ];
656 }
657
658 function htmlAttribute() {
659 var parsedResult = sequence( [
660 whitespace,
661 asciiAlphabetLiteral,
662 htmlAttributeEquals,
663 choice( [
664 doubleQuotedHtmlAttributeValue,
665 singleQuotedHtmlAttributeValue
666 ] )
667 ] );
668 return parsedResult === null ? null : [ parsedResult[ 1 ], parsedResult[ 3 ] ];
669 }
670
671 /**
672 * Checks if HTML is allowed
673 *
674 * @param {string} startTagName HTML start tag name
675 * @param {string} endTagName HTML start tag name
676 * @param {Object} attributes array of consecutive key value pairs,
677 * with index 2 * n being a name and 2 * n + 1 the associated value
678 * @return {boolean} true if this is HTML is allowed, false otherwise
679 */
680 function isAllowedHtml( startTagName, endTagName, attributes ) {
681 var i, len, attributeName;
682
683 startTagName = startTagName.toLowerCase();
684 endTagName = endTagName.toLowerCase();
685 if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
686 return false;
687 }
688
689 for ( i = 0, len = attributes.length; i < len; i += 2 ) {
690 attributeName = attributes[ i ];
691 if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
692 $.inArray( attributeName, settings.allowedHtmlAttributesByElement[ startTagName ] || [] ) === -1 ) {
693 return false;
694 }
695 }
696
697 return true;
698 }
699
700 function htmlAttributes() {
701 var parsedResult = nOrMore( 0, htmlAttribute )();
702 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
703 return concat.apply( [ 'HTMLATTRIBUTES' ], parsedResult );
704 }
705
706 // Subset of allowed HTML markup.
707 // Most elements and many attributes allowed on the server are not supported yet.
708 function html() {
709 var parsedOpenTagResult, parsedHtmlContents, parsedCloseTagResult,
710 wrappedAttributes, attributes, startTagName, endTagName, startOpenTagPos,
711 startCloseTagPos, endOpenTagPos, endCloseTagPos,
712 result = null;
713
714 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
715 // 1. open through closeHtmlTag
716 // 2. expression
717 // 3. openHtmlEnd through close
718 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
719
720 startOpenTagPos = pos;
721 parsedOpenTagResult = sequence( [
722 openHtmlStartTag,
723 asciiAlphabetLiteral,
724 htmlAttributes,
725 optionalForwardSlash,
726 closeHtmlTag
727 ] );
728
729 if ( parsedOpenTagResult === null ) {
730 return null;
731 }
732
733 endOpenTagPos = pos;
734 startTagName = parsedOpenTagResult[ 1 ];
735
736 parsedHtmlContents = nOrMore( 0, expression )();
737
738 startCloseTagPos = pos;
739 parsedCloseTagResult = sequence( [
740 openHtmlEndTag,
741 asciiAlphabetLiteral,
742 closeHtmlTag
743 ] );
744
745 if ( parsedCloseTagResult === null ) {
746 // Closing tag failed. Return the start tag and contents.
747 return [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
748 .concat( parsedHtmlContents );
749 }
750
751 endCloseTagPos = pos;
752 endTagName = parsedCloseTagResult[ 1 ];
753 wrappedAttributes = parsedOpenTagResult[ 2 ];
754 attributes = wrappedAttributes.slice( 1 );
755 if ( isAllowedHtml( startTagName, endTagName, attributes ) ) {
756 result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ]
757 .concat( parsedHtmlContents );
758 } else {
759 // HTML is not allowed, so contents will remain how
760 // it was, while HTML markup at this level will be
761 // treated as text
762 // E.g. assuming script tags are not allowed:
763 //
764 // <script>[[Foo|bar]]</script>
765 //
766 // results in '&lt;script&gt;' and '&lt;/script&gt;'
767 // (not treated as an HTML tag), surrounding a fully
768 // parsed HTML link.
769 //
770 // Concatenate everything from the tag, flattening the contents.
771 result = [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
772 .concat( parsedHtmlContents, input.slice( startCloseTagPos, endCloseTagPos ) );
773 }
774
775 return result;
776 }
777
778 // <nowiki>...</nowiki> tag. The tags are stripped and the contents are returned unparsed.
779 function nowiki() {
780 var parsedResult, plainText,
781 result = null;
782
783 parsedResult = sequence( [
784 makeStringParser( '<nowiki>' ),
785 // We use a greedy non-backtracking parser, so we must ensure here that we don't take too much
786 makeRegexParser( /^.*?(?=<\/nowiki>)/ ),
787 makeStringParser( '</nowiki>' )
788 ] );
789 if ( parsedResult !== null ) {
790 plainText = parsedResult[ 1 ];
791 result = [ 'CONCAT' ].concat( plainText );
792 }
793
794 return result;
795 }
796
797 templateName = transform(
798 // see $wgLegalTitleChars
799 // not allowing : due to the need to catch "PLURAL:$1"
800 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
801 function ( result ) { return result.toString(); }
802 );
803 function templateParam() {
804 var expr, result;
805 result = sequence( [
806 pipe,
807 nOrMore( 0, paramExpression )
808 ] );
809 if ( result === null ) {
810 return null;
811 }
812 expr = result[ 1 ];
813 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
814 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[ 0 ];
815 }
816
817 function templateWithReplacement() {
818 var result = sequence( [
819 templateName,
820 colon,
821 replacement
822 ] );
823 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
824 }
825 function templateWithOutReplacement() {
826 var result = sequence( [
827 templateName,
828 colon,
829 paramExpression
830 ] );
831 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
832 }
833 function templateWithOutFirstParameter() {
834 var result = sequence( [
835 templateName,
836 colon
837 ] );
838 return result === null ? null : [ result[ 0 ], '' ];
839 }
840 colon = makeStringParser( ':' );
841 templateContents = choice( [
842 function () {
843 var res = sequence( [
844 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
845 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
846 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
847 nOrMore( 0, templateParam )
848 ] );
849 return res === null ? null : res[ 0 ].concat( res[ 1 ] );
850 },
851 function () {
852 var res = sequence( [
853 templateName,
854 nOrMore( 0, templateParam )
855 ] );
856 if ( res === null ) {
857 return null;
858 }
859 return [ res[ 0 ] ].concat( res[ 1 ] );
860 }
861 ] );
862 openTemplate = makeStringParser( '{{' );
863 closeTemplate = makeStringParser( '}}' );
864 nonWhitespaceExpression = choice( [
865 template,
866 wikilink,
867 extlink,
868 replacement,
869 literalWithoutSpace
870 ] );
871 paramExpression = choice( [
872 template,
873 wikilink,
874 extlink,
875 replacement,
876 literalWithoutBar
877 ] );
878
879 expression = choice( [
880 template,
881 wikilink,
882 extlink,
883 replacement,
884 nowiki,
885 html,
886 literal
887 ] );
888
889 // Used when only {{-transformation is wanted, for 'text'
890 // or 'escaped' formats
891 curlyBraceTransformExpression = choice( [
892 template,
893 replacement,
894 curlyBraceTransformExpressionLiteral
895 ] );
896
897 /**
898 * Starts the parse
899 *
900 * @param {Function} rootExpression root parse function
901 */
902 function start( rootExpression ) {
903 var result = nOrMore( 0, rootExpression )();
904 if ( result === null ) {
905 return null;
906 }
907 return [ 'CONCAT' ].concat( result );
908 }
909 // everything above this point is supposed to be stateless/static, but
910 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
911 // finally let's do some actual work...
912
913 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
914
915 /*
916 * For success, the p must have gotten to the end of the input
917 * and returned a non-null.
918 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
919 */
920 if ( result === null || pos !== input.length ) {
921 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
922 }
923 return result;
924 }
925
926 };
927
928 /**
929 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
930 */
931 mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
932 this.language = language;
933 var jmsg = this;
934 $.each( magic, function ( key, val ) {
935 jmsg[ key.toLowerCase() ] = function () {
936 return val;
937 };
938 } );
939
940 /**
941 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
942 * Walk entire node structure, applying replacements and template functions when appropriate
943 *
944 * @param {Mixed} node Abstract syntax tree (top node or subnode)
945 * @param {Array} replacements for $1, $2, ... $n
946 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
947 */
948 this.emit = function ( node, replacements ) {
949 var ret, subnodes, operation,
950 jmsg = this;
951 switch ( typeof node ) {
952 case 'string':
953 case 'number':
954 ret = node;
955 break;
956 // typeof returns object for arrays
957 case 'object':
958 // node is an array of nodes
959 subnodes = $.map( node.slice( 1 ), function ( n ) {
960 return jmsg.emit( n, replacements );
961 } );
962 operation = node[ 0 ].toLowerCase();
963 if ( typeof jmsg[ operation ] === 'function' ) {
964 ret = jmsg[ operation ]( subnodes, replacements );
965 } else {
966 throw new Error( 'Unknown operation "' + operation + '"' );
967 }
968 break;
969 case 'undefined':
970 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
971 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
972 // The logical thing is probably to return the empty string here when we encounter undefined.
973 ret = '';
974 break;
975 default:
976 throw new Error( 'Unexpected type in AST: ' + typeof node );
977 }
978 return ret;
979 };
980 };
981
982 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
983 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
984 // If you have 'magic words' then configure the parser to have them upon creation.
985 //
986 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
987 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
988 mw.jqueryMsg.htmlEmitter.prototype = {
989 /**
990 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
991 * Must return a single node to parents -- a jQuery with synthetic span
992 * However, unwrap any other synthetic spans in our children and pass them upwards
993 *
994 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
995 * @return {jQuery}
996 */
997 concat: function ( nodes ) {
998 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
999 $.each( nodes, function ( i, node ) {
1000 // Let jQuery append nodes, arrays of nodes and jQuery objects
1001 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
1002 appendWithoutParsing( $span, node );
1003 } );
1004 return $span;
1005 },
1006
1007 /**
1008 * Return escaped replacement of correct index, or string if unavailable.
1009 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
1010 * if the specified parameter is not found return the same string
1011 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
1012 *
1013 * TODO: Throw error if nodes.length > 1 ?
1014 *
1015 * @param {Array} nodes List of one element, integer, n >= 0
1016 * @param {Array} replacements List of at least n strings
1017 * @return {string} replacement
1018 */
1019 replace: function ( nodes, replacements ) {
1020 var index = parseInt( nodes[ 0 ], 10 );
1021
1022 if ( index < replacements.length ) {
1023 return replacements[ index ];
1024 } else {
1025 // index not found, fallback to displaying variable
1026 return '$' + ( index + 1 );
1027 }
1028 },
1029
1030 /**
1031 * Transform wiki-link
1032 *
1033 * TODO:
1034 * It only handles basic cases, either no pipe, or a pipe with an explicit
1035 * anchor.
1036 *
1037 * It does not attempt to handle features like the pipe trick.
1038 * However, the pipe trick should usually not be present in wikitext retrieved
1039 * from the server, since the replacement is done at save time.
1040 * It may, though, if the wikitext appears in extension-controlled content.
1041 *
1042 * @param {string[]} nodes
1043 */
1044 wikilink: function ( nodes ) {
1045 var page, anchor, url, $el;
1046
1047 page = textify( nodes[ 0 ] );
1048 // Strip leading ':', which is used to suppress special behavior in wikitext links,
1049 // e.g. [[:Category:Foo]] or [[:File:Foo.jpg]]
1050 if ( page.charAt( 0 ) === ':' ) {
1051 page = page.slice( 1 );
1052 }
1053 url = mw.util.getUrl( page );
1054
1055 if ( nodes.length === 1 ) {
1056 // [[Some Page]] or [[Namespace:Some Page]]
1057 anchor = page;
1058 } else {
1059 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1060 anchor = nodes[ 1 ];
1061 }
1062
1063 $el = $( '<a>' ).attr( {
1064 title: page,
1065 href: url
1066 } );
1067 return appendWithoutParsing( $el, anchor );
1068 },
1069
1070 /**
1071 * Converts array of HTML element key value pairs to object
1072 *
1073 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1074 * name and 2 * n + 1 the associated value
1075 * @return {Object} Object mapping attribute name to attribute value
1076 */
1077 htmlattributes: function ( nodes ) {
1078 var i, len, mapping = {};
1079 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1080 mapping[ nodes[ i ] ] = decodePrimaryHtmlEntities( nodes[ i + 1 ] );
1081 }
1082 return mapping;
1083 },
1084
1085 /**
1086 * Handles an (already-validated) HTML element.
1087 *
1088 * @param {Array} nodes Nodes to process when creating element
1089 * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
1090 */
1091 htmlelement: function ( nodes ) {
1092 var tagName, attributes, contents, $element;
1093
1094 tagName = nodes.shift();
1095 attributes = nodes.shift();
1096 contents = nodes;
1097 $element = $( document.createElement( tagName ) ).attr( attributes );
1098 return appendWithoutParsing( $element, contents );
1099 },
1100
1101 /**
1102 * Transform parsed structure into external link.
1103 *
1104 * The "href" can be:
1105 * - a jQuery object, treat it as "enclosing" the link text.
1106 * - a function, treat it as the click handler.
1107 * - a string, or our htmlEmitter jQuery object, treat it as a URI after stringifying.
1108 *
1109 * TODO: throw an error if nodes.length > 2 ?
1110 *
1111 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {string}
1112 * @return {jQuery}
1113 */
1114 extlink: function ( nodes ) {
1115 var $el,
1116 arg = nodes[ 0 ],
1117 contents = nodes[ 1 ];
1118 if ( arg instanceof jQuery && !arg.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1119 $el = arg;
1120 } else {
1121 $el = $( '<a>' );
1122 if ( typeof arg === 'function' ) {
1123 $el.attr( 'href', '#' )
1124 .click( function ( e ) {
1125 e.preventDefault();
1126 } )
1127 .click( arg );
1128 } else {
1129 $el.attr( 'href', textify( arg ) );
1130 }
1131 }
1132 return appendWithoutParsing( $el.empty(), contents );
1133 },
1134
1135 /**
1136 * Transform parsed structure into pluralization
1137 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1138 * So convert it back with the current language's convertNumber.
1139 *
1140 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1141 * @return {string} selected pluralized form according to current language
1142 */
1143 plural: function ( nodes ) {
1144 var forms, firstChild, firstChildText, explicitPluralFormNumber, formIndex, form, count,
1145 explicitPluralForms = {};
1146
1147 count = parseFloat( this.language.convertNumber( nodes[ 0 ], true ) );
1148 forms = nodes.slice( 1 );
1149 for ( formIndex = 0; formIndex < forms.length; formIndex++ ) {
1150 form = forms[ formIndex ];
1151
1152 if ( form instanceof jQuery && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1153 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1154 firstChild = form.contents().get( 0 );
1155 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1156 firstChildText = firstChild.textContent;
1157 if ( /^\d+=/.test( firstChildText ) ) {
1158 explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[ 0 ], 10 );
1159 // Use the digit part as key and rest of first text node and
1160 // rest of child nodes as value.
1161 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1162 explicitPluralForms[ explicitPluralFormNumber ] = form;
1163 forms[ formIndex ] = undefined;
1164 }
1165 }
1166 } else if ( /^\d+=/.test( form ) ) {
1167 // Simple explicit plural forms like 12=a dozen
1168 explicitPluralFormNumber = parseInt( form.split( /=/ )[ 0 ], 10 );
1169 explicitPluralForms[ explicitPluralFormNumber ] = form.slice( form.indexOf( '=' ) + 1 );
1170 forms[ formIndex ] = undefined;
1171 }
1172 }
1173
1174 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1175 forms = $.map( forms, function ( form ) {
1176 return form;
1177 } );
1178
1179 return this.language.convertPlural( count, forms, explicitPluralForms );
1180 },
1181
1182 /**
1183 * Transform parsed structure according to gender.
1184 *
1185 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1186 *
1187 * The first node must be one of:
1188 * - the mw.user object (or a compatible one)
1189 * - an empty string - indicating the current user, same effect as passing the mw.user object
1190 * - a gender string ('male', 'female' or 'unknown')
1191 *
1192 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1193 * @return {string} Selected gender form according to current language
1194 */
1195 gender: function ( nodes ) {
1196 var gender,
1197 maybeUser = nodes[ 0 ],
1198 forms = nodes.slice( 1 );
1199
1200 if ( maybeUser === '' ) {
1201 maybeUser = mw.user;
1202 }
1203
1204 // If we are passed a mw.user-like object, check their gender.
1205 // Otherwise, assume the gender string itself was passed .
1206 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1207 gender = maybeUser.options.get( 'gender' );
1208 } else {
1209 gender = maybeUser;
1210 }
1211
1212 return this.language.gender( gender, forms );
1213 },
1214
1215 /**
1216 * Transform parsed structure into grammar conversion.
1217 * Invoked by putting `{{grammar:form|word}}` in a message
1218 *
1219 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1220 * @return {string} selected grammatical form according to current language
1221 */
1222 grammar: function ( nodes ) {
1223 var form = nodes[ 0 ],
1224 word = nodes[ 1 ];
1225 return word && form && this.language.convertGrammar( word, form );
1226 },
1227
1228 /**
1229 * Tranform parsed structure into a int: (interface language) message include
1230 * Invoked by putting `{{int:othermessage}}` into a message
1231 *
1232 * @param {Array} nodes List of nodes
1233 * @return {string} Other message
1234 */
1235 'int': function ( nodes ) {
1236 var msg = nodes[ 0 ];
1237 return mw.jqueryMsg.getMessageFunction()( msg.charAt( 0 ).toLowerCase() + msg.slice( 1 ) );
1238 },
1239
1240 /**
1241 * Get localized namespace name from canonical name or namespace number.
1242 * Invoked by putting `{{ns:foo}}` into a message
1243 *
1244 * @param {Array} nodes List of nodes
1245 * @return {string} Localized namespace name
1246 */
1247 ns: function ( nodes ) {
1248 var ns = $.trim( textify( nodes[ 0 ] ) );
1249 if ( !/^\d+$/.test( ns ) ) {
1250 ns = mw.config.get( 'wgNamespaceIds' )[ ns.replace( / /g, '_' ).toLowerCase() ];
1251 }
1252 ns = mw.config.get( 'wgFormattedNamespaces' )[ ns ];
1253 return ns || '';
1254 },
1255
1256 /**
1257 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1258 * and outputs it in the localized digit script and formatted with decimal
1259 * separator, according to the current language.
1260 *
1261 * @param {Array} nodes List of nodes
1262 * @return {number|string} Formatted number
1263 */
1264 formatnum: function ( nodes ) {
1265 var isInteger = ( nodes[ 1 ] && nodes[ 1 ] === 'R' ) ? true : false,
1266 number = nodes[ 0 ];
1267
1268 return this.language.convertNumber( number, isInteger );
1269 },
1270
1271 /**
1272 * Lowercase text
1273 *
1274 * @param {Array} nodes List of nodes
1275 * @return {string} The given text, all in lowercase
1276 */
1277 lc: function ( nodes ) {
1278 return textify( nodes[ 0 ] ).toLowerCase();
1279 },
1280
1281 /**
1282 * Uppercase text
1283 *
1284 * @param {Array} nodes List of nodes
1285 * @return {string} The given text, all in uppercase
1286 */
1287 uc: function ( nodes ) {
1288 return textify( nodes[ 0 ] ).toUpperCase();
1289 },
1290
1291 /**
1292 * Lowercase first letter of input, leaving the rest unchanged
1293 *
1294 * @param {Array} nodes List of nodes
1295 * @return {string} The given text, with the first character in lowercase
1296 */
1297 lcfirst: function ( nodes ) {
1298 var text = textify( nodes[ 0 ] );
1299 return text.charAt( 0 ).toLowerCase() + text.slice( 1 );
1300 },
1301
1302 /**
1303 * Uppercase first letter of input, leaving the rest unchanged
1304 *
1305 * @param {Array} nodes List of nodes
1306 * @return {string} The given text, with the first character in uppercase
1307 */
1308 ucfirst: function ( nodes ) {
1309 var text = textify( nodes[ 0 ] );
1310 return text.charAt( 0 ).toUpperCase() + text.slice( 1 );
1311 }
1312 };
1313
1314 // Deprecated! don't rely on gM existing.
1315 // The window.gM ought not to be required - or if required, not required here.
1316 // But moving it to extensions breaks it (?!)
1317 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1318 // @deprecated since 1.23
1319 mw.log.deprecate( window, 'gM', mw.jqueryMsg.getMessageFunction(), 'Use mw.message( ... ).parse() instead.' );
1320
1321 /**
1322 * @method
1323 * @member jQuery
1324 * @see mw.jqueryMsg#getPlugin
1325 */
1326 $.fn.msg = mw.jqueryMsg.getPlugin();
1327
1328 // Replace the default message parser with jqueryMsg
1329 oldParser = mw.Message.prototype.parser;
1330 mw.Message.prototype.parser = function () {
1331 if ( this.format === 'plain' || !/\{\{|[\[<>&]/.test( this.map.get( this.key ) ) ) {
1332 // Fall back to mw.msg's simple parser
1333 return oldParser.apply( this );
1334 }
1335
1336 if ( !this.map.hasOwnProperty( this.format ) ) {
1337 this.map[ this.format ] = mw.jqueryMsg.getMessageFunction( {
1338 messages: this.map,
1339 // For format 'escaped', escaping part is handled by mediawiki.js
1340 format: this.format
1341 } );
1342 }
1343 return this.map[ this.format ]( this.key, this.parameters );
1344 };
1345
1346 /**
1347 * Parse the message to DOM nodes, rather than HTML string like #parse.
1348 *
1349 * This method is only available when jqueryMsg is loaded.
1350 *
1351 * @method parseDom
1352 * @member mw.Message
1353 * @return {jQuery}
1354 */
1355 mw.Message.prototype.parseDom = ( function () {
1356 var reusableParent = $( '<div>' );
1357 return function () {
1358 return reusableParent.msg( this.key, this.parameters ).contents().detach();
1359 };
1360 } )();
1361
1362 }( mediaWiki, jQuery ) );