mediawiki.jqueryMsg: Implement HTML support and improve HTML escaping.
authorMatthew Flaschen <mflaschen@wikimedia.org>
Sun, 10 Mar 2013 08:23:43 +0000 (04:23 -0400)
committerMatthew Flaschen <mflaschen@wikimedia.org>
Mon, 8 Apr 2013 20:22:43 +0000 (16:22 -0400)
* Whitelisted elements.
* Whitelisted attributes allowed on all elements, and additional ones
  per element.
* Currently, only b (bold) and i (italics) and the common attributes are
  implemented, but more can be added.
* Treat error message as text
* Implement appendWithoutParsing as an extra safeguard to ensure
  jQuery never implicitly parses HTML.  Reuse this where a similar
  case was handled one-off.
* Rename some of the link functions and variables to be clearer about
  relationships.

Bug: 44525
Change-Id: Id8902af9568092d0318d0ac9d1ca6c9c195d36fe

RELEASE-NOTES-1.22
resources/mediawiki/mediawiki.jqueryMsg.js
tests/qunit/suites/resources/mediawiki/mediawiki.jqueryMsg.test.js
tests/qunit/suites/resources/mediawiki/mediawiki.test.js

index 5c0098d..95da63b 100644 (file)
@@ -12,6 +12,7 @@ production.
 * $wgRedirectScript was removed. It was unused.
 
 === New features in 1.22 ===
+* (bug 44525) mediawiki.jqueryMsg can now parse (whitelisted) HTML elements and attributes.
 
 === Bug fixes in 1.22 ===
 
index 183b525..5539d4d 100644 (file)
@@ -3,6 +3,7 @@
 * See: http://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
 *
 * @author neilk@wikimedia.org
+* @author mflaschen@wikimedia.org
 */
 ( function ( mw, $ ) {
        var oldParser,
                        magic : {
                                'SITENAME' : mw.config.get( 'wgSiteName' )
                        },
+                       // This is a whitelist based on, but simpler than, Sanitizer.php.
+                       // Self-closing tags are not currently supported.
+                       allowedHtmlElements : [
+                               'b',
+                               'i'
+                       ],
+                       // Key tag name, value allowed attributes for that tag.
+                       // See Sanitizer::setupAttributeWhitelist
+                       allowedHtmlCommonAttributes : [
+                               // HTML
+                               'id',
+                               'class',
+                               'style',
+                               'lang',
+                               'dir',
+                               'title',
+
+                               // WAI-ARIA
+                               'role'
+                       ],
+
+                       // Attributes allowed for specific elements.
+                       // Key is element name in lower case
+                       // Value is array of allowed attributes for that element
+                       allowedHtmlAttributesByElement : {},
                        messages : mw.messages,
                        language : mw.language,
 
 
                };
 
+       /**
+        * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
+        * convert what it detects as an htmlString to an element.
+        *
+        * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
+        *
+        * @param {jQuery} $parent Parent node wrapped by jQuery
+        * @param {Object|string|Array} children What to append, with the same possible types as jQuery
+        * @return {jQuery} $parent
+        */
+       function appendWithoutParsing( $parent, children ) {
+               var i, len;
+
+               if ( !$.isArray( children ) ) {
+                       children = [children];
+               }
+
+               for ( i = 0, len = children.length; i < len; i++ ) {
+                       if ( typeof children[i] !== 'object' ) {
+                               children[i] = document.createTextNode( children[i] );
+                       }
+               }
+
+               return $parent.append( children );
+       }
+
+       /**
+        * Decodes the main HTML entities, those encoded by mw.html.escape.
+        *
+        * @param {string} encode Encoded string
+        * @return {string} String with those entities decoded
+        */
+       function decodePrimaryHtmlEntities( encoded ) {
+               return encoded
+                       .replace( /&#039;/g, '\'' )
+                       .replace( /&quot;/g, '"' )
+                       .replace( /&lt;/g, '<' )
+                       .replace( /&gt;/g, '>' )
+                       .replace( /&amp;/g, '&' );
+       }
+
        /**
         * Given parser options, return a function that parses a key and replacements, returning jQuery object
         * @param {Object} parser options
                        try {
                                return parser.parse( key, argsArray );
                        } catch ( e ) {
-                               return $( '<span>' ).append( key + ': ' + e.message );
+                               return $( '<span>' ).text( key + ': ' + e.message );
                        }
                };
        }
                 */
                return function () {
                        var $target = this.empty();
-                       // TODO: Simply $target.append( failableParserFn( arguments ).contents() )
-                       // or Simply $target.append( failableParserFn( arguments ) )
+                       // TODO: Simply appendWithoutParsing( $target, failableParserFn( arguments ).contents() )
+                       // or Simply appendWithoutParsing( $target, failableParserFn( arguments ) )
                        $.each( failableParserFn( arguments ).contents(), function ( i, node ) {
-                               $target.append( node );
+                               appendWithoutParsing( $target, node );
                        } );
                        return $target;
                };
                 * @return {Mixed} abstract syntax tree
                 */
                wikiTextToAst: function ( input ) {
-                       var pos,
+                       var pos, settings = this.settings, concat = Array.prototype.concat,
                                regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
-                               backslash, anyCharacter, escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
-                               whitespace, dollar, digits,
-                               openExtlink, closeExtlink, wikilinkPage, wikilinkContents, openLink, closeLink, templateName, pipe, colon,
+                               doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
+                               escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
+                               whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
+                               htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
+                               openExtlink, closeExtlink, wikilinkPage, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
                                templateContents, openTemplate, closeTemplate,
                                nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result;
 
                                        return result;
                                };
                        }
+
+                       /**
+                        * Makes a regex parser, given a RegExp object.
+                        * The regex being passed in should start with a ^ to anchor it to the start
+                        * of the string.
+                        *
+                        * @param {RegExp} regex anchored regex
+                        * @return {Function} function to parse input based on the regex
+                        */
                        function makeRegexParser( regex ) {
                                return function () {
                                        var matches = input.substr( pos ).match( regex );
                        // but some debuggers can't tell you exactly where they come from. Also the mutually
                        // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
                        // This may be because, to save code, memoization was removed
-                       regularLiteral = makeRegexParser( /^[^{}\[\]$\\]/ );
+
+                       regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
                        regularLiteralWithoutBar = makeRegexParser(/^[^{}\[\]$\\|]/);
                        regularLiteralWithoutSpace = makeRegexParser(/^[^{}\[\]$\s]/);
                        regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
+
                        backslash = makeStringParser( '\\' );
+                       doubleQuote = makeStringParser( '"' );
+                       singleQuote = makeStringParser( '\'' );
                        anyCharacter = makeRegexParser( /^./ );
+
+                       openHtmlStartTag = makeStringParser( '<' );
+                       optionalForwardSlash = makeRegexParser( /^\/?/ );
+                       openHtmlEndTag = makeStringParser( '</' );
+                       htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
+                       closeHtmlTag = makeRegexParser( /^\s*>/ );
+
                        function escapedLiteral() {
                                var result = sequence( [
                                        backslash,
                                return result === null ? null : result.join('');
                        }
 
+                       asciiAlphabetLiteral = makeRegexParser( /[A-Za-z]+/ );
+                       htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
+                       htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
+
                        whitespace = makeRegexParser( /^\s+/ );
                        dollar = makeStringParser( '$' );
                        digits = makeRegexParser( /^\d+/ );
                        }
                        openExtlink = makeStringParser( '[' );
                        closeExtlink = makeStringParser( ']' );
-                       // this extlink MUST have inner text, e.g. [foo] not allowed; [foo bar] is allowed
+                       // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
                        function extlink() {
                                var result, parsedResult;
                                result = null;
                                        openExtlink,
                                        nonWhitespaceExpression,
                                        whitespace,
-                                       expression,
+                                       nOrMore( 1, expression ),
                                        closeExtlink
                                ] );
                                if ( parsedResult !== null ) {
-                                        result = [ 'LINK', parsedResult[1], parsedResult[3] ];
+                                       result = [ 'EXTLINK', parsedResult[1] ];
+                                       // TODO (mattflaschen, 2013-03-22): Clean this up if possible.
+                                       // It's avoiding CONCAT for single nodes, so they at least doesn't get the htmlEmitter span.
+                                       if ( parsedResult[3].length === 1 ) {
+                                               result.push( parsedResult[3][0] );
+                                       } else {
+                                               result.push( ['CONCAT'].concat( parsedResult[3] ) );
+                                       }
                                }
                                return result;
                        }
                                if ( result === null ) {
                                        return null;
                                }
-                               return [ 'LINKPARAM', parseInt( result[2], 10 ) - 1, result[4] ];
+                               return [ 'EXTLINKPARAM', parseInt( result[2], 10 ) - 1, result[4] ];
                        }
-                       openLink = makeStringParser( '[[' );
-                       closeLink = makeStringParser( ']]' );
+                       openWikilink = makeStringParser( '[[' );
+                       closeWikilink = makeStringParser( ']]' );
                        pipe = makeStringParser( '|' );
 
                        function template() {
                                wikilinkPage // unpiped link
                        ] );
 
-                       function link() {
+                       function wikilink() {
                                var result, parsedResult, parsedLinkContents;
                                result = null;
 
                                parsedResult = sequence( [
-                                       openLink,
+                                       openWikilink,
                                        wikilinkContents,
-                                       closeLink
+                                       closeWikilink
                                ] );
                                if ( parsedResult !== null ) {
                                        parsedLinkContents = parsedResult[1];
-                                       result = [ 'WLINK' ].concat( parsedLinkContents );
+                                       result = [ 'WIKILINK' ].concat( parsedLinkContents );
+                               }
+                               return result;
+                       }
+
+                       // TODO: Support data- if appropriate
+                       function doubleQuotedHtmlAttributeValue() {
+                               var parsedResult = sequence( [
+                                       doubleQuote,
+                                       htmlDoubleQuoteAttributeValue,
+                                       doubleQuote
+                               ] );
+                               return parsedResult === null ? null : parsedResult[1];
+                       }
+
+                       function singleQuotedHtmlAttributeValue() {
+                               var parsedResult = sequence( [
+                                       singleQuote,
+                                       htmlSingleQuoteAttributeValue,
+                                       singleQuote
+                               ] );
+                               return parsedResult === null ? null : parsedResult[1];
+                       }
+
+                       function htmlAttribute() {
+                               var parsedResult = sequence( [
+                                       whitespace,
+                                       asciiAlphabetLiteral,
+                                       htmlAttributeEquals,
+                                       choice( [
+                                               doubleQuotedHtmlAttributeValue,
+                                               singleQuotedHtmlAttributeValue
+                                       ] )
+                               ] );
+                               return parsedResult === null ? null : [parsedResult[1], parsedResult[3]];
+                       }
+
+                       /**
+                        * Checks if HTML is allowed
+                        *
+                        * @param {string} startTagName HTML start tag name
+                        * @param {string} endTagName HTML start tag name
+                        * @param {Object} attributes array of consecutive key value pairs,
+                        *  with index 2 * n being a name and 2 * n + 1 the associated value
+                        * @return {boolean} true if this is HTML is allowed, false otherwise
+                        */
+                       function isAllowedHtml( startTagName, endTagName, attributes ) {
+                               var i, len, attributeName;
+
+                               startTagName = startTagName.toLowerCase();
+                               endTagName = endTagName.toLowerCase();
+                               if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
+                                       return false;
+                               }
+
+                               for ( i = 0, len = attributes.length; i < len; i += 2 ) {
+                                       attributeName = attributes[i];
+                                       if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
+                                            $.inArray( attributeName, settings.allowedHtmlAttributesByElement[startTagName] || [] ) === -1 ) {
+                                               return false;
+                                       }
+                               }
+
+                               return true;
+                       }
+
+                       function htmlAttributes() {
+                               var parsedResult = nOrMore( 0, htmlAttribute )();
+                               // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
+                               return concat.apply( ['HTMLATTRIBUTES'], parsedResult );
+                       }
+
+                       // Subset of allowed HTML markup.
+                       // Most elements and many attributes allowed on the server are not supported yet.
+                       function html() {
+                               var result = null, parsedOpenTagResult, parsedHtmlContents,
+                                       parsedCloseTagResult, wrappedAttributes, attributes,
+                                       startTagName, endTagName, startOpenTagPos, startCloseTagPos,
+                                       endOpenTagPos, endCloseTagPos;
+
+                               // Break into three sequence calls.  That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
+                               // 1. open through closeHtmlTag
+                               // 2. expression
+                               // 3. openHtmlEnd through close
+                               // This will allow recording the positions to reconstruct if HTML is to be treated as text.
+
+                               startOpenTagPos = pos;
+                               parsedOpenTagResult = sequence( [
+                                       openHtmlStartTag,
+                                       asciiAlphabetLiteral,
+                                       htmlAttributes,
+                                       optionalForwardSlash,
+                                       closeHtmlTag
+                               ] );
+
+                               if ( parsedOpenTagResult === null ) {
+                                       return null;
                                }
+
+                               endOpenTagPos = pos;
+                               startTagName = parsedOpenTagResult[1];
+
+                               parsedHtmlContents = nOrMore( 0, expression )();
+
+                               startCloseTagPos = pos;
+                               parsedCloseTagResult = sequence( [
+                                       openHtmlEndTag,
+                                       asciiAlphabetLiteral,
+                                       closeHtmlTag
+                               ] );
+
+                               if ( parsedCloseTagResult === null ) {
+                                       // Closing tag failed.  Return the start tag and contents.
+                                       return [ 'CONCAT', input.substring( startOpenTagPos, endOpenTagPos ) ].concat( parsedHtmlContents );
+                               }
+
+                               endCloseTagPos = pos;
+                               endTagName = parsedCloseTagResult[1];
+                               wrappedAttributes = parsedOpenTagResult[2];
+                               attributes = wrappedAttributes.slice( 1 );
+                               if ( isAllowedHtml( startTagName, endTagName, attributes) ) {
+                                       result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ].concat( parsedHtmlContents );
+                               } else {
+                                       // HTML is not allowed, so contents will remain how
+                                       // it was, while HTML markup at this level will be
+                                       // treated as text
+                                       // E.g. assuming script tags are not allowed:
+                                       //
+                                       // <script>[[Foo|bar]]</script>
+                                       //
+                                       // results in '&lt;script&gt;' and '&lt;/script&gt;'
+                                       // (not treated as an HTML tag), surrounding a fully
+                                       // parsed HTML link.
+                                       //
+                                       // Concatenate everything from the tag, flattening the contents.
+                                       result = [ 'CONCAT', input.substring( startOpenTagPos, endOpenTagPos ) ].concat( parsedHtmlContents, input.substring( startCloseTagPos, endCloseTagPos ) );
+                               }
+
                                return result;
                        }
+
                        templateName = transform(
                                // see $wgLegalTitleChars
                                // not allowing : due to the need to catch "PLURAL:$1"
                        closeTemplate = makeStringParser('}}');
                        nonWhitespaceExpression = choice( [
                                template,
-                               link,
+                               wikilink,
                                extLinkParam,
                                extlink,
                                replacement,
                        ] );
                        paramExpression = choice( [
                                template,
-                               link,
+                               wikilink,
                                extLinkParam,
                                extlink,
                                replacement,
 
                        expression = choice( [
                                template,
-                               link,
+                               wikilink,
                                extLinkParam,
                                extlink,
                                replacement,
+                               html,
                                literal
                        ] );
 
                        $.each( nodes, function ( i, node ) {
                                if ( node instanceof jQuery && node.hasClass( 'mediaWiki_htmlEmitter' ) ) {
                                        $.each( node.contents(), function ( j, childNode ) {
-                                               $span.append( childNode );
+                                               appendWithoutParsing( $span, childNode );
                                        } );
                                } else {
                                        // Let jQuery append nodes, arrays of nodes and jQuery objects
                                        // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
-                                       $span.append( $.type( node ) === 'object' ? node : document.createTextNode( node ) );
+                                       appendWithoutParsing( $span, node );
                                }
                        } );
                        return $span;
                 *
                 * @param nodes
                 */
-               wlink: function ( nodes ) {
+               wikilink: function ( nodes ) {
                        var page, anchor, url;
 
                        page = nodes[0];
                        } ).text( anchor );
                },
 
+               /**
+                * Converts array of HTML element key value pairs to object
+                *
+                * @param {Array} nodes array of consecutive key value pairs, with index 2 * n being a name and 2 * n + 1 the associated value
+                * @return {Object} object mapping attribute name to attribute value
+                */
+               htmlattributes: function ( nodes ) {
+                       var i, len, mapping = {};
+                       for ( i = 0, len = nodes.length; i < len; i += 2 ) {
+                               mapping[nodes[i]] = decodePrimaryHtmlEntities( nodes[i + 1] );
+                       }
+                       return mapping;
+               },
+
+               /**
+                * Handles an (already-validated) HTML element.
+                *
+                * @param {Array} nodes nodes to process when creating element
+                * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
+                */
+               htmlelement: function ( nodes ) {
+                       var tagName, attributes, contents, $element;
+
+                       tagName = nodes.shift();
+                       attributes = nodes.shift();
+                       contents = nodes;
+                       $element = $( document.createElement( tagName ) ).attr( attributes );
+                       return appendWithoutParsing( $element, contents );
+               },
+
                /**
                 * Transform parsed structure into external link
                 * If the href is a jQuery object, treat it as "enclosing" the link text.
                 * @param {Array} of two elements, {jQuery|Function|String} and {String}
                 * @return {jQuery}
                 */
-               link: function ( nodes ) {
+               extlink: function ( nodes ) {
                        var $el,
                                arg = nodes[0],
                                contents = nodes[1];
                                        $el.attr( 'href', arg.toString() );
                                }
                        }
-                       $el.append( contents );
-                       return $el;
+                       return appendWithoutParsing( $el, contents );
                },
 
                /**
-                * This is basically use a combination of replace + link (link with parameter
+                * This is basically use a combination of replace + external link (link with parameter
                 * as url), but we don't want to run the regular replace here-on: inserting a
                 * url as href-attribute of a link will automatically escape it already, so
                 * we don't want replace to (manually) escape it as well.
                 * @param {Array} of one element, integer, n >= 0
                 * @return {String} replacement
                 */
-               linkparam: function ( nodes, replacements ) {
+               extlinkparam: function ( nodes, replacements ) {
                        var replacement,
                                index = parseInt( nodes[0], 10 );
                        if ( index < replacements.length) {
                        } else {
                                replacement = '$' + ( index + 1 );
                        }
-                       return this.link( [ replacement, nodes[1] ] );
+                       return this.extlink( [ replacement, nodes[1] ] );
                },
 
                /**
                // Caching is somewhat problematic, because we do need different message functions for different maps, so
                // we'd have to cache the parser as a member of this.map, which sounds a bit ugly.
                // Do not use mw.jqueryMsg unless required
-               if ( this.format === 'plain' || !/\{\{|\[/.test(this.map.get( this.key ) ) ) {
+               if ( this.format === 'plain' || !/\{\{|[\[<>]/.test(this.map.get( this.key ) ) ) {
                        // Fall back to mw.msg's simple parser
                        return oldParser.apply( this );
                }
index 697159c..e0e823d 100644 (file)
@@ -1,7 +1,13 @@
 ( function ( mw, $ ) {
-       var mwLanguageCache = {}, formatnumTests, specialCharactersPageName,
+       var mwLanguageCache = {}, formatText, formatParse, formatnumTests, specialCharactersPageName,
                expectedListUsers, expectedEntrypoints;
 
+       // When the expected result is the same in both modes
+       function assertBothModes( assert, parserArguments, expectedResult, assertMessage ) {
+               assert.equal( formatText.apply( null, parserArguments ), expectedResult, assertMessage + ' when format is \'text\'' );
+               assert.equal( formatParse.apply( null, parserArguments ), expectedResult, assertMessage + ' when format is \'parse\'' );
+       }
+
        QUnit.module( 'mediawiki.jqueryMsg', QUnit.newMwEnvironment( {
                setup: function () {
                        this.orgMwLangauge = mw.language;
                        expectedListUsers = '注册<a title="Special:ListUsers" href="/wiki/Special:ListUsers">用户</a>';
 
                        expectedEntrypoints = '<a href="https://www.mediawiki.org/wiki/Manual:index.php">index.php</a>';
+
+                       formatText = mw.jqueryMsg.getMessageFunction( {
+                               format: 'text'
+                       } );
+
+                       formatParse = mw.jqueryMsg.getMessageFunction( {
+                               format: 'parse'
+                       } );
                },
                teardown: function () {
                        mw.language = this.orgMwLangauge;
        }
 
        QUnit.test( 'Replace', 9, function ( assert ) {
-               var parser = mw.jqueryMsg.getMessageFunction();
-
                mw.messages.set( 'simple', 'Foo $1 baz $2' );
 
-               assert.equal( parser( 'simple' ), 'Foo $1 baz $2', 'Replacements with no substitutes' );
-               assert.equal( parser( 'simple', 'bar' ), 'Foo bar baz $2', 'Replacements with less substitutes' );
-               assert.equal( parser( 'simple', 'bar', 'quux' ), 'Foo bar baz quux', 'Replacements with all substitutes' );
+               assert.equal( formatParse( 'simple' ), 'Foo $1 baz $2', 'Replacements with no substitutes' );
+               assert.equal( formatParse( 'simple', 'bar' ), 'Foo bar baz $2', 'Replacements with less substitutes' );
+               assert.equal( formatParse( 'simple', 'bar', 'quux' ), 'Foo bar baz quux', 'Replacements with all substitutes' );
 
                mw.messages.set( 'plain-input', '<foo foo="foo">x$1y&lt;</foo>z' );
 
                assert.equal(
-                       parser( 'plain-input', 'bar' ),
+                       formatParse( 'plain-input', 'bar' ),
                        '&lt;foo foo="foo"&gt;xbary&amp;lt;&lt;/foo&gt;z',
                        'Input is not considered html'
                );
                mw.messages.set( 'plain-replace', 'Foo $1' );
 
                assert.equal(
-                       parser( 'plain-replace', '<bar bar="bar">&gt;</bar>' ),
+                       formatParse( 'plain-replace', '<bar bar="bar">&gt;</bar>' ),
                        'Foo &lt;bar bar="bar"&gt;&amp;gt;&lt;/bar&gt;',
                        'Replacement is not considered html'
                );
                mw.messages.set( 'object-replace', 'Foo $1' );
 
                assert.equal(
-                       parser( 'object-replace', $( '<div class="bar">&gt;</div>' ) ),
+                       formatParse( 'object-replace', $( '<div class="bar">&gt;</div>' ) ),
                        'Foo <div class="bar">&gt;</div>',
                        'jQuery objects are preserved as raw html'
                );
 
                assert.equal(
-                       parser( 'object-replace', $( '<div class="bar">&gt;</div>' ).get( 0 ) ),
+                       formatParse( 'object-replace', $( '<div class="bar">&gt;</div>' ).get( 0 ) ),
                        'Foo <div class="bar">&gt;</div>',
                        'HTMLElement objects are preserved as raw html'
                );
 
                assert.equal(
-                       parser( 'object-replace', $( '<div class="bar">&gt;</div>' ).toArray() ),
+                       formatParse( 'object-replace', $( '<div class="bar">&gt;</div>' ).toArray() ),
                        'Foo <div class="bar">&gt;</div>',
                        'HTMLElement[] arrays are preserved as raw html'
                );
 
                assert.equal(
-                       parser( 'external-link-replace', 'http://example.org/?x=y&z' ),
+                       formatParse( 'external-link-replace', 'http://example.org/?x=y&z' ),
                        'Foo <a href="http://example.org/?x=y&amp;z">bar</a>',
                        'Href is not double-escaped in wikilink function'
                );
        } );
 
        QUnit.test( 'Plural', 3, function ( assert ) {
-               var parser = mw.jqueryMsg.getMessageFunction();
-
-               assert.equal( parser( 'plural-msg', 0 ), 'Found 0 items', 'Plural test for english with zero as count' );
-               assert.equal( parser( 'plural-msg', 1 ), 'Found 1 item', 'Singular test for english' );
-               assert.equal( parser( 'plural-msg', 2 ), 'Found 2 items', 'Plural test for english' );
+               assert.equal( formatParse( 'plural-msg', 0 ), 'Found 0 items', 'Plural test for english with zero as count' );
+               assert.equal( formatParse( 'plural-msg', 1 ), 'Found 1 item', 'Singular test for english' );
+               assert.equal( formatParse( 'plural-msg', 2 ), 'Found 2 items', 'Plural test for english' );
        } );
 
        QUnit.test( 'Gender', 11, function ( assert ) {
                // TODO: These tests should be for mw.msg once mw.msg integrated with mw.jqueryMsg
                // TODO: English may not be the best language for these tests. Use a language like Arabic or Russian
-               var user = mw.user,
-                       parser = mw.jqueryMsg.getMessageFunction();
+               var user = mw.user;
 
                user.options.set( 'gender', 'male' );
                assert.equal(
-                       parser( 'gender-msg', 'Bob', 'male' ),
+                       formatParse( 'gender-msg', 'Bob', 'male' ),
                        'Bob: blue',
                        'Masculine from string "male"'
                );
                assert.equal(
-                       parser( 'gender-msg', 'Bob', user ),
+                       formatParse( 'gender-msg', 'Bob', user ),
                        'Bob: blue',
                        'Masculine from mw.user object'
                );
 
                user.options.set( 'gender', 'unknown' );
                assert.equal(
-                       parser( 'gender-msg', 'Foo', user ),
+                       formatParse( 'gender-msg', 'Foo', user ),
                        'Foo: green',
                        'Neutral from mw.user object' );
                assert.equal(
-                       parser( 'gender-msg', 'Alice', 'female' ),
+                       formatParse( 'gender-msg', 'Alice', 'female' ),
                        'Alice: pink',
                        'Feminine from string "female"' );
                assert.equal(
-                       parser( 'gender-msg', 'User' ),
+                       formatParse( 'gender-msg', 'User' ),
                        'User: green',
                        'Neutral when no parameter given' );
                assert.equal(
-                       parser( 'gender-msg', 'User', 'unknown' ),
+                       formatParse( 'gender-msg', 'User', 'unknown' ),
                        'User: green',
                        'Neutral from string "unknown"'
                );
                mw.messages.set( 'gender-msg-one-form', '{{GENDER:$1|User}}: $2 {{PLURAL:$2|edit|edits}}' );
 
                assert.equal(
-                       parser( 'gender-msg-one-form', 'male', 10 ),
+                       formatParse( 'gender-msg-one-form', 'male', 10 ),
                        'User: 10 edits',
                        'Gender neutral and plural form'
                );
                assert.equal(
-                       parser( 'gender-msg-one-form', 'female', 1 ),
+                       formatParse( 'gender-msg-one-form', 'female', 1 ),
                        'User: 1 edit',
                        'Gender neutral and singular form'
                );
 
                mw.messages.set( 'gender-msg-lowercase', '{{gender:$1|he|she}} is awesome' );
                assert.equal(
-                       parser( 'gender-msg-lowercase', 'male' ),
+                       formatParse( 'gender-msg-lowercase', 'male' ),
                        'he is awesome',
                        'Gender masculine'
                );
                assert.equal(
-                       parser( 'gender-msg-lowercase', 'female' ),
+                       formatParse( 'gender-msg-lowercase', 'female' ),
                        'she is awesome',
                        'Gender feminine'
                );
 
                mw.messages.set( 'gender-msg-wrong', '{{gender}} test' );
                assert.equal(
-                       parser( 'gender-msg-wrong', 'female' ),
+                       formatParse( 'gender-msg-wrong', 'female' ),
                        ' test',
                        'Invalid syntax should result in {{gender}} simply being stripped away'
                );
        } );
 
        QUnit.test( 'Grammar', 2, function ( assert ) {
-               var parser = mw.jqueryMsg.getMessageFunction();
-
-               assert.equal( parser( 'grammar-msg' ), 'Przeszukaj ' + mw.config.get( 'wgSiteName' ), 'Grammar Test with sitename' );
+               assert.equal( formatParse( 'grammar-msg' ), 'Przeszukaj ' + mw.config.get( 'wgSiteName' ), 'Grammar Test with sitename' );
 
                mw.messages.set( 'grammar-msg-wrong-syntax', 'Przeszukaj {{GRAMMAR:grammar_case_xyz}}' );
-               assert.equal( parser( 'grammar-msg-wrong-syntax' ), 'Przeszukaj ', 'Grammar Test with wrong grammar template syntax' );
+               assert.equal( formatParse( 'grammar-msg-wrong-syntax' ), 'Przeszukaj ', 'Grammar Test with wrong grammar template syntax' );
        } );
 
        QUnit.test( 'Match PHP parser', mw.libs.phpParserData.tests.length, function ( assert ) {
        } );
 
        QUnit.test( 'Links', 6, function ( assert ) {
-               var parser = mw.jqueryMsg.getMessageFunction(),
-                       expectedDisambiguationsText,
+               var expectedDisambiguationsText,
                        expectedMultipleBars,
                        expectedSpecialCharacters;
 
                 */
 
                assert.htmlEqual(
-                       parser( 'jquerymsg-test-statistics-users' ),
+                       formatParse( 'jquerymsg-test-statistics-users' ),
                        expectedListUsers,
                        'Piped wikilink'
                );
 
                mw.messages.set( 'disambiguations-text', 'The following pages contain at least one link to a disambiguation page.\nThey may have to link to a more appropriate page instead.\nA page is treated as a disambiguation page if it uses a template that is linked from [[MediaWiki:Disambiguationspage]].' );
                assert.htmlEqual(
-                       parser( 'disambiguations-text' ),
+                       formatParse( 'disambiguations-text' ),
                        expectedDisambiguationsText,
                        'Wikilink without pipe'
                );
 
                assert.htmlEqual(
-                       parser( 'jquerymsg-test-version-entrypoints-index-php' ),
+                       formatParse( 'jquerymsg-test-version-entrypoints-index-php' ),
                        expectedEntrypoints,
                        'External link'
                );
                // Pipe trick is not supported currently, but should not parse as text either.
                mw.messages.set( 'pipe-trick', '[[Tampa, Florida|]]' );
                assert.equal(
-                       parser( 'pipe-trick' ),
+                       formatParse( 'pipe-trick' ),
                        'pipe-trick: Parse error at position 0 in input: [[Tampa, Florida|]]',
                        'Pipe trick should return error string.'
                );
                expectedMultipleBars = '<a title="Main Page" href="/wiki/Main_Page">Main|Page</a>';
                mw.messages.set( 'multiple-bars', '[[Main Page|Main|Page]]' );
                assert.htmlEqual(
-                       parser( 'multiple-bars' ),
+                       formatParse( 'multiple-bars' ),
                        expectedMultipleBars,
                        'Bar in anchor'
                );
 
                mw.messages.set( 'special-characters', '[[' + specialCharactersPageName + ']]' );
                assert.htmlEqual(
-                       parser( 'special-characters' ),
+                       formatParse( 'special-characters' ),
                        expectedSpecialCharacters,
                        'Special characters'
                );
 
 // Tests that {{-transformation vs. general parsing are done as requested
        QUnit.test( 'Curly brace transformation', 14, function ( assert ) {
-               var formatText, formatParse, oldUserLang;
-
-               oldUserLang = mw.config.get( 'wgUserLanguage' );
-
-               formatText = mw.jqueryMsg.getMessageFunction( {
-                       format: 'text'
-               } );
-
-               formatParse = mw.jqueryMsg.getMessageFunction( {
-                       format: 'parse'
-               } );
-
-               // When the expected result is the same in both modes
-               function assertBothModes( parserArguments, expectedResult, assertMessage ) {
-                       assert.equal( formatText.apply( null, parserArguments ), expectedResult, assertMessage + ' when format is \'text\'' );
-                       assert.equal( formatParse.apply( null, parserArguments ), expectedResult, assertMessage + ' when format is \'parse\'' );
-               }
+               var oldUserLang = mw.config.get( 'wgUserLanguage' );
 
-               assertBothModes( ['gender-msg', 'Bob', 'male'], 'Bob: blue', 'gender is resolved' );
+               assertBothModes( assert, ['gender-msg', 'Bob', 'male'], 'Bob: blue', 'gender is resolved' );
 
-               assertBothModes( ['plural-msg', 5], 'Found 5 items', 'plural is resolved' );
+               assertBothModes( assert, ['plural-msg', 5], 'Found 5 items', 'plural is resolved' );
 
-               assertBothModes( ['grammar-msg'], 'Przeszukaj ' + mw.config.get( 'wgSiteName' ), 'grammar is resolved' );
+               assertBothModes( assert, ['grammar-msg'], 'Przeszukaj ' + mw.config.get( 'wgSiteName' ), 'grammar is resolved' );
 
                mw.config.set( 'wgUserLanguage', 'en' );
-               assertBothModes( ['formatnum-msg', '987654321.654321'], '987,654,321.654', 'formatnum is resolved' );
+               assertBothModes( assert, ['formatnum-msg', '987654321.654321'], '987,654,321.654', 'formatnum is resolved' );
 
                // Test non-{{ wikitext, where behavior differs
 
        } );
 
        QUnit.test( 'Int', 4, function ( assert ) {
-               var parser = mw.jqueryMsg.getMessageFunction(),
-                       newarticletextSource = 'You have followed a link to a page that does not exist yet. To create the page, start typing in the box below (see the [[{{Int:Helppage}}|help page]] for more info). If you are here by mistake, click your browser\'s back button.',
+               var newarticletextSource = 'You have followed a link to a page that does not exist yet. To create the page, start typing in the box below (see the [[{{Int:Helppage}}|help page]] for more info). If you are here by mistake, click your browser\'s back button.',
                        expectedNewarticletext,
                        helpPageTitle = 'Help:Contents';
 
                mw.messages.set( 'newarticletext', newarticletextSource );
 
                assert.htmlEqual(
-                       parser( 'newarticletext' ),
+                       formatParse( 'newarticletext' ),
                        expectedNewarticletext,
                        'Link with nested message'
                );
 
                assert.equal(
-                       parser( 'see-portal-url' ),
+                       formatParse( 'see-portal-url' ),
                        'Project:Community portal is an important community page.',
                        'Nested message'
                );
                        newarticletextSource.replace( 'Int:Helppage', 'int:helppage' ) );
 
                assert.htmlEqual(
-                       parser( 'newarticletext-lowercase' ),
+                       formatParse( 'newarticletext-lowercase' ),
                        expectedNewarticletext,
                        'Link with nested message, lowercase include'
                );
                mw.messages.set( 'uses-missing-int', '{{int:doesnt-exist}}' );
 
                assert.equal(
-                       parser( 'uses-missing-int' ),
+                       formatParse( 'uses-missing-int' ),
                        '[doesnt-exist]',
                        'int: where nested message does not exist'
                );
@@ -577,4 +566,149 @@ QUnit.test( 'formatnum', formatnumTests.length, function ( assert ) {
        } );
 } );
 
+// HTML in wikitext
+QUnit.test( 'HTML', 26, function ( assert ) {
+       mw.messages.set( 'jquerymsg-italics-msg', '<i>Very</i> important' );
+
+       assertBothModes( assert, ['jquerymsg-italics-msg'], mw.messages.get( 'jquerymsg-italics-msg' ), 'Simple italics unchanged' );
+
+       mw.messages.set( 'jquerymsg-bold-msg', '<b>Strong</b> speaker' );
+       assertBothModes( assert, ['jquerymsg-bold-msg'], mw.messages.get( 'jquerymsg-bold-msg' ), 'Simple bold unchanged' );
+
+       mw.messages.set( 'jquerymsg-bold-italics-msg', 'It is <b><i>key</i></b>' );
+       assertBothModes( assert, ['jquerymsg-bold-italics-msg'], mw.messages.get( 'jquerymsg-bold-italics-msg' ), 'Bold and italics nesting order preserved' );
+
+       mw.messages.set( 'jquerymsg-italics-bold-msg', 'It is <i><b>vital</b></i>' );
+       assertBothModes( assert, ['jquerymsg-italics-bold-msg'], mw.messages.get( 'jquerymsg-italics-bold-msg' ), 'Italics and bold nesting order preserved' );
+
+       mw.messages.set( 'jquerymsg-italics-with-link', 'An <i>italicized [[link|wiki-link]]</i>' );
+
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-italics-with-link' ),
+               'An <i>italicized <a title="link" href="' + mw.html.escape( mw.util.wikiGetlink( 'link' ) ) + '">wiki-link</i>',
+               'Italics with link inside in parse mode'
+       );
+
+       assert.equal(
+               formatText( 'jquerymsg-italics-with-link' ),
+               mw.messages.get( 'jquerymsg-italics-with-link' ),
+               'Italics with link unchanged in text mode'
+       );
+
+       mw.messages.set( 'jquerymsg-italics-id-class', '<i id="foo" class="bar">Foo</i>' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-italics-id-class' ),
+               mw.messages.get( 'jquerymsg-italics-id-class' ),
+               'ID and class are allowed'
+       );
+
+       mw.messages.set( 'jquerymsg-italics-onclick', '<i onclick="alert(\'foo\')">Foo</i>' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-italics-onclick' ),
+               '&lt;i onclick=&quot;alert(\'foo\')&quot;&gt;Foo&lt;/i&gt;',
+               'element with onclick is escaped because it is not allowed'
+       );
+
+       mw.messages.set( 'jquerymsg-script-msg', '<script  >alert( "Who put this tag here?" );</script>' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-script-msg' ),
+               '&lt;script  &gt;alert( &quot;Who put this tag here?&quot; );&lt;/script&gt;',
+               'Tag outside whitelist escaped in parse mode'
+       );
+
+       assert.equal(
+               formatText( 'jquerymsg-script-msg' ),
+               mw.messages.get( 'jquerymsg-script-msg' ),
+               'Tag outside whitelist unchanged in text mode'
+       );
+
+       mw.messages.set( 'jquerymsg-script-link-msg', '<script>[[Foo|bar]]</script>' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-script-link-msg' ),
+               '&lt;script&gt;<a title="Foo" href="' + mw.html.escape( mw.util.wikiGetlink( 'Foo' ) ) + '">bar</a>&lt;/script&gt;',
+               'Script tag text is escaped because that element is not allowed, but link inside is still HTML'
+       );
+
+       mw.messages.set( 'jquerymsg-mismatched-html', '<i class="important">test</b>' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-mismatched-html' ),
+               '&lt;i class=&quot;important&quot;&gt;test&lt;/b&gt;',
+               'Mismatched HTML start and end tag treated as text'
+       );
+
+       // TODO (mattflaschen, 2013-03-18): It's not a security issue, but there's no real
+       // reason the htmlEmitter span needs to be here. It's an artifact of how emitting works.
+       mw.messages.set( 'jquerymsg-script-and-external-link', '<script>alert( "jquerymsg-script-and-external-link test" );</script> [http://example.com <i>Foo</i> bar]' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-script-and-external-link' ),
+               '&lt;script&gt;alert( "jquerymsg-script-and-external-link test" );&lt;/script&gt; <a href="http://example.com"><span class="mediaWiki_htmlEmitter"><i>Foo</i> bar</span></a>',
+               'HTML tags in external links not interfering with escaping of other tags'
+       );
+
+       mw.messages.set( 'jquerymsg-link-script', '[http://example.com <script>alert( "jquerymsg-link-script test" );</script>]' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-link-script' ),
+               '<a href="http://example.com"><span class="mediaWiki_htmlEmitter">&lt;script&gt;alert( "jquerymsg-link-script test" );&lt;/script&gt;</span></a>',
+               'Non-whitelisted HTML tag in external link anchor treated as text'
+       );
+
+       // Intentionally not using htmlEqual for the quote tests
+       mw.messages.set( 'jquerymsg-double-quotes-preserved', '<i id="double">Double</i>' );
+       assert.equal(
+               formatParse( 'jquerymsg-double-quotes-preserved' ),
+               mw.messages.get( 'jquerymsg-double-quotes-preserved' ),
+               'Attributes with double quotes are preserved as such'
+       );
+
+       mw.messages.set( 'jquerymsg-single-quotes-normalized-to-double', '<i id=\'single\'>Single</i>' );
+       assert.equal(
+               formatParse( 'jquerymsg-single-quotes-normalized-to-double' ),
+               '<i id="single">Single</i>',
+               'Attributes with single quotes are normalized to double'
+       );
+
+       mw.messages.set( 'jquerymsg-escaped-double-quotes-attribute', '<i style="font-family:&quot;Arial&quot;">Styled</i>' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-escaped-double-quotes-attribute' ),
+               mw.messages.get( 'jquerymsg-escaped-double-quotes-attribute' ),
+               'Escaped attributes are parsed correctly'
+       );
+
+       mw.messages.set( 'jquerymsg-escaped-single-quotes-attribute', '<i style=\'font-family:&#039;Arial&#039;\'>Styled</i>' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-escaped-single-quotes-attribute' ),
+               mw.messages.get( 'jquerymsg-escaped-single-quotes-attribute' ),
+               'Escaped attributes are parsed correctly'
+       );
+
+
+       mw.messages.set( 'jquerymsg-wikitext-contents-parsed', '<i>[http://example.com Example]</i>' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-wikitext-contents-parsed' ),
+               '<i><a href="http://example.com">Example</a></i>',
+               'Contents of valid tag are treated as wikitext, so external link is parsed'
+       );
+
+       mw.messages.set( 'jquerymsg-wikitext-contents-script', '<i><script>Script inside</script></i>' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-wikitext-contents-script' ),
+               '<i><span class="mediaWiki_htmlEmitter">&lt;script&gt;Script inside&lt;/script&gt;</span></i>',
+               'Contents of valid tag are treated as wikitext, so invalid HTML element is treated as text'
+       );
+
+       mw.messages.set( 'jquerymsg-unclosed-tag', 'Foo<tag>bar' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-unclosed-tag' ),
+               'Foo&lt;tag&gt;bar',
+               'Nonsupported unclosed tags are escaped'
+       );
+
+       mw.messages.set( 'jquerymsg-self-closing-tag', 'Foo<tag/>bar' );
+       assert.htmlEqual(
+               formatParse( 'jquerymsg-self-closing-tag' ),
+               'Foo&lt;tag/&gt;bar',
+               'Self-closing tags don\'t cause a parse error'
+       );
+} );
+
 }( mediaWiki, jQuery ) );
index 7ae9826..adf54bc 100644 (file)
                assert.ok( mw.config instanceof mw.Map, 'mw.config instance of mw.Map' );
        } );
 
-       QUnit.test( 'mw.message & mw.messages', 68, function ( assert ) {
+       QUnit.test( 'mw.message & mw.messages', 83, function ( assert ) {
                var goodbye, hello;
 
                // Convenience method for asserting the same result for multiple formats
 
                assertMultipleFormats( ['int-msg'], ['text', 'parse', 'escaped'], 'Some Other Message', 'int is resolved' );
                assert.equal( mw.message( 'int-msg' ).plain(), mw.messages.get( 'int-msg' ), 'int is not resolved in plain mode' );
+
+               assert.ok( mw.messages.set( 'mediawiki-italics-msg', '<i>Very</i> important' ), 'mw.messages.set: Register' );
+               assertMultipleFormats( ['mediawiki-italics-msg'], ['plain', 'text', 'parse'], mw.messages.get( 'mediawiki-italics-msg' ), 'Simple italics unchanged' );
+               assert.htmlEqual(
+                       mw.message( 'mediawiki-italics-msg' ).escaped(),
+                       '&lt;i&gt;Very&lt;/i&gt; important',
+                       'Italics are escaped in escaped mode'
+               );
+
+               assert.ok( mw.messages.set( 'mediawiki-italics-with-link', 'An <i>italicized [[link|wiki-link]]</i>' ), 'mw.messages.set: Register' );
+               assertMultipleFormats( ['mediawiki-italics-with-link'], ['plain', 'text'], mw.messages.get( 'mediawiki-italics-with-link' ), 'Italics with link unchanged' );
+               assert.htmlEqual(
+                       mw.message( 'mediawiki-italics-with-link' ).escaped(),
+                       'An &lt;i&gt;italicized [[link|wiki-link]]&lt;/i&gt;',
+                       'Italics and link unchanged except for escaping in escaped mode'
+               );
+               assert.htmlEqual(
+                       mw.message( 'mediawiki-italics-with-link' ).parse(),
+                       'An <i>italicized <a title="link" href="' + mw.util.wikiGetlink( 'link' ) + '">wiki-link</i>',
+                       'Italics with link inside in parse mode'
+               );
+
+               assert.ok( mw.messages.set( 'mediawiki-script-msg', '<script  >alert( "Who put this script here?" );</script>' ), 'mw.messages.set: Register' );
+               assertMultipleFormats( ['mediawiki-script-msg'], ['plain', 'text'], mw.messages.get( 'mediawiki-script-msg' ), 'Script unchanged' );
+               assert.htmlEqual(
+                       mw.message( 'mediawiki-script-msg' ).escaped(),
+                       '&lt;script  &gt;alert( "Who put this script here?" );&lt;/script&gt;',
+                       'Script escaped when using escaped format'
+               );
+               assert.htmlEqual(
+                       mw.message( 'mediawiki-script-msg' ).parse(),
+                       '&lt;script  &gt;alert( "Who put this script here?" );&lt;/script&gt;',
+                       'Script escaped when using parse format'
+               );
+
+
        } );
 
        QUnit.test( 'mw.msg', 14, function ( assert ) {