From: Brion Vibber Date: Sun, 6 Feb 2005 12:46:31 +0000 (+0000) Subject: Rewrote fixTagAttributes() for robustness and correctness: X-Git-Tag: 1.5.0alpha1~772 X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/exercices/modifier.php?a=commitdiff_plain;h=5d0480ebec79782768296ac6ac3be01c4aadb236;p=lhc%2Fweb%2Fwiklou.git Rewrote fixTagAttributes() for robustness and correctness: - Attribute whitelist now per-element to appease validation - Always put values in quotes, for well-formed XML - Check that entities in attributes (but not yet in general text) are actually defined in HTML - Remove extra spaces that got put at the end of some tags w/o attributes - Be generally anal about things --- diff --git a/includes/Parser.php b/includes/Parser.php index 855429c05a..7db37be527 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -546,7 +546,7 @@ class Parser $indent_level = strlen( $matches[1] ); $t[$k] = "\n" . str_repeat( '
', $indent_level ) . - '' ; + '' ; array_push ( $td , false ) ; array_push ( $ltd , '' ) ; array_push ( $tr , false ) ; @@ -573,7 +573,7 @@ class Parser array_push ( $tr , false ) ; array_push ( $td , false ) ; array_push ( $ltd , '' ) ; - array_push ( $ltr , Sanitizer::fixTagAttributes ( $x ) ) ; + array_push ( $ltr , Sanitizer::fixTagAttributes ( $x, 'tr' ) ) ; } else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption # $x is a table row @@ -593,7 +593,7 @@ class Parser if ( $fc != '+' ) { $tra = array_pop ( $ltr ) ; - if ( !array_pop ( $tr ) ) $z = '\n" ; + if ( !array_pop ( $tr ) ) $z = '\n" ; array_push ( $tr , true ) ; array_push ( $ltr , '' ) ; } @@ -615,7 +615,7 @@ class Parser } if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ; - else $y = $y = "{$z}<{$l} ".Sanitizer::fixTagAttributes($y[0]).">{$y[1]}" ; + else $y = $y = "{$z}<{$l}".Sanitizer::fixTagAttributes($y[0], $l).">{$y[1]}" ; $t[$k] .= $y ; array_push ( $td , true ) ; } @@ -631,7 +631,7 @@ class Parser } $t = implode ( "\n" , $t ) ; - # $t = $this->removeHTMLtags( $t ); + # $t = Sanitizer::removeHTMLtags( $t ); wfProfileOut( $fname ); return $t ; } diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index dab3c8ce4f..0062791e14 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -63,8 +63,6 @@ class Sanitizer { $htmlsingle = array_merge( $tabletags, $htmlsingle ); $htmlelements = array_merge( $htmlsingle, $htmlpairs ); - $htmlattrs = Sanitizer::getHTMLattrs () ; - # Remove HTML comments $text = Sanitizer::removeHTMLcomments( $text ); @@ -110,12 +108,11 @@ class Sanitizer { array_push( $tagstack, $t ); } # Strip non-approved attributes from the tag - $newparams = Sanitizer::fixTagAttributes($params); - + $newparams = Sanitizer::fixTagAttributes( $params, $t ); } if ( ! $badtag ) { $rest = str_replace( '>', '>', $rest ); - $text .= "<$slash$t $newparams$brace$rest"; + $text .= "<$slash$t$newparams$brace$rest"; continue; } } @@ -133,9 +130,9 @@ class Sanitizer { $x, $regs ); @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { - $newparams = Sanitizer::fixTagAttributes($params); + $newparams = Sanitizer::fixTagAttributes( $params, $t ); $rest = str_replace( '>', '>', $rest ); - $text .= "<$slash$t $newparams$brace$rest"; + $text .= "<$slash$t$newparams$brace$rest"; } else { $text .= '<' . str_replace( '>', '>', $x); } @@ -190,54 +187,610 @@ class Sanitizer { } /** - * Return allowed HTML attributes + * Take a tag soup fragment listing an HTML element's attributes + * and normalize it to well-formed XML, discarding unwanted attributes. + * + * - Normalizes attribute names to lowercase + * - Discards attributes not on a whitelist for the given element + * - Turns broken or invalid entities into plaintext + * - Double-quotes all attribute values + * - Attributes without values are given the name as attribute + * - Double attributes are discarded + * - Unsafe style attributes are discarded + * - Prepends space if there are attributes. + * + * @param string $text + * @param string $element + * @return string + * + * @todo Check for legal values where the DTD limits things. + * @todo Check for unique id attribute :P + */ + function fixTagAttributes( $text, $element ) { + if( trim( $text ) == '' ) { + return ''; + } + + $attrib = '[A-Za-z0-9]'; #FIXME + $space = '[\x09\x0a\x0d\x20]'; + if( !preg_match_all( + "/(?:^|$space)($attrib+) + ($space*=$space* + (?: + # The attribute value: quoted or alone + \"([^<\"]*)\" + | '([^<']*)' + | ([a-zA-Z0-9._:-]+) + | (\#[0-9a-fA-F]+) # Technically wrong, but lots of + # colors are specified like this. + # We'll be normalizing it. + ) + )?(?=$space|\$)/sx", + $text, + $pairs, + PREG_SET_ORDER ) ) { + return ''; + } + + $whitelist = array_flip( Sanitizer::attributeWhitelist( $element ) ); + $attribs = array(); + foreach( $pairs as $set ) { + $attribute = strtolower( $set[1] ); + if( !isset( $whitelist[$attribute] ) ) { + continue; + } + if( $set[2] == '' ) { + # In XHTML, attributes must have a value. + $value = $set[1]; + } elseif( $set[3] != '' ) { + # Double-quoted + $value = Sanitizer::normalizeAttributeValue( $set[3] ); + } elseif( $set[4] != '' ) { + # Single-quoted + $value = str_replace( '"', '"', + Sanitizer::normalizeAttributeValue( $set[4] ) ); + } elseif( $set[5] != '' ) { + # No quotes. + $value = Sanitizer::normalizeAttributeValue( $set[5] ); + } elseif( $set[6] != '' ) { + # Illegal #XXXXXX color with no quotes. + $value = Sanitizer::normalizeAttributeValue( $set[6] ); + } else { + wfDebugDieBacktrace( "Tag conditions not met. Something's very odd." ); + } + + # Strip javascript "expression" from stylesheets. + # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp + if( $attribute == 'style' && preg_match( + '/(expression|tps*:\/\/|url\\s*\().*/is', + wfMungeToUtf8( $value ) ) ) { + # haxx0r + continue; + } + + if( !isset( $attribs[$attribute] ) ) { + $attribs[$attribute] = "$attribute=\"$value\""; + } + } + if( empty( $attribs ) ) { + return ''; + } else { + return ' ' . implode( ' ', $attribs ); + } + } + + /** + * Normalize whitespace and character references in an XML source- + * encoded text for an attribute value. + * + * See http://www.w3.org/TR/REC-xml/#AVNormalize for background, + * but note that we're not returning the value, but are returning + * XML source fragments that will be slapped into output. * + * @param string $text + * @return string * @access private */ - function getHTMLattrs () { - $htmlattrs = array( # Allowed attributes--no scripting, etc. - 'title', 'align', 'lang', 'dir', 'width', 'height', - 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */ - 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color', - /* FONT */ 'type', 'start', 'value', 'compact', - /* For various lists, mostly deprecated but safe */ - 'summary', 'width', 'border', 'frame', 'rules', - 'cellspacing', 'cellpadding', 'valign', 'char', - 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis', - 'headers', 'scope', 'rowspan', 'colspan', /* Tables */ - 'id', 'class', 'name', 'style' /* For CSS */ - ); - return $htmlattrs ; + function normalizeAttributeValue( $text ) { + return preg_replace( + '/\r\n|[\x20\x0d\x0a\x09]/', + ' ', + Sanitizer::normalizeCharReferences( $text ) ); + return $spaced; } - + /** - * Remove non approved attributes and javascript in css + * Ensure that any entities and character references are legal + * for XML and XHTML specifically. Any stray bits will be + * &-escaped to result in a valid text fragment. * + * a. any named char refs must be known in XHTML + * b. any numeric char refs must be legal chars, not invalid or forbidden + * c. use &#x, not &#X + * d. fix or reject non-valid attributes + * + * @param string $text + * @return string * @access private */ - function fixTagAttributes ( $t ) { - if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-) - $htmlattrs = Sanitizer::getHTMLattrs() ; - - # Strip non-approved attributes from the tag - $t = preg_replace( - '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e', - "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')", - $t); - - $t = str_replace ( '<>' , '' , $t ) ; # This should fix bug 980557 - - # Strip javascript "expression" from stylesheets. Brute force approach: - # If anythin offensive is found, all attributes of the HTML tag are dropped - - if( preg_match( - '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is', - wfMungeToUtf8( $t ) ) ) - { - $t=''; + function normalizeCharReferences( $text ) { + return preg_replace_callback( + '/&([A-Za-z0-9]+); + |&\#([0-9]+); + |&\#x([0-9A-Za-z]+); + |&\#X([0-9A-Za-z]+); + |(&)/x', + array( 'Sanitizer', 'normalizeCharReferencesCallback' ), + $text ); + } + + function normalizeCharReferencesCallback( $matches ) { + $ret = null; + if( $matches[1] != '' ) { + $ret = Sanitizer::normalizeEntity( $matches[1] ); + } elseif( $matches[2] != '' ) { + $ret = Sanitizer::decCharReference( $matches[2] ); + } elseif( $matches[3] != '' ) { + $ret = Sanitizer::hexCharReference( $matches[3] ); + } elseif( $matches[4] != '' ) { + $ret = Sanitizer::hexCharReference( $matches[4] ); } + if( is_null( $ret ) ) { + return htmlspecialchars( $matches[0] ); + } else { + return $ret; + } + } + + /** + * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD, + * return the named entity reference as is. Otherwise, returns + * HTML-escaped text of pseudo-entity source (eg &foo;) + * + * @return string + */ + function normalizeEntity( $name ) { + # List of all named character entities defined in HTML 4.01 + # http://www.w3.org/TR/html4/sgml/entities.html + static $htmlEntities = array( + 'aacute' => true, + 'Aacute' => true, + 'acirc' => true, + 'Acirc' => true, + 'acute' => true, + 'aelig' => true, + 'AElig' => true, + 'agrave' => true, + 'Agrave' => true, + 'alefsym' => true, + 'alpha' => true, + 'Alpha' => true, + 'amp' => true, + 'and' => true, + 'ang' => true, + 'apos' => true, + 'aring' => true, + 'Aring' => true, + 'asymp' => true, + 'atilde' => true, + 'Atilde' => true, + 'auml' => true, + 'Auml' => true, + 'bdquo' => true, + 'beta' => true, + 'Beta' => true, + 'brvbar' => true, + 'bull' => true, + 'cap' => true, + 'ccedil' => true, + 'Ccedil' => true, + 'cedil' => true, + 'cent' => true, + 'chi' => true, + 'Chi' => true, + 'circ' => true, + 'clubs' => true, + 'cong' => true, + 'copy' => true, + 'crarr' => true, + 'cup' => true, + 'curren' => true, + 'dagger' => true, + 'Dagger' => true, + 'darr' => true, + 'dArr' => true, + 'deg' => true, + 'delta' => true, + 'Delta' => true, + 'diams' => true, + 'divide' => true, + 'eacute' => true, + 'Eacute' => true, + 'ecirc' => true, + 'Ecirc' => true, + 'egrave' => true, + 'Egrave' => true, + 'empty' => true, + 'emsp' => true, + 'ensp' => true, + 'epsilon' => true, + 'Epsilon' => true, + 'equiv' => true, + 'eta' => true, + 'Eta' => true, + 'eth' => true, + 'ETH' => true, + 'euml' => true, + 'Euml' => true, + 'euro' => true, + 'exist' => true, + 'fnof' => true, + 'forall' => true, + 'frac12' => true, + 'frac14' => true, + 'frac34' => true, + 'frasl' => true, + 'gamma' => true, + 'Gamma' => true, + 'ge' => true, + 'gt' => true, + 'harr' => true, + 'hArr' => true, + 'hearts' => true, + 'hellip' => true, + 'iacute' => true, + 'Iacute' => true, + 'icirc' => true, + 'Icirc' => true, + 'iexcl' => true, + 'igrave' => true, + 'Igrave' => true, + 'image' => true, + 'infin' => true, + 'int' => true, + 'iota' => true, + 'Iota' => true, + 'iquest' => true, + 'isin' => true, + 'iuml' => true, + 'Iuml' => true, + 'kappa' => true, + 'Kappa' => true, + 'lambda' => true, + 'Lambda' => true, + 'lang' => true, + 'laquo' => true, + 'larr' => true, + 'lArr' => true, + 'lceil' => true, + 'ldquo' => true, + 'le' => true, + 'lfloor' => true, + 'lowast' => true, + 'loz' => true, + 'lrm' => true, + 'lsaquo' => true, + 'lsquo' => true, + 'lt' => true, + 'macr' => true, + 'mdash' => true, + 'micro' => true, + 'middot' => true, + 'minus' => true, + 'mu' => true, + 'Mu' => true, + 'nabla' => true, + 'nbsp' => true, + 'ndash' => true, + 'ne' => true, + 'ni' => true, + 'not' => true, + 'notin' => true, + 'nsub' => true, + 'ntilde' => true, + 'Ntilde' => true, + 'nu' => true, + 'Nu' => true, + 'oacute' => true, + 'Oacute' => true, + 'ocirc' => true, + 'Ocirc' => true, + 'oelig' => true, + 'OElig' => true, + 'ograve' => true, + 'Ograve' => true, + 'oline' => true, + 'omega' => true, + 'Omega' => true, + 'omicron' => true, + 'Omicron' => true, + 'oplus' => true, + 'or' => true, + 'ordf' => true, + 'ordm' => true, + 'oslash' => true, + 'Oslash' => true, + 'otilde' => true, + 'Otilde' => true, + 'otimes' => true, + 'ouml' => true, + 'Ouml' => true, + 'para' => true, + 'part' => true, + 'permil' => true, + 'perp' => true, + 'phi' => true, + 'Phi' => true, + 'pi' => true, + 'Pi' => true, + 'piv' => true, + 'plusmn' => true, + 'pound' => true, + 'prime' => true, + 'Prime' => true, + 'prod' => true, + 'prop' => true, + 'psi' => true, + 'Psi' => true, + 'quot' => true, + 'radic' => true, + 'rang' => true, + 'raquo' => true, + 'rarr' => true, + 'rArr' => true, + 'rceil' => true, + 'rdquo' => true, + 'real' => true, + 'reg' => true, + 'rfloor' => true, + 'rho' => true, + 'Rho' => true, + 'rlm' => true, + 'rsaquo' => true, + 'rsquo' => true, + 'sbquo' => true, + 'scaron' => true, + 'Scaron' => true, + 'sdot' => true, + 'sect' => true, + 'shy' => true, + 'sigma' => true, + 'Sigma' => true, + 'sigmaf' => true, + 'sim' => true, + 'spades' => true, + 'sub' => true, + 'sube' => true, + 'sum' => true, + 'sup' => true, + 'sup1' => true, + 'sup2' => true, + 'sup3' => true, + 'supe' => true, + 'szlig' => true, + 'tau' => true, + 'Tau' => true, + 'there4' => true, + 'theta' => true, + 'Theta' => true, + 'thetasym' => true, + 'thinsp' => true, + 'thorn' => true, + 'THORN' => true, + 'tilde' => true, + 'times' => true, + 'trade' => true, + 'uacute' => true, + 'Uacute' => true, + 'uarr' => true, + 'uArr' => true, + 'ucirc' => true, + 'Ucirc' => true, + 'ugrave' => true, + 'Ugrave' => true, + 'uml' => true, + 'upsih' => true, + 'upsilon' => true, + 'Upsilon' => true, + 'uuml' => true, + 'Uuml' => true, + 'weierp' => true, + 'xi' => true, + 'Xi' => true, + 'yacute' => true, + 'Yacute' => true, + 'yen' => true, + 'yuml' => true, + 'Yuml' => true, + 'zeta' => true, + 'Zeta' => true, + 'zwj' => true, + 'zwnj' => true ); + if( isset( $htmlEntities[$name] ) ) { + return "&$name;"; + } else { + return "&$name;"; + } + } + + function decCharReference( $codepoint ) { + $point = IntVal( $codepoint ); + if( Sanitizer::validateCodepoint( $point ) ) { + return sprintf( '&#%d;', $point ); + } else { + return null; + } + } + + function hexCharReference( $codepoint ) { + $point = hexdec( $codepoint ); + if( Sanitizer::validateCodepoint( $point ) ) { + return sprintf( '&#x%x;', $point ); + } else { + return null; + } + } + + /** + * Returns true if a given Unicode codepoint is a valid character in XML. + * @param int $codepoint + * @return bool + */ + function validateCodepoint( $codepoint ) { + return ($codepoint == 0x09) + || ($codepoint == 0x0a) + || ($codepoint == 0x0d) + || ($codepoint >= 0x20 && $codepoint <= 0xd7ff) + || ($codepoint >= 0xe000 && $codepoint <= 0xfffd) + || ($codepoint >= 0x10000 && $codepoint <= 0x10ffff); + } - return trim ( $t ) ; + /** + * Fetch the whitelist of acceptable attributes for a given + * element name. + * + * @param string $element + * @return array + */ + function attributeWhitelist( $element ) { + $list = Sanitizer::setupAttributeWhitelist(); + return isset( $list[$element] ) + ? $list[$element] + : array(); + } + + /** + * @return array + */ + function setupAttributeWhitelist() { + $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' ); + $block = array_merge( $common, array( 'align' ) ); + $tablealign = array( 'align', 'char', 'charoff', 'valign' ); + $tablecell = array( 'abbr', + 'axis', + 'headers', + 'scope', + 'rowspan', + 'colspan', + 'nowrap', # deprecated + 'width', # deprecated + 'height' # deprecated + ); + + # Numbers refer to sections in HTML 4.01 standard describing the element. + # See: http://www.w3.org/TR/html4/ + $whitelist = array ( + # 7.5.4 + 'div' => $block, + 'center' => $common, # deprecated + 'span' => $block, # ?? + + # 7.5.5 + 'h1' => $block, + 'h2' => $block, + 'h3' => $block, + 'h4' => $block, + 'h5' => $block, + 'h6' => $block, + + # 7.5.6 + # address + + # 8.2.4 + # bdo + + # 9.2.1 + 'em' => $common, + 'strong' => $common, + 'cite' => $common, + # dfn + 'code' => $common, + # samp + # kbd + 'var' => $common, + # abbr + # acronym + + # 9.2.2 + 'blockquote' => array_merge( $common, array( 'cite' ) ), + # q + + # 9.2.3 + 'sub' => $common, + 'sup' => $common, + + # 9.3.1 + 'p' => $block, + + # 9.3.2 + 'br' => array( 'id', 'class', 'title', 'style', 'clear' ), + + # 9.3.4 + 'pre' => array_merge( $common, array( 'width' ) ), + + # 9.4 + 'ins' => array_merge( $common, array( 'cite', 'datetime' ) ), + 'del' => array_merge( $common, array( 'cite', 'datetime' ) ), + + # 10.2 + 'ul' => array_merge( $common, array( 'type' ) ), + 'ol' => array_merge( $common, array( 'type', 'start' ) ), + 'li' => array_merge( $common, array( 'type', 'value' ) ), + + # 10.3 + 'dl' => $common, + 'dd' => $common, + 'dt' => $common, + + # 11.2.1 + 'table' => array_merge( $common, + array( 'summary', 'width', 'border', 'frame', + 'rules', 'cellspacing', 'cellpadding', + 'align', 'bgcolor', 'frame', 'rules', + 'border' ) ), + + # 11.2.2 + 'caption' => array_merge( $common, array( 'align' ) ), + + # 11.2.3 + 'thead' => array_merge( $common, $tablealign ), + 'tfoot' => array_merge( $common, $tablealign ), + 'tbody' => array_merge( $common, $tablealign ), + + # 11.2.4 + 'colgroup' => array_merge( $common, array( 'span', 'width' ), $tablealign ), + 'col' => array_merge( $common, array( 'span', 'width' ), $tablealign ), + + # 11.2.5 + 'tr' => array_merge( $common, array( 'bgcolor' ), $tablealign ), + + # 11.2.6 + 'td' => array_merge( $common, $tablecell, $tablealign ), + 'th' => array_merge( $common, $tablecell, $tablealign ), + + # 15.2.1 + 'tt' => $common, + 'b' => $common, + 'i' => $common, + 'big' => $common, + 'small' => $common, + 'strike' => $common, + 's' => $common, + 'u' => $common, + + # 15.2.2 + 'font' => array_merge( $common, array( 'size', 'color', 'face' ) ), + # basefont + + # 15.3 + 'hr' => array_merge( $common, array( 'noshade', 'size', 'width' ) ), + + 'rb' => $common, + 'rp' => $common, + 'rt' => $common, + 'ruby' => $common, + ); + return $whitelist; } } diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index 47656ef6ba..5da1ff151b 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -548,7 +548,7 @@ BUG 289: literal ">"-token in URL-tail !! input http://www.example.com/html !! result -

http://www.example.com/html +

http://www.example.com/html

!!end @@ -566,7 +566,7 @@ BUG 289: literal ">"-token in bracketed URL !! input [http://www.example.com/html stuff] !! result -

html stuff +

html stuff

!!end @@ -662,11 +662,11 @@ Simple table | 3 || 4 |} !! result -
- +
+ - +
1 2
3 4
@@ -699,26 +699,26 @@ Multiplication table - + - + - + - + - + - +
Multiplication table
× 1 2 3
1 1 2 3
2 2 4 6
3 3 6 9
4 4 8 12
5 5 10 15
@@ -737,13 +737,13 @@ Table rowspan | Cell 3, row 2 |} !! result - - +
+ - +
Cell 1, row 1 - Cell 2, row 1 (and 2) + Cell 2, row 1 (and 2) Cell 3, row 1
Cell 1, row 2 Cell 3, row 2
@@ -764,15 +764,15 @@ Nested table |the original table again |} !! result - - +
+
α - - +
+ - +
nested
table
the original table again @@ -1194,8 +1194,8 @@ BUG 553: link with two variables in a piped link |[[{{{1}}}|{{{2}}}]] |} !! result - - +
+
[[{{{1}}}|{{{2}}}]]
@@ -1304,11 +1304,11 @@ foo {{table}} !! result

foo

- - +
+ - +
1 2
3 4
@@ -1322,11 +1322,11 @@ foo !! result

foo

- - +
+ - +
1 2
3 4
@@ -1842,6 +1842,61 @@ Namespaced link must have a title (bad fragment version)

!!end + +!! test +div with no attributes +!! input +
HTML rocks
+!! result +
HTML rocks
+ +!! end + +!! test +div with double-quoted attribute +!! input +
HTML rocks
+!! result +
HTML rocks
+ +!! end + +!! test +div with single-quoted attribute +!! input +
HTML rocks
+!! result +
HTML rocks
+ +!! end + +!! test +div with unquoted attribute +!! input +
HTML rocks
+!! result +
HTML rocks
+ +!! end + +!! test +div with illegal double attributes +!! input +
HTML rocks
+!! result +
HTML rocks
+ +!!end + +!! test +DIV IN UPPERCASE +!! input +
HTML ROCKS
+!! result +
HTML ROCKS
+ +!!end + TODO: more images more tables