'kbd', 'samp', 'data', 'time', 'mark'
];
$htmlsingle = [
- 'br', 'wbr', 'hr', 'li', 'dt', 'dd'
- ];
- $htmlsingleonly = [ # Elements that cannot have close tags
- 'br', 'wbr', 'hr'
+ 'br', 'wbr', 'hr', 'li', 'dt', 'dd', 'meta', 'link'
];
- $htmlsingle[] = $htmlsingleonly[] = 'meta';
- $htmlsingle[] = $htmlsingleonly[] = 'link';
+ # Elements that cannot have close tags. This is (not coincidentally)
+ # also the list of tags for which the HTML 5 parsing algorithm
+ # requires you to "acknowledge the token's self-closing flag", i.e.
+ # a self-closing tag like <br/> is not an HTML 5 parse error only
+ # for this list.
+ $htmlsingleonly = [
+ 'br', 'wbr', 'hr', 'meta', 'link'
+ ];
$htmlnest = [ # Tags that can be nested--??
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
* @param array|bool $args Arguments for the processing callback
* @param array $extratags For any extra tags to include
* @param array $removetags For any tags (default or extra) to exclude
+ * @param callable $warnCallback (Deprecated) Callback allowing the
+ * addition of a tracking category when bad input is encountered.
+ * DO NOT ADD NEW PARAMETERS AFTER $warnCallback, since it will be
+ * removed shortly.
* @return string
*/
public static function removeHTMLtags( $text, $processCallback = null,
- $args = [], $extratags = [], $removetags = []
+ $args = [], $extratags = [], $removetags = [], $warnCallback = null
) {
extract( self::getRecognizedTagData( $extratags, $removetags ) );
$badtag = true;
# Is it a self closed htmlpair ? (bug 5487)
} elseif ( $brace == '/>' && isset( $htmlpairs[$t] ) ) {
+ // Eventually we'll just remove the self-closing
+ // slash, in order to be consistent with HTML5
+ // semantics.
+ // $brace = '>';
+ // For now, let's just warn authors to clean up.
+ if ( is_callable( $warnCallback ) ) {
+ call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
+ }
$badtag = true;
} elseif ( isset( $htmlsingleonly[$t] ) ) {
# Hack to force empty tag for unclosable elements
call_user_func_array( $processCallback, [ &$params, $args ] );
}
+ if ( $brace == '/>' && !( isset( $htmlsingle[$t] ) || isset( $htmlsingleonly[$t] ) ) ) {
+ // Eventually we'll just remove the self-closing
+ // slash, in order to be consistent with HTML5
+ // semantics.
+ // $brace = '>';
+ // For now, let's just warn authors to clean up.
+ if ( is_callable( $warnCallback ) ) {
+ call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
+ }
+ }
if ( !Sanitizer::validateTag( $params, $t ) ) {
$badtag = true;
}
$newparams = Sanitizer::fixTagAttributes( $params, $t );
if ( !$badtag ) {
+ if ( $brace === '/>' && !isset( $htmlsingleonly[$t] ) ) {
+ # Interpret self-closing tags as empty tags even when
+ # HTML 5 would interpret them as start tags. Such input
+ # is commonly seen on Wikimedia wikis with this intention.
+ $brace = "></$t>";
+ }
+
$rest = str_replace( '>', '>', $rest );
$text .= "<$slash$t$newparams$brace$rest";
continue;
"content-model-json": "JSON",
"content-json-empty-object": "Empty object",
"content-json-empty-array": "Empty array",
+ "deprecated-self-close-category": "Pages using invalid self-closed HTML tags",
+ "deprecated-self-close-category-desc": "The page contains invalid self-closed HTML tags, such as <code><b/></code> or <code><span/></code>. The behavior of these will change soon to be consistent with the HTML5 specification, so their use in wikitext is deprecated.",
"duplicate-args-warning": "<strong>Warning:</strong> [[:$1]] is calling [[:$2]] with more than one value for the \"$3\" parameter. Only the last value provided will be used.",
"duplicate-args-category": "Pages using duplicate arguments in template calls",
"duplicate-args-category-desc": "The page contains template calls that use duplicates of arguments, such as <code><nowiki>{{foo|bar=1|bar=2}}</nowiki></code> or <code><nowiki>{{foo|bar|1=baz}}</nowiki></code>.",
"content-model-json": "Name for the JSON content model, used when decribing what type of content a page contains.\n\nThis message is substituted in:\n*{{msg-mw|Bad-target-model}}\n*{{msg-mw|Content-not-allowed-here}}",
"content-json-empty-object": "Used to represent an object with no properties on a JSON content model page.",
"content-json-empty-array": "Used to represent an array with no values on a JSON content model page.",
+ "deprecated-self-close-category": "This message is used as a category name for a [[mw:Special:MyLanguage/Help:Tracking categories|tracking category]] where pages are placed automatically if they contain invalid self-closed HTML tags, such as <code><b/></code> or <code><span/></code>. The behavior of these will change soon to be consistent with the HTML5 specification, so their use in wikitext is deprecated.",
+ "deprecated-self-close-category-desc": "Invalid self-closed HTML tag category description. Shown on [[Special:TrackingCategories]].\n\nSee also:\n* {{msg-mw|deprecated-self-close-category}}",
"duplicate-args-warning": "If a page calls a template and specifies the same argument more than once, such as <code><nowiki>{{foo|bar=1|bar=2}}</nowiki></code> or <code><nowiki>{{foo|bar|1=baz}}</nowiki></code>, this warning is displayed when previewing.\n\nParameters:\n* $1 - The calling page\n* $2 - The called template\n* $3 - The name of the duplicated argument",
"duplicate-args-category": "This message is used as a category name for a [[mw:Special:MyLanguage/Help:Tracking categories|tracking category]] where pages are placed automatically if they contain template calls that use duplicates of arguments, such as <code><nowiki>{{foo|bar=1|bar=2}}</nowiki></code> or <code><nowiki>{{foo|bar|1=baz}}</nowiki></code>.",
"duplicate-args-category-desc": "Duplicate arguments category description. Shown on [[Special:TrackingCategories]].\n\nSee also:\n* {{msg-mw|Duplicate-args-category}}",