|&\#[xX]([0-9A-Fa-f]+);
|(&)/x';
+ /**
+ * Acceptable tag name charset from HTML5 parsing spec
+ * http://www.w3.org/TR/html5/syntax.html#tag-open-state
+ */
+ const ELEMENT_BITS_REGEX = '!^(/?)([A-Za-z][^\t\n\v />\0]*+)([^>]*?)(/?>)([^<]*)$!';
+
/**
* Blacklist for evil uris like javascript:
* WARNING: DO NOT use this in any place that actually requires blacklisting
/**
* Cleans up HTML, removes dangerous tags and attributes, and
* removes HTML comments
- * @private
* @param string $text
* @param callable $processCallback Callback to do any variable or parameter
* replacements in HTML attribute values
* @param array $removetags For any tags (default or extra) to exclude
* @return string
*/
- static function removeHTMLtags( $text, $processCallback = null,
+ public static function removeHTMLtags( $text, $processCallback = null,
$args = array(), $extratags = array(), $removetags = array()
) {
global $wgUseTidy, $wgAllowMicrodataAttributes, $wgAllowImageTag;
static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
$htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
- wfProfileIn( __METHOD__ );
-
// Base our staticInitialised variable off of the global config state so that if the globals
// are changed (like in the screwed up test system) we will re-initialise the settings.
$globalContext = implode( '-', compact( 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) );
# $params: String between element name and >
# $brace: Ending '>' or '/>'
# $rest: Everything until the next element of $bits
- if ( preg_match( '!^(/?)([^\\s/>]+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) {
+ if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs ) ) {
list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
} else {
$slash = $t = $params = $brace = $rest = null;
} else {
# this might be possible using tidy itself
foreach ( $bits as $x ) {
- preg_match(
- '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
- $x,
- $regs
- );
+ preg_match( self::ELEMENT_BITS_REGEX, $x, $regs );
wfSuppressWarnings();
list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
$text .= '<' . str_replace( '>', '>', $x );
}
}
- wfProfileOut( __METHOD__ );
return $text;
}
* and followed by a newline (ignoring spaces), trim leading and
* trailing spaces and one of the newlines.
*
- * @private
* @param string $text
* @return string
*/
- static function removeHTMLcomments( $text ) {
- wfProfileIn( __METHOD__ );
+ public static function removeHTMLcomments( $text ) {
while ( ( $start = strpos( $text, '<!--' ) ) !== false ) {
$end = strpos( $text, '-->', $start + 4 );
if ( $end === false ) {
$text = substr_replace( $text, '', $start, $end - $start );
}
}
- wfProfileOut( __METHOD__ );
return $text;
}