public static function removeHTMLtags( $text, $processCallback = null,
$args = [], $extratags = [], $removetags = [], $warnCallback = null
) {
- extract( self::getRecognizedTagData( $extratags, $removetags ) );
+ $tagData = self::getRecognizedTagData( $extratags, $removetags );
+ $htmlpairs = $tagData['htmlpairs'];
+ $htmlsingle = $tagData['htmlsingle'];
+ $htmlsingleonly = $tagData['htmlsingleonly'];
+ $htmlnest = $tagData['htmlnest'];
+ $tabletags = $tagData['tabletags'];
+ $htmllist = $tagData['htmllist'];
+ $listtags = $tagData['listtags'];
+ $htmlsingleallowed = $tagData['htmlsingleallowed'];
+ $htmlelements = $tagData['htmlelements'];
# Remove HTML comments
$text = self::removeHTMLcomments( $text );
'{' => '{',
'}' => '}', // prevent unpaired language conversion syntax
'[' => '[',
+ ']' => ']',
"''" => '''',
'ISBN' => 'ISBN',
'RFC' => 'RFC',
* Warning: this return value must be further escaped for literal
* inclusion in HTML output as of 1.10!
*
- * @param string $text HTML fragment
+ * @param string $html HTML fragment
* @return string
*/
- static function stripAllTags( $text ) {
- # Actual <tags>
- $text = StringUtils::delimiterReplace( '<', '>', '', $text );
+ static function stripAllTags( $html ) {
+ // Use RemexHtml to tokenize $html and extract the text
+ $handler = new RemexStripTagHandler;
+ $tokenizer = new RemexHtml\Tokenizer\Tokenizer( $handler, $html, [
+ 'ignoreErrors' => true,
+ // don't ignore char refs, we want them to be decoded
+ 'ignoreNulls' => true,
+ 'skipPreprocess' => true,
+ ] );
+ $tokenizer->execute();
+ $text = $handler->getResult();
- # Normalize &entities and whitespace
- $text = self::decodeCharReferences( $text );
$text = self::normalizeWhitespace( $text );
-
return $text;
}