From 2aa8892d8e2069d724e52f130a5e4e6131f20eb0 Mon Sep 17 00:00:00 2001 From: Gabriel Wicke Date: Fri, 24 Mar 2006 16:36:29 +0000 Subject: [PATCH] Provide some cleanup if tidy is disabled: * fix invalid nesting of anchors and i/b * remove empty i/b tags * remove divs inside anchors Fixes several test cases --- includes/Parser.php | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/includes/Parser.php b/includes/Parser.php index b777cd8cd5..3cc60be4e0 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -250,6 +250,32 @@ class Parser if (($wgUseTidy and $this->mOptions->mTidy) or $wgAlwaysUseTidy) { $text = Parser::tidy($text); + } else { + # attempt to sanitize at least some nesting problems + # (bug #2702 and quite a few others) + $tidyregs = array( + # ''Something [http://www.cool.com cool''] --> + # Somethingcool> + '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => + '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', + # fix up an anchor inside another anchor, only + # at least for a single single nested link (bug 3695) + '/(]+>)([^<]*)(]+>[^<]*)<\/a>(.*)<\/a>/' => + '\\1\\2\\3\\1\\4', + # fix div inside inline elements- doBlockLevels won't wrap a line which + # contains a div, so fix it up here; replace + # div with escaped text + '/(<([aib]) [^>]+>)([^<]*)(]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' => + '\\1\\3<div\\5>\\6</div>\\8\\9', + # remove empty italic or bold tag pairs, some + # introduced by rules above + '/<([bi])><\/\\1>/' => '' + ); + + $text = preg_replace( + array_keys( $tidyregs ), + array_values( $tidyregs ), + $text ); } wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) ); -- 2.20.1