From 3a7a90e61280cd346225a9a0ad3014a0758504f9 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sun, 6 Feb 2005 16:13:06 +0000 Subject: [PATCH] Use the new character entity reference sanitizer on wikitext as well as attribute value normalization. One step closer to well-formed XML all the time... --- includes/Parser.php | 4 +--- includes/Sanitizer.php | 8 ++++++-- maintenance/parserTests.txt | 29 +++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/includes/Parser.php b/includes/Parser.php index 7db37be527..0e3549cb38 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -185,11 +185,9 @@ class Parser '/
/i' => '
', '/
/i' => '
', '/<\\/center *>/i' => '
', - # Clean up spare ampersands; note that we probably ought to be - # more careful about named entities. - '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&' ); $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); + $text = Sanitizer::normalizeCharReferences( $text ); } else { $fixtags = array( # french spaces, last one Guillemet-left diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 757b46ca96..2729efcb7c 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -784,10 +784,14 @@ class Sanitizer { # 15.3 'hr' => array_merge( $common, array( 'noshade', 'size', 'width' ) ), + # XHTML Ruby annotation text module, simple ruby only. + # http://www.w3c.org/TR/ruby/ + 'ruby' => $common, + # rbc + # rtc 'rb' => $common, + 'rt' => $common, #array_merge( $common, array( 'rbspan' ) ), 'rp' => $common, - 'rt' => $common, - 'ruby' => $common, ); return $whitelist; } diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index 5da1ff151b..3dfb9f0320 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -1897,6 +1897,35 @@ DIV IN UPPERCASE !!end + +!! test +text with amp in the middle of nowhere +!! input +Remember AT&T? +!!result +

Remember AT&T? +

+!! end + +!! test +text with character entity: eacute +!! input +I always thought é was a cute letter. +!! result +

I always thought é was a cute letter. +

+!! end + +!! test +text with undefined character entity: xacute +!! input +I always thought &xacute; was a cute letter. +!! result +

I always thought &xacute; was a cute letter. +

+!! end + + TODO: more images more tables -- 2.20.1