Use the new character entity reference sanitizer on wikitext as well as
authorBrion Vibber <brion@users.mediawiki.org>
Sun, 6 Feb 2005 16:13:06 +0000 (16:13 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Sun, 6 Feb 2005 16:13:06 +0000 (16:13 +0000)
attribute value normalization. One step closer to well-formed XML all the
time...

includes/Parser.php
includes/Sanitizer.php
maintenance/parserTests.txt

index 7db37be..0e3549c 100644 (file)
@@ -185,11 +185,9 @@ class Parser
                                '/<br *>/i' => '<br />',
                                '/<center *>/i' => '<div class="center">',
                                '/<\\/center *>/i' => '</div>',
-                               # Clean up spare ampersands; note that we probably ought to be
-                               # more careful about named entities.
-                               '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
                        );
                        $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+                       $text = Sanitizer::normalizeCharReferences( $text );
                } else {
                        $fixtags = array(
                                # french spaces, last one Guillemet-left
index 757b46c..2729efc 100644 (file)
@@ -784,10 +784,14 @@ class Sanitizer {
                        # 15.3
                        'hr'         => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
                        
+                       # XHTML Ruby annotation text module, simple ruby only.
+                       # http://www.w3c.org/TR/ruby/
+                       'ruby'       => $common,
+                       # rbc
+                       # rtc
                        'rb'         => $common,
+                       'rt'         => $common, #array_merge( $common, array( 'rbspan' ) ),
                        'rp'         => $common,
-                       'rt'         => $common,
-                       'ruby'       => $common,
                        );
                return $whitelist;
        }
index 5da1ff1..3dfb9f0 100644 (file)
@@ -1897,6 +1897,35 @@ DIV IN UPPERCASE
 
 !!end
 
+
+!! test
+text with amp in the middle of nowhere
+!! input
+Remember AT&T?
+!!result
+<p>Remember AT&amp;T?
+</p>
+!! end
+
+!! test
+text with character entity: eacute
+!! input
+I always thought &eacute; was a cute letter.
+!! result
+<p>I always thought &eacute; was a cute letter.
+</p>
+!! end
+
+!! test
+text with undefined character entity: xacute
+!! input
+I always thought &xacute; was a cute letter.
+!! result
+<p>I always thought &amp;xacute; was a cute letter.
+</p>
+!! end
+
+
 TODO:
 more images
 more tables