Use the new character entity reference sanitizer on wikitext as well as

author Brion Vibber <brion@users.mediawiki.org>

Sun, 6 Feb 2005 16:13:06 +0000 (16:13 +0000)

committer Brion Vibber <brion@users.mediawiki.org>

Sun, 6 Feb 2005 16:13:06 +0000 (16:13 +0000)
author Brion Vibber <brion@users.mediawiki.org>
Sun, 6 Feb 2005 16:13:06 +0000 (16:13 +0000)
committer Brion Vibber <brion@users.mediawiki.org>
Sun, 6 Feb 2005 16:13:06 +0000 (16:13 +0000)
diff --git a/includes/Parser.php b/includes/Parser.php

index 7db37be..0e3549c 100644 (file)
--- a/includes/Parser.php
+++ b/includes/Parser.php
@@ -185,11 +185,9 @@ class Parser
                                 '/<br *>/i' => '<br />',
                                 '/<center *>/i' => '<div class="center">',
                                 '/<\\/center *>/i' => '</div>',
-                               # Clean up spare ampersands; note that we probably ought to be
-                               # more careful about named entities.
-                               '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
                         );
                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+                       $text = Sanitizer::normalizeCharReferences( $text );
                 } else {
                         $fixtags = array(
                                 # french spaces, last one Guillemet-left
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php

index 757b46c..2729efc 100644 (file)
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -784,10 +784,14 @@ class Sanitizer {
                         # 15.3
                         'hr'         => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
                         
+                       # XHTML Ruby annotation text module, simple ruby only.
+                       # http://www.w3c.org/TR/ruby/
+                       'ruby'       => $common,
+                       # rbc
+                       # rtc
                         'rb'         => $common,
+                       'rt'         => $common, #array_merge( $common, array( 'rbspan' ) ),
                         'rp'         => $common,
-                       'rt'         => $common,
-                       'ruby'       => $common,
                         );
                 return $whitelist;
         }
diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt

index 5da1ff1..3dfb9f0 100644 (file)
--- a/maintenance/parserTests.txt
+++ b/maintenance/parserTests.txt
@@ -1897,6 +1897,35 @@ DIV IN UPPERCASE
  
  !!end
  
+
+!! test
+text with amp in the middle of nowhere
+!! input
+Remember AT&T?
+!!result
+<p>Remember AT&amp;T?
+</p>
+!! end
+
+!! test
+text with character entity: eacute
+!! input
+I always thought &eacute; was a cute letter.
+!! result
+<p>I always thought &eacute; was a cute letter.
+</p>
+!! end
+
+!! test
+text with undefined character entity: xacute
+!! input
+I always thought &xacute; was a cute letter.
+!! result
+<p>I always thought &amp;xacute; was a cute letter.
+</p>
+!! end
+
+
  TODO:
  more images
  more tables
author	Brion Vibber <brion@users.mediawiki.org>
	Sun, 6 Feb 2005 16:13:06 +0000 (16:13 +0000)
committer	Brion Vibber <brion@users.mediawiki.org>
	Sun, 6 Feb 2005 16:13:06 +0000 (16:13 +0000)
includes/Parser.php		patch \| blob \| history
includes/Sanitizer.php		patch \| blob \| history
maintenance/parserTests.txt		patch \| blob \| history