'/<br *>/i' => '<br />',
'/<center *>/i' => '<div class="center">',
'/<\\/center *>/i' => '</div>',
- # Clean up spare ampersands; note that we probably ought to be
- # more careful about named entities.
- '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&'
);
$text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+ $text = Sanitizer::normalizeCharReferences( $text );
} else {
$fixtags = array(
# french spaces, last one Guillemet-left
# 15.3
'hr' => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
+ # XHTML Ruby annotation text module, simple ruby only.
+ # http://www.w3c.org/TR/ruby/
+ 'ruby' => $common,
+ # rbc
+ # rtc
'rb' => $common,
+ 'rt' => $common, #array_merge( $common, array( 'rbspan' ) ),
'rp' => $common,
- 'rt' => $common,
- 'ruby' => $common,
);
return $whitelist;
}
!!end
+
+!! test
+text with amp in the middle of nowhere
+!! input
+Remember AT&T?
+!!result
+<p>Remember AT&T?
+</p>
+!! end
+
+!! test
+text with character entity: eacute
+!! input
+I always thought é was a cute letter.
+!! result
+<p>I always thought é was a cute letter.
+</p>
+!! end
+
+!! test
+text with undefined character entity: xacute
+!! input
+I always thought &xacute; was a cute letter.
+!! result
+<p>I always thought &xacute; was a cute letter.
+</p>
+!! end
+
+
TODO:
more images
more tables