Rewrote fixTagAttributes() for robustness and correctness:
[lhc/web/wiklou.git] / includes / Sanitizer.php
1 <?php
2
3 /**
4 * (X)HTML sanitizer for MediaWiki
5 *
6 * Copyright (C) 2002-2005 Brion Vibber <brion@pobox.com> et al
7 * http://www.mediawiki.org/
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 * http://www.gnu.org/copyleft/gpl.html
23 *
24 * @package MediaWiki
25 */
26
27 class Sanitizer {
28 /**
29 * Cleans up HTML, removes dangerous tags and attributes, and
30 * removes HTML comments
31 * @access private
32 */
33 function removeHTMLtags( $text ) {
34 global $wgUseTidy, $wgUserHtml;
35 $fname = 'Parser::removeHTMLtags';
36 wfProfileIn( $fname );
37
38 if( $wgUserHtml ) {
39 $htmlpairs = array( # Tags that must be closed
40 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
41 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
42 'strike', 'strong', 'tt', 'var', 'div', 'center',
43 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
44 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span'
45 );
46 $htmlsingle = array(
47 'br', 'hr', 'li', 'dt', 'dd'
48 );
49 $htmlnest = array( # Tags that can be nested--??
50 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
51 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
52 );
53 $tabletags = array( # Can only appear inside table
54 'td', 'th', 'tr'
55 );
56 } else {
57 $htmlpairs = array();
58 $htmlsingle = array();
59 $htmlnest = array();
60 $tabletags = array();
61 }
62
63 $htmlsingle = array_merge( $tabletags, $htmlsingle );
64 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
65
66 # Remove HTML comments
67 $text = Sanitizer::removeHTMLcomments( $text );
68
69 $bits = explode( '<', $text );
70 $text = array_shift( $bits );
71 if(!$wgUseTidy) {
72 $tagstack = array(); $tablestack = array();
73 foreach ( $bits as $x ) {
74 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
75 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
76 $x, $regs );
77 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
78 error_reporting( $prev );
79
80 $badtag = 0 ;
81 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
82 # Check our stack
83 if ( $slash ) {
84 # Closing a tag...
85 if ( ! in_array( $t, $htmlsingle ) &&
86 ( $ot = @array_pop( $tagstack ) ) != $t ) {
87 @array_push( $tagstack, $ot );
88 $badtag = 1;
89 } else {
90 if ( $t == 'table' ) {
91 $tagstack = array_pop( $tablestack );
92 }
93 $newparams = '';
94 }
95 } else {
96 # Keep track for later
97 if ( in_array( $t, $tabletags ) &&
98 ! in_array( 'table', $tagstack ) ) {
99 $badtag = 1;
100 } else if ( in_array( $t, $tagstack ) &&
101 ! in_array ( $t , $htmlnest ) ) {
102 $badtag = 1 ;
103 } else if ( ! in_array( $t, $htmlsingle ) ) {
104 if ( $t == 'table' ) {
105 array_push( $tablestack, $tagstack );
106 $tagstack = array();
107 }
108 array_push( $tagstack, $t );
109 }
110 # Strip non-approved attributes from the tag
111 $newparams = Sanitizer::fixTagAttributes( $params, $t );
112 }
113 if ( ! $badtag ) {
114 $rest = str_replace( '>', '&gt;', $rest );
115 $text .= "<$slash$t$newparams$brace$rest";
116 continue;
117 }
118 }
119 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
120 }
121 # Close off any remaining tags
122 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
123 $text .= "</$t>\n";
124 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
125 }
126 } else {
127 # this might be possible using tidy itself
128 foreach ( $bits as $x ) {
129 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
130 $x, $regs );
131 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
132 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
133 $newparams = Sanitizer::fixTagAttributes( $params, $t );
134 $rest = str_replace( '>', '&gt;', $rest );
135 $text .= "<$slash$t$newparams$brace$rest";
136 } else {
137 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
138 }
139 }
140 }
141 wfProfileOut( $fname );
142 return $text;
143 }
144
145 /**
146 * Remove '<!--', '-->', and everything between.
147 * To avoid leaving blank lines, when a comment is both preceded
148 * and followed by a newline (ignoring spaces), trim leading and
149 * trailing spaces and one of the newlines.
150 *
151 * @access private
152 */
153 function removeHTMLcomments( $text ) {
154 $fname='Parser::removeHTMLcomments';
155 wfProfileIn( $fname );
156 while (($start = strpos($text, '<!--')) !== false) {
157 $end = strpos($text, '-->', $start + 4);
158 if ($end === false) {
159 # Unterminated comment; bail out
160 break;
161 }
162
163 $end += 3;
164
165 # Trim space and newline if the comment is both
166 # preceded and followed by a newline
167 $spaceStart = max($start - 1, 0);
168 $spaceLen = $end - $spaceStart;
169 while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
170 $spaceStart--;
171 $spaceLen++;
172 }
173 while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
174 $spaceLen++;
175 if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
176 # Remove the comment, leading and trailing
177 # spaces, and leave only one newline.
178 $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
179 }
180 else {
181 # Remove just the comment.
182 $text = substr_replace($text, '', $start, $end - $start);
183 }
184 }
185 wfProfileOut( $fname );
186 return $text;
187 }
188
189 /**
190 * Take a tag soup fragment listing an HTML element's attributes
191 * and normalize it to well-formed XML, discarding unwanted attributes.
192 *
193 * - Normalizes attribute names to lowercase
194 * - Discards attributes not on a whitelist for the given element
195 * - Turns broken or invalid entities into plaintext
196 * - Double-quotes all attribute values
197 * - Attributes without values are given the name as attribute
198 * - Double attributes are discarded
199 * - Unsafe style attributes are discarded
200 * - Prepends space if there are attributes.
201 *
202 * @param string $text
203 * @param string $element
204 * @return string
205 *
206 * @todo Check for legal values where the DTD limits things.
207 * @todo Check for unique id attribute :P
208 */
209 function fixTagAttributes( $text, $element ) {
210 if( trim( $text ) == '' ) {
211 return '';
212 }
213
214 $attrib = '[A-Za-z0-9]'; #FIXME
215 $space = '[\x09\x0a\x0d\x20]';
216 if( !preg_match_all(
217 "/(?:^|$space)($attrib+)
218 ($space*=$space*
219 (?:
220 # The attribute value: quoted or alone
221 \"([^<\"]*)\"
222 | '([^<']*)'
223 | ([a-zA-Z0-9._:-]+)
224 | (\#[0-9a-fA-F]+) # Technically wrong, but lots of
225 # colors are specified like this.
226 # We'll be normalizing it.
227 )
228 )?(?=$space|\$)/sx",
229 $text,
230 $pairs,
231 PREG_SET_ORDER ) ) {
232 return '';
233 }
234
235 $whitelist = array_flip( Sanitizer::attributeWhitelist( $element ) );
236 $attribs = array();
237 foreach( $pairs as $set ) {
238 $attribute = strtolower( $set[1] );
239 if( !isset( $whitelist[$attribute] ) ) {
240 continue;
241 }
242 if( $set[2] == '' ) {
243 # In XHTML, attributes must have a value.
244 $value = $set[1];
245 } elseif( $set[3] != '' ) {
246 # Double-quoted
247 $value = Sanitizer::normalizeAttributeValue( $set[3] );
248 } elseif( $set[4] != '' ) {
249 # Single-quoted
250 $value = str_replace( '"', '&quot;',
251 Sanitizer::normalizeAttributeValue( $set[4] ) );
252 } elseif( $set[5] != '' ) {
253 # No quotes.
254 $value = Sanitizer::normalizeAttributeValue( $set[5] );
255 } elseif( $set[6] != '' ) {
256 # Illegal #XXXXXX color with no quotes.
257 $value = Sanitizer::normalizeAttributeValue( $set[6] );
258 } else {
259 wfDebugDieBacktrace( "Tag conditions not met. Something's very odd." );
260 }
261
262 # Strip javascript "expression" from stylesheets.
263 # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
264 if( $attribute == 'style' && preg_match(
265 '/(expression|tps*:\/\/|url\\s*\().*/is',
266 wfMungeToUtf8( $value ) ) ) {
267 # haxx0r
268 continue;
269 }
270
271 if( !isset( $attribs[$attribute] ) ) {
272 $attribs[$attribute] = "$attribute=\"$value\"";
273 }
274 }
275 if( empty( $attribs ) ) {
276 return '';
277 } else {
278 return ' ' . implode( ' ', $attribs );
279 }
280 }
281
282 /**
283 * Normalize whitespace and character references in an XML source-
284 * encoded text for an attribute value.
285 *
286 * See http://www.w3.org/TR/REC-xml/#AVNormalize for background,
287 * but note that we're not returning the value, but are returning
288 * XML source fragments that will be slapped into output.
289 *
290 * @param string $text
291 * @return string
292 * @access private
293 */
294 function normalizeAttributeValue( $text ) {
295 return preg_replace(
296 '/\r\n|[\x20\x0d\x0a\x09]/',
297 ' ',
298 Sanitizer::normalizeCharReferences( $text ) );
299 return $spaced;
300 }
301
302 /**
303 * Ensure that any entities and character references are legal
304 * for XML and XHTML specifically. Any stray bits will be
305 * &amp;-escaped to result in a valid text fragment.
306 *
307 * a. any named char refs must be known in XHTML
308 * b. any numeric char refs must be legal chars, not invalid or forbidden
309 * c. use &#x, not &#X
310 * d. fix or reject non-valid attributes
311 *
312 * @param string $text
313 * @return string
314 * @access private
315 */
316 function normalizeCharReferences( $text ) {
317 return preg_replace_callback(
318 '/&([A-Za-z0-9]+);
319 |&\#([0-9]+);
320 |&\#x([0-9A-Za-z]+);
321 |&\#X([0-9A-Za-z]+);
322 |(&)/x',
323 array( 'Sanitizer', 'normalizeCharReferencesCallback' ),
324 $text );
325 }
326
327 function normalizeCharReferencesCallback( $matches ) {
328 $ret = null;
329 if( $matches[1] != '' ) {
330 $ret = Sanitizer::normalizeEntity( $matches[1] );
331 } elseif( $matches[2] != '' ) {
332 $ret = Sanitizer::decCharReference( $matches[2] );
333 } elseif( $matches[3] != '' ) {
334 $ret = Sanitizer::hexCharReference( $matches[3] );
335 } elseif( $matches[4] != '' ) {
336 $ret = Sanitizer::hexCharReference( $matches[4] );
337 }
338 if( is_null( $ret ) ) {
339 return htmlspecialchars( $matches[0] );
340 } else {
341 return $ret;
342 }
343 }
344
345 /**
346 * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
347 * return the named entity reference as is. Otherwise, returns
348 * HTML-escaped text of pseudo-entity source (eg &amp;foo;)
349 *
350 * @return string
351 */
352 function normalizeEntity( $name ) {
353 # List of all named character entities defined in HTML 4.01
354 # http://www.w3.org/TR/html4/sgml/entities.html
355 static $htmlEntities = array(
356 'aacute' => true,
357 'Aacute' => true,
358 'acirc' => true,
359 'Acirc' => true,
360 'acute' => true,
361 'aelig' => true,
362 'AElig' => true,
363 'agrave' => true,
364 'Agrave' => true,
365 'alefsym' => true,
366 'alpha' => true,
367 'Alpha' => true,
368 'amp' => true,
369 'and' => true,
370 'ang' => true,
371 'apos' => true,
372 'aring' => true,
373 'Aring' => true,
374 'asymp' => true,
375 'atilde' => true,
376 'Atilde' => true,
377 'auml' => true,
378 'Auml' => true,
379 'bdquo' => true,
380 'beta' => true,
381 'Beta' => true,
382 'brvbar' => true,
383 'bull' => true,
384 'cap' => true,
385 'ccedil' => true,
386 'Ccedil' => true,
387 'cedil' => true,
388 'cent' => true,
389 'chi' => true,
390 'Chi' => true,
391 'circ' => true,
392 'clubs' => true,
393 'cong' => true,
394 'copy' => true,
395 'crarr' => true,
396 'cup' => true,
397 'curren' => true,
398 'dagger' => true,
399 'Dagger' => true,
400 'darr' => true,
401 'dArr' => true,
402 'deg' => true,
403 'delta' => true,
404 'Delta' => true,
405 'diams' => true,
406 'divide' => true,
407 'eacute' => true,
408 'Eacute' => true,
409 'ecirc' => true,
410 'Ecirc' => true,
411 'egrave' => true,
412 'Egrave' => true,
413 'empty' => true,
414 'emsp' => true,
415 'ensp' => true,
416 'epsilon' => true,
417 'Epsilon' => true,
418 'equiv' => true,
419 'eta' => true,
420 'Eta' => true,
421 'eth' => true,
422 'ETH' => true,
423 'euml' => true,
424 'Euml' => true,
425 'euro' => true,
426 'exist' => true,
427 'fnof' => true,
428 'forall' => true,
429 'frac12' => true,
430 'frac14' => true,
431 'frac34' => true,
432 'frasl' => true,
433 'gamma' => true,
434 'Gamma' => true,
435 'ge' => true,
436 'gt' => true,
437 'harr' => true,
438 'hArr' => true,
439 'hearts' => true,
440 'hellip' => true,
441 'iacute' => true,
442 'Iacute' => true,
443 'icirc' => true,
444 'Icirc' => true,
445 'iexcl' => true,
446 'igrave' => true,
447 'Igrave' => true,
448 'image' => true,
449 'infin' => true,
450 'int' => true,
451 'iota' => true,
452 'Iota' => true,
453 'iquest' => true,
454 'isin' => true,
455 'iuml' => true,
456 'Iuml' => true,
457 'kappa' => true,
458 'Kappa' => true,
459 'lambda' => true,
460 'Lambda' => true,
461 'lang' => true,
462 'laquo' => true,
463 'larr' => true,
464 'lArr' => true,
465 'lceil' => true,
466 'ldquo' => true,
467 'le' => true,
468 'lfloor' => true,
469 'lowast' => true,
470 'loz' => true,
471 'lrm' => true,
472 'lsaquo' => true,
473 'lsquo' => true,
474 'lt' => true,
475 'macr' => true,
476 'mdash' => true,
477 'micro' => true,
478 'middot' => true,
479 'minus' => true,
480 'mu' => true,
481 'Mu' => true,
482 'nabla' => true,
483 'nbsp' => true,
484 'ndash' => true,
485 'ne' => true,
486 'ni' => true,
487 'not' => true,
488 'notin' => true,
489 'nsub' => true,
490 'ntilde' => true,
491 'Ntilde' => true,
492 'nu' => true,
493 'Nu' => true,
494 'oacute' => true,
495 'Oacute' => true,
496 'ocirc' => true,
497 'Ocirc' => true,
498 'oelig' => true,
499 'OElig' => true,
500 'ograve' => true,
501 'Ograve' => true,
502 'oline' => true,
503 'omega' => true,
504 'Omega' => true,
505 'omicron' => true,
506 'Omicron' => true,
507 'oplus' => true,
508 'or' => true,
509 'ordf' => true,
510 'ordm' => true,
511 'oslash' => true,
512 'Oslash' => true,
513 'otilde' => true,
514 'Otilde' => true,
515 'otimes' => true,
516 'ouml' => true,
517 'Ouml' => true,
518 'para' => true,
519 'part' => true,
520 'permil' => true,
521 'perp' => true,
522 'phi' => true,
523 'Phi' => true,
524 'pi' => true,
525 'Pi' => true,
526 'piv' => true,
527 'plusmn' => true,
528 'pound' => true,
529 'prime' => true,
530 'Prime' => true,
531 'prod' => true,
532 'prop' => true,
533 'psi' => true,
534 'Psi' => true,
535 'quot' => true,
536 'radic' => true,
537 'rang' => true,
538 'raquo' => true,
539 'rarr' => true,
540 'rArr' => true,
541 'rceil' => true,
542 'rdquo' => true,
543 'real' => true,
544 'reg' => true,
545 'rfloor' => true,
546 'rho' => true,
547 'Rho' => true,
548 'rlm' => true,
549 'rsaquo' => true,
550 'rsquo' => true,
551 'sbquo' => true,
552 'scaron' => true,
553 'Scaron' => true,
554 'sdot' => true,
555 'sect' => true,
556 'shy' => true,
557 'sigma' => true,
558 'Sigma' => true,
559 'sigmaf' => true,
560 'sim' => true,
561 'spades' => true,
562 'sub' => true,
563 'sube' => true,
564 'sum' => true,
565 'sup' => true,
566 'sup1' => true,
567 'sup2' => true,
568 'sup3' => true,
569 'supe' => true,
570 'szlig' => true,
571 'tau' => true,
572 'Tau' => true,
573 'there4' => true,
574 'theta' => true,
575 'Theta' => true,
576 'thetasym' => true,
577 'thinsp' => true,
578 'thorn' => true,
579 'THORN' => true,
580 'tilde' => true,
581 'times' => true,
582 'trade' => true,
583 'uacute' => true,
584 'Uacute' => true,
585 'uarr' => true,
586 'uArr' => true,
587 'ucirc' => true,
588 'Ucirc' => true,
589 'ugrave' => true,
590 'Ugrave' => true,
591 'uml' => true,
592 'upsih' => true,
593 'upsilon' => true,
594 'Upsilon' => true,
595 'uuml' => true,
596 'Uuml' => true,
597 'weierp' => true,
598 'xi' => true,
599 'Xi' => true,
600 'yacute' => true,
601 'Yacute' => true,
602 'yen' => true,
603 'yuml' => true,
604 'Yuml' => true,
605 'zeta' => true,
606 'Zeta' => true,
607 'zwj' => true,
608 'zwnj' => true );
609 if( isset( $htmlEntities[$name] ) ) {
610 return "&$name;";
611 } else {
612 return "&amp;$name;";
613 }
614 }
615
616 function decCharReference( $codepoint ) {
617 $point = IntVal( $codepoint );
618 if( Sanitizer::validateCodepoint( $point ) ) {
619 return sprintf( '&#%d;', $point );
620 } else {
621 return null;
622 }
623 }
624
625 function hexCharReference( $codepoint ) {
626 $point = hexdec( $codepoint );
627 if( Sanitizer::validateCodepoint( $point ) ) {
628 return sprintf( '&#x%x;', $point );
629 } else {
630 return null;
631 }
632 }
633
634 /**
635 * Returns true if a given Unicode codepoint is a valid character in XML.
636 * @param int $codepoint
637 * @return bool
638 */
639 function validateCodepoint( $codepoint ) {
640 return ($codepoint == 0x09)
641 || ($codepoint == 0x0a)
642 || ($codepoint == 0x0d)
643 || ($codepoint >= 0x20 && $codepoint <= 0xd7ff)
644 || ($codepoint >= 0xe000 && $codepoint <= 0xfffd)
645 || ($codepoint >= 0x10000 && $codepoint <= 0x10ffff);
646 }
647
648 /**
649 * Fetch the whitelist of acceptable attributes for a given
650 * element name.
651 *
652 * @param string $element
653 * @return array
654 */
655 function attributeWhitelist( $element ) {
656 $list = Sanitizer::setupAttributeWhitelist();
657 return isset( $list[$element] )
658 ? $list[$element]
659 : array();
660 }
661
662 /**
663 * @return array
664 */
665 function setupAttributeWhitelist() {
666 $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );
667 $block = array_merge( $common, array( 'align' ) );
668 $tablealign = array( 'align', 'char', 'charoff', 'valign' );
669 $tablecell = array( 'abbr',
670 'axis',
671 'headers',
672 'scope',
673 'rowspan',
674 'colspan',
675 'nowrap', # deprecated
676 'width', # deprecated
677 'height' # deprecated
678 );
679
680 # Numbers refer to sections in HTML 4.01 standard describing the element.
681 # See: http://www.w3.org/TR/html4/
682 $whitelist = array (
683 # 7.5.4
684 'div' => $block,
685 'center' => $common, # deprecated
686 'span' => $block, # ??
687
688 # 7.5.5
689 'h1' => $block,
690 'h2' => $block,
691 'h3' => $block,
692 'h4' => $block,
693 'h5' => $block,
694 'h6' => $block,
695
696 # 7.5.6
697 # address
698
699 # 8.2.4
700 # bdo
701
702 # 9.2.1
703 'em' => $common,
704 'strong' => $common,
705 'cite' => $common,
706 # dfn
707 'code' => $common,
708 # samp
709 # kbd
710 'var' => $common,
711 # abbr
712 # acronym
713
714 # 9.2.2
715 'blockquote' => array_merge( $common, array( 'cite' ) ),
716 # q
717
718 # 9.2.3
719 'sub' => $common,
720 'sup' => $common,
721
722 # 9.3.1
723 'p' => $block,
724
725 # 9.3.2
726 'br' => array( 'id', 'class', 'title', 'style', 'clear' ),
727
728 # 9.3.4
729 'pre' => array_merge( $common, array( 'width' ) ),
730
731 # 9.4
732 'ins' => array_merge( $common, array( 'cite', 'datetime' ) ),
733 'del' => array_merge( $common, array( 'cite', 'datetime' ) ),
734
735 # 10.2
736 'ul' => array_merge( $common, array( 'type' ) ),
737 'ol' => array_merge( $common, array( 'type', 'start' ) ),
738 'li' => array_merge( $common, array( 'type', 'value' ) ),
739
740 # 10.3
741 'dl' => $common,
742 'dd' => $common,
743 'dt' => $common,
744
745 # 11.2.1
746 'table' => array_merge( $common,
747 array( 'summary', 'width', 'border', 'frame',
748 'rules', 'cellspacing', 'cellpadding',
749 'align', 'bgcolor', 'frame', 'rules',
750 'border' ) ),
751
752 # 11.2.2
753 'caption' => array_merge( $common, array( 'align' ) ),
754
755 # 11.2.3
756 'thead' => array_merge( $common, $tablealign ),
757 'tfoot' => array_merge( $common, $tablealign ),
758 'tbody' => array_merge( $common, $tablealign ),
759
760 # 11.2.4
761 'colgroup' => array_merge( $common, array( 'span', 'width' ), $tablealign ),
762 'col' => array_merge( $common, array( 'span', 'width' ), $tablealign ),
763
764 # 11.2.5
765 'tr' => array_merge( $common, array( 'bgcolor' ), $tablealign ),
766
767 # 11.2.6
768 'td' => array_merge( $common, $tablecell, $tablealign ),
769 'th' => array_merge( $common, $tablecell, $tablealign ),
770
771 # 15.2.1
772 'tt' => $common,
773 'b' => $common,
774 'i' => $common,
775 'big' => $common,
776 'small' => $common,
777 'strike' => $common,
778 's' => $common,
779 'u' => $common,
780
781 # 15.2.2
782 'font' => array_merge( $common, array( 'size', 'color', 'face' ) ),
783 # basefont
784
785 # 15.3
786 'hr' => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
787
788 'rb' => $common,
789 'rp' => $common,
790 'rt' => $common,
791 'ruby' => $common,
792 );
793 return $whitelist;
794 }
795
796 }
797
798 ?>