From a5d6a7db891968c7b94d056d0829bdb6e72809db Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 3 Apr 2006 08:36:17 +0000 Subject: [PATCH] * Additional protections against HTML breakage in table parsing Avoid splitting table cells on "||" appearing in HTML tag attributes. Bad tag nesting is still possible, but this should keep things from being split unexpectedly in the middle of an element, and might avoid some potential injection points. See http://mail.wikipedia.org/pipermail/wikitech-l/2006-April/034637.html --- RELEASE-NOTES | 1 + includes/GlobalFunctions.php | 36 ++++++++++++++++++++++++++++++++++++ includes/Parser.php | 8 +++++++- maintenance/parserTests.txt | 17 +++++++++++++++++ 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 709a957ac8..1d8432a8c0 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -733,6 +733,7 @@ fully support the editing toolbar, but was found to be too confusing. * (bug 5277) Use audio/midi rather that audio/mid * (bug 5410) Use namespace name when a custom namespace's nstab-NS message is nonexistent * (bug 5432) Fix inconsistencies in cookie names when using table prefixes +* Additional protections against HTML breakage in table parsing === Caveats === diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 745fa38c13..aeb615c39d 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -1749,4 +1749,40 @@ function wfDoUpdates() $wgPostCommitUpdateList = array(); } +/** + * More or less "markup-safe" explode() + * Ignores any instances of the separator inside <...> + * @param string $separator + * @param string $text + * @return array + */ +function wfExplodeMarkup( $separator, $text ) { + $placeholder = "\x00"; + + // Just in case... + $text = str_replace( $placeholder, '', $text ); + + // Trim stuff + $replacer = new ReplacerCallback( $separator, $placeholder ); + $cleaned = preg_replace_callback( '/(<.*?>)/', array( $replacer, 'go' ), $text ); + + $items = explode( $separator, $cleaned ); + foreach( $items as $i => $str ) { + $items[$i] = str_replace( $placeholder, $separator, $str ); + } + + return $items; +} + +class ReplacerCallback { + function ReplacerCallback( $from, $to ) { + $this->from = $from; + $this->to = $to; + } + + function go( $matches ) { + return str_replace( $this->from, $this->to, $matches[1] ); + } +} + ?> diff --git a/includes/Parser.php b/includes/Parser.php index 1b8a4622a4..ce337f8298 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -783,7 +783,13 @@ class Parser } $after = substr ( $x , 1 ) ; if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ; - $after = explode ( '||' , $after ) ; + + // Split up multiple cells on the same line. + // FIXME: This can result in improper nesting of tags processed + // by earlier parser steps, but should avoid splitting up eg + // attribute values containing literal "||". + $after = wfExplodeMarkup( '||', $after ); + $t[$k] = '' ; # Loop through each table cell diff --git a/maintenance/parserTests.txt b/maintenance/parserTests.txt index b2786404f2..da08388e84 100644 --- a/maintenance/parserTests.txt +++ b/maintenance/parserTests.txt @@ -1035,6 +1035,23 @@ Invalid attributes in table cell (bug 1830) !! end +# FIXME: this one has incorrect tag nesting still. +!! test +Table security: embedded pipes (http://mail.wikipedia.org/pipermail/wikitech-l/2006-April/034637.html) +!! input +{| +| |[ftp://|x||]" onmouseover="alert(document.cookie)">test +!! result + + + + +
ftp://|x" onmouseover="alert(document.cookie)">test +
+ +!! end + + ### ### Internal links ### -- 2.20.1