From a2f759891164da4d25e60167bb9ad0408bbc6d62 Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Tue, 3 Sep 2013 16:00:56 -0400 Subject: [PATCH] Improve wfEscapeWikiText The following are now correctly escaped: * Space at the start of a line * Start-of-line characters after \r * Magic links such as "RFC 123" with non-space whitespace * URIs that don't use "://", such as "urn:foo" * Double-underscore magic words Bug: 53658 Change-Id: Ie8803a03b20e3fd7e5cc7e55829f24bb86f60b17 --- includes/GlobalFunctions.php | 45 ++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 0060d15fb8..9a7f62b70c 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -2010,15 +2010,42 @@ function wfCheckLimits( $deflimit = 50, $optionname = 'rclimit' ) { * @return String */ function wfEscapeWikiText( $text ) { - $text = strtr( "\n$text", array( - '"' => '"', '&' => '&', "'" => ''', '<' => '<', - '=' => '=', '>' => '>', '[' => '[', ']' => ']', - '{' => '{', '|' => '|', '}' => '}', - "\n#" => "\n#", "\n*" => "\n*", - "\n:" => "\n:", "\n;" => "\n;", - '://' => '://', 'ISBN ' => 'ISBN ', 'RFC ' => 'RFC ', - ) ); - return substr( $text, 1 ); + static $repl = null, $repl2 = null; + if ( $repl === null ) { + $repl = array( + '"' => '"', '&' => '&', "'" => ''', '<' => '<', + '=' => '=', '>' => '>', '[' => '[', ']' => ']', + '{' => '{', '|' => '|', '}' => '}', + "\n#" => "\n#", "\r#" => "\r#", + "\n*" => "\n*", "\r*" => "\r*", + "\n:" => "\n:", "\r:" => "\r:", + "\n;" => "\n;", "\r;" => "\r;", + "\n " => "\n ", "\r " => "\r ", + '__' => '__', '://' => '://', + ); + + // We have to catch everything "\s" matches in PCRE + foreach ( array( 'ISBN', 'RFC', 'PMID' ) as $magic ) { + $repl["$magic "] = "$magic "; + $repl["$magic\t"] = "$magic "; + $repl["$magic\r"] = "$magic "; + $repl["$magic\n"] = "$magic "; + $repl["$magic\f"] = "$magic "; + } + + // And handle protocols that don't use "://" + global $wgUrlProtocols; + $repl2 = array(); + foreach ( $wgUrlProtocols as $prot ) { + if ( substr( $prot, -1 ) === ':' ) { + $repl2[] = preg_quote( substr( $prot, 0, -1 ), '/' ); + } + } + $repl2 = $repl2 ? '/\b(' . join( '|', $repl2 ) . '):/i' : '/^(?!)/'; + } + $text = substr( strtr( "\n$text", $repl ), 1 ); + $text = preg_replace( $repl2, '$1:', $text ); + return $text; } /** -- 2.20.1