X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2Fparser%2FParser.php;h=84bb224300dfe9a362fb08886ab7bef49a9c9ad6;hb=9fa902583cc8e1c2a04b76a58c3fa517df06a013;hp=61fffc5564851dede1e8e7c8f04c6bde6d17182f;hpb=eea755d3babc7148de76efc0a74f700020ceaae2;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 61fffc5564..84bb224300 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -1402,7 +1402,7 @@ class Parser { $this->getExternalLinkAttribs( $url ) ); # Register it in the output object... # Replace unnecessary URL escape codes with their equivalent characters - $pasteurized = self::replaceUnusualEscapes( $url ); + $pasteurized = self::normalizeLinkUrl( $url ); $this->mOutput->addExternalLink( $pasteurized ); } wfProfileOut( __METHOD__ ); @@ -1710,7 +1710,7 @@ class Parser { # Register link in the output object. # Replace unnecessary URL escape codes with the referenced character # This prevents spammers from hiding links from the filters - $pasteurized = self::replaceUnusualEscapes( $url ); + $pasteurized = self::normalizeLinkUrl( $url ); $this->mOutput->addExternalLink( $pasteurized ); } @@ -1759,40 +1759,75 @@ class Parser { } /** - * Replace unusual URL escape codes with their equivalent characters + * Replace unusual escape codes in a URL with their equivalent characters * + * @deprecated since 1.24, use normalizeLinkUrl * @param string $url * @return string - * - * @todo This can merge genuinely required bits in the path or query string, - * breaking legit URLs. A proper fix would treat the various parts of - * the URL differently; as a workaround, just use the output for - * statistical records, not for actual linking/output. */ public static function replaceUnusualEscapes( $url ) { - return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', - array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); + wfDeprecated( __METHOD__, '1.24' ); + return self::normalizeLinkUrl( $url ); } /** - * Callback function used in replaceUnusualEscapes(). - * Replaces unusual URL escape codes with their equivalent character + * Replace unusual escape codes in a URL with their equivalent characters * - * @param array $matches + * This generally follows the syntax defined in RFC 3986, with special + * consideration for HTTP query strings. * + * @param string $url * @return string */ - private static function replaceUnusualEscapesCallback( $matches ) { - $char = urldecode( $matches[0] ); - $ord = ord( $char ); - # Is it an unsafe or HTTP reserved character according to RFC 1738? - if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) { - # No, shouldn't be escaped - return $char; - } else { - # Yes, leave it escaped - return $matches[0]; + public static function normalizeLinkUrl( $url ) { + # First, make sure unsafe characters are encoded + $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/', + function ( $m ) { + return rawurlencode( $m[0] ); + }, + $url + ); + + $ret = ''; + $end = strlen( $url ); + + # Fragment part - 'fragment' + $start = strpos( $url, '#' ); + if ( $start !== false && $start < $end ) { + $ret = self::normalizeUrlComponent( + substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret; + $end = $start; + } + + # Query part - 'query' minus &=+; + $start = strpos( $url, '?' ); + if ( $start !== false && $start < $end ) { + $ret = self::normalizeUrlComponent( + substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret; + $end = $start; } + + # Scheme and path part - 'pchar' + # (we assume no userinfo or encoded colons in the host) + $ret = self::normalizeUrlComponent( + substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret; + + return $ret; + } + + private static function normalizeUrlComponent( $component, $unsafe ) { + $callback = function ( $matches ) use ( $unsafe ) { + $char = urldecode( $matches[0] ); + $ord = ord( $char ); + if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) { + # Unescape it + return $char; + } else { + # Leave it escaped, but use uppercase for a-f + return strtoupper( $matches[0] ); + } + }; + return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component ); } /** @@ -4597,13 +4632,13 @@ class Parser { if ( $isTemplate ) { # Put a T flag in the section identifier, to indicate to extractSections() # that sections inside should be counted. - $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ ); + $editsectionPage = $titleText; + $editsectionSection = "T-$sectionIndex"; + $editsectionContent = null; } else { - $editlinkArgs = array( - $this->mTitle->getPrefixedText(), - $sectionIndex, - $headlineHint - ); + $editsectionPage = $this->mTitle->getPrefixedText(); + $editsectionSection = $sectionIndex; + $editsectionContent = $headlineHint; } // We use a bit of pesudo-xml for editsection markers. The // language converter is run later on. Using a UNIQ style marker @@ -4616,10 +4651,11 @@ class Parser { // important bits of data, but put the headline hint inside a // content block because the language converter is supposed to // be able to convert that piece of data. - $editlink = ''; + // Gets replaced with html in ParserOutput::getText + $editlink = ''; } else { $editlink .= '/>'; }