$this->getExternalLinkAttribs( $url ) );
# Register it in the output object...
# Replace unnecessary URL escape codes with their equivalent characters
- $pasteurized = self::replaceUnusualEscapes( $url );
+ $pasteurized = self::normalizeLinkUrl( $url );
$this->mOutput->addExternalLink( $pasteurized );
}
wfProfileOut( __METHOD__ );
# Register link in the output object.
# Replace unnecessary URL escape codes with the referenced character
# This prevents spammers from hiding links from the filters
- $pasteurized = self::replaceUnusualEscapes( $url );
+ $pasteurized = self::normalizeLinkUrl( $url );
$this->mOutput->addExternalLink( $pasteurized );
}
}
/**
- * Replace unusual URL escape codes with their equivalent characters
+ * Replace unusual escape codes in a URL with their equivalent characters
*
+ * @deprecated since 1.24, use normalizeLinkUrl
* @param string $url
* @return string
- *
- * @todo This can merge genuinely required bits in the path or query string,
- * breaking legit URLs. A proper fix would treat the various parts of
- * the URL differently; as a workaround, just use the output for
- * statistical records, not for actual linking/output.
*/
public static function replaceUnusualEscapes( $url ) {
- return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
- array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+ wfDeprecated( __METHOD__, '1.24' );
+ return self::normalizeLinkUrl( $url );
}
/**
- * Callback function used in replaceUnusualEscapes().
- * Replaces unusual URL escape codes with their equivalent character
+ * Replace unusual escape codes in a URL with their equivalent characters
*
- * @param array $matches
+ * This generally follows the syntax defined in RFC 3986, with special
+ * consideration for HTTP query strings.
*
+ * @param string $url
* @return string
*/
- private static function replaceUnusualEscapesCallback( $matches ) {
- $char = urldecode( $matches[0] );
- $ord = ord( $char );
- # Is it an unsafe or HTTP reserved character according to RFC 1738?
- if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
- # No, shouldn't be escaped
- return $char;
- } else {
- # Yes, leave it escaped
- return $matches[0];
+ public static function normalizeLinkUrl( $url ) {
+ # First, make sure unsafe characters are encoded
+ $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
+ function ( $m ) {
+ return rawurlencode( $m[0] );
+ },
+ $url
+ );
+
+ $ret = '';
+ $end = strlen( $url );
+
+ # Fragment part - 'fragment'
+ $start = strpos( $url, '#' );
+ if ( $start !== false && $start < $end ) {
+ $ret = self::normalizeUrlComponent(
+ substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
+ $end = $start;
+ }
+
+ # Query part - 'query' minus &=+;
+ $start = strpos( $url, '?' );
+ if ( $start !== false && $start < $end ) {
+ $ret = self::normalizeUrlComponent(
+ substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
+ $end = $start;
}
+
+ # Scheme and path part - 'pchar'
+ # (we assume no userinfo or encoded colons in the host)
+ $ret = self::normalizeUrlComponent(
+ substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
+
+ return $ret;
+ }
+
+ private static function normalizeUrlComponent( $component, $unsafe ) {
+ $callback = function ( $matches ) use ( $unsafe ) {
+ $char = urldecode( $matches[0] );
+ $ord = ord( $char );
+ if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
+ # Unescape it
+ return $char;
+ } else {
+ # Leave it escaped, but use uppercase for a-f
+ return strtoupper( $matches[0] );
+ }
+ };
+ return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
}
/**
if ( $isTemplate ) {
# Put a T flag in the section identifier, to indicate to extractSections()
# that sections inside <includeonly> should be counted.
- $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
+ $editsectionPage = $titleText;
+ $editsectionSection = "T-$sectionIndex";
+ $editsectionContent = null;
} else {
- $editlinkArgs = array(
- $this->mTitle->getPrefixedText(),
- $sectionIndex,
- $headlineHint
- );
+ $editsectionPage = $this->mTitle->getPrefixedText();
+ $editsectionSection = $sectionIndex;
+ $editsectionContent = $headlineHint;
}
// We use a bit of pesudo-xml for editsection markers. The
// language converter is run later on. Using a UNIQ style marker
// important bits of data, but put the headline hint inside a
// content block because the language converter is supposed to
// be able to convert that piece of data.
- $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] );
- $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"';
- if ( isset( $editlinkArgs[2] ) ) {
- $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
+ // Gets replaced with html in ParserOutput::getText
+ $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
+ $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
+ if ( $editsectionContent !== null ) {
+ $editlink .= '>' . $editsectionContent . '</mw:editsection>';
} else {
$editlink .= '/>';
}
$paramName = 'no-link';
$value = true;
$validated = true;
- } elseif ( preg_match( "/^(?i)$prots/", $value ) ) {
+ } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
$paramName = 'link-url';
$this->mOutput->addExternalLink( $value );