- private static function replaceUnusualEscapesCallback( $matches ) {
- $char = urldecode( $matches[0] );
- $ord = ord( $char );
- # Is it an unsafe or HTTP reserved character according to RFC 1738?
- if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
- # No, shouldn't be escaped
- return $char;
- } else {
- # Yes, leave it escaped
- return $matches[0];
+ public static function normalizeLinkUrl( $url ) {
+ # First, make sure unsafe characters are encoded
+ $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
+ function ( $m ) {
+ return rawurlencode( $m[0] );
+ },
+ $url
+ );
+
+ $ret = '';
+ $end = strlen( $url );
+
+ # Fragment part - 'fragment'
+ $start = strpos( $url, '#' );
+ if ( $start !== false && $start < $end ) {
+ $ret = self::normalizeUrlComponent(
+ substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
+ $end = $start;
+ }
+
+ # Query part - 'query' minus &=+;
+ $start = strpos( $url, '?' );
+ if ( $start !== false && $start < $end ) {
+ $ret = self::normalizeUrlComponent(
+ substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
+ $end = $start;