From: gicode Date: Wed, 9 Nov 2011 22:44:12 +0000 (+0000) Subject: Add wfRemoveDotSegments and unit tests. This is a sane step towards fixing X-Git-Tag: 1.31.0-rc.0~26610 X-Git-Url: http://git.cyclocoop.org/?a=commitdiff_plain;h=179f3fdf023eb7c901f7df7fa30af798611274ed;p=lhc%2Fweb%2Fwiklou.git Add wfRemoveDotSegments and unit tests. This is a sane step towards fixing bug 32168. This implements RFC3986 Section 5.2.4. http://tools.ietf.org/html/rfc3986#section-5.2.4 This is important because you need to remove dot segments in order to safely compare URLs when limiting URLs to a particular path. --- diff --git a/RELEASE-NOTES-1.19 b/RELEASE-NOTES-1.19 index a6cbe07be4..ddeefd06fa 100644 --- a/RELEASE-NOTES-1.19 +++ b/RELEASE-NOTES-1.19 @@ -125,6 +125,7 @@ production. from recent changes feeds * (bug 30232) add current time to message wlnote on Special:Watchlist * (bug 29110) $wgFeedDiffCutoff did not affect new pages +* (bug 32168) Add wfRemoveDotSegments for use in wfExpandUrl === API changes in 1.19 === * (bug 19838) siprop=interwikimap can now use the interwiki cache. diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 0017953246..339cd090c7 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -477,9 +477,9 @@ function wfExpandUrl( $url, $defaultProto = PROTO_CURRENT ) { $defaultProtoWithoutSlashes = substr( $defaultProto, 0, -2 ); - if( substr( $url, 0, 2 ) == '//' ) { + if ( substr( $url, 0, 2 ) == '//' ) { return $defaultProtoWithoutSlashes . $url; - } elseif( substr( $url, 0, 1 ) == '/' ) { + } elseif ( substr( $url, 0, 1 ) == '/' ) { // If $serverUrl is protocol-relative, prepend $defaultProtoWithoutSlashes, otherwise leave it alone return ( $serverHasProto ? '' : $defaultProtoWithoutSlashes ) . $serverUrl . $url; } else { @@ -487,6 +487,47 @@ function wfExpandUrl( $url, $defaultProto = PROTO_CURRENT ) { } } +/** + * Remove all dot-segments in the provided URL path. For example, + * '/a/./b/../c/' becomes '/a/c/'. For details on the algorithm, please see + * RFC3986 section 5.2.4. + * + * @todo Need to integrate this into wfExpandUrl (bug 32168) + * + * @param $urlPath String URL path, potentially containing dot-segments + * @return string URL path with all dot-segments removed + */ +function wfRemoveDotSegments( $urlPath ) { + $output = ''; + + while ( $urlPath ) { + $matches = null; + if ( preg_match('%^\.\.?/%', $urlPath, $matches) ) { + # Step A + $urlPath = substr( $urlPath, strlen( $matches[0] ) ); + } elseif ( preg_match( '%^/\.(/|$)%', $urlPath, $matches ) ) { + # Step B + $start = strlen( $matches[0] ); + $urlPath = '/' . substr( $urlPath, $start ); + } elseif ( preg_match( '%^/\.\.(/|$)%', $urlPath, $matches ) ) { + # Step C + $start = strlen( $matches[0] ); + $urlPath = '/' . substr( $urlPath, $start ); + $output = preg_replace('%(^|/)[^/]*$%', '', $output); + } elseif ( preg_match( '%^\.\.?$%', $urlPath, $matches ) ) { + # Step D + $urlPath = substr( $urlPath, strlen( $matches[0] ) ); + } else { + # Step E + preg_match( '%^/?[^/]*%', $urlPath, $matches ); + $urlPath = substr( $urlPath, strlen( $matches[0] ) ); + $output .= $matches[0]; + } + } + + return $output; +} + /** * Returns a regular expression of url protocols * diff --git a/tests/phpunit/includes/GlobalFunctions/wfRemoveDotSegmentsTest.php b/tests/phpunit/includes/GlobalFunctions/wfRemoveDotSegmentsTest.php new file mode 100644 index 0000000000..dd86c02668 --- /dev/null +++ b/tests/phpunit/includes/GlobalFunctions/wfRemoveDotSegmentsTest.php @@ -0,0 +1,86 @@ +assertEquals( $outputPath, $actualPath, $message ); + } + + /** + * Provider of URL paths for testing wfRemoveDotSegments() + * + * @return array + */ + public function providePaths() { + return array( + array( '/a/b/c/./../../g', '/a/g' ), + array( 'mid/content=5/../6', 'mid/6' ), + array( '/a//../b', '/a/b' ), + array( '', '' ), + array( '/', '/' ), + array( '//', '//' ), + array( '.', '' ), + array( '..', '' ), + array( '/.', '/' ), + array( '/..', '/' ), + array( './', '' ), + array( '../', '' ), + array( './a', 'a' ), + array( '../a', 'a' ), + array( '../../a', 'a' ), + array( '.././a', 'a' ), + array( './../a', 'a' ), + array( '././a', 'a' ), + array( '../../', '' ), + array( '.././', '' ), + array( './../', '' ), + array( '././', '' ), + array( '../..', '' ), + array( '../.', '' ), + array( './..', '' ), + array( './.', '' ), + array( '/../../a', '/a' ), + array( '/.././a', '/a' ), + array( '/./../a', '/a' ), + array( '/././a', '/a' ), + array( '/../../', '/' ), + array( '/.././', '/' ), + array( '/./../', '/' ), + array( '/././', '/' ), + array( '/../..', '/' ), + array( '/../.', '/' ), + array( '/./..', '/' ), + array( '/./.', '/' ), + array( 'b/../../a', '/a' ), + array( 'b/.././a', '/a' ), + array( 'b/./../a', '/a' ), + array( 'b/././a', 'b/a' ), + array( 'b/../../', '/' ), + array( 'b/.././', '/' ), + array( 'b/./../', '/' ), + array( 'b/././', 'b/' ), + array( 'b/../..', '/' ), + array( 'b/../.', '/' ), + array( 'b/./..', '/' ), + array( 'b/./.', 'b/' ), + array( '/b/../../a', '/a' ), + array( '/b/.././a', '/a' ), + array( '/b/./../a', '/a' ), + array( '/b/././a', '/b/a' ), + array( '/b/../../', '/' ), + array( '/b/.././', '/' ), + array( '/b/./../', '/' ), + array( '/b/././', '/b/' ), + array( '/b/../..', '/' ), + array( '/b/../.', '/' ), + array( '/b/./..', '/' ), + array( '/b/./.', '/b/' ), + ); + } +}