From: Daniel Friesen Date: Mon, 2 Apr 2012 01:54:25 +0000 (-0700) Subject: Add a Uri class. X-Git-Tag: 1.31.0-rc.0~22822^2 X-Git-Url: http://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/operations/?a=commitdiff_plain;h=94f623363beb00a7a211e8f8f1f39bc12b4713ed;p=lhc%2Fweb%2Fwiklou.git Add a Uri class. Add a Uri class matching our mw.Uri JS class for handling uris. This class should be helpful in a bunch of places where we end up doing manual concatenation of things like the path + '?' + query of a url parsed with wfParseUrl. [tylerromeo@gmail.com: Removed cat() function, fixed wfWarn() usage for aliases and added visibility to all functions. Also added test for aliases.] Signed-off-by: Tyler Romeo Change-Id: Iefdedb7c80cf1d4aab58050edab3ab44ba868a58 --- diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 7e11f3ef64..a59b9d1915 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -252,6 +252,7 @@ $wgAutoloadLocalClasses = array( 'UnlistedSpecialPage' => 'includes/SpecialPage.php', 'UploadSourceAdapter' => 'includes/Import.php', 'UppercaseCollation' => 'includes/Collation.php', + 'Uri' => 'includes/Uri.php', 'User' => 'includes/User.php', 'UserArray' => 'includes/UserArray.php', 'UserArrayFromResult' => 'includes/UserArray.php', diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 35887a4b47..983bb7299c 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -391,7 +391,7 @@ function wfArrayToCgi( $array1, $array2 = null, $prefix = '' ) { $cgi = ''; foreach ( $array1 as $key => $value ) { - if ( !is_null($value) && $value !== false ) { + if ( $value !== false ) { if ( $cgi != '' ) { $cgi .= '&'; } @@ -412,8 +412,11 @@ function wfArrayToCgi( $array1, $array2 = null, $prefix = '' ) { } else { if ( is_object( $value ) ) { $value = $value->__toString(); + } elseif( !is_null( $value ) ) { + $cgi .= urlencode( $key ) . '=' . urlencode( $value ); + } else { + $cgi .= urlencode( $key ); } - $cgi .= urlencode( $key ) . '=' . urlencode( $value ); } } } @@ -440,14 +443,15 @@ function wfCgiToArray( $query ) { continue; } if ( strpos( $bit, '=' ) === false ) { - // Pieces like &qwerty become 'qwerty' => '' (at least this is what php does) - $key = $bit; - $value = ''; + // Pieces like &qwerty become 'qwerty' => null + $key = urldecode( $bit ); + $value = null; } else { list( $key, $value ) = explode( '=', $bit ); + $key = urldecode( $key ); + $value = urldecode( $value ); } - $key = urldecode( $key ); - $value = urldecode( $value ); + if ( strpos( $key, '[' ) !== false ) { $keys = array_reverse( explode( '[', $key ) ); $key = array_pop( $keys ); @@ -472,23 +476,15 @@ function wfCgiToArray( $query ) { * Append a query string to an existing URL, which may or may not already * have query string parameters already. If so, they will be combined. * + * @deprecated * @param $url String * @param $query Mixed: string or associative array * @return string */ function wfAppendQuery( $url, $query ) { - if ( is_array( $query ) ) { - $query = wfArrayToCgi( $query ); - } - if( $query != '' ) { - if( false === strpos( $url, '?' ) ) { - $url .= '?'; - } else { - $url .= '&'; - } - $url .= $query; - } - return $url; + $obj = new Uri( $url ); + $obj->extendQuery( $query ); + return $obj->toString(); } /** @@ -576,49 +572,13 @@ function wfExpandUrl( $url, $defaultProto = PROTO_CURRENT ) { * @todo Need to integrate this into wfExpandUrl (bug 32168) * * @since 1.19 + * @deprecated * @param $urlParts Array URL parts, as output from wfParseUrl * @return string URL assembled from its component parts */ function wfAssembleUrl( $urlParts ) { - $result = ''; - - if ( isset( $urlParts['delimiter'] ) ) { - if ( isset( $urlParts['scheme'] ) ) { - $result .= $urlParts['scheme']; - } - - $result .= $urlParts['delimiter']; - } - - if ( isset( $urlParts['host'] ) ) { - if ( isset( $urlParts['user'] ) ) { - $result .= $urlParts['user']; - if ( isset( $urlParts['pass'] ) ) { - $result .= ':' . $urlParts['pass']; - } - $result .= '@'; - } - - $result .= $urlParts['host']; - - if ( isset( $urlParts['port'] ) ) { - $result .= ':' . $urlParts['port']; - } - } - - if ( isset( $urlParts['path'] ) ) { - $result .= $urlParts['path']; - } - - if ( isset( $urlParts['query'] ) ) { - $result .= '?' . $urlParts['query']; - } - - if ( isset( $urlParts['fragment'] ) ) { - $result .= '#' . $urlParts['fragment']; - } - - return $result; + $obj = new Uri( $urlParts ); + return $obj->toString(); } /** @@ -765,58 +725,13 @@ function wfUrlProtocolsWithoutProtRel() { * 2) Handles protocols that don't use :// (e.g., mailto: and news: , as well as protocol-relative URLs) correctly * 3) Adds a "delimiter" element to the array, either '://', ':' or '//' (see (2)) * + * @deprecated * @param $url String: a URL to parse * @return Array: bits of the URL in an associative array, per PHP docs */ function wfParseUrl( $url ) { - global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php - - // Protocol-relative URLs are handled really badly by parse_url(). It's so bad that the easiest - // way to handle them is to just prepend 'http:' and strip the protocol out later - $wasRelative = substr( $url, 0, 2 ) == '//'; - if ( $wasRelative ) { - $url = "http:$url"; - } - wfSuppressWarnings(); - $bits = parse_url( $url ); - wfRestoreWarnings(); - // parse_url() returns an array without scheme for some invalid URLs, e.g. - // parse_url("%0Ahttp://example.com") == array( 'host' => '%0Ahttp', 'path' => 'example.com' ) - if ( !$bits || !isset( $bits['scheme'] ) ) { - return false; - } - - // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it - if ( in_array( $bits['scheme'] . '://', $wgUrlProtocols ) ) { - $bits['delimiter'] = '://'; - } elseif ( in_array( $bits['scheme'] . ':', $wgUrlProtocols ) ) { - $bits['delimiter'] = ':'; - // parse_url detects for news: and mailto: the host part of an url as path - // We have to correct this wrong detection - if ( isset( $bits['path'] ) ) { - $bits['host'] = $bits['path']; - $bits['path'] = ''; - } - } else { - return false; - } - - /* Provide an empty host for eg. file:/// urls (see bug 28627) */ - if ( !isset( $bits['host'] ) ) { - $bits['host'] = ''; - - /* parse_url loses the third / for file:///c:/ urls (but not on variants) */ - if ( substr( $bits['path'], 0, 1 ) !== '/' ) { - $bits['path'] = '/' . $bits['path']; - } - } - - // If the URL was protocol-relative, fix scheme and delimiter - if ( $wasRelative ) { - $bits['scheme'] = ''; - $bits['delimiter'] = '//'; - } - return $bits; + $obj = new Uri( $url ); + return $obj->getComponents(); } /** diff --git a/includes/Uri.php b/includes/Uri.php new file mode 100644 index 0000000000..ba051aec8f --- /dev/null +++ b/includes/Uri.php @@ -0,0 +1,336 @@ + 'scheme', 'password' => 'pass' ); + + /** + * parse_url() work-alike, but non-broken. Differences: + * + * 1) Does not raise warnings on bad URLs (just returns false) + * 2) Handles protocols that don't use :// (e.g., mailto: and news: , as well as protocol-relative URLs) correctly + * 3) Adds a "delimiter" element to the array, either '://', ':' or '//' (see (2)) + * + * @param $url String: a URL to parse + * @return Array: bits of the URL in an associative array, per PHP docs + */ + protected static function parseUri( $url ) { + global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php + + // Protocol-relative URLs are handled really badly by parse_url(). It's so bad that the easiest + // way to handle them is to just prepend 'http:' and strip the protocol out later + $wasRelative = substr( $url, 0, 2 ) == '//'; + if ( $wasRelative ) { + $url = "http:$url"; + } + wfSuppressWarnings(); + $bits = parse_url( $url ); + wfRestoreWarnings(); + // parse_url() returns an array without scheme for some invalid URLs, e.g. + // parse_url("%0Ahttp://example.com") == array( 'host' => '%0Ahttp', 'path' => 'example.com' ) + if ( !$bits || + !isset( $bits['scheme'] ) && strpos( $url, "://" ) !== false ) { + wfWarn( __METHOD__ . ": Invalid URL: $url" ); + return false; + } else { + $scheme = isset( $bits['scheme'] ) ? $bits['scheme'] : null; + } + + // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it + if ( in_array( $scheme . '://', $wgUrlProtocols ) ) { + $bits['delimiter'] = '://'; + } elseif ( !is_null( $scheme ) && !in_array( $scheme . ':', $wgUrlProtocols ) ) { + wfWarn( __METHOD__ . ": Invalid scheme in URL: $scheme" ); + return false; + } elseif( !is_null( $scheme ) ) { + if( !in_array( $scheme . ':', $wgUrlProtocols ) ) { + // For URLs that don't have a scheme, but do have a user:password, parse_url + // detects the user as the scheme. + unset( $bits['scheme'] ); + $bits['user'] = $scheme; + } else { + $bits['delimiter'] = ':'; + // parse_url detects for news: and mailto: the host part of an url as path + // We have to correct this wrong detection + if ( isset( $bits['path'] ) ) { + $bits['host'] = $bits['path']; + $bits['path'] = ''; + } + } + } + + /* Provide an empty host for eg. file:/// urls (see bug 28627) */ + if ( !isset( $bits['host'] ) && $scheme == "file" ) { + $bits['host'] = ''; + + /* parse_url loses the third / for file:///c:/ urls (but not on variants) */ + if ( isset( $bits['path'] ) && substr( $bits['path'], 0, 1 ) !== '/' ) { + $bits['path'] = '/' . $bits['path']; + } + } + + // If the URL was protocol-relative, fix scheme and delimiter + if ( $wasRelative ) { + $bits['scheme'] = ''; + $bits['delimiter'] = '//'; + } + return $bits; + } + + /** + * + * @param $uri mixed URI string or array + */ + public function __construct( $uri ) { + $this->components = array(); + $this->setUri( $uri ); + } + + /** + * Set the Uri to the value of some other URI. + * + * @param $uri mixed URI string or array + */ + public function setUri( $uri ) { + if ( is_string( $uri ) ) { + $parsed = self::parseUri( $uri ); + if( $parsed === false ) { + return false; + } + $this->setComponents( $parsed ); + } elseif ( is_array( $uri ) ) { + $this->setComponents( $uri ); + } elseif ( $uri instanceof Uri ) { + $this->setComponents( $uri->getComponents() ); + } else { + throw new MWException( __METHOD__ . ': $uri is not of a valid type.' ); + } + } + + /** + * Set the components of this array. + * Will output warnings when invalid components or aliases are found. + * + * @param $components Array The components to set on this Uri. + */ + public function setComponents( array $components ) { + foreach ( $components as $name => $value ) { + if ( isset( self::$componentAliases[$name] ) ) { + $canonical = self::$componentAliases[$name]; + wfWarn( __METHOD__ . ": Converting alias $name to canonical $canonical." ); + $components[$canonical] = $value; + unset( $components[$name] ); + } elseif ( !in_array( $name, self::$validComponents ) ) { + wfWarn( __METHOD__ . ": $name is not a valid component." ); + unset( $components[$name] ); + } + } + + $this->components = $components; + } + + /** + * Return the components for this Uri + * @return Array + */ + public function getComponents() { + return $this->components; + } + + /** + * Return the value of a specific component + * + * @param $name string The name of the component to return + * @param string|null + */ + public function getComponent( $name ) { + if ( isset( self::$componentAliases[$name] ) ) { + // Component is an alias. Get the actual name. + $alias = $name; + $name = self::$componentAliases[$name]; + wfWarn( __METHOD__ . ": Converting alias $alias to canonical $name." ); + } + + if( !in_array( $name, self::$validComponents ) ) { + // Component is invalid + throw new MWException( __METHOD__ . ": $name is not a valid component." ); + } elseif( !empty( $this->components[$name] ) ) { + // Component is valid and has a value. + return $this->components[$name]; + } else { + // Component is empty + return null; + } + } + + /** + * Set a component for this Uri + * @param $name string The name of the component to set + * @param $value string|null The value to set + */ + public function setComponent( $name, $value ) { + if ( isset( self::$componentAliases[$name] ) ) { + $alias = $name; + $name = self::$componentAliases[$name]; + wfWarn( __METHOD__ . ": Converting alias $alias to canonical $name." ); + } elseif ( !in_array( $name, self::$validComponents ) ) { + throw new MWException( __METHOD__ . ": $name is not a valid component." ); + } + $this->components[$name] = $value; + } + + public function getProtocol() { return $this->getComponent( 'scheme' ); } + public function getUser() { return $this->getComponent( 'user' ); } + public function getPassword() { return $this->getComponent( 'pass' ); } + public function getHost() { return $this->getComponent( 'host' ); } + public function getPort() { return $this->getComponent( 'port' ); } + public function getPath() { return $this->getComponent( 'path' ); } + public function getQueryString() { return $this->getComponent( 'query' ); } + public function getFragment() { return $this->getComponent( 'fragment' ); } + + public function setProtocol( $scheme ) { $this->setComponent( 'scheme', $scheme ); } + public function setUser( $user ) { $this->setComponent( 'user', $user ); } + public function setPassword( $pass ) { $this->setComponent( 'pass', $pass ); } + public function setHost( $host ) { $this->setComponent( 'host', $host ); } + public function setPort( $port ) { $this->setComponent( 'port', $port ); } + public function setPath( $path ) { $this->setComponent( 'path', $path ); } + public function setFragment( $fragment ) { $this->setComponent( 'fragment', $fragment ); } + + /** + * Gets the protocol-authority delimiter of a URI (:// or //). + * @return string|null + */ + public function getDelimiter() { + $delimiter = $this->getComponent( 'delimiter' ); + if ( $delimiter ) { + // A specific delimiter is set, so return it. + return $delimiter; + } + if ( $this->getAuthority() && $this->getProtocol() ) { + // If the URI has a protocol and a body (i.e., some sort of host, etc.) + // the default delimiter is "://", e.g., "http://test.com". + return '://'; + } + return null; + } + + /** + * Gets query portion of a URI in array format. + * @return string + */ + public function getQuery() { + return wfCgiToArray( $this->getQueryString() ); + } + + /** + * Gets query portion of a URI. + * @param string|array $query + */ + public function setQuery( $query ) { + if ( is_array( $query ) ) { + $query = wfArrayToCGI( $query ); + } + $this->setComponent( 'query', $query ); + } + + /** + * Extend the query -- supply query parameters to override or add to ours + * @param Array|string $parameters query parameters to override or add + * @return Uri this URI object + */ + public function extendQuery( $parameters ) { + if ( is_string( $parameters ) ) { + $parameters = wfCgiToArray( $parameters ); + } + + $query = $this->getQuery(); + foreach( $parameters as $key => $value ) { + $query[$key] = $value; + } + + $this->setQuery( $query ); + return $this; + } + + /** + * Returns user and password portion of a URI. + * @return string + */ + public function getUserInfo() { + $user = $this->getComponent( 'user' ); + $pass = $this->getComponent( 'pass' ); + return $pass ? "$user:$pass" : $user; + } + + /** + * Gets host and port portion of a URI. + * @return string + */ + public function getHostPort() { + $host = $this->getComponent( 'host' ); + $port = $this->getComponent( 'port' ); + return $port ? "$host:$port" : $host; + } + + /** + * Returns the userInfo and host and port portion of the URI. + * In most real-world URLs, this is simply the hostname, but it is more general. + * @return string + */ + public function getAuthority() { + $userinfo = $this->getUserInfo(); + $hostinfo = $this->getHostPort(); + return $userinfo ? "$userinfo@$hostinfo" : $hostinfo; + } + + /** + * Returns everything after the authority section of the URI + * @return String + */ + public function getRelativePath() { + $path = $this->getComponent( 'path' ); + $query = $this->getComponent( 'query' ); + $fragment = $this->getComponent( 'fragment' ); + + $retval = $path; + if( $query ) { + $retval .= "?$query"; + } + if( $fragment ) { + $retval .= "#$fragment"; + } + return $retval; + } + + /** + * Gets the entire URI string. May not be precisely the same as input due to order of query arguments. + * @return String the URI string + */ + public function toString() { + return $this->getComponent( 'scheme' ) . $this->getDelimiter() . $this->getAuthority() . $this->getRelativePath(); + } + + /** + * Gets the entire URI string. May not be precisely the same as input due to order of query arguments. + * @return String the URI string + */ + public function __toString() { + return $this->toString(); + } + +} diff --git a/tests/phpunit/includes/GlobalFunctions/GlobalTest.php b/tests/phpunit/includes/GlobalFunctions/GlobalTest.php index 94158bfd2a..746add5b52 100644 --- a/tests/phpunit/includes/GlobalFunctions/GlobalTest.php +++ b/tests/phpunit/includes/GlobalFunctions/GlobalTest.php @@ -108,7 +108,7 @@ class GlobalTest extends MediaWikiTestCase { array( array( 'foo' => 1 ), 'foo=1' ), // number test array( array( 'foo' => true ), 'foo=1' ), // true test array( array( 'foo' => false ), '' ), // false test - array( array( 'foo' => null ), '' ), // null test + array( array( 'foo' => null ), 'foo' ), // null test array( array( 'foo' => 'A&B=5+6@!"\'' ), 'foo=A%26B%3D5%2B6%40%21%22%27' ), // urlencoding test array( array( 'foo' => 'bar', 'baz' => 'is', 'asdf' => 'qwerty' ), 'foo=bar&baz=is&asdf=qwerty' ), // multi-item test array( array( 'foo' => array( 'bar' => 'baz' ) ), 'foo%5Bbar%5D=baz' ), diff --git a/tests/phpunit/includes/UriTest.php b/tests/phpunit/includes/UriTest.php new file mode 100644 index 0000000000..3b78f47019 --- /dev/null +++ b/tests/phpunit/includes/UriTest.php @@ -0,0 +1,164 @@ + 'http', + 'delimiter' => '://', + 'user' => null, + 'pass' => null, + 'host' => 'example.com', + 'port' => null, + 'path' => '/', + 'query' => null, + 'fragment' => null, + ), + ), + array( + '//mediawiki.org/wiki/Main_Page', + array( + 'scheme' => null, + 'delimiter' => '//', + 'user' => null, + 'pass' => null, + 'host' => 'mediawiki.org', + 'port' => null, + 'path' => '/wiki/Main_Page', + 'query' => null, + 'fragment' => null, + ), + ), + array( + 'http://user:pass@example.com/', + array( + 'scheme' => 'http', + 'delimiter' => '://', + 'user' => 'user', + 'pass' => 'pass', + 'host' => 'example.com', + 'port' => null, + 'path' => '/', + 'query' => null, + 'fragment' => null, + ), + ), + array( + '/?asdf=asdf', + array( + 'scheme' => null, + 'delimiter' => null, + 'user' => null, + 'pass' => null, + 'host' => null, + 'port' => null, + 'path' => '/', + 'query' => 'asdf=asdf', + 'fragment' => null, + ), + ), + array( + '?asdf=asdf#asdf', + array( + 'scheme' => null, + 'delimiter' => null, + 'user' => null, + 'pass' => null, + 'host' => null, + 'port' => null, + 'path' => null, + 'query' => 'asdf=asdf', + 'fragment' => 'asdf', + ), + ) + ); + } + + /** + * Ensure that get* methods properly match the appropriate getComponent( key ) value + * @dataProvider dataUris + */ + function testGetters( $uri ) { + $uri = new Uri( $uri ); + $getterMap = array( + 'getProtocol' => 'scheme', + 'getUser' => 'user', + 'getPassword' => 'pass', + 'getHost' => 'host', + 'getPort' => 'port', + 'getPath' => 'path', + 'getQueryString' => 'query', + 'getFragment' => 'fragment', + ); + foreach ( $getterMap as $fn => $c ) { + $this->assertSame( $uri->{$fn}(), $uri->getComponent( $c ), "\$uri->{$fn}(); matches \$uri->getComponent( '$c' );" ); + } + } + + /** + * Ensure that Uri has the proper components for our example uris + * @dataProvider dataUris + */ + function testComponents( $uri, $components ) { + $uri = new Uri( $uri ); + + $this->assertSame( $components['scheme'], $uri->getProtocol(), 'Correct scheme' ); + $this->assertSame( $components['delimiter'], $uri->getDelimiter(), 'Correct delimiter' ); + $this->assertSame( $components['user'], $uri->getUser(), 'Correct user' ); + $this->assertSame( $components['pass'], $uri->getPassword(), 'Correct pass' ); + $this->assertSame( $components['host'], $uri->getHost(), 'Correct host' ); + $this->assertSame( $components['port'], $uri->getPort(), 'Correct port' ); + $this->assertSame( $components['path'], $uri->getPath(), 'Correct path' ); + $this->assertSame( $components['query'], $uri->getQueryString(), 'Correct query' ); + $this->assertSame( $components['fragment'], $uri->getFragment(), 'Correct fragment' ); + } + + /** + * Ensure that the aliases work for various components. + */ + function testAliases() { + $url = "//myuser@test.com"; + $uri = new Uri( $url ); + + // Set the aliases. + $uri->setComponent( 'protocol', 'https' ); + $uri->setComponent( 'password', 'mypass' ); + + // Now try getting them. + $this->assertSame( 'https', $uri->getComponent( 'protocol' ), 'Correct protocol (alias for scheme)' ); + $this->assertSame( 'mypass', $uri->getComponent( 'password' ), 'Correct password (alias for pass)' ); + + // Finally check their actual names. + $this->assertSame( 'https', $uri->getProtocol(), 'Alias for scheme works' ); + $this->assertSame( 'mypass', $uri->getPassword(), 'Alias for pass works' ); + } + + /** + * Ensure that Uri's helper methods return the correct data + */ + function testHelpers() { + $uri = new Uri( 'http://a:b@example.com:8080/path?query=value' ); + + $this->assertSame( 'a:b', $uri->getUserInfo(), 'Correct getUserInfo' ); + $this->assertSame( 'example.com:8080', $uri->getHostPort(), 'Correct getHostPort' ); + $this->assertSame( 'a:b@example.com:8080', $uri->getAuthority(), 'Correct getAuthority' ); + $this->assertSame( '/path?query=value', $uri->getRelativePath(), 'Correct getRelativePath' ); + $this->assertSame( 'http://a:b@example.com:8080/path?query=value', $uri->toString(), 'Correct toString' ); + } + + /** + * Ensure that Uri's extend method properly overrides keys + */ + function testExtend() { + $uri = new Uri( 'http://example.org/?a=b&hello=world' ); + $uri->extendQuery( 'a=c&foo=bar' ); + $this->assertSame( 'a=c&hello=world&foo=bar', $uri->getQueryString() ); + } +}