3 * Class for simple URI parsing and manipulation.
4 * Intended to simplify things that were using wfParseUrl and
5 * had to do manual concatenation for various needs.
6 * Built to match our JS mw.Uri in naming patterns.
8 * @author Daniel Friesen
15 * The parsed components of the URI
17 protected $components;
19 protected static $validComponents = array( 'scheme', 'delimiter', 'host', 'port', 'user', 'pass', 'path', 'query', 'fragment' );
20 protected static $componentAliases = array( 'protocol' => 'scheme', 'password' => 'pass' );
23 * parse_url() work-alike, but non-broken. Differences:
25 * 1) Does not raise warnings on bad URLs (just returns false)
26 * 2) Handles protocols that don't use :// (e.g., mailto: and news: , as well as protocol-relative URLs) correctly
27 * 3) Adds a "delimiter" element to the array, either '://', ':' or '//' (see (2))
29 * @param $url String: a URL to parse
30 * @return Array: bits of the URL in an associative array, per PHP docs
32 protected static function parseUri( $url ) {
33 global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php
35 // Protocol-relative URLs are handled really badly by parse_url(). It's so bad that the easiest
36 // way to handle them is to just prepend 'http:' and strip the protocol out later
37 $wasRelative = substr( $url, 0, 2 ) == '//';
42 $bits = parse_url( $url );
44 // parse_url() returns an array without scheme for some invalid URLs, e.g.
45 // parse_url("%0Ahttp://example.com") == array( 'host' => '%0Ahttp', 'path' => 'example.com' )
47 !isset( $bits['scheme'] ) && strpos( $url, "://" ) !== false ) {
48 wfDebug( __METHOD__
. ": Invalid URL: $url" );
51 $scheme = isset( $bits['scheme'] ) ?
$bits['scheme'] : null;
54 // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
55 if ( in_array( $scheme . '://', $wgUrlProtocols ) ) {
56 $bits['delimiter'] = '://';
57 } elseif ( !is_null( $scheme ) && !in_array( $scheme . ':', $wgUrlProtocols ) ) {
58 wfDebug( __METHOD__
. ": Invalid scheme in URL: $scheme" );
60 } elseif( !is_null( $scheme ) ) {
61 if( !in_array( $scheme . ':', $wgUrlProtocols ) ) {
62 // For URLs that don't have a scheme, but do have a user:password, parse_url
63 // detects the user as the scheme.
64 unset( $bits['scheme'] );
65 $bits['user'] = $scheme;
67 $bits['delimiter'] = ':';
68 // parse_url detects for news: and mailto: the host part of an url as path
69 // We have to correct this wrong detection
70 if ( isset( $bits['path'] ) ) {
71 $bits['host'] = $bits['path'];
77 /* Provide an empty host for eg. file:/// urls (see bug 28627) */
78 if ( !isset( $bits['host'] ) && $scheme == "file" ) {
81 /* parse_url loses the third / for file:///c:/ urls (but not on variants) */
82 if ( isset( $bits['path'] ) && substr( $bits['path'], 0, 1 ) !== '/' ) {
83 $bits['path'] = '/' . $bits['path'];
87 // If the URL was protocol-relative, fix scheme and delimiter
90 $bits['delimiter'] = '//';
97 * @param $uri mixed URI string or array
99 public function __construct( $uri ) {
100 $this->components
= array();
101 $this->setUri( $uri );
105 * Set the Uri to the value of some other URI.
107 * @param $uri mixed URI string or array
109 public function setUri( $uri ) {
110 if ( is_string( $uri ) ) {
111 $parsed = self
::parseUri( $uri );
112 if( $parsed === false ) {
115 $this->setComponents( $parsed );
116 } elseif ( is_array( $uri ) ) {
117 $this->setComponents( $uri );
118 } elseif ( $uri instanceof Uri
) {
119 $this->setComponents( $uri->getComponents() );
121 throw new MWException( __METHOD__
. ': $uri is not of a valid type.' );
126 * Set the components of this array.
127 * Will output warnings when invalid components or aliases are found.
129 * @param $components Array The components to set on this Uri.
131 public function setComponents( array $components ) {
132 foreach ( $components as $name => $value ) {
133 if ( isset( self
::$componentAliases[$name] ) ) {
134 $canonical = self
::$componentAliases[$name];
135 wfDebug( __METHOD__
. ": Converting alias $name to canonical $canonical." );
136 $components[$canonical] = $value;
137 unset( $components[$name] );
138 } elseif ( !in_array( $name, self
::$validComponents ) ) {
139 throw new MWException( __METHOD__
. ": $name is not a valid component." );
143 $this->components
= $components;
147 * Return the components for this Uri
150 public function getComponents() {
151 return $this->components
;
155 * Return the value of a specific component
157 * @param $name string The name of the component to return
160 public function getComponent( $name ) {
161 if ( isset( self
::$componentAliases[$name] ) ) {
162 // Component is an alias. Get the actual name.
164 $name = self
::$componentAliases[$name];
165 wfDebug( __METHOD__
. ": Converting alias $alias to canonical $name." );
168 if( !in_array( $name, self
::$validComponents ) ) {
169 // Component is invalid
170 throw new MWException( __METHOD__
. ": $name is not a valid component." );
171 } elseif( !empty( $this->components
[$name] ) ) {
172 // Component is valid and has a value.
173 return $this->components
[$name];
175 // Component is empty
181 * Set a component for this Uri
182 * @param $name string The name of the component to set
183 * @param $value string|null The value to set
185 public function setComponent( $name, $value ) {
186 if ( isset( self
::$componentAliases[$name] ) ) {
188 $name = self
::$componentAliases[$name];
189 wfDebug( __METHOD__
. ": Converting alias $alias to canonical $name." );
190 } elseif ( !in_array( $name, self
::$validComponents ) ) {
191 throw new MWException( __METHOD__
. ": $name is not a valid component." );
193 $this->components
[$name] = $value;
196 public function getProtocol() { return $this->getComponent( 'scheme' ); }
197 public function getUser() { return $this->getComponent( 'user' ); }
198 public function getPassword() { return $this->getComponent( 'pass' ); }
199 public function getHost() { return $this->getComponent( 'host' ); }
200 public function getPort() { return $this->getComponent( 'port' ); }
201 public function getPath() { return $this->getComponent( 'path' ); }
202 public function getQueryString() { return $this->getComponent( 'query' ); }
203 public function getFragment() { return $this->getComponent( 'fragment' ); }
205 public function setProtocol( $scheme ) { $this->setComponent( 'scheme', $scheme ); }
206 public function setUser( $user ) { $this->setComponent( 'user', $user ); }
207 public function setPassword( $pass ) { $this->setComponent( 'pass', $pass ); }
208 public function setHost( $host ) { $this->setComponent( 'host', $host ); }
209 public function setPort( $port ) { $this->setComponent( 'port', $port ); }
210 public function setPath( $path ) { $this->setComponent( 'path', $path ); }
211 public function setFragment( $fragment ) { $this->setComponent( 'fragment', $fragment ); }
214 * Gets the protocol-authority delimiter of a URI (:// or //).
215 * @return string|null
217 public function getDelimiter() {
218 $delimiter = $this->getComponent( 'delimiter' );
220 // A specific delimiter is set, so return it.
223 if ( $this->getAuthority() && $this->getProtocol() ) {
224 // If the URI has a protocol and a body (i.e., some sort of host, etc.)
225 // the default delimiter is "://", e.g., "http://test.com".
232 * Gets query portion of a URI in array format.
235 public function getQuery() {
236 return wfCgiToArray( $this->getQueryString() );
240 * Gets query portion of a URI.
241 * @param string|array $query
243 public function setQuery( $query ) {
244 if ( is_array( $query ) ) {
245 $query = wfArrayToCGI( $query );
247 $this->setComponent( 'query', $query );
251 * Extend the query -- supply query parameters to override or add to ours
252 * @param Array|string $parameters query parameters to override or add
253 * @return Uri this URI object
255 public function extendQuery( $parameters ) {
256 if ( !is_array( $parameters ) ) {
257 $parameters = wfCgiToArray( $parameters );
260 $query = $this->getQuery();
261 foreach( $parameters as $key => $value ) {
262 $query[$key] = $value;
265 $this->setQuery( $query );
270 * Returns user and password portion of a URI.
273 public function getUserInfo() {
274 $user = $this->getComponent( 'user' );
275 $pass = $this->getComponent( 'pass' );
276 return $pass ?
"$user:$pass" : $user;
280 * Gets host and port portion of a URI.
283 public function getHostPort() {
284 $host = $this->getComponent( 'host' );
285 $port = $this->getComponent( 'port' );
286 return $port ?
"$host:$port" : $host;
290 * Returns the userInfo and host and port portion of the URI.
291 * In most real-world URLs, this is simply the hostname, but it is more general.
294 public function getAuthority() {
295 $userinfo = $this->getUserInfo();
296 $hostinfo = $this->getHostPort();
297 return $userinfo ?
"$userinfo@$hostinfo" : $hostinfo;
301 * Returns everything after the authority section of the URI
304 public function getRelativePath() {
305 $path = $this->getComponent( 'path' );
306 $query = $this->getComponent( 'query' );
307 $fragment = $this->getComponent( 'fragment' );
311 $retval .= "?$query";
314 $retval .= "#$fragment";
320 * Gets the entire URI string. May not be precisely the same as input due to order of query arguments.
321 * @return String the URI string
323 public function toString() {
324 return $this->getComponent( 'scheme' ) . $this->getDelimiter() . $this->getAuthority() . $this->getRelativePath();
328 * Gets the entire URI string. May not be precisely the same as input due to order of query arguments.
329 * @return String the URI string
331 public function __toString() {
332 return $this->toString();