+/**
+ * This function takes two arrays as input, and returns a CGI-style string, e.g.
+ * "days=7&limit=100". Options in the first array override options in the second.
+ * Options set to "" will not be output.
+ *
+ * @param $array1 Array ( String|Array )
+ * @param $array2 Array ( String|Array )
+ * @param $prefix String
+ * @return String
+ */
+function wfArrayToCGI( $array1, $array2 = null, $prefix = '' ) {
+ if ( !is_null( $array2 ) ) {
+ $array1 = $array1 + $array2;
+ }
+
+ $cgi = '';
+ foreach ( $array1 as $key => $value ) {
+ if ( $value !== '' ) {
+ if ( $cgi != '' ) {
+ $cgi .= '&';
+ }
+ if ( $prefix !== '' ) {
+ $key = $prefix . "[$key]";
+ }
+ if ( is_array( $value ) ) {
+ $firstTime = true;
+ foreach ( $value as $k => $v ) {
+ $cgi .= $firstTime ? '' : '&';
+ if ( is_array( $v ) ) {
+ $cgi .= wfArrayToCGI( $v, null, $key . "[$k]" );
+ } else {
+ $cgi .= urlencode( $key . "[$k]" ) . '=' . urlencode( $v );
+ }
+ $firstTime = false;
+ }
+ } else {
+ if ( is_object( $value ) ) {
+ $value = $value->__toString();
+ }
+ $cgi .= urlencode( $key ) . '=' . urlencode( $value );
+ }
+ }
+ }
+ return $cgi;
+}
+
+/**
+ * This is the logical opposite of wfArrayToCGI(): it accepts a query string as
+ * its argument and returns the same string in array form. This allows compa-
+ * tibility with legacy functions that accept raw query strings instead of nice
+ * arrays. Of course, keys and values are urldecode()d. Don't try passing in-
+ * valid query strings, or it will explode.
+ *
+ * @param $query String: query string
+ * @return array Array version of input
+ */
+function wfCgiToArray( $query ) {
+ if ( isset( $query[0] ) && $query[0] == '?' ) {
+ $query = substr( $query, 1 );
+ }
+ $bits = explode( '&', $query );
+ $ret = array();
+ foreach ( $bits as $bit ) {
+ if ( $bit === '' ) {
+ continue;
+ }
+ list( $key, $value ) = explode( '=', $bit );
+ $key = urldecode( $key );
+ $value = urldecode( $value );
+ if ( strpos( $key, '[' ) !== false ) {
+ $keys = array_reverse( explode( '[', $key ) );
+ $key = array_pop( $keys );
+ $temp = $value;
+ foreach ( $keys as $k ) {
+ $k = substr( $k, 0, -1 );
+ $temp = array( $k => $temp );
+ }
+ if ( isset( $ret[$key] ) ) {
+ $ret[$key] = array_merge( $ret[$key], $temp );
+ } else {
+ $ret[$key] = $temp;
+ }
+ } else {
+ $ret[$key] = $value;
+ }
+ }
+ return $ret;
+}
+
+/**
+ * Append a query string to an existing URL, which may or may not already
+ * have query string parameters already. If so, they will be combined.
+ *
+ * @param $url String
+ * @param $query Mixed: string or associative array
+ * @return string
+ */
+function wfAppendQuery( $url, $query ) {
+ if ( is_array( $query ) ) {
+ $query = wfArrayToCGI( $query );
+ }
+ if( $query != '' ) {
+ if( false === strpos( $url, '?' ) ) {
+ $url .= '?';
+ } else {
+ $url .= '&';
+ }
+ $url .= $query;
+ }
+ return $url;
+}
+
+/**
+ * Expand a potentially local URL to a fully-qualified URL. Assumes $wgServer
+ * is correct.
+ *
+ * The meaning of the PROTO_* constants is as follows:
+ * PROTO_HTTP: Output a URL starting with http://
+ * PROTO_HTTPS: Output a URL starting with https://
+ * PROTO_RELATIVE: Output a URL starting with // (protocol-relative URL)
+ * PROTO_CURRENT: Output a URL starting with either http:// or https:// , depending on which protocol was used for the current incoming request
+ * PROTO_CANONICAL: For URLs without a domain, like /w/index.php , use $wgCanonicalServer. For protocol-relative URLs, use the protocol of $wgCanonicalServer
+ *
+ * @todo this won't work with current-path-relative URLs
+ * like "subdir/foo.html", etc.
+ *
+ * @param $url String: either fully-qualified or a local path + query
+ * @param $defaultProto Mixed: one of the PROTO_* constants. Determines the protocol to use if $url or $wgServer is protocol-relative
+ * @return string Fully-qualified URL
+ */
+function wfExpandUrl( $url, $defaultProto = PROTO_CURRENT ) {
+ global $wgServer, $wgCanonicalServer;
+ $serverUrl = $defaultProto === PROTO_CANONICAL ? $wgCanonicalServer : $wgServer;
+
+ if ( $defaultProto === PROTO_CURRENT ) {
+ $defaultProto = WebRequest::detectProtocol() . '://';
+ }
+
+ // Analyze $serverUrl to obtain its protocol
+ $bits = wfParseUrl( $serverUrl );
+ $serverHasProto = $bits && $bits['scheme'] != '';
+
+ if ( $defaultProto === PROTO_CANONICAL ) {
+ if ( $serverHasProto ) {
+ $defaultProto = $bits['scheme'] . '://';
+ } else {
+ // $wgCanonicalServer doesn't have a protocol. This really isn't supposed to happen
+ // Fall back to HTTP in this ridiculous case
+ $defaultProto = PROTO_HTTP;
+ }
+ }
+
+ $defaultProtoWithoutSlashes = substr( $defaultProto, 0, -2 );
+
+ if( substr( $url, 0, 2 ) == '//' ) {
+ return $defaultProtoWithoutSlashes . $url;
+ } elseif( substr( $url, 0, 1 ) == '/' ) {
+ // If $serverUrl is protocol-relative, prepend $defaultProtoWithoutSlashes, otherwise leave it alone
+ return ( $serverHasProto ? '' : $defaultProtoWithoutSlashes ) . $serverUrl . $url;
+ } else {
+ return $url;
+ }
+}
+
+/**
+ * Returns a regular expression of url protocols
+ *
+ * @param $includeProtocolRelative bool If false, remove '//' from the returned protocol list.
+ * DO NOT USE this directy, use wfUrlProtocolsWithoutProtRel() instead
+ * @return String
+ */
+function wfUrlProtocols( $includeProtocolRelative = true ) {
+ global $wgUrlProtocols;
+
+ // Cache return values separately based on $includeProtocolRelative
+ static $withProtRel = null, $withoutProtRel = null;
+ $cachedValue = $includeProtocolRelative ? $withProtRel : $withoutProtRel;
+ if ( !is_null( $cachedValue ) ) {
+ return $cachedValue;
+ }
+
+ // Support old-style $wgUrlProtocols strings, for backwards compatibility
+ // with LocalSettings files from 1.5
+ if ( is_array( $wgUrlProtocols ) ) {
+ $protocols = array();
+ foreach ( $wgUrlProtocols as $protocol ) {
+ // Filter out '//' if !$includeProtocolRelative
+ if ( $includeProtocolRelative || $protocol !== '//' ) {
+ $protocols[] = preg_quote( $protocol, '/' );
+ }
+ }
+
+ $retval = implode( '|', $protocols );
+ } else {
+ // Ignore $includeProtocolRelative in this case
+ // This case exists for pre-1.6 compatibility, and we can safely assume
+ // that '//' won't appear in a pre-1.6 config because protocol-relative
+ // URLs weren't supported until 1.18
+ $retval = $wgUrlProtocols;
+ }
+
+ // Cache return value
+ if ( $includeProtocolRelative ) {
+ $withProtRel = $retval;
+ } else {
+ $withoutProtRel = $retval;
+ }
+ return $retval;
+}
+
+/**
+ * Like wfUrlProtocols(), but excludes '//' from the protocol list. Use this if
+ * you need a regex that matches all URL protocols but does not match protocol-
+ * relative URLs
+ */
+function wfUrlProtocolsWithoutProtRel() {
+ return wfUrlProtocols( false );
+}
+
+/**
+ * parse_url() work-alike, but non-broken. Differences:
+ *
+ * 1) Does not raise warnings on bad URLs (just returns false)
+ * 2) Handles protocols that don't use :// (e.g., mailto: and news: , as well as protocol-relative URLs) correctly
+ * 3) Adds a "delimiter" element to the array, either '://', ':' or '//' (see (2))
+ *
+ * @param $url String: a URL to parse
+ * @return Array: bits of the URL in an associative array, per PHP docs
+ */
+function wfParseUrl( $url ) {
+ global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php
+
+ // Protocol-relative URLs are handled really badly by parse_url(). It's so bad that the easiest
+ // way to handle them is to just prepend 'http:' and strip the protocol out later
+ $wasRelative = substr( $url, 0, 2 ) == '//';
+ if ( $wasRelative ) {
+ $url = "http:$url";
+ }
+ wfSuppressWarnings();
+ $bits = parse_url( $url );
+ wfRestoreWarnings();
+ // parse_url() returns an array without scheme for some invalid URLs, e.g.
+ // parse_url("%0Ahttp://example.com") == array( 'host' => '%0Ahttp', 'path' => 'example.com' )
+ if ( !$bits || !isset( $bits['scheme'] ) ) {
+ return false;
+ }
+
+ // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
+ if ( in_array( $bits['scheme'] . '://', $wgUrlProtocols ) ) {
+ $bits['delimiter'] = '://';
+ } elseif ( in_array( $bits['scheme'] . ':', $wgUrlProtocols ) ) {
+ $bits['delimiter'] = ':';
+ // parse_url detects for news: and mailto: the host part of an url as path
+ // We have to correct this wrong detection
+ if ( isset( $bits['path'] ) ) {
+ $bits['host'] = $bits['path'];
+ $bits['path'] = '';
+ }
+ } else {
+ return false;
+ }
+
+ /* Provide an empty host for eg. file:/// urls (see bug 28627) */
+ if ( !isset( $bits['host'] ) ) {
+ $bits['host'] = '';
+
+ /* parse_url loses the third / for file:///c:/ urls (but not on variants) */
+ if ( substr( $bits['path'], 0, 1 ) !== '/' ) {
+ $bits['path'] = '/' . $bits['path'];
+ }
+ }
+
+ // If the URL was protocol-relative, fix scheme and delimiter
+ if ( $wasRelative ) {
+ $bits['scheme'] = '';
+ $bits['delimiter'] = '//';
+ }
+ return $bits;
+}
+
+/**
+ * Make a URL index, appropriate for the el_index field of externallinks.
+ *
+ * @param $url String
+ * @return String
+ */
+function wfMakeUrlIndex( $url ) {
+ $bits = wfParseUrl( $url );
+
+ // Reverse the labels in the hostname, convert to lower case
+ // For emails reverse domainpart only
+ if ( $bits['scheme'] == 'mailto' ) {
+ $mailparts = explode( '@', $bits['host'], 2 );
+ if ( count( $mailparts ) === 2 ) {
+ $domainpart = strtolower( implode( '.', array_reverse( explode( '.', $mailparts[1] ) ) ) );
+ } else {
+ // No domain specified, don't mangle it
+ $domainpart = '';
+ }
+ $reversedHost = $domainpart . '@' . $mailparts[0];
+ } else {
+ $reversedHost = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) );
+ }
+ // Add an extra dot to the end
+ // Why? Is it in wrong place in mailto links?
+ if ( substr( $reversedHost, -1, 1 ) !== '.' ) {
+ $reversedHost .= '.';
+ }
+ // Reconstruct the pseudo-URL
+ $prot = $bits['scheme'];
+ $index = $prot . $bits['delimiter'] . $reversedHost;
+ // Leave out user and password. Add the port, path, query and fragment
+ if ( isset( $bits['port'] ) ) {
+ $index .= ':' . $bits['port'];
+ }
+ if ( isset( $bits['path'] ) ) {
+ $index .= $bits['path'];
+ } else {
+ $index .= '/';
+ }
+ if ( isset( $bits['query'] ) ) {
+ $index .= '?' . $bits['query'];
+ }
+ if ( isset( $bits['fragment'] ) ) {
+ $index .= '#' . $bits['fragment'];
+ }
+ return $index;
+}
+
+/**
+ * Check whether a given URL has a domain that occurs in a given set of domains
+ * @param $url string URL
+ * @param $domains array Array of domains (strings)
+ * @return bool True if the host part of $url ends in one of the strings in $domains
+ */
+function wfMatchesDomainList( $url, $domains ) {
+ $bits = wfParseUrl( $url );
+ if ( is_array( $bits ) && isset( $bits['host'] ) ) {
+ foreach ( (array)$domains as $domain ) {
+ // FIXME: This gives false positives. http://nds-nl.wikipedia.org will match nl.wikipedia.org
+ // We should use something that interprets dots instead
+ if ( substr( $bits['host'], -strlen( $domain ) ) === $domain ) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+