* (bug 28627) External link normalization now handles file: URL cases without throwin...
authorBrion Vibber <brion@users.mediawiki.org>
Mon, 25 Apr 2011 21:00:49 +0000 (21:00 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Mon, 25 Apr 2011 21:00:49 +0000 (21:00 +0000)
Added some test cases for wfMakeUrlIndex() to GlobalTests (tweaks $wgUrlProtocols to toss in file:// support so it can test them).
Needs more cases for other URL styles probably; some of the more pathological file: URL cases still won't normalize really cleanly but will go through the function without exploding. The most-needed variants will be the Windows/IE-compatible ones I think -- so file:///c:/foo or file://server/foo.

RELEASE-NOTES
includes/GlobalFunctions.php
tests/phpunit/includes/GlobalTest.php

index ef1be2b..4b5ceab 100644 (file)
@@ -243,6 +243,8 @@ PHP if you have not done so prior to upgrading MediaWiki.
 * (bug 28532) wfMsgExt() and wfMsgWikiHtml() use $wgOut->parse()
 * (bug 16129) Transcluded special pages expose strip markers when they output parsed messages
 * (bug 27249) "Installed software" table in Special:Version should always be left-to-right
+* (bug 28627) External link normalization now handles file: URL cases without
+  throwing notice warnings.
 
 === API changes in 1.18 ===
 * (bug 26339) Throw warning when truncating an overlarge API result
index ac04b19..f35cb5c 100644 (file)
@@ -2750,8 +2750,12 @@ function wfMakeUrlIndex( $url ) {
                        $domainpart = '';
                }
                $reversedHost = $domainpart . '@' . $mailparts[0];
-       } else {
+       } else if ( isset( $bits['host'] ) ) {
                $reversedHost = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) );
+       } else {
+               // In file: URIs for instance it's common to have an empty host,
+               // which turns up as not getting a 'host' member from parse_url.
+               $reversedHost = '.';
        }
        // Add an extra dot to the end
        // Why? Is it in wrong place in mailto links?
@@ -2766,6 +2770,13 @@ function wfMakeUrlIndex( $url ) {
                $index .= ':' . $bits['port'];
        }
        if ( isset( $bits['path'] ) ) {
+               // parse_url() removes the initial '/' from the path
+               // for file: URLs with Windows-style paths, such as
+               // file:///c:/windows/stuff. We need to add it back
+               // to keep our division between host and path properly.
+               if ( strlen( $bits['path'] ) > 0 && substr( $bits['path'], 0, 1 ) !== '/' ) {
+                       $index .= '/';
+               }
                $index .= $bits['path'];
        } else {
                $index .= '/';
index 8d466c4..b76ea07 100644 (file)
@@ -2,19 +2,22 @@
 
 class GlobalTest extends MediaWikiTestCase {
        function setUp() {
-               global $wgReadOnlyFile, $wgContLang, $wgLang;
+               global $wgReadOnlyFile, $wgContLang, $wgLang, $wgUrlProtocols;
                $this->originals['wgReadOnlyFile'] = $wgReadOnlyFile;
+               $this->originals['wgUrlProtocols'] = $wgUrlProtocols;
                $wgReadOnlyFile = tempnam( wfTempDir(), "mwtest_readonly" );
+               $wgUrlProtocols[] = 'file://';
                unlink( $wgReadOnlyFile );
                $wgContLang = $wgLang = Language::factory( 'en' );
        }
 
        function tearDown() {
-               global $wgReadOnlyFile;
+               global $wgReadOnlyFile, $wgUrlProtocols;
                if ( file_exists( $wgReadOnlyFile ) ) {
                        unlink( $wgReadOnlyFile );
                }
                $wgReadOnlyFile = $this->originals['wgReadOnlyFile'];
+               $wgUrlProtocols = $this->originals['wgUrlProtocols'];
        }
 
        /** @dataProvider provideForWfArrayDiff2 */
@@ -815,6 +818,54 @@ class GlobalTest extends MediaWikiTestCase {
                 */);
        }
 
+       /**
+        * @dataProvider provideMakeUrlIndex()
+        */
+       function testMakeUrlIndex( $url, $expected ) {
+               $index = wfMakeUrlIndex( $url );
+               $this->assertEquals( $expected, $index, "wfMakeUrlIndex(\"$url\")" );
+       }
+
+       function provideMakeUrlIndex() {
+               return array(
+                       array(
+                               // just a regular :)
+                               'https://bugzilla.wikimedia.org/show_bug.cgi?id=28627',
+                               'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627'
+                       ),
+                       array(
+                               // mailtos are handled special
+                               // is this really right though? that final . probably belongs earlier?
+                               'mailto:wiki@wikimedia.org',
+                               'mailto:org.wikimedia@wiki.',
+                       ),
+
+                       // file URL cases per bug 28627...
+                       array(
+                               // three slashes: local filesystem path Unix-style
+                               'file:///whatever/you/like.txt',
+                               'file://./whatever/you/like.txt'
+                       ),
+                       array(
+                               // three slashes: local filesystem path Windows-style
+                               'file:///c:/whatever/you/like.txt',
+                               'file://./c:/whatever/you/like.txt'
+                       ),
+                       array(
+                               // two slashes: UNC filesystem path Windows-style
+                               'file://intranet/whatever/you/like.txt',
+                               'file://intranet./whatever/you/like.txt'
+                       ),
+                       // Multiple-slash cases that can sorta work on Mozilla
+                       // if you hack it just right are kinda pathological,
+                       // and unreliable cross-platform or on IE which means they're
+                       // unlikely to appear on intranets.
+                       //
+                       // Those will survive the algorithm but with results that
+                       // are less consistent.
+               );
+       }
+
        /* TODO: many more! */
 }