From 6fff535d08d243879e74186162f755887616b51d Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 25 Apr 2011 21:00:49 +0000 Subject: [PATCH] * (bug 28627) External link normalization now handles file: URL cases without throwing notice warnings. Added some test cases for wfMakeUrlIndex() to GlobalTests (tweaks $wgUrlProtocols to toss in file:// support so it can test them). Needs more cases for other URL styles probably; some of the more pathological file: URL cases still won't normalize really cleanly but will go through the function without exploding. The most-needed variants will be the Windows/IE-compatible ones I think -- so file:///c:/foo or file://server/foo. --- RELEASE-NOTES | 2 + includes/GlobalFunctions.php | 13 ++++++- tests/phpunit/includes/GlobalTest.php | 55 ++++++++++++++++++++++++++- 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index ef1be2b456..4b5ceabeee 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -243,6 +243,8 @@ PHP if you have not done so prior to upgrading MediaWiki. * (bug 28532) wfMsgExt() and wfMsgWikiHtml() use $wgOut->parse() * (bug 16129) Transcluded special pages expose strip markers when they output parsed messages * (bug 27249) "Installed software" table in Special:Version should always be left-to-right +* (bug 28627) External link normalization now handles file: URL cases without + throwing notice warnings. === API changes in 1.18 === * (bug 26339) Throw warning when truncating an overlarge API result diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index ac04b19480..f35cb5cb89 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -2750,8 +2750,12 @@ function wfMakeUrlIndex( $url ) { $domainpart = ''; } $reversedHost = $domainpart . '@' . $mailparts[0]; - } else { + } else if ( isset( $bits['host'] ) ) { $reversedHost = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) ); + } else { + // In file: URIs for instance it's common to have an empty host, + // which turns up as not getting a 'host' member from parse_url. + $reversedHost = '.'; } // Add an extra dot to the end // Why? Is it in wrong place in mailto links? @@ -2766,6 +2770,13 @@ function wfMakeUrlIndex( $url ) { $index .= ':' . $bits['port']; } if ( isset( $bits['path'] ) ) { + // parse_url() removes the initial '/' from the path + // for file: URLs with Windows-style paths, such as + // file:///c:/windows/stuff. We need to add it back + // to keep our division between host and path properly. + if ( strlen( $bits['path'] ) > 0 && substr( $bits['path'], 0, 1 ) !== '/' ) { + $index .= '/'; + } $index .= $bits['path']; } else { $index .= '/'; diff --git a/tests/phpunit/includes/GlobalTest.php b/tests/phpunit/includes/GlobalTest.php index 8d466c4c4e..b76ea07cbb 100644 --- a/tests/phpunit/includes/GlobalTest.php +++ b/tests/phpunit/includes/GlobalTest.php @@ -2,19 +2,22 @@ class GlobalTest extends MediaWikiTestCase { function setUp() { - global $wgReadOnlyFile, $wgContLang, $wgLang; + global $wgReadOnlyFile, $wgContLang, $wgLang, $wgUrlProtocols; $this->originals['wgReadOnlyFile'] = $wgReadOnlyFile; + $this->originals['wgUrlProtocols'] = $wgUrlProtocols; $wgReadOnlyFile = tempnam( wfTempDir(), "mwtest_readonly" ); + $wgUrlProtocols[] = 'file://'; unlink( $wgReadOnlyFile ); $wgContLang = $wgLang = Language::factory( 'en' ); } function tearDown() { - global $wgReadOnlyFile; + global $wgReadOnlyFile, $wgUrlProtocols; if ( file_exists( $wgReadOnlyFile ) ) { unlink( $wgReadOnlyFile ); } $wgReadOnlyFile = $this->originals['wgReadOnlyFile']; + $wgUrlProtocols = $this->originals['wgUrlProtocols']; } /** @dataProvider provideForWfArrayDiff2 */ @@ -815,6 +818,54 @@ class GlobalTest extends MediaWikiTestCase { */); } + /** + * @dataProvider provideMakeUrlIndex() + */ + function testMakeUrlIndex( $url, $expected ) { + $index = wfMakeUrlIndex( $url ); + $this->assertEquals( $expected, $index, "wfMakeUrlIndex(\"$url\")" ); + } + + function provideMakeUrlIndex() { + return array( + array( + // just a regular :) + 'https://bugzilla.wikimedia.org/show_bug.cgi?id=28627', + 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' + ), + array( + // mailtos are handled special + // is this really right though? that final . probably belongs earlier? + 'mailto:wiki@wikimedia.org', + 'mailto:org.wikimedia@wiki.', + ), + + // file URL cases per bug 28627... + array( + // three slashes: local filesystem path Unix-style + 'file:///whatever/you/like.txt', + 'file://./whatever/you/like.txt' + ), + array( + // three slashes: local filesystem path Windows-style + 'file:///c:/whatever/you/like.txt', + 'file://./c:/whatever/you/like.txt' + ), + array( + // two slashes: UNC filesystem path Windows-style + 'file://intranet/whatever/you/like.txt', + 'file://intranet./whatever/you/like.txt' + ), + // Multiple-slash cases that can sorta work on Mozilla + // if you hack it just right are kinda pathological, + // and unreliable cross-platform or on IE which means they're + // unlikely to appear on intranets. + // + // Those will survive the algorithm but with results that + // are less consistent. + ); + } + /* TODO: many more! */ } -- 2.20.1