(bug 29854) Store protocol-relative links twice in the externallinks table, one with...
authorRoan Kattouw <catrope@users.mediawiki.org>
Mon, 14 Nov 2011 09:13:58 +0000 (09:13 +0000)
committerRoan Kattouw <catrope@users.mediawiki.org>
Mon, 14 Nov 2011 09:13:58 +0000 (09:13 +0000)
includes/AutoLoader.php
includes/GlobalFunctions.php
includes/LinksUpdate.php
includes/api/ApiQueryExternalLinks.php
includes/installer/DatabaseUpdater.php
maintenance/fixExtLinksProtocolRelative.php [new file with mode: 0644]
tests/phpunit/includes/GlobalFunctions/GlobalTest.php

index c7d11e1..e0de7ec 100644 (file)
@@ -861,6 +861,7 @@ $wgAutoloadLocalClasses = array(
        'FakeMaintenance' => 'maintenance/Maintenance.php',
        'LoggedUpdateMaintenance' => 'maintenance/Maintenance.php',
        'Maintenance' => 'maintenance/Maintenance.php',
+       'FixExtLinksProtocolRelative' => 'maintenance/fixExtLinksProtocolRelative.php',
        'PopulateCategory' => 'maintenance/populateCategory.php',
        'PopulateImageSha1' => 'maintenance/populateImageSha1.php',
        'PopulateLogSearch' => 'maintenance/populateLogSearch.php',
index 17e13ec..42b683a 100644 (file)
@@ -647,12 +647,12 @@ function wfParseUrl( $url ) {
 }
 
 /**
- * Make a URL index, appropriate for the el_index field of externallinks.
+ * Make URL indexes, appropriate for the el_index field of externallinks.
  *
  * @param $url String
- * @return String
+ * @return array
  */
-function wfMakeUrlIndex( $url ) {
+function wfMakeUrlIndexes( $url ) {
        $bits = wfParseUrl( $url );
 
        // Reverse the labels in the hostname, convert to lower case
@@ -692,7 +692,12 @@ function wfMakeUrlIndex( $url ) {
        if ( isset( $bits['fragment'] ) ) {
                $index .= '#' . $bits['fragment'];
        }
-       return $index;
+
+       if ( $prot == '' ) {
+               return array( "http:$index", "https:$index" );
+       } else {
+               return array( $index );
+       }
 }
 
 /**
index 0002096..38ed4cb 100644 (file)
@@ -456,11 +456,13 @@ class LinksUpdate {
                $arr = array();
                $diffs = array_diff_key( $this->mExternals, $existing );
                foreach( $diffs as $url => $dummy ) {
-                       $arr[] = array(
-                               'el_from'   => $this->mId,
-                               'el_to'     => $url,
-                               'el_index'  => wfMakeUrlIndex( $url ),
-                       );
+                       foreach( wfMakeUrlIndexes( $url ) as $index ) {
+                               $arr[] = array(
+                                       'el_from'   => $this->mId,
+                                       'el_to'     => $url,
+                                       'el_index'  => $index,
+                               );
+                       }
                }
                return $arr;
        }
index c2fed6a..c971165 100644 (file)
@@ -69,6 +69,11 @@ class ApiQueryExternalLinks extends ApiQueryBase {
                        $this->addOption( 'ORDER BY', 'el_from' );
                }
 
+               // If we're querying all protocols, use DISTINCT to avoid repeating protocol-relative links twice
+               if ( $protocol === null ) {
+                       $this->addOption( 'DISTINCT' );
+               }
+
                $this->addOption( 'LIMIT', $params['limit'] + 1 );
                $offset = isset( $params['offset'] ) ? $params['offset'] : 0;
                if ( $offset ) {
index a394f19..94c790c 100644 (file)
@@ -43,7 +43,8 @@ abstract class DatabaseUpdater {
                'DeleteDefaultMessages',
                'PopulateRevisionLength',
                'PopulateRevisionSha1',
-               'PopulateImageSha1'
+               'PopulateImageSha1',
+               'FixExtLinksProtocolRelative',
        );
 
        /**
diff --git a/maintenance/fixExtLinksProtocolRelative.php b/maintenance/fixExtLinksProtocolRelative.php
new file mode 100644 (file)
index 0000000..e10b194
--- /dev/null
@@ -0,0 +1,81 @@
+<?php
+/**
+ * Fixes any entries for protocol-relative URLs in the externallinks table,
+ * replacing each protocol-relative entry with two entries, one for http
+ * and one for https.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @ingroup Maintenance
+ */
+
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+
+class FixExtLinksProtocolRelative extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "Fixes any entries in the externallinks table containing protocol-relative URLs";
+       }
+
+       protected function getUpdateKey() {
+               return 'fix protocol-relative URLs in externallinks';
+       }
+
+       protected function updateSkippedMessage() {
+               return 'protocol-relative URLs in externallinks table already fixed.';
+       }
+       
+       protected function doDBUpdates() {
+               $db = wfGetDB( DB_MASTER );
+               if ( !$db->tableExists( 'externallinks' ) ) {
+                       $this->error( "externallinks table does not exist" );
+                       return false;
+               }
+               $this->output( "Fixing protocol-relative entries in the externallinks table...\n" );
+               $res = $db->select( 'externallinks', array( 'el_from', 'el_to', 'el_index' ),
+                       array( 'el_index' . $db->buildLike( '//', $db->anyString() ) ),
+                       __METHOD__
+               );
+               $count = 0;
+               foreach ( $res as $row ) {
+                       $count++;
+                       if ( $count % 100 == 0 ) {
+                               $this->output( $count );
+                               wfWaitForSlaves();
+                       }
+                       $db->insert( 'externallinks',
+                               array(
+                                       array(
+                                               'el_from' => $row->el_from,
+                                               'el_to' => $row->el_to,
+                                               'el_index' => "http:{$row->el_index}",
+                                       ),
+                                       array(
+                                               'el_from' => $row->el_from,
+                                               'el_to' => $row->el_to,
+                                               'el_index' => "https:{$row->el_index}",
+                                       )
+                               ), __METHOD__, array( 'IGNORE' )
+                       );
+                       $db->delete( 'externallinks', array( 'el_index' => $row->el_index ), __METHOD__ );
+               }
+               $this->output( "Done, $count rows updated.\n" );
+               return true;
+       }
+}
+
+$maintClass = "FixExtLinksProtocolRelative";
+require_once( RUN_MAINTENANCE_IF_MAIN );
index 01da0a5..5b5ee64 100644 (file)
@@ -831,42 +831,42 @@ class GlobalTest extends MediaWikiTestCase {
        }
 
        /**
-        * @dataProvider provideMakeUrlIndex()
+        * @dataProvider provideMakeUrlIndexes()
         */
-       function testMakeUrlIndex( $url, $expected ) {
-               $index = wfMakeUrlIndex( $url );
-               $this->assertEquals( $expected, $index, "wfMakeUrlIndex(\"$url\")" );
+       function testMakeUrlIndexes( $url, $expected ) {
+               $index = wfMakeUrlIndexes( $url );
+               $this->assertEquals( $expected, $index, "wfMakeUrlIndexes(\"$url\")" );
        }
 
-       function provideMakeUrlIndex() {
+       function provideMakeUrlIndexes() {
                return array(
                        array(
                                // just a regular :)
                                'https://bugzilla.wikimedia.org/show_bug.cgi?id=28627',
-                               'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627'
+                               array( 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' )
                        ),
                        array(
                                // mailtos are handled special
                                // is this really right though? that final . probably belongs earlier?
                                'mailto:wiki@wikimedia.org',
-                               'mailto:org.wikimedia@wiki.',
+                               array( 'mailto:org.wikimedia@wiki.' )
                        ),
 
                        // file URL cases per bug 28627...
                        array(
                                // three slashes: local filesystem path Unix-style
                                'file:///whatever/you/like.txt',
-                               'file://./whatever/you/like.txt'
+                               array( 'file://./whatever/you/like.txt' )
                        ),
                        array(
                                // three slashes: local filesystem path Windows-style
                                'file:///c:/whatever/you/like.txt',
-                               'file://./c:/whatever/you/like.txt'
+                               array( 'file://./c:/whatever/you/like.txt' )
                        ),
                        array(
                                // two slashes: UNC filesystem path Windows-style
                                'file://intranet/whatever/you/like.txt',
-                               'file://intranet./whatever/you/like.txt'
+                               array( 'file://intranet./whatever/you/like.txt' )
                        ),
                        // Multiple-slash cases that can sorta work on Mozilla
                        // if you hack it just right are kinda pathological,
@@ -875,6 +875,15 @@ class GlobalTest extends MediaWikiTestCase {
                        //
                        // Those will survive the algorithm but with results that
                        // are less consistent.
+
+                       // protocol-relative URL cases per bug 29854...
+                       array(
+                               '//bugzilla.wikimedia.org/show_bug.cgi?id=28627',
+                               array(
+                                       'http://org.wikimedia.bugzilla./show_bug.cgi?id=28627',
+                                       'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627'
+                               )
+                       ),
                );
        }