Merge "Add script to fix content model of JSON pages"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Tue, 24 Nov 2015 18:53:43 +0000 (18:53 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Tue, 24 Nov 2015 18:53:43 +0000 (18:53 +0000)
autoload.php
includes/installer/DatabaseUpdater.php
maintenance/fixDefaultJsonContentPages.php [new file with mode: 0644]

index 3596ac3..03748bf 100644 (file)
@@ -445,6 +445,7 @@ $wgAutoloadLocalClasses = array(
        'FindMissingFiles' => __DIR__ . '/maintenance/findMissingFiles.php',
        'FindOrphanedFiles' => __DIR__ . '/maintenance/findOrphanedFiles.php',
        'FixBug20757' => __DIR__ . '/maintenance/storage/fixBug20757.php',
+       'FixDefaultJsonContentPages' => __DIR__ . '/maintenance/fixDefaultJsonContentPages.php',
        'FixDoubleRedirects' => __DIR__ . '/maintenance/fixDoubleRedirects.php',
        'FixExtLinksProtocolRelative' => __DIR__ . '/maintenance/fixExtLinksProtocolRelative.php',
        'FixTimestamps' => __DIR__ . '/maintenance/fixTimestamps.php',
index 57084cb..904fde8 100644 (file)
@@ -73,7 +73,8 @@ abstract class DatabaseUpdater {
                'PopulateImageSha1',
                'FixExtLinksProtocolRelative',
                'PopulateFilearchiveSha1',
-               'PopulateBacklinkNamespace'
+               'PopulateBacklinkNamespace',
+               'FixDefaultJsonContentPages'
        );
 
        /**
diff --git a/maintenance/fixDefaultJsonContentPages.php b/maintenance/fixDefaultJsonContentPages.php
new file mode 100644 (file)
index 0000000..1265891
--- /dev/null
@@ -0,0 +1,128 @@
+<?php
+/**
+ * Fix instances of pre-existing JSON pages
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Usage:
+ *  fixDefaultJsonContentPages.php
+ *
+ * It is automatically run by update.php
+ */
+class FixDefaultJsonContentPages extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription =
+                               'Fix instances of JSON pages prior to them being the ContentHandler default';
+               $this->setBatchSize( 100 );
+       }
+
+       protected function getUpdateKey() {
+               return __CLASS__;
+       }
+
+       protected function doDBUpdates() {
+               if ( !$this->getConfig()->get( 'ContentHandlerUseDB' ) ) {
+                       $this->output( "\$wgContentHandlerUseDB is not enabled, nothing to do.\n" );
+                       return true;
+               }
+
+               $dbr = wfGetDB( DB_SLAVE );
+               $namespaces = array(
+                       NS_MEDIAWIKI => $dbr->buildLike( $dbr->anyString(), '.json' ),
+                       NS_USER => $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString(), '.json' ),
+               );
+               foreach ( $namespaces as $ns => $like ) {
+                       $lastPage = 0;
+                       do {
+                               $rows = $dbr->select(
+                                               'page',
+                                               array( 'page_id', 'page_title', 'page_namespace', 'page_content_model' ),
+                                               array(
+                                                               'page_namespace' => $ns,
+                                                               'page_title ' . $like,
+                                                               'page_id > ' . $dbr->addQuotes( $lastPage )
+                                               ),
+                                               __METHOD__,
+                                               array( 'ORDER BY' => 'page_id', 'LIMIT' => $this->mBatchSize )
+                               );
+                               foreach ( $rows as $row ) {
+                                       $this->handleRow( $row );
+                               }
+                       } while ( $rows->numRows() >= $this->mBatchSize );
+               }
+
+               return true;
+       }
+
+       protected function handleRow( stdClass $row ) {
+               $title = Title::makeTitle( $row->page_namespace, $row->page_title );
+               $this->output( "Processing {$title} ({$row->page_id})...\n" );
+               $rev = Revision::newFromTitle( $title );
+               $content = $rev->getContent( Revision::RAW );
+               $dbw = wfGetDB( DB_MASTER );
+               if ( $content instanceof JsonContent ) {
+                       if ( $content->isValid() ) {
+                               // Yay, actually JSON. We need to just change the
+                               // page_content_model because revision will automatically
+                               // use the default, which is *now* JSON.
+                               $this->output( "Setting page_content_model to json..." );
+                               $dbw->update(
+                                       'page',
+                                       array( 'page_content_model' => CONTENT_MODEL_JSON ),
+                                       array( 'page_id' => $row->page_id ),
+                                       __METHOD__
+                               );
+                               $this->output( "done.\n" );
+                               wfWaitForSlaves();
+                       } else {
+                               // Not JSON...force it to wikitext. We need to update the
+                               // revision table so that these revisions are always processed
+                               // as wikitext in the future. page_content_model is already
+                               // set to "wikitext".
+                               $this->output( "Setting rev_content_model to wikitext..." );
+                               // Grab all the ids for batching
+                               $ids = $dbw->selectFieldValues(
+                                       'revision',
+                                       'rev_id',
+                                       array( 'rev_page' => $row->page_id ),
+                                       __METHOD__
+                               );
+                               foreach ( array_chunk( $ids, 50 ) as $chunk ) {
+                                       $dbw->update(
+                                               'revision',
+                                               array( 'rev_content_model' => CONTENT_MODEL_WIKITEXT ),
+                                               array( 'rev_page' => $row->page_id, 'rev_id' => $chunk )
+                                       );
+                                       wfWaitForSlaves();
+                               }
+                               $this->output( "done.\n" );
+                       }
+               } else {
+                       $this->output( "not a JSON page? Skipping\n" );
+               }
+       }
+}
+
+$maintClass = 'FixDefaultJsonContentPages';
+require_once RUN_MAINTENANCE_IF_MAIN;