From 2986d47c9052af08e37b363ad614e32f5ccff508 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Tue, 20 Oct 2015 22:44:33 -0700 Subject: [PATCH] Add script to fix content model of JSON pages MediaWiki:Foo.json and User:Foo/bar.json pages now have a default content model of JSON, but existing pages using those names will be set to defaults of wikitext. The content models of those pages are now set to "json", unless it has invalid syntax, in which case it will be set to "wikitext". For convenience, the script is automatically run as part of update.php. Bug: T108663 Change-Id: I1412937ccea8e65dba58580beec79cbf2286ae01 --- autoload.php | 1 + includes/installer/DatabaseUpdater.php | 3 +- maintenance/fixDefaultJsonContentPages.php | 128 +++++++++++++++++++++ 3 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 maintenance/fixDefaultJsonContentPages.php diff --git a/autoload.php b/autoload.php index d7bccb363a..80d1eebfd8 100644 --- a/autoload.php +++ b/autoload.php @@ -447,6 +447,7 @@ $wgAutoloadLocalClasses = array( 'FindHooks' => __DIR__ . '/maintenance/findHooks.php', 'FindMissingFiles' => __DIR__ . '/maintenance/findMissingFiles.php', 'FixBug20757' => __DIR__ . '/maintenance/storage/fixBug20757.php', + 'FixDefaultJsonContentPages' => __DIR__ . '/maintenance/fixDefaultJsonContentPages.php', 'FixDoubleRedirects' => __DIR__ . '/maintenance/fixDoubleRedirects.php', 'FixExtLinksProtocolRelative' => __DIR__ . '/maintenance/fixExtLinksProtocolRelative.php', 'FixTimestamps' => __DIR__ . '/maintenance/fixTimestamps.php', diff --git a/includes/installer/DatabaseUpdater.php b/includes/installer/DatabaseUpdater.php index 57084cbe44..904fde83dc 100644 --- a/includes/installer/DatabaseUpdater.php +++ b/includes/installer/DatabaseUpdater.php @@ -73,7 +73,8 @@ abstract class DatabaseUpdater { 'PopulateImageSha1', 'FixExtLinksProtocolRelative', 'PopulateFilearchiveSha1', - 'PopulateBacklinkNamespace' + 'PopulateBacklinkNamespace', + 'FixDefaultJsonContentPages' ); /** diff --git a/maintenance/fixDefaultJsonContentPages.php b/maintenance/fixDefaultJsonContentPages.php new file mode 100644 index 0000000000..12658910ec --- /dev/null +++ b/maintenance/fixDefaultJsonContentPages.php @@ -0,0 +1,128 @@ +mDescription = + 'Fix instances of JSON pages prior to them being the ContentHandler default'; + $this->setBatchSize( 100 ); + } + + protected function getUpdateKey() { + return __CLASS__; + } + + protected function doDBUpdates() { + if ( !$this->getConfig()->get( 'ContentHandlerUseDB' ) ) { + $this->output( "\$wgContentHandlerUseDB is not enabled, nothing to do.\n" ); + return true; + } + + $dbr = wfGetDB( DB_SLAVE ); + $namespaces = array( + NS_MEDIAWIKI => $dbr->buildLike( $dbr->anyString(), '.json' ), + NS_USER => $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString(), '.json' ), + ); + foreach ( $namespaces as $ns => $like ) { + $lastPage = 0; + do { + $rows = $dbr->select( + 'page', + array( 'page_id', 'page_title', 'page_namespace', 'page_content_model' ), + array( + 'page_namespace' => $ns, + 'page_title ' . $like, + 'page_id > ' . $dbr->addQuotes( $lastPage ) + ), + __METHOD__, + array( 'ORDER BY' => 'page_id', 'LIMIT' => $this->mBatchSize ) + ); + foreach ( $rows as $row ) { + $this->handleRow( $row ); + } + } while ( $rows->numRows() >= $this->mBatchSize ); + } + + return true; + } + + protected function handleRow( stdClass $row ) { + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $this->output( "Processing {$title} ({$row->page_id})...\n" ); + $rev = Revision::newFromTitle( $title ); + $content = $rev->getContent( Revision::RAW ); + $dbw = wfGetDB( DB_MASTER ); + if ( $content instanceof JsonContent ) { + if ( $content->isValid() ) { + // Yay, actually JSON. We need to just change the + // page_content_model because revision will automatically + // use the default, which is *now* JSON. + $this->output( "Setting page_content_model to json..." ); + $dbw->update( + 'page', + array( 'page_content_model' => CONTENT_MODEL_JSON ), + array( 'page_id' => $row->page_id ), + __METHOD__ + ); + $this->output( "done.\n" ); + wfWaitForSlaves(); + } else { + // Not JSON...force it to wikitext. We need to update the + // revision table so that these revisions are always processed + // as wikitext in the future. page_content_model is already + // set to "wikitext". + $this->output( "Setting rev_content_model to wikitext..." ); + // Grab all the ids for batching + $ids = $dbw->selectFieldValues( + 'revision', + 'rev_id', + array( 'rev_page' => $row->page_id ), + __METHOD__ + ); + foreach ( array_chunk( $ids, 50 ) as $chunk ) { + $dbw->update( + 'revision', + array( 'rev_content_model' => CONTENT_MODEL_WIKITEXT ), + array( 'rev_page' => $row->page_id, 'rev_id' => $chunk ) + ); + wfWaitForSlaves(); + } + $this->output( "done.\n" ); + } + } else { + $this->output( "not a JSON page? Skipping\n" ); + } + } +} + +$maintClass = 'FixDefaultJsonContentPages'; +require_once RUN_MAINTENANCE_IF_MAIN; -- 2.20.1