* Maintenance script to delete unused text records
authorRob Church <robchurch@users.mediawiki.org>
Wed, 25 Jan 2006 21:08:29 +0000 (21:08 +0000)
committerRob Church <robchurch@users.mediawiki.org>
Wed, 25 Jan 2006 21:08:29 +0000 (21:08 +0000)
* Maintenance script to delete non-current revisions
* Maintenance script to wipe a page and all revisions from the database

RELEASE-NOTES
maintenance/deleteOldRevisions.inc [new file with mode: 0644]
maintenance/deleteOldRevisions.php [new file with mode: 0644]
maintenance/nukePage.inc [new file with mode: 0644]
maintenance/nukePage.php [new file with mode: 0644]
maintenance/purgeOldText.inc [new file with mode: 0644]
maintenance/purgeOldText.php [new file with mode: 0644]

index d4759ae..ea87dc8 100644 (file)
@@ -131,6 +131,9 @@ Maintenance:
   duplicate rows with the same id field)
 * Added --conf option to command line scripts, allowing the user to specify a 
   different LocalSettings.php.
+* Maintenance script to delete unused text records
+* Maintenance script to delete non-current revisions
+* Maintenance script to wipe a page and all revisions from the database
 
 i18n / Languages:
 * Partial support for Basque language (from wikipedia and meta)
diff --git a/maintenance/deleteOldRevisions.inc b/maintenance/deleteOldRevisions.inc
new file mode 100644 (file)
index 0000000..dd48028
--- /dev/null
@@ -0,0 +1,60 @@
+<?php
+
+/**
+ * Support functions for the deleteOldRevisions script
+ *
+ * @package MediaWiki
+ * @subpackage Maintenance
+ * @author Rob Church <robchur@gmail.com>
+ */
+require_once( 'purgeOldText.inc' );
+
+function DeleteOldRevisions( $delete = false ) {
+
+       # Data should come off the master, wrapped in a transaction
+       $dbw =& wfGetDB( DB_MASTER );
+       $dbw->begin();
+       
+       $tbl_pag = $dbw->tableName( 'page' );
+       $tbl_rev = $dbw->tableName( 'revision' );
+       
+       # Get "active" revisions from the page table
+       echo( "Searching for active revisions..." );
+       $res = $dbw->query( "SELECT page_latest FROM $tbl_pag" );
+       while( $row = $dbw->fetchObject( $res ) ) {
+               $cur[] = $row->page_latest;
+       }
+       echo( "done.\n" );
+       
+       # Get all revisions that aren't in this set
+       echo( "Searching for inactive revisions..." );
+       $set = implode( ', ', $cur );
+       $res = $dbw->query( "SELECT rev_id FROM $tbl_rev WHERE rev_id NOT IN ( $set )" );
+       while( $row = $dbw->fetchObject( $res ) ) {
+               $old[] = $row->rev_id;
+       }
+       echo( "done.\n" );
+       
+       # Inform the user of what we're going to do
+       $count = count( $old );
+       echo( "$count old revisions found.\n" );
+       
+       # Delete as appropriate
+       if( $delete && $count ) {
+               echo( "Deleting..." );
+               $set = implode( ', ', $old );
+               $dbw->query( "DELETE FROM $tbl_rev WHERE rev_id IN ( $set )" );
+               echo( "done.\n" );
+       }
+       
+       # This bit's done
+       # Purge redundant text records
+       $dbw->commit();
+       if( $delete ) {
+               PurgeRedundantText( true );
+       }
+
+}
+
+?>
\ No newline at end of file
diff --git a/maintenance/deleteOldRevisions.php b/maintenance/deleteOldRevisions.php
new file mode 100644 (file)
index 0000000..9695a8c
--- /dev/null
@@ -0,0 +1,30 @@
+<?php
+
+/**
+ * Delete old (non-current) revisions from the database
+ *
+ * @package MediaWiki
+ * @subpackage Maintenance
+ * @author Rob Church <robchur@gmail.com>
+ */
+
+$options = array( 'delete', 'help' );
+require_once( 'commandLine.inc' );
+require_once( 'deleteOldRevisions.inc' );
+
+echo( "Delete Old Revisions\n\n" );
+
+if( @$options['help'] ) {
+       ShowUsage();
+} else {
+       DeleteOldRevisions( @$options['delete'] );
+}
+
+function ShowUsage() {
+       echo( "Deletes non-current revisions from the database.\n\n" );
+       echo( "Usage: php deleteOldRevisions.php [--delete|--help]\n\n" );
+       echo( "delete : Performs the deletion\n" );
+       echo( "  help : Show this usage information\n" );
+}
+
+?>
\ No newline at end of file
diff --git a/maintenance/nukePage.inc b/maintenance/nukePage.inc
new file mode 100644 (file)
index 0000000..64017e8
--- /dev/null
@@ -0,0 +1,76 @@
+<?php
+
+/**
+ * Support functions for the nukeArticle script
+ *
+ * @package MediaWiki
+ * @subpackage Maintenance
+ * @author Rob Church <robchur@gmail.com>
+ */
+
+require_once( 'purgeOldText.inc' );
+
+function NukePage( $name, $delete = false ) {
+
+       $dbw =& wfGetDB( DB_MASTER );
+       $dbw->begin();
+       
+       $tbl_pag = $dbw->tableName( 'page' );
+       $tbl_rev = $dbw->tableName( 'revision' );
+       
+       # Get page ID
+       echo( "Searching for \"$name\"..." );
+       $title = Title::newFromText( $name );
+       if( $title ) {
+               $id   = $title->getArticleID();
+               $real = $title->getPrefixedText();
+               echo( "found \"$real\" with ID $id.\n" );
+               
+               # Get corresponding revisions
+               echo( "Searching for revisions..." );
+               $res = $dbw->query( "SELECT rev_id FROM $tbl_rev WHERE rev_page = $id" );
+               while( $row = $dbw->fetchObject( $res ) ) {
+                       $revs[] = $row->rev_id;
+               }
+               $count = count( $revs );
+               echo( "found $count.\n" );
+               
+               # Delete the page itself
+               if( $delete ) {
+                       echo( "Deleting page record..." );
+                       $dbw->query( "DELETE FROM $tbl_pag WHERE page_id = $id" );
+                       echo( "done.\n" );
+               }
+
+               $dbw->commit();
+               
+               # Delete revisions as appropriate
+               if( $delete && $count ) {
+                       echo( "Deleting revisions..." );
+                       DeleteRevisions( $revs );
+                       echo( "done.\n" );
+                       PurgeRedundantText( true );
+               }
+               
+       } else {
+               echo( "not found in database.\n" );
+               $dbw->commit();
+       }
+       
+}
+
+function DeleteRevisions( $ids ) {
+
+       $dbw =& wfGetDB( DB_MASTER );
+       $dbw->begin();
+       
+       $tbl_rev = $dbw->tableName( 'revision' );
+       
+       $set = implode( ', ', $ids );
+       $dbw->query( "DELETE FROM $tbl_rev WHERE rev_id IN ( $set )" );
+       
+       $dbw->commit(); 
+       
+}
+
+?>
\ No newline at end of file
diff --git a/maintenance/nukePage.php b/maintenance/nukePage.php
new file mode 100644 (file)
index 0000000..b5c3f28
--- /dev/null
@@ -0,0 +1,30 @@
+<?php
+
+/**
+ * Erase a page record from the database
+ * Irreversible (can't use standard undelete) and does not update link tables
+ *
+ * @package MediaWiki
+ * @subpackage Maintenance
+ * @author Rob Church <robchur@gmail.com>
+ */
+
+require_once( 'commandLine.inc' );
+require_once( 'nukePage.inc' );
+
+echo( "Erase Page Record\n\n" );
+
+if( isset( $args[0] ) ) {
+       NukePage( $args[0], true );
+} else {
+       ShowUsage();
+}
+
+/** Show script usage information */
+function ShowUsage() {
+       echo( "Remove a page record from the database.\n\n" );
+       echo( "Usage: php nukePage.php <title>\n\n" );
+       echo( " <title> : Page title; spaces escaped with underscores\n\n" );
+}
+
+?>
\ No newline at end of file
diff --git a/maintenance/purgeOldText.inc b/maintenance/purgeOldText.inc
new file mode 100644 (file)
index 0000000..6a27550
--- /dev/null
@@ -0,0 +1,63 @@
+<?php
+
+/**
+ * Support functions for cleaning up redundant text records
+ *
+ * @package MediaWiki
+ * @subpackage Maintenance
+ * @author Rob Church <robchur@gmail.com>
+ */
+
+function PurgeRedundantText( $delete = false ) {
+       
+       # Data should come off the master, wrapped in a transaction
+       $dbw =& wfGetDB( DB_MASTER );
+       $dbw->begin();
+       
+       $tbl_arc = $dbw->tableName( 'archive' );
+       $tbl_rev = $dbw->tableName( 'revision' );
+       $tbl_txt = $dbw->tableName( 'text' );
+       
+       # Get "active" text records from the revisions table
+       echo( "Searching for active text records in revisions table..." );
+       $res = $dbw->query( "SELECT DISTINCTROW rev_text_id FROM $tbl_rev" );
+       while( $row = $dbw->fetchObject( $res ) ) {
+               $cur[] = $row->rev_text_id;
+       }
+       echo( "done.\n" );
+       
+       # Get "active" text records from the archive table
+       echo( "Searching for active text records in archive table..." );
+       $res = $dbw->query( "SELECT DISTINCTROW ar_text_id FROM $tbl_arc" );
+       while( $row = $dbw->fetchObject( $res ) ) {
+               $cur[] = $row->ar_text_id;
+       }
+       echo( "done.\n" );
+       
+       # Get the IDs of all text records not in these sets
+       echo( "Searching for inactive text records..." );
+       $set = implode( ', ', $cur );
+       $res = $dbw->query( "SELECT old_id FROM text WHERE old_id NOT IN ( $set )" );
+       while( $row = $dbw->fetchObject( $res ) ) {
+               $old[] = $row->old_id;
+       }
+       echo( "done.\n" );
+       
+       # Inform the user of what we're going to do
+       $count = count( $old );
+       echo( "$count inactive items found.\n" );
+       
+       # Delete as appropriate
+       if( $delete && $count ) {
+               echo( "Deleting..." );
+               $set = implode( ', ', $old );
+               $dbw->query( "DELETE FROM $tbl_txt WHERE old_id IN ( $set )" );
+               echo( "done.\n" );
+       }
+       
+       # Done
+       $dbw->commit();
+       
+}
+
+?>
\ No newline at end of file
diff --git a/maintenance/purgeOldText.php b/maintenance/purgeOldText.php
new file mode 100644 (file)
index 0000000..e8a738a
--- /dev/null
@@ -0,0 +1,30 @@
+<?php
+
+/**
+ * Purge old text records from the database
+ *
+ * @package MediaWiki
+ * @subpackage Maintenance
+ * @author Rob Church <robchur@gmail.com>
+ */
+
+$options = array( 'purge', 'help' );
+require_once( 'commandLine.inc' );
+require_once( 'purgeOldText.inc' );
+
+echo( "Purge Old Text\n\n" );
+
+if( @$options['help'] ) {
+       ShowUsage();
+} else {
+       PurgeRedundantText( @$options['purge'] );
+}
+
+function ShowUsage() {
+       echo( "Prunes unused text records from the database.\n\n" );
+       echo( "Usage: php purgeOldText.php [--purge]\n\n" );
+       echo( "purge : Performs the deletion\n" );
+       echo( " help : Show this usage information\n" );
+}
+
+?>
\ No newline at end of file