From 9d84a9819c7147a969b39cfcf9aba01fb7921487 Mon Sep 17 00:00:00 2001 From: Rob Church Date: Wed, 25 Jan 2006 21:08:29 +0000 Subject: [PATCH] * Maintenance script to delete unused text records * Maintenance script to delete non-current revisions * Maintenance script to wipe a page and all revisions from the database --- RELEASE-NOTES | 3 ++ maintenance/deleteOldRevisions.inc | 60 +++++++++++++++++++++++ maintenance/deleteOldRevisions.php | 30 ++++++++++++ maintenance/nukePage.inc | 76 ++++++++++++++++++++++++++++++ maintenance/nukePage.php | 30 ++++++++++++ maintenance/purgeOldText.inc | 63 +++++++++++++++++++++++++ maintenance/purgeOldText.php | 30 ++++++++++++ 7 files changed, 292 insertions(+) create mode 100644 maintenance/deleteOldRevisions.inc create mode 100644 maintenance/deleteOldRevisions.php create mode 100644 maintenance/nukePage.inc create mode 100644 maintenance/nukePage.php create mode 100644 maintenance/purgeOldText.inc create mode 100644 maintenance/purgeOldText.php diff --git a/RELEASE-NOTES b/RELEASE-NOTES index d4759ae072..ea87dc8437 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -131,6 +131,9 @@ Maintenance: duplicate rows with the same id field) * Added --conf option to command line scripts, allowing the user to specify a different LocalSettings.php. +* Maintenance script to delete unused text records +* Maintenance script to delete non-current revisions +* Maintenance script to wipe a page and all revisions from the database i18n / Languages: * Partial support for Basque language (from wikipedia and meta) diff --git a/maintenance/deleteOldRevisions.inc b/maintenance/deleteOldRevisions.inc new file mode 100644 index 0000000000..dd48028a91 --- /dev/null +++ b/maintenance/deleteOldRevisions.inc @@ -0,0 +1,60 @@ + + */ + +require_once( 'purgeOldText.inc' ); + +function DeleteOldRevisions( $delete = false ) { + + # Data should come off the master, wrapped in a transaction + $dbw =& wfGetDB( DB_MASTER ); + $dbw->begin(); + + $tbl_pag = $dbw->tableName( 'page' ); + $tbl_rev = $dbw->tableName( 'revision' ); + + # Get "active" revisions from the page table + echo( "Searching for active revisions..." ); + $res = $dbw->query( "SELECT page_latest FROM $tbl_pag" ); + while( $row = $dbw->fetchObject( $res ) ) { + $cur[] = $row->page_latest; + } + echo( "done.\n" ); + + # Get all revisions that aren't in this set + echo( "Searching for inactive revisions..." ); + $set = implode( ', ', $cur ); + $res = $dbw->query( "SELECT rev_id FROM $tbl_rev WHERE rev_id NOT IN ( $set )" ); + while( $row = $dbw->fetchObject( $res ) ) { + $old[] = $row->rev_id; + } + echo( "done.\n" ); + + # Inform the user of what we're going to do + $count = count( $old ); + echo( "$count old revisions found.\n" ); + + # Delete as appropriate + if( $delete && $count ) { + echo( "Deleting..." ); + $set = implode( ', ', $old ); + $dbw->query( "DELETE FROM $tbl_rev WHERE rev_id IN ( $set )" ); + echo( "done.\n" ); + } + + # This bit's done + # Purge redundant text records + $dbw->commit(); + if( $delete ) { + PurgeRedundantText( true ); + } + +} + +?> \ No newline at end of file diff --git a/maintenance/deleteOldRevisions.php b/maintenance/deleteOldRevisions.php new file mode 100644 index 0000000000..9695a8c5b7 --- /dev/null +++ b/maintenance/deleteOldRevisions.php @@ -0,0 +1,30 @@ + + */ + +$options = array( 'delete', 'help' ); +require_once( 'commandLine.inc' ); +require_once( 'deleteOldRevisions.inc' ); + +echo( "Delete Old Revisions\n\n" ); + +if( @$options['help'] ) { + ShowUsage(); +} else { + DeleteOldRevisions( @$options['delete'] ); +} + +function ShowUsage() { + echo( "Deletes non-current revisions from the database.\n\n" ); + echo( "Usage: php deleteOldRevisions.php [--delete|--help]\n\n" ); + echo( "delete : Performs the deletion\n" ); + echo( " help : Show this usage information\n" ); +} + +?> \ No newline at end of file diff --git a/maintenance/nukePage.inc b/maintenance/nukePage.inc new file mode 100644 index 0000000000..64017e8083 --- /dev/null +++ b/maintenance/nukePage.inc @@ -0,0 +1,76 @@ + + */ + +require_once( 'purgeOldText.inc' ); + +function NukePage( $name, $delete = false ) { + + $dbw =& wfGetDB( DB_MASTER ); + $dbw->begin(); + + $tbl_pag = $dbw->tableName( 'page' ); + $tbl_rev = $dbw->tableName( 'revision' ); + + # Get page ID + echo( "Searching for \"$name\"..." ); + $title = Title::newFromText( $name ); + if( $title ) { + $id = $title->getArticleID(); + $real = $title->getPrefixedText(); + echo( "found \"$real\" with ID $id.\n" ); + + # Get corresponding revisions + echo( "Searching for revisions..." ); + $res = $dbw->query( "SELECT rev_id FROM $tbl_rev WHERE rev_page = $id" ); + while( $row = $dbw->fetchObject( $res ) ) { + $revs[] = $row->rev_id; + } + $count = count( $revs ); + echo( "found $count.\n" ); + + # Delete the page itself + if( $delete ) { + echo( "Deleting page record..." ); + $dbw->query( "DELETE FROM $tbl_pag WHERE page_id = $id" ); + echo( "done.\n" ); + } + + $dbw->commit(); + + # Delete revisions as appropriate + if( $delete && $count ) { + echo( "Deleting revisions..." ); + DeleteRevisions( $revs ); + echo( "done.\n" ); + PurgeRedundantText( true ); + } + + } else { + echo( "not found in database.\n" ); + $dbw->commit(); + } + +} + +function DeleteRevisions( $ids ) { + + $dbw =& wfGetDB( DB_MASTER ); + $dbw->begin(); + + $tbl_rev = $dbw->tableName( 'revision' ); + + $set = implode( ', ', $ids ); + $dbw->query( "DELETE FROM $tbl_rev WHERE rev_id IN ( $set )" ); + + $dbw->commit(); + +} + +?> \ No newline at end of file diff --git a/maintenance/nukePage.php b/maintenance/nukePage.php new file mode 100644 index 0000000000..b5c3f2831c --- /dev/null +++ b/maintenance/nukePage.php @@ -0,0 +1,30 @@ + + */ + +require_once( 'commandLine.inc' ); +require_once( 'nukePage.inc' ); + +echo( "Erase Page Record\n\n" ); + +if( isset( $args[0] ) ) { + NukePage( $args[0], true ); +} else { + ShowUsage(); +} + +/** Show script usage information */ +function ShowUsage() { + echo( "Remove a page record from the database.\n\n" ); + echo( "Usage: php nukePage.php \n\n" ); + echo( " <title> : Page title; spaces escaped with underscores\n\n" ); +} + +?> \ No newline at end of file diff --git a/maintenance/purgeOldText.inc b/maintenance/purgeOldText.inc new file mode 100644 index 0000000000..6a27550f27 --- /dev/null +++ b/maintenance/purgeOldText.inc @@ -0,0 +1,63 @@ +<?php + +/** + * Support functions for cleaning up redundant text records + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +function PurgeRedundantText( $delete = false ) { + + # Data should come off the master, wrapped in a transaction + $dbw =& wfGetDB( DB_MASTER ); + $dbw->begin(); + + $tbl_arc = $dbw->tableName( 'archive' ); + $tbl_rev = $dbw->tableName( 'revision' ); + $tbl_txt = $dbw->tableName( 'text' ); + + # Get "active" text records from the revisions table + echo( "Searching for active text records in revisions table..." ); + $res = $dbw->query( "SELECT DISTINCTROW rev_text_id FROM $tbl_rev" ); + while( $row = $dbw->fetchObject( $res ) ) { + $cur[] = $row->rev_text_id; + } + echo( "done.\n" ); + + # Get "active" text records from the archive table + echo( "Searching for active text records in archive table..." ); + $res = $dbw->query( "SELECT DISTINCTROW ar_text_id FROM $tbl_arc" ); + while( $row = $dbw->fetchObject( $res ) ) { + $cur[] = $row->ar_text_id; + } + echo( "done.\n" ); + + # Get the IDs of all text records not in these sets + echo( "Searching for inactive text records..." ); + $set = implode( ', ', $cur ); + $res = $dbw->query( "SELECT old_id FROM text WHERE old_id NOT IN ( $set )" ); + while( $row = $dbw->fetchObject( $res ) ) { + $old[] = $row->old_id; + } + echo( "done.\n" ); + + # Inform the user of what we're going to do + $count = count( $old ); + echo( "$count inactive items found.\n" ); + + # Delete as appropriate + if( $delete && $count ) { + echo( "Deleting..." ); + $set = implode( ', ', $old ); + $dbw->query( "DELETE FROM $tbl_txt WHERE old_id IN ( $set )" ); + echo( "done.\n" ); + } + + # Done + $dbw->commit(); + +} + +?> \ No newline at end of file diff --git a/maintenance/purgeOldText.php b/maintenance/purgeOldText.php new file mode 100644 index 0000000000..e8a738ad28 --- /dev/null +++ b/maintenance/purgeOldText.php @@ -0,0 +1,30 @@ +<?php + +/** + * Purge old text records from the database + * + * @package MediaWiki + * @subpackage Maintenance + * @author Rob Church <robchur@gmail.com> + */ + +$options = array( 'purge', 'help' ); +require_once( 'commandLine.inc' ); +require_once( 'purgeOldText.inc' ); + +echo( "Purge Old Text\n\n" ); + +if( @$options['help'] ) { + ShowUsage(); +} else { + PurgeRedundantText( @$options['purge'] ); +} + +function ShowUsage() { + echo( "Prunes unused text records from the database.\n\n" ); + echo( "Usage: php purgeOldText.php [--purge]\n\n" ); + echo( "purge : Performs the deletion\n" ); + echo( " help : Show this usage information\n" ); +} + +?> \ No newline at end of file -- 2.20.1