Start support for compressing entries in the old table with gzdeflate.
authorBrion Vibber <brion@users.mediawiki.org>
Sat, 3 Jan 2004 12:32:32 +0000 (12:32 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Sat, 3 Jan 2004 12:32:32 +0000 (12:32 +0000)
Adds compressOld.php to batch-compress existing entries.
Article::getRevisionText will do decompression on a given row if
necessary (marked by old_flags). Requires zlib.

includes/Article.php
includes/DifferenceEngine.php
includes/SpecialExport.php
includes/SpecialUndelete.php
maintenance/compressOld.inc [new file with mode: 0644]
maintenance/compressOld.php [new file with mode: 0644]

index 4d1bca9..0c7ba47 100644 (file)
@@ -36,6 +36,20 @@ class Article {
                $this->mTouched = "19700101000000";
        }
 
+       /* static */ function getRevisionText( $row, $prefix = "old_" ) {
+               # Deal with optional compression of archived pages.
+               # This can be done periodically via maintenance/compressOld.php
+               $text = $prefix . "text";
+               $flags = $prefix . "flags";
+               if( isset( $row->$flags ) && (false !== strpos( $row->$flags, "gzip" ) ) ) {
+                       return gzinflate( $row->$text );
+               }
+               if( isset( $row->$text ) ) {
+                       return $row->$text;
+               }
+               return false;
+       }
+       
        # Note that getContent/loadContent may follow redirects if
        # not told otherwise, and so may cause a change to mTitle.
 
@@ -172,13 +186,13 @@ class Article {
                        $this->mTitle->mRestrictionsLoaded = true;
                        wfFreeResult( $res );
                } else { # oldid set, retrieve historical version
-                       $sql = "SELECT old_text,old_timestamp,old_user FROM old " .
+                       $sql = "SELECT old_text,old_timestamp,old_user,old_flags FROM old " .
                          "WHERE old_id={$oldid}";
                        $res = wfQuery( $sql, DB_READ, $fname );
                        if ( 0 == wfNumRows( $res ) ) { return; }
 
                        $s = wfFetchObject( $res );
-                       $this->mContent = $s->old_text;
+                       $this->mContent = Article::getRevisionText( $s );
                        $this->mUser = $s->old_user;
                        $this->mCounter = 0;
                        $this->mTimestamp = $s->old_timestamp;
@@ -671,7 +685,7 @@ class Article {
                $ns = $this->mTitle->getNamespace();
                $title = $this->mTitle->getDBkey();
                $etitle = wfStrencode( $title );
-               $sql = "SELECT old_text FROM old WHERE old_namespace=$ns and old_title='$etitle' ORDER BY inverse_timestamp LIMIT 1";
+               $sql = "SELECT old_text,old_flags FROM old WHERE old_namespace=$ns and old_title='$etitle' ORDER BY inverse_timestamp LIMIT 1";
                $res = wfQuery( $sql, DB_READ, $fname );
                if( ($old=wfFetchObject($res)) && !$wpConfirm ) {
                        $skin=$wgUser->getSkin();
@@ -690,7 +704,7 @@ class Article {
                                $text=$s->cur_text;
                        } else {
                                if($old) {
-                                       $text=$old->old_text;
+                                       $text = Article::getRevisionText( $old );
                                        $blanked=1;
                                }
                                
@@ -943,7 +957,7 @@ class Article {
                }
                
                # Get the last edit not by this guy
-               $sql = "SELECT old_text,old_user,old_user_text,old_timestamp
+               $sql = "SELECT old_text,old_user,old_user_text,old_timestamp,old_flags
                FROM old USE INDEX (name_title_timestamp)
                WHERE old_namespace={$n} AND old_title='{$tt}'
                AND (old_user <> {$uid} OR old_user_text <> '{$ut}')
@@ -969,7 +983,7 @@ class Article {
                $wgOut->setPagetitle( wfMsg( "actioncomplete" ) );
                $wgOut->setRobotpolicy( "noindex,nofollow" );
                $wgOut->addHTML( "<h2>" . $newcomment . "</h2>\n<hr>\n" );
-               $this->updateArticle( $s->old_text, $newcomment, 1, $this->mTitle->userIsWatching(), "", $bot );
+               $this->updateArticle( Article::getRevisionText( $s ), $newcomment, 1, $this->mTitle->userIsWatching(), "", $bot );
 
                global $wgEnablePersistentLC;
                if ( $wgEnablePersistentLC ) {
index 712690c..3d26279 100644 (file)
@@ -81,33 +81,33 @@ cellpadding=0 cellspacing='4px'><tr>
                        $s = wfFetchObject( $res );
                        $this->mNewtext = $s->cur_text;
                } else {
-                       $sql = "SELECT old_timestamp,old_text FROM old WHERE " .
+                       $sql = "SELECT old_timestamp,old_text,old_flags FROM old WHERE " .
                          "old_id={$this->mNewid}";
 
                        $res = wfQuery( $sql, DB_READ, $fname );
                        if ( 0 == wfNumRows( $res ) ) { return false; }
 
                        $s = wfFetchObject( $res );
-                       $this->mNewtext = $s->old_text;
+                       $this->mNewtext = Article::getRevisionText( $s );
 
                        $t = $wgLang->timeanddate( $s->old_timestamp, true );
                        $this->mNewtitle = wfMsg( "revisionasof", $t );
                }
                if ( 0 == $this->mOldid ) {
-                       $sql = "SELECT old_timestamp,old_text FROM old USE INDEX (name_title_timestamp) WHERE " .
+                       $sql = "SELECT old_timestamp,old_text,old_flags FROM old USE INDEX (name_title_timestamp) WHERE " .
                          "old_namespace=" . $wgTitle->getNamespace() . " AND " .
                          "old_title='" . wfStrencode( $wgTitle->getDBkey() ) .
                          "' ORDER BY inverse_timestamp LIMIT 1";
                        $res = wfQuery( $sql, DB_READ, $fname );
                } else {
-                       $sql = "SELECT old_timestamp,old_text FROM old WHERE " .
+                       $sql = "SELECT old_timestamp,old_text,old_flags FROM old WHERE " .
                          "old_id={$this->mOldid}";
                        $res = wfQuery( $sql, DB_READ, $fname );
                }
                if ( 0 == wfNumRows( $res ) ) { return false; }
 
                $s = wfFetchObject( $res );
-               $this->mOldtext = $s->old_text;
+               $this->mOldtext = Article::getRevisionText( $s );
 
                $t = $wgLang->timeanddate( $s->old_timestamp, true );
                $this->mOldtitle = wfMsg( "revisionasof", $t );
index 7985b4b..4ce8d3c 100644 (file)
@@ -66,7 +66,7 @@ function page2xml( $page, $curonly, $full = false ) {
                }
                if( !$curonly ) {
                        $sql = "SELECT old_id as id,old_timestamp as timestamp, old_user as user, old_user_text as user_text," .
-                               "old_comment as comment, old_text as text FROM old " .
+                               "old_comment as comment, old_text as text, old_flags as flags FROM old " .
                                "WHERE old_namespace=$ns AND old_title='$t' ORDER BY old_timestamp";
                        $res = wfQuery( $sql, DB_READ );
 
@@ -103,7 +103,7 @@ function revision2xml( $s, $full, $cur ) {
                $c = htmlspecialchars( $s->comment );
                $xml .= "      <comment>$c</comment>\n";
        }
-       $t = htmlspecialchars( $s->text );
+       $t = htmlspecialchars( Article::getRevisionText( $s, "" ) );
        $xml .= "      <text>$t</text>\n";
        $xml .= "    </revision>\n";
        return $xml;
index 1654bfe..549e30f 100644 (file)
@@ -48,14 +48,14 @@ function wfSpecialUndelete( $par )
     
     if(!preg_match("/[0-9]{14}/",$timestamp)) return 0;
     
-    $sql = "SELECT ar_text FROM archive WHERE ar_namespace={$namespace} AND ar_title=\"{$title}\" AND ar_timestamp={$timestamp}";
+    $sql = "SELECT ar_text,ar_flags FROM archive WHERE ar_namespace={$namespace} AND ar_title=\"{$title}\" AND ar_timestamp={$timestamp}";
     $ret = wfQuery( $sql, DB_READ );
     $row = wfFetchObject( $ret );
     
     $wgOut->setPagetitle( wfMsg( "undeletepage" ) );
     $wgOut->addWikiText( "(" . wfMsg( "undeleterevision", $wgLang->date($timestamp, true) )
-      . ")\n<hr>\n" . $row->ar_text );
-    
+      . ")\n<hr>\n" . Article::getRevisionText( $row, "ar_" ) );
+
        return 0;
 }
 
@@ -127,7 +127,7 @@ function wfSpecialUndelete( $par )
 
                if( $row->count == 0) {
                        # Have to create new article...
-                       $sql = "SELECT ar_text,ar_timestamp FROM archive WHERE ar_namespace={$namespace} AND ar_title='{$t}' ORDER BY ar_timestamp DESC LIMIT 1";
+                       $sql = "SELECT ar_text,ar_timestamp,ar_flags FROM archive WHERE ar_namespace={$namespace} AND ar_title='{$t}' ORDER BY ar_timestamp DESC LIMIT 1";
                        $res = wfQuery( $sql, DB_READ, $fname );
                        $s = wfFetchObject( $res );
                        $max = $s->ar_timestamp;
diff --git a/maintenance/compressOld.inc b/maintenance/compressOld.inc
new file mode 100644 (file)
index 0000000..22317c6
--- /dev/null
@@ -0,0 +1,37 @@
+<?php
+
+include_once( "Article.php" );
+
+function compressOldPages( $start = 0 ) {
+       $chunksize = 50;
+       print "Starting from old_id $start...\n";
+       do {
+               $sql = "SELECT old_id,old_flags,old_namespace,old_title,old_text FROM old WHERE old_id>=$start ORDER BY old_id LIMIT $chunksize";
+               $res = wfQuery( $sql, DB_READ, "compressOldPages" );
+               if( wfNumRows( $res ) == 0 ) {
+                       break;
+               }
+               while( $row = wfFetchObject( $res ) ) {
+                       # print "  {$row->old_id} - {$row->old_namespace}:{$row->old_title}\n";
+                       compressPage( $row );
+               }
+               wfFreeResult( $res );
+               $start += $chunksize;
+               print "$start...\n";
+       } while( true );
+}
+
+function compressPage( $row ) {
+       if( false !== strpos( $row->old_flags, "gzip" ) ) {
+               print "Already compressed row {$row->old_id}?\n";
+               return false;
+       }
+       $flags = $row->old_flags ? "{$row->old_flags},gzip" : "gzip";
+       $compress = wfStrencode( gzdeflate( $row->old_text ) );
+       
+       $sql = "UPDATE old SET old_flags='$flags', old_text='$compress' WHERE old_id={$row->old_id} LIMIT 1";
+        $res = wfQuery( $sql, DB_WRITE, 'compressPage' );
+       return $res;
+}
+
+?>
diff --git a/maintenance/compressOld.php b/maintenance/compressOld.php
new file mode 100644 (file)
index 0000000..0081fb5
--- /dev/null
@@ -0,0 +1,49 @@
+<?
+
+# Rebuild search index table from scratch.  This takes several
+# hours, depending on the database size and server configuration.
+
+if ( ! is_readable( "../LocalSettings.php" ) ) {
+       print "A copy of your installation's LocalSettings.php\n" .
+         "must exist in the source directory.\n";
+       exit();
+}
+
+$wgCommandLineMode = true;
+$DP = "../includes";
+include_once( "../LocalSettings.php" );
+include_once( "../AdminSettings.php" );
+
+$sep = strchr( $include_path = ini_get( "include_path" ), ";" ) ? ";" : ":";
+ini_set( "include_path", "$IP$sep$include_path" );
+
+include_once( "Setup.php" );
+include_once( "./compressOld.inc" );
+$wgTitle = Title::newFromText( "Compress old pages script" );
+set_time_limit(0);
+
+$wgDBuser                      = $wgDBadminuser;
+$wgDBpassword          = $wgDBadminpassword;
+
+if( !function_exists( "gzdeflate" ) ) {
+       print "You must enable zlib support in PHP to compress old revisions!\n";
+       print "Please see http://www.php.net/manual/en/ref.zlib.php\n\n";
+       die();
+}
+
+print "Depending on the size of your database this may take a while!\n";
+print "If you abort the script while it's running it shouldn't harm anything,\n";
+print "but if you haven't backed up your data, you SHOULD abort now!\n\n";
+print "Press control-c to abort first (will proceed automatically in 5 seconds)\n";
+sleep(5);
+
+$n = 0;
+if( !empty( $argv[1] ) ) {
+       $n = intval( $argv[1] );
+}
+compressOldPages( $n );
+
+print "Done.\n";
+exit();
+
+?>