From 57d8f4b73798dc078d3b913d39e3bf8af8ea1ac8 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sat, 3 Jan 2004 12:32:32 +0000 Subject: [PATCH] Start support for compressing entries in the old table with gzdeflate. Adds compressOld.php to batch-compress existing entries. Article::getRevisionText will do decompression on a given row if necessary (marked by old_flags). Requires zlib. --- includes/Article.php | 26 ++++++++++++++----- includes/DifferenceEngine.php | 10 +++---- includes/SpecialExport.php | 4 +-- includes/SpecialUndelete.php | 8 +++--- maintenance/compressOld.inc | 37 ++++++++++++++++++++++++++ maintenance/compressOld.php | 49 +++++++++++++++++++++++++++++++++++ 6 files changed, 117 insertions(+), 17 deletions(-) create mode 100644 maintenance/compressOld.inc create mode 100644 maintenance/compressOld.php diff --git a/includes/Article.php b/includes/Article.php index 4d1bca9198..0c7ba47f06 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -36,6 +36,20 @@ class Article { $this->mTouched = "19700101000000"; } + /* static */ function getRevisionText( $row, $prefix = "old_" ) { + # Deal with optional compression of archived pages. + # This can be done periodically via maintenance/compressOld.php + $text = $prefix . "text"; + $flags = $prefix . "flags"; + if( isset( $row->$flags ) && (false !== strpos( $row->$flags, "gzip" ) ) ) { + return gzinflate( $row->$text ); + } + if( isset( $row->$text ) ) { + return $row->$text; + } + return false; + } + # Note that getContent/loadContent may follow redirects if # not told otherwise, and so may cause a change to mTitle. @@ -172,13 +186,13 @@ class Article { $this->mTitle->mRestrictionsLoaded = true; wfFreeResult( $res ); } else { # oldid set, retrieve historical version - $sql = "SELECT old_text,old_timestamp,old_user FROM old " . + $sql = "SELECT old_text,old_timestamp,old_user,old_flags FROM old " . "WHERE old_id={$oldid}"; $res = wfQuery( $sql, DB_READ, $fname ); if ( 0 == wfNumRows( $res ) ) { return; } $s = wfFetchObject( $res ); - $this->mContent = $s->old_text; + $this->mContent = Article::getRevisionText( $s ); $this->mUser = $s->old_user; $this->mCounter = 0; $this->mTimestamp = $s->old_timestamp; @@ -671,7 +685,7 @@ class Article { $ns = $this->mTitle->getNamespace(); $title = $this->mTitle->getDBkey(); $etitle = wfStrencode( $title ); - $sql = "SELECT old_text FROM old WHERE old_namespace=$ns and old_title='$etitle' ORDER BY inverse_timestamp LIMIT 1"; + $sql = "SELECT old_text,old_flags FROM old WHERE old_namespace=$ns and old_title='$etitle' ORDER BY inverse_timestamp LIMIT 1"; $res = wfQuery( $sql, DB_READ, $fname ); if( ($old=wfFetchObject($res)) && !$wpConfirm ) { $skin=$wgUser->getSkin(); @@ -690,7 +704,7 @@ class Article { $text=$s->cur_text; } else { if($old) { - $text=$old->old_text; + $text = Article::getRevisionText( $old ); $blanked=1; } @@ -943,7 +957,7 @@ class Article { } # Get the last edit not by this guy - $sql = "SELECT old_text,old_user,old_user_text,old_timestamp + $sql = "SELECT old_text,old_user,old_user_text,old_timestamp,old_flags FROM old USE INDEX (name_title_timestamp) WHERE old_namespace={$n} AND old_title='{$tt}' AND (old_user <> {$uid} OR old_user_text <> '{$ut}') @@ -969,7 +983,7 @@ class Article { $wgOut->setPagetitle( wfMsg( "actioncomplete" ) ); $wgOut->setRobotpolicy( "noindex,nofollow" ); $wgOut->addHTML( "

" . $newcomment . "

\n
\n" ); - $this->updateArticle( $s->old_text, $newcomment, 1, $this->mTitle->userIsWatching(), "", $bot ); + $this->updateArticle( Article::getRevisionText( $s ), $newcomment, 1, $this->mTitle->userIsWatching(), "", $bot ); global $wgEnablePersistentLC; if ( $wgEnablePersistentLC ) { diff --git a/includes/DifferenceEngine.php b/includes/DifferenceEngine.php index 712690c5f2..3d26279767 100644 --- a/includes/DifferenceEngine.php +++ b/includes/DifferenceEngine.php @@ -81,33 +81,33 @@ cellpadding=0 cellspacing='4px'> $s = wfFetchObject( $res ); $this->mNewtext = $s->cur_text; } else { - $sql = "SELECT old_timestamp,old_text FROM old WHERE " . + $sql = "SELECT old_timestamp,old_text,old_flags FROM old WHERE " . "old_id={$this->mNewid}"; $res = wfQuery( $sql, DB_READ, $fname ); if ( 0 == wfNumRows( $res ) ) { return false; } $s = wfFetchObject( $res ); - $this->mNewtext = $s->old_text; + $this->mNewtext = Article::getRevisionText( $s ); $t = $wgLang->timeanddate( $s->old_timestamp, true ); $this->mNewtitle = wfMsg( "revisionasof", $t ); } if ( 0 == $this->mOldid ) { - $sql = "SELECT old_timestamp,old_text FROM old USE INDEX (name_title_timestamp) WHERE " . + $sql = "SELECT old_timestamp,old_text,old_flags FROM old USE INDEX (name_title_timestamp) WHERE " . "old_namespace=" . $wgTitle->getNamespace() . " AND " . "old_title='" . wfStrencode( $wgTitle->getDBkey() ) . "' ORDER BY inverse_timestamp LIMIT 1"; $res = wfQuery( $sql, DB_READ, $fname ); } else { - $sql = "SELECT old_timestamp,old_text FROM old WHERE " . + $sql = "SELECT old_timestamp,old_text,old_flags FROM old WHERE " . "old_id={$this->mOldid}"; $res = wfQuery( $sql, DB_READ, $fname ); } if ( 0 == wfNumRows( $res ) ) { return false; } $s = wfFetchObject( $res ); - $this->mOldtext = $s->old_text; + $this->mOldtext = Article::getRevisionText( $s ); $t = $wgLang->timeanddate( $s->old_timestamp, true ); $this->mOldtitle = wfMsg( "revisionasof", $t ); diff --git a/includes/SpecialExport.php b/includes/SpecialExport.php index 7985b4bcfc..4ce8d3c263 100644 --- a/includes/SpecialExport.php +++ b/includes/SpecialExport.php @@ -66,7 +66,7 @@ function page2xml( $page, $curonly, $full = false ) { } if( !$curonly ) { $sql = "SELECT old_id as id,old_timestamp as timestamp, old_user as user, old_user_text as user_text," . - "old_comment as comment, old_text as text FROM old " . + "old_comment as comment, old_text as text, old_flags as flags FROM old " . "WHERE old_namespace=$ns AND old_title='$t' ORDER BY old_timestamp"; $res = wfQuery( $sql, DB_READ ); @@ -103,7 +103,7 @@ function revision2xml( $s, $full, $cur ) { $c = htmlspecialchars( $s->comment ); $xml .= " $c\n"; } - $t = htmlspecialchars( $s->text ); + $t = htmlspecialchars( Article::getRevisionText( $s, "" ) ); $xml .= " $t\n"; $xml .= " \n"; return $xml; diff --git a/includes/SpecialUndelete.php b/includes/SpecialUndelete.php index 1654bfe3a4..549e30f1ff 100644 --- a/includes/SpecialUndelete.php +++ b/includes/SpecialUndelete.php @@ -48,14 +48,14 @@ function wfSpecialUndelete( $par ) if(!preg_match("/[0-9]{14}/",$timestamp)) return 0; - $sql = "SELECT ar_text FROM archive WHERE ar_namespace={$namespace} AND ar_title=\"{$title}\" AND ar_timestamp={$timestamp}"; + $sql = "SELECT ar_text,ar_flags FROM archive WHERE ar_namespace={$namespace} AND ar_title=\"{$title}\" AND ar_timestamp={$timestamp}"; $ret = wfQuery( $sql, DB_READ ); $row = wfFetchObject( $ret ); $wgOut->setPagetitle( wfMsg( "undeletepage" ) ); $wgOut->addWikiText( "(" . wfMsg( "undeleterevision", $wgLang->date($timestamp, true) ) - . ")\n
\n" . $row->ar_text ); - + . ")\n
\n" . Article::getRevisionText( $row, "ar_" ) ); + return 0; } @@ -127,7 +127,7 @@ function wfSpecialUndelete( $par ) if( $row->count == 0) { # Have to create new article... - $sql = "SELECT ar_text,ar_timestamp FROM archive WHERE ar_namespace={$namespace} AND ar_title='{$t}' ORDER BY ar_timestamp DESC LIMIT 1"; + $sql = "SELECT ar_text,ar_timestamp,ar_flags FROM archive WHERE ar_namespace={$namespace} AND ar_title='{$t}' ORDER BY ar_timestamp DESC LIMIT 1"; $res = wfQuery( $sql, DB_READ, $fname ); $s = wfFetchObject( $res ); $max = $s->ar_timestamp; diff --git a/maintenance/compressOld.inc b/maintenance/compressOld.inc new file mode 100644 index 0000000000..22317c6497 --- /dev/null +++ b/maintenance/compressOld.inc @@ -0,0 +1,37 @@ +=$start ORDER BY old_id LIMIT $chunksize"; + $res = wfQuery( $sql, DB_READ, "compressOldPages" ); + if( wfNumRows( $res ) == 0 ) { + break; + } + while( $row = wfFetchObject( $res ) ) { + # print " {$row->old_id} - {$row->old_namespace}:{$row->old_title}\n"; + compressPage( $row ); + } + wfFreeResult( $res ); + $start += $chunksize; + print "$start...\n"; + } while( true ); +} + +function compressPage( $row ) { + if( false !== strpos( $row->old_flags, "gzip" ) ) { + print "Already compressed row {$row->old_id}?\n"; + return false; + } + $flags = $row->old_flags ? "{$row->old_flags},gzip" : "gzip"; + $compress = wfStrencode( gzdeflate( $row->old_text ) ); + + $sql = "UPDATE old SET old_flags='$flags', old_text='$compress' WHERE old_id={$row->old_id} LIMIT 1"; + $res = wfQuery( $sql, DB_WRITE, 'compressPage' ); + return $res; +} + +?> diff --git a/maintenance/compressOld.php b/maintenance/compressOld.php new file mode 100644 index 0000000000..0081fb5afd --- /dev/null +++ b/maintenance/compressOld.php @@ -0,0 +1,49 @@ + -- 2.20.1