Add page_len field with byte length of current revision text, since
authorBrion Vibber <brion@users.mediawiki.org>
Sat, 12 Mar 2005 11:51:02 +0000 (11:51 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Sat, 12 Mar 2005 11:51:02 +0000 (11:51 +0000)
revision text is now stored separately and may be compressed.
This field is indexed which speeds up Special:Shortpages & Longpages.

Removed no longer used wfArticleIsStub().

includes/Article.php
includes/Linker.php
includes/Parser.php
includes/SpecialShortpages.php
includes/SpecialUndelete.php
maintenance/archives/patch-page_len.sql [new file with mode: 0644]
maintenance/tables.sql
maintenance/updaters.inc

index cf3592b..c9be37a 100644 (file)
@@ -741,12 +741,15 @@ class Article {
         *
         * @param Database $dbw
         * @param int $revId
-        * @param int $lastRevision
-        * @param bool $isRedirect
+        * @param string $text -- used to set length and redirect status if given
+        * @param int $lastRevision -- if given, will not overwrite the page field
+        *                             when different from the currently set value.
+        *                             Giving 0 indicates the new page flag should
+        *                             be set on.
         * @return bool true on success, false on failure
         * @access private
         */
-       function updateRevisionOn( &$dbw, $revId, $lastRevision = null, $isRedirect = false ) {
+       function updateRevisionOn( &$dbw, $revId, $text = '', $lastRevision = null ) {
                $fname = 'Article::updateToRevision';
                wfProfileIn( $fname );
                
@@ -760,7 +763,8 @@ class Article {
                                'page_latest'      => $revId,
                                'page_touched'     => $dbw->timestamp(),
                                'page_is_new'      => ($lastRevision === 0) ? 0 : 1,
-                               'page_is_redirect' => $isRedirect ? 1 : 0,
+                               'page_is_redirect' => Article::isRedirect( $text ),
+                               'page_len'         => strlen( $text ),
                        ),
                        $conditions,
                        $fname );
@@ -806,7 +810,7 @@ class Article {
                $this->mTitle->resetArticleID( $newid );
                
                # Update the page record with revision data
-               $this->updateRevisionOn( $dbw, $revisionId, 0, $this->isRedirect( $text ) );
+               $this->updateRevisionOn( $dbw, $revisionId, $text, 0 );
 
                Article::onArticleCreate( $this->mTitle );
                RecentChange::notifyNew( $now, $this->mTitle, $isminor, $wgUser, $summary );
@@ -1004,7 +1008,7 @@ class Article {
                        $revisionId = $revision->insertOn( $dbw );
                        
                        # Update page
-                       $ok = $this->updateRevisionOn( $dbw, $revisionId, $lastRevision, $redir );
+                       $ok = $this->updateRevisionOn( $dbw, $revisionId, $text, $lastRevision );
 
                        if( !$ok ) {
                                /* Belated edit conflict! Run away!! */
@@ -1945,8 +1949,7 @@ class Article {
                        'minor_edit' => $minor ? 1 : 0,
                        ) );
                $revisionId = $revision->insertOn( $dbw );
-               $this->updateRevisionOn( $dbw, $revisionId, null,
-                       Article::isRedirect( $text ) );
+               $this->updateRevisionOn( $dbw, $revisionId, $text );
                $dbw->commit();
                
                wfProfileOut( $fname );
@@ -2136,32 +2139,5 @@ class Article {
        }
 }
 
-/**
- * Check whether an article is a stub
- *
- * @public
- * @param integer $articleID   ID of the article that is to be checked
- */
-function wfArticleIsStub( $articleID ) {
-       global $wgUser;
-       $fname = 'wfArticleIsStub';
-
-       wfDebugDieBacktrace( 'This function seems to be unused. Pending removal.' );
-
-       $threshold = $wgUser->getOption('stubthreshold') ;
-       if ( $threshold > 0 ) {
-               $dbr =& wfGetDB( DB_SLAVE );
-               $s = $dbr->selectRow( array('page', 'text'),
-                       array( 'LENGTH(old_text) AS len', 'page_namespace', 'page_is_redirect' ),
-                       array( 'page_id' => $articleID, "page.page_latest=text.old_id" ),
-                       $fname ) ;
-               if ( $s == false OR $s->page_is_redirect OR $s->page_namespace != NS_MAIN ) {
-                       return false;
-               }
-               $size = $s->len;
-               return ( $size < $threshold );
-       }
-       return false;
-}
 
 ?>
index c8819e1..9594721 100644 (file)
@@ -227,14 +227,14 @@ class Linker {
                                if ( $threshold > 0 ) {
                                        $dbr =& wfGetDB( DB_SLAVE );
                                        $s = $dbr->selectRow(
-                                               array( 'page', 'text' ),
-                                               array( 'LENGTH(old_text) AS x',
+                                               array( 'page' ),
+                                               array( 'page_len',
                                                        'page_namespace',
                                                        'page_is_redirect' ),
                                                array( 'page_id' => $aid,
                                                        'old_id = page_latest' ), $fname ) ;
                                        if ( $s !== false ) {
-                                               $size = $s->x;
+                                               $size = $s->page_len;
                                                if ( $s->page_is_redirect OR $s->page_namespace != NS_MAIN ) {
                                                        $size = $threshold*2 ; # Really big
                                                }
index 4f40e09..4a0adea 100644 (file)
@@ -2770,16 +2770,12 @@ class Parser
                                        # Not in the link cache, add it to the query
                                        if ( !isset( $current ) ) {
                                                $current = $val;
-                                               $tables = $page;
-                                               $join = '';
                                                $query =  "SELECT page_id, page_namespace, page_title";
                                                if ( $threshold > 0 ) {
-                                                       $textTable = $dbr->tableName( 'text' );
-                                                       $query .= ', LENGTH(old_text) AS page_len, page_is_redirect';
-                                                       $tables .= ", $textTable";
+                                                       $query .= ', page_len, page_is_redirect';
                                                        $join = 'page_latest=old_id AND';
                                                }
-                                               $query .= " FROM $tables WHERE $join (page_namespace=$val AND page_title IN(";
+                                               $query .= " FROM $page WHERE (page_namespace=$val AND page_title IN(";
                                        } elseif ( $current != $val ) {
                                                $current = $val;
                                                $query .= ")) OR (page_namespace=$val AND page_title IN(";
index f996428..70ca503 100644 (file)
@@ -22,28 +22,29 @@ class ShortPagesPage extends QueryPage {
                return "Shortpages";
        }
 
+       /**
+        * This query is indexed as of 1.5
+        */
        function isExpensive() {
-               return true;
+               return false;
+       }
+       
+       function isSyndicated() {
+               return false;
        }
-       function isSyndicated() { return false; }
 
        function getSQL() {
                $dbr =& wfGetDB( DB_SLAVE );
                $page = $dbr->tableName( 'page' );
-               $text = $dbr->tableName( 'text' );
                $name = $dbr->addQuotes( $this->getName() );
                
-               # FIXME: Not only is this teh suck, it will fail
-               # if we compress revisions on save as it will return
-               # the compressed size.
                return
                        "SELECT $name as type,
                                        page_namespace as namespace,
                                page_title as title,
-                               LENGTH(old_text) AS value
-                       FROM $page, $text
-                       WHERE page_namespace=".NS_MAIN." AND page_is_redirect=0
-                         AND page_latest=old_id";
+                               page_len AS value
+                       FROM $page
+                       WHERE page_namespace=".NS_MAIN." AND page_is_redirect=0";
        }
        
        function sortDescending() {
@@ -52,8 +53,9 @@ class ShortPagesPage extends QueryPage {
 
        function formatResult( $skin, $result ) {
                global $wgLang, $wgContLang;
-               $nb = wfMsg( "nbytes", $wgLang->formatNum( $result->value ) );
-               $link = $skin->makeKnownLink( $result->title, $wgContLang->convert( $result->title ) );
+               $nb = htmlspecialchars( wfMsg( "nbytes", $wgLang->formatNum( $result->value ) ) );
+               $title = Title::makeTitle( $result->namespace, $result->title );
+               $link = $skin->makeKnownLinkObj( $title, $wgContLang->convert( $title->getPrefixedText() ) );
                return "{$link} ({$nb})";
        }
 }
index fc0aa33..6b4833d 100644 (file)
@@ -217,8 +217,8 @@ class PageArchive {
                        # FIXME: Update latest if newer as well...
                        if( $newid ) {
                                # FIXME: update article count if changed...
-                               $article->updateRevisionOn( $dbw, $revision->getId(), $previousRevId,
-                                       Article::isRedirect( $revision->getText() ) );
+                               $article->updateRevisionOn( $dbw, $revision->getId(),
+                                       $revision->getText(), $previousRevId );
                                
                                # Finally, clean up the link tables
                                $wgLinkCache = new LinkCache();
diff --git a/maintenance/archives/patch-page_len.sql b/maintenance/archives/patch-page_len.sql
new file mode 100644 (file)
index 0000000..c32dc8d
--- /dev/null
@@ -0,0 +1,16 @@
+-- Page length field (in bytes) for current revision of page.
+-- Since page text is now stored separately, it may be compressed
+-- or otherwise difficult to calculate. Additionally, the field
+-- can be indexed for handy 'long' and 'short' page lists.
+--
+-- Added 2005-03-12
+
+ALTER TABLE /*$wgDBprefix*/page
+  ADD page_len int(8) unsigned NOT NULL,
+  ADD INDEX (page_len);
+
+-- Not accurate if upgrading from intermediate
+-- 1.5 alpha and have revision compression on.
+UPDATE /*$wgDBprefix*/page, /*$wgDBprefix*/text
+  SET page_len=LENGTH(old_text)
+  WHERE page_latest=old_id;
index ddc5ea1..802069f 100644 (file)
@@ -36,20 +36,29 @@ CREATE TABLE /*$wgDBprefix*/user_rights (
 -- );
 
 CREATE TABLE /*$wgDBprefix*/page (
+  -- Identifiers:
   page_id int(8) unsigned NOT NULL auto_increment,
   page_namespace tinyint NOT NULL,
   page_title varchar(255) binary NOT NULL,
+  
+  -- Mutable information
   page_restrictions tinyblob NOT NULL default '',
   page_counter bigint(20) unsigned NOT NULL default '0',
   page_is_redirect tinyint(1) unsigned NOT NULL default '0',
   page_is_new tinyint(1) unsigned NOT NULL default '0',
   page_random real unsigned NOT NULL,
   page_touched char(14) binary NOT NULL default '',
+  
+  -- Handy key to revision.rev_id of the current revision
   page_latest int(8) unsigned NOT NULL,
+  page_len int(8) unsigned NOT NULL,
 
   PRIMARY KEY page_id (page_id),
   UNIQUE INDEX name_title (page_namespace,page_title),
-  INDEX (page_random)
+  
+  -- Special-purpose indexes
+  INDEX (page_random),
+  INDEX (page_len)
 );
 
 CREATE TABLE /*$wgDBprefix*/revision (
index 9a973c0..0cab338 100644 (file)
@@ -36,6 +36,7 @@ $wgNewFields = array(
        array( 'group',         'group_rights',     'patch-userlevels-rights.sql' ),
        array( 'logging',       'log_params',       'patch-log_params.sql' ),
        array( 'archive',       'ar_rev_id',        'patch-archive-rev_id.sql' ),
+       array( 'page',          'page_len',         'patch-page_len.sql' ),
 );
 
 function add_table( $name, $patch ) {
@@ -347,10 +348,12 @@ function do_schema_restructuring() {
                        page_random real unsigned NOT NULL,
                        page_touched char(14) binary NOT NULL default '',
                        page_latest int(8) unsigned NOT NULL,
+                       page_len int(8) unsigned NOT NULL,
 
                        PRIMARY KEY page_id (page_id),
                        UNIQUE INDEX name_title (page_namespace,page_title),
-                       INDEX (page_random)
+                       INDEX (page_random),
+                       INDEX (page_len)
                        )", $fname );
                $wgDatabase->query("CREATE TABLE $revision (
                        rev_id int(8) unsigned NOT NULL auto_increment,
@@ -390,9 +393,9 @@ function do_schema_restructuring() {
 
                echo "......Setting up page table.\n";
                $wgDatabase->query( "INSERT INTO $page (page_id, page_namespace, page_title, page_restrictions, page_counter,
-                               page_is_redirect, page_is_new, page_random, page_touched, page_latest)
+                               page_is_redirect, page_is_new, page_random, page_touched, page_latest, page_len)
                        SELECT cur_id, cur_namespace, cur_title, cur_restrictions, cur_counter, cur_is_redirect, cur_is_new,
-                               cur_random, cur_touched, rev_id
+                               cur_random, cur_touched, rev_id, LENGTH(cur_text)
                        FROM $cur,$revision
                        WHERE cur_id=rev_page AND rev_timestamp=cur_timestamp AND rev_id > {$maxold}", $fname );