Incremental link table updates
authorTim Starling <tstarling@users.mediawiki.org>
Sun, 6 Jul 2003 11:42:42 +0000 (11:42 +0000)
committerTim Starling <tstarling@users.mediawiki.org>
Sun, 6 Jul 2003 11:42:42 +0000 (11:42 +0000)
includes/Article.php
includes/LinkCache.php
includes/LinksUpdate.php

index 5edfe69..614c715 100644 (file)
@@ -754,7 +754,13 @@ name=\"wpSummary\" maxlength=200 size=60><br>
                global $wgOut, $wgTitle, $wgUser, $wgLinkCache;
 
                $wgLinkCache = new LinkCache();
-               $wgOut->addWikiText( $text ); # Just to update links
+
+               # Get old version of link table to allow incremental link updates
+               $wgLinkCache->preFill( $wgTitle );
+               $wgLinkCache->clear();
+
+               # Now update the link cache by parsing the text
+               $wgOut->addWikiText( $text );
 
                $this->editUpdates( $text );
                if( preg_match( "/^#redirect/i", $text ) )
index 8664ef8..d442920 100644 (file)
@@ -1,17 +1,26 @@
 <?
 # Cache for article titles and ids linked from one source
 
+# These are used in incrementalSetup()
+define ('LINKCACHE_GOOD', 0);
+define ('LINKCACHE_BAD', 1);
+define ('LINKCACHE_IMAGE', 2);
+
 class LinkCache {
 
        /* private */ var $mGoodLinks, $mBadLinks, $mActive;
-       /* private */ var $mImageLinks;
-
+       /* private */ var $mImageLinks; 
+       /* private */ var $mPreFilled, $mOldGoodLinks, $mOldBadLinks;
+       
        function LinkCache()
        {
                $this->mActive = true;
+               $this->mPreFilled = false;
                $this->mGoodLinks = array();
                $this->mBadLinks = array();
                $this->mImageLinks = array();
+               $this->mOldGoodLinks = array();
+               $this->mOldBadLinks = array();
        }
 
        function getGoodLinkID( $title )
@@ -104,18 +113,102 @@ class LinkCache {
                                Title::makeName( $s->cur_namespace, $s->cur_title )
                                );
                }
-
+               
+               $this->suspend();
+               $id = $fromtitle->getArticleID();
+               $this->resume();
+               
                $sql = "SELECT HIGH_PRIORITY bl_to
                        FROM brokenlinks
-                       WHERE bl_from='{$dbkeyfrom}'";
+                       WHERE bl_from='{$id}'";
                $res = wfQuery( $sql, "LinkCache::preFill" );
                while( $s = wfFetchObject( $res ) ) {
                        $this->addBadLink( $s->bl_to );
                }
-
+               
+               wfDebug("preFill dbkeyfrom=$dbkeyfrom\n");
+               $this->mOldBadLinks = $this->mBadLinks;
+               $this->mOldGoodLinks = $this->mGoodLinks;
+               $this->mPreFilled = true;
+               
                wfProfileOut();
        }
 
-}
+       function getGoodAdditions() 
+       {
+               return array_diff( $this->mGoodLinks, $this->mOldGoodLinks );
+       }
+
+       function getBadAdditions() 
+       {
+               return array_values( array_diff( $this->mBadLinks, $this->mOldBadLinks ) );
+       }
+
+       function getImageAdditions()
+       {
+               return array_diff_assoc( $this->mImageLinks, $this->mOldImageLinks );
+       }
+
+       function getGoodDeletions() 
+       {
+               return array_diff( $this->mOldGoodLinks, $this->mGoodLinks );
+       }
+
+       function getBadDeletions()
+       {
+               return array_values( array_diff( $this->mOldBadLinks, $this->mBadLinks ) );
+       }
+
+       function getImageDeletions()
+       {
+               return array_diff_assoc( $this->mOldImageLinks, $this->mImageLinks );
+       }
 
+       #     Parameters: $which is one of the LINKCACHE_xxx constants, $del and $add are 
+       # the incremental update arrays which will be filled. Returns whether or not it's
+       # worth doing the incremental version. For example, if [[List of mathematical topics]]
+       # was blanked, it would take a long, long time to do incrementally.
+       function incrementalSetup( $which, &$del, &$add )
+       {
+               if ( ! $this->mPreFilled ) {
+                       return false;
+               }
+
+               switch ( $which ) {
+                       case LINKCACHE_GOOD:
+                               $old =& $this->mOldGoodLinks;
+                               $cur =& $this->mGoodLinks;
+                               $del = $this->getGoodDeletions();
+                               $add = $this->getGoodAdditions();
+                               break;
+                       case LINKCACHE_BAD:
+                               $old =& $this->mOldBadLinks;
+                               $cur =& $this->mBadLinks;
+                               $del = $this->getBadDeletions();
+                               $add = $this->getBadAdditions();
+                               break;
+                       default: # LINKCACHE_IMAGE
+                               return false;           
+               }
+               wfDebug( "which = $which\n" );
+               wfDebug( '$old = ' . implode(", ", $old) . "\n" );
+               wfDebug( '$cur = ' . implode(", ", $cur) . "\n" );
+               wfDebug( '$del = ' . implode(", ", $del) . "\n" );
+               wfDebug( '$add = ' . implode(", ", $add) . "\n" );
+
+               # Coefficients here (1,1,3,1) could probably be put in a global object
+               $timeDumb = count( $old ) + count( $cur );
+               $timeIncr = count( $del ) * 3 + count( $new );
+               
+               return $timeIncr < $timeDumb;
+       }
+
+       # Clears cache but leaves old preFill copies alone
+       function clear() 
+       {
+               $this->mGoodLinks = array();
+               $this->mBadLinks = array();
+               $this->mImageLinks = array();
+       }
+}
 ?>
index 4f1ebbb..3c7ae87 100644 (file)
@@ -14,14 +14,150 @@ class LinksUpdate {
 
        function doUpdate()
        {
-               /* Update link tables with outgoing links from an updated article */
-               /* Currently this is 'dumb', removing all links and putting them back. */
+               global $wgUseBetterLinksUpdate, $wgLinkCache, $wgDBtransactions;
                
+               wfDebug("Hello\n");
+
+               /* Update link tables with outgoing links from an updated article */
                /* Relies on the 'link cache' to be filled out */
-               global $wgLinkCache, $wgDBtransactions;
+
+               if ( !$wgUseBetterLinksUpdate ) {
+                       $this->doDumbUpdate();
+                       return;
+               }
+
                $fname = "LinksUpdate::doUpdate";
                wfProfileIn( $fname );
 
+               $del = array();
+               $add = array();
+
+               if( $wgDBtransactions ) {
+                       $sql = "BEGIN";
+                       wfQuery( $sql, $fname );
+               }
+               
+               #------------------------------------------------------------------------------
+               # Good links
+
+               if ( $wgLinkCache->incrementalSetup( LINKCACHE_GOOD, $del, $add ) ) {
+                       # Delete where necessary
+                       $baseSql = "DELETE FROM links WHERE l_from='{$this->mTitleEnc}'";
+                       foreach ($del as $title => $id ) {
+                               wfDebug( "Incremental deletion  from {$this->mTitleEnc} to $title\n" );
+                               $sql = $baseSql . " AND l_to={$id}";
+                               wfQuery( $sql, $fname );
+                       }
+               } else {
+                       # Delete everything
+                       wfDebug( "Complete deletion from {$this->mTitleEnc}\n" );
+                       $sql = "DELETE FROM links WHERE l_from='{$this->mTitleEnc}'";
+                       wfQuery( $sql, $fname );
+                       
+                       # Get the addition list
+                       $add = $wgLinkCache->getGoodLinks();
+               }
+
+               # Do the insertion
+               $sql = "";
+               if ( 0 != count( $add ) ) {
+                       $sql = "INSERT INTO links (l_from,l_to) VALUES ";
+                       $first = true;
+                       foreach( $add as $lt => $lid ) {
+                               wfDebug( "Inserting from {$this->mTitleEnc} to $lt\n" );
+                               
+                               if ( ! $first ) { $sql .= ","; }
+                               $first = false;
+
+                               $sql .= "('{$this->mTitleEnc}',{$lid})";
+                       }
+               }
+               if ( "" != $sql ) { wfQuery( $sql, $fname ); }
+
+               #------------------------------------------------------------------------------
+               # Bad links
+
+               if ( $wgLinkCache->incrementalSetup( LINKCACHE_BAD, $del, $add ) ) {
+                       # Delete where necessary
+                       $baseSql = "DELETE FROM brokenlinks WHERE bl_from={$this->mId}";
+                       foreach ( $del as $title ) {
+                               $sql = $baseSql . " AND bl_to={$title}";
+                               wfQuery( $sql, $fname );
+                       }
+               } else {
+                       # Delete all
+                       $sql = "DELETE FROM brokenlinks WHERE bl_from={$this->mId}";
+                       wfQuery( $sql, $fname );
+                       
+                       # Get addition list
+                       $add = $wgLinkCache->getBadLinks();
+               }
+
+               # Do additions
+               $sql = "";
+               if ( 0 != count ( $add ) ) {
+                       $sql = "INSERT INTO brokenlinks (bl_from,bl_to) VALUES ";
+                       $first = true;
+                       foreach( $add as $blt ) {
+                               $blt = wfStrencode( $blt );
+                               if ( ! $first ) { $sql .= ","; }
+                               $first = false;
+
+                               $sql .= "({$this->mId},'{$blt}')";
+                       }
+               }
+               if ( "" != $sql ) { wfQuery( $sql, $fname ); }
+
+               #------------------------------------------------------------------------------
+               # Image links
+               if ( $wgLinkCache->incrementalSetup( LINKCACHE_IMAGE, $del, $add ) ) {
+                       # Delete where necessary
+                       $sql = "DELETE FROM imagelinks WHERE il_from='{$this->mTitleEnc}'";
+                       foreach ($del as $title ) {
+                               $sql = $baseSql . " AND il_to={$title}";
+                               wfQuery( $sql, $fname );
+                       }
+               } else {
+                       # Delete all
+                       $sql = "DELETE FROM imagelinks WHERE il_from='{$this->mTitleEnc}'";
+                       wfQuery( $sql, $fname );
+                       
+                       # Get addition list
+                       $add = $wgLinkCache->getImageLinks();
+               }
+               
+               # Do the insertion
+               $sql = "";
+               if ( 0 != count ( $add ) ) {
+                       $sql = "INSERT INTO imagelinks (il_from,il_to) VALUES ";
+                       $first = true;
+                       foreach( $add as $iname => $val ) {
+                               $iname = wfStrencode( $iname );
+                               if ( ! $first ) { $sql .= ","; }
+                               $first = false;
+
+                               $sql .= "('{$this->mTitleEnc}','{$iname}')";
+                       }
+               }
+               if ( "" != $sql ) { wfQuery( $sql, $fname ); }
+
+               $this->fixBrokenLinks();
+
+               if( $wgDBtransactions ) {
+                       $sql = "COMMIT";
+                       wfQuery( $sql, $fname );
+               }
+               wfProfileOut();
+       }
+
+       function doDumbUpdate()
+       {
+               # Old update function. This can probably be removed eventually, if the new one
+               # proves to be stable
+               global $wgLinkCache, $wgDBtransactions;
+               $fname = "LinksUpdate::doDumbUpdate";
+               wfProfileIn( $fname );
+
                if( $wgDBtransactions ) {
                        $sql = "BEGIN";
                        wfQuery( $sql, $fname );
@@ -61,8 +197,8 @@ class LinksUpdate {
                        }
                }
                if ( "" != $sql ) { wfQuery( $sql, $fname ); }
-
-               $sql = "DELETE FROM imagelinks WHERE il_from='{$t}'";
+               
+               $sql = "DELETE FROM imagelinks WHERE il_from='{$this->mTitleEnc}'";
                wfQuery( $sql, $fname );
 
                $a = $wgLinkCache->getImageLinks();
@@ -75,7 +211,7 @@ class LinksUpdate {
                                if ( ! $first ) { $sql .= ","; }
                                $first = false;
 
-                               $sql .= "('{$t}','{$iname}')";
+                               $sql .= "('{$this->mTitleEnc}','{$iname}')";
                        }
                }
                if ( "" != $sql ) { wfQuery( $sql, $fname ); }