From a81b621b5f7a344f5ec520eaa69c805907802eb9 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Sun, 6 Jul 2003 11:42:42 +0000 Subject: [PATCH] Incremental link table updates --- includes/Article.php | 8 ++- includes/LinkCache.php | 105 +++++++++++++++++++++++++-- includes/LinksUpdate.php | 148 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 248 insertions(+), 13 deletions(-) diff --git a/includes/Article.php b/includes/Article.php index 5edfe690a1..614c715c28 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -754,7 +754,13 @@ name=\"wpSummary\" maxlength=200 size=60>
global $wgOut, $wgTitle, $wgUser, $wgLinkCache; $wgLinkCache = new LinkCache(); - $wgOut->addWikiText( $text ); # Just to update links + + # Get old version of link table to allow incremental link updates + $wgLinkCache->preFill( $wgTitle ); + $wgLinkCache->clear(); + + # Now update the link cache by parsing the text + $wgOut->addWikiText( $text ); $this->editUpdates( $text ); if( preg_match( "/^#redirect/i", $text ) ) diff --git a/includes/LinkCache.php b/includes/LinkCache.php index 8664ef8f80..d442920352 100644 --- a/includes/LinkCache.php +++ b/includes/LinkCache.php @@ -1,17 +1,26 @@ mActive = true; + $this->mPreFilled = false; $this->mGoodLinks = array(); $this->mBadLinks = array(); $this->mImageLinks = array(); + $this->mOldGoodLinks = array(); + $this->mOldBadLinks = array(); } function getGoodLinkID( $title ) @@ -104,18 +113,102 @@ class LinkCache { Title::makeName( $s->cur_namespace, $s->cur_title ) ); } - + + $this->suspend(); + $id = $fromtitle->getArticleID(); + $this->resume(); + $sql = "SELECT HIGH_PRIORITY bl_to FROM brokenlinks - WHERE bl_from='{$dbkeyfrom}'"; + WHERE bl_from='{$id}'"; $res = wfQuery( $sql, "LinkCache::preFill" ); while( $s = wfFetchObject( $res ) ) { $this->addBadLink( $s->bl_to ); } - + + wfDebug("preFill dbkeyfrom=$dbkeyfrom\n"); + $this->mOldBadLinks = $this->mBadLinks; + $this->mOldGoodLinks = $this->mGoodLinks; + $this->mPreFilled = true; + wfProfileOut(); } -} + function getGoodAdditions() + { + return array_diff( $this->mGoodLinks, $this->mOldGoodLinks ); + } + + function getBadAdditions() + { + return array_values( array_diff( $this->mBadLinks, $this->mOldBadLinks ) ); + } + + function getImageAdditions() + { + return array_diff_assoc( $this->mImageLinks, $this->mOldImageLinks ); + } + + function getGoodDeletions() + { + return array_diff( $this->mOldGoodLinks, $this->mGoodLinks ); + } + + function getBadDeletions() + { + return array_values( array_diff( $this->mOldBadLinks, $this->mBadLinks ) ); + } + + function getImageDeletions() + { + return array_diff_assoc( $this->mOldImageLinks, $this->mImageLinks ); + } + # Parameters: $which is one of the LINKCACHE_xxx constants, $del and $add are + # the incremental update arrays which will be filled. Returns whether or not it's + # worth doing the incremental version. For example, if [[List of mathematical topics]] + # was blanked, it would take a long, long time to do incrementally. + function incrementalSetup( $which, &$del, &$add ) + { + if ( ! $this->mPreFilled ) { + return false; + } + + switch ( $which ) { + case LINKCACHE_GOOD: + $old =& $this->mOldGoodLinks; + $cur =& $this->mGoodLinks; + $del = $this->getGoodDeletions(); + $add = $this->getGoodAdditions(); + break; + case LINKCACHE_BAD: + $old =& $this->mOldBadLinks; + $cur =& $this->mBadLinks; + $del = $this->getBadDeletions(); + $add = $this->getBadAdditions(); + break; + default: # LINKCACHE_IMAGE + return false; + } + wfDebug( "which = $which\n" ); + wfDebug( '$old = ' . implode(", ", $old) . "\n" ); + wfDebug( '$cur = ' . implode(", ", $cur) . "\n" ); + wfDebug( '$del = ' . implode(", ", $del) . "\n" ); + wfDebug( '$add = ' . implode(", ", $add) . "\n" ); + + # Coefficients here (1,1,3,1) could probably be put in a global object + $timeDumb = count( $old ) + count( $cur ); + $timeIncr = count( $del ) * 3 + count( $new ); + + return $timeIncr < $timeDumb; + } + + # Clears cache but leaves old preFill copies alone + function clear() + { + $this->mGoodLinks = array(); + $this->mBadLinks = array(); + $this->mImageLinks = array(); + } +} ?> diff --git a/includes/LinksUpdate.php b/includes/LinksUpdate.php index 4f1ebbb3b4..3c7ae87930 100644 --- a/includes/LinksUpdate.php +++ b/includes/LinksUpdate.php @@ -14,14 +14,150 @@ class LinksUpdate { function doUpdate() { - /* Update link tables with outgoing links from an updated article */ - /* Currently this is 'dumb', removing all links and putting them back. */ + global $wgUseBetterLinksUpdate, $wgLinkCache, $wgDBtransactions; + wfDebug("Hello\n"); + + /* Update link tables with outgoing links from an updated article */ /* Relies on the 'link cache' to be filled out */ - global $wgLinkCache, $wgDBtransactions; + + if ( !$wgUseBetterLinksUpdate ) { + $this->doDumbUpdate(); + return; + } + $fname = "LinksUpdate::doUpdate"; wfProfileIn( $fname ); + $del = array(); + $add = array(); + + if( $wgDBtransactions ) { + $sql = "BEGIN"; + wfQuery( $sql, $fname ); + } + + #------------------------------------------------------------------------------ + # Good links + + if ( $wgLinkCache->incrementalSetup( LINKCACHE_GOOD, $del, $add ) ) { + # Delete where necessary + $baseSql = "DELETE FROM links WHERE l_from='{$this->mTitleEnc}'"; + foreach ($del as $title => $id ) { + wfDebug( "Incremental deletion from {$this->mTitleEnc} to $title\n" ); + $sql = $baseSql . " AND l_to={$id}"; + wfQuery( $sql, $fname ); + } + } else { + # Delete everything + wfDebug( "Complete deletion from {$this->mTitleEnc}\n" ); + $sql = "DELETE FROM links WHERE l_from='{$this->mTitleEnc}'"; + wfQuery( $sql, $fname ); + + # Get the addition list + $add = $wgLinkCache->getGoodLinks(); + } + + # Do the insertion + $sql = ""; + if ( 0 != count( $add ) ) { + $sql = "INSERT INTO links (l_from,l_to) VALUES "; + $first = true; + foreach( $add as $lt => $lid ) { + wfDebug( "Inserting from {$this->mTitleEnc} to $lt\n" ); + + if ( ! $first ) { $sql .= ","; } + $first = false; + + $sql .= "('{$this->mTitleEnc}',{$lid})"; + } + } + if ( "" != $sql ) { wfQuery( $sql, $fname ); } + + #------------------------------------------------------------------------------ + # Bad links + + if ( $wgLinkCache->incrementalSetup( LINKCACHE_BAD, $del, $add ) ) { + # Delete where necessary + $baseSql = "DELETE FROM brokenlinks WHERE bl_from={$this->mId}"; + foreach ( $del as $title ) { + $sql = $baseSql . " AND bl_to={$title}"; + wfQuery( $sql, $fname ); + } + } else { + # Delete all + $sql = "DELETE FROM brokenlinks WHERE bl_from={$this->mId}"; + wfQuery( $sql, $fname ); + + # Get addition list + $add = $wgLinkCache->getBadLinks(); + } + + # Do additions + $sql = ""; + if ( 0 != count ( $add ) ) { + $sql = "INSERT INTO brokenlinks (bl_from,bl_to) VALUES "; + $first = true; + foreach( $add as $blt ) { + $blt = wfStrencode( $blt ); + if ( ! $first ) { $sql .= ","; } + $first = false; + + $sql .= "({$this->mId},'{$blt}')"; + } + } + if ( "" != $sql ) { wfQuery( $sql, $fname ); } + + #------------------------------------------------------------------------------ + # Image links + if ( $wgLinkCache->incrementalSetup( LINKCACHE_IMAGE, $del, $add ) ) { + # Delete where necessary + $sql = "DELETE FROM imagelinks WHERE il_from='{$this->mTitleEnc}'"; + foreach ($del as $title ) { + $sql = $baseSql . " AND il_to={$title}"; + wfQuery( $sql, $fname ); + } + } else { + # Delete all + $sql = "DELETE FROM imagelinks WHERE il_from='{$this->mTitleEnc}'"; + wfQuery( $sql, $fname ); + + # Get addition list + $add = $wgLinkCache->getImageLinks(); + } + + # Do the insertion + $sql = ""; + if ( 0 != count ( $add ) ) { + $sql = "INSERT INTO imagelinks (il_from,il_to) VALUES "; + $first = true; + foreach( $add as $iname => $val ) { + $iname = wfStrencode( $iname ); + if ( ! $first ) { $sql .= ","; } + $first = false; + + $sql .= "('{$this->mTitleEnc}','{$iname}')"; + } + } + if ( "" != $sql ) { wfQuery( $sql, $fname ); } + + $this->fixBrokenLinks(); + + if( $wgDBtransactions ) { + $sql = "COMMIT"; + wfQuery( $sql, $fname ); + } + wfProfileOut(); + } + + function doDumbUpdate() + { + # Old update function. This can probably be removed eventually, if the new one + # proves to be stable + global $wgLinkCache, $wgDBtransactions; + $fname = "LinksUpdate::doDumbUpdate"; + wfProfileIn( $fname ); + if( $wgDBtransactions ) { $sql = "BEGIN"; wfQuery( $sql, $fname ); @@ -61,8 +197,8 @@ class LinksUpdate { } } if ( "" != $sql ) { wfQuery( $sql, $fname ); } - - $sql = "DELETE FROM imagelinks WHERE il_from='{$t}'"; + + $sql = "DELETE FROM imagelinks WHERE il_from='{$this->mTitleEnc}'"; wfQuery( $sql, $fname ); $a = $wgLinkCache->getImageLinks(); @@ -75,7 +211,7 @@ class LinksUpdate { if ( ! $first ) { $sql .= ","; } $first = false; - $sql .= "('{$t}','{$iname}')"; + $sql .= "('{$this->mTitleEnc}','{$iname}')"; } } if ( "" != $sql ) { wfQuery( $sql, $fname ); } -- 2.20.1