From 81217f55a08341a1b3fc401d836b53b8d97e9d27 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Fri, 30 Dec 2005 09:33:11 +0000 Subject: [PATCH] * Added templatelinks table. The table currently represents a literal list of templates included from each article. That is, the table contains pages which were actually loaded during parsing, not the markup which went into resolving their names. * Ended the role of $wgLinkCache in link updates. Instead, links (and related entities) are registered in the ParserOutput object during a parse. The LinksUpdate constructor now takes a ParserOutput object as a parameter. $wgLinkCache is still used, but only as a cache of article IDs. * Because the link list is now saved and restored in the parser cache, meta tag keywords now work on parser cache hits. Some refactoring took place in this area. * Rendering of the HTML for category links has moved from Parser to OutputPage. * Did some general pottering around in Article.php, such as allowing an Article object to be created with a specified revision ID, thereby optionally removing the dependence on $wgRequest. Not used at the current time. * A few documentation tweaks. --- RELEASE-NOTES | 8 + docs/linkcache.txt | 31 +- includes/Article.php | 255 ++++++---- includes/Database.php | 4 +- includes/DefaultSettings.php | 6 - includes/EditPage.php | 24 +- includes/GlobalFunctions.php | 18 + includes/ImagePage.php | 1 - includes/LinkCache.php | 180 +++---- includes/LinksUpdate.php | 505 ++++++++++++------- includes/OutputPage.php | 74 ++- includes/Parser.php | 136 +++-- includes/Skin.php | 3 +- includes/SpecialMovepage.php | 5 - includes/SpecialUndelete.php | 23 +- includes/SpecialWhatlinkshere.php | 322 ++++++++---- includes/Title.php | 22 +- index.php | 2 +- languages/Language.php | 6 +- maintenance/archives/patch-templatelinks.sql | 19 + maintenance/dumpHTML.inc | 4 +- maintenance/mysql5/tables.sql | 20 + maintenance/refreshLinks.inc | 32 +- maintenance/tables.sql | 19 + maintenance/updaters.inc | 53 +- 25 files changed, 1119 insertions(+), 653 deletions(-) create mode 100644 maintenance/archives/patch-templatelinks.sql diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 1638b5e28e..978eba51d8 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -374,6 +374,14 @@ fully support the editing toolbar, but was found to be too confusing. * (bug 4424) Update for Spanish language (es) 100% messages translated * (bug 4425) Typos in Polish translation * (bug 4426) Add link to user_talk page on image pages +* Added templatelinks table, to track template inclusions. User-visible effects + will be: + * (inclusion) tag for inclusions in Special:Whatlinkshere + * More accurate list of used templates on the edit page + * More reliable cache invalidation when templates outside the template + namespace are changed +* Removed $wgUseCategoryMagic option, categories are now enabled unconditionally + === Caveats === diff --git a/docs/linkcache.txt b/docs/linkcache.txt index ab36587d82..3e9799c31e 100644 --- a/docs/linkcache.txt +++ b/docs/linkcache.txt @@ -4,28 +4,15 @@ The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database. This is used to mark up links when displaying a page. If the same link appears more than once on any page, -then it only has to be looked up once. +then it only has to be looked up once. In most cases, link +lookups are done in batches with the LinkBatch class, or the +equivalent in Parser::replaceLinkHolders(), so the link +cache is mostly useful for short snippets of parsed text +(such as the site notice), and for links in the navigation +areas of the skin. -In practice, what happens is that the global cache object -$wgLinkCache is consulted and updated every time the function -getArticleID() from Title is called. +The link cache was formerly used to track links used in a +document for the purposes of updating the link tables. This +application is now deprecated. -This has a side benefit that we take advantage of. We have -tables "links" and "brokenlinks" which we use to do things -like the Orphans page and Whatlinkshere page. It just so -happens that after we update a page, we display it--and as -we're displaying it, we look up all the links on that page, -causing them to be put into the cache. That information is -exactly what we need to update those two tables. So, we do -something tricky when we update pages: just after the update -and before we display, we clear the cache. Then we display -the updated page. Finally, we put a LinksUpdate object onto -the deferred updates list, which fetches its information from -the cache. - -There's a minor complication: displaying a page also looks up -a few things like the talk page link in the quick bar and the -date links. Since we don't want those in the link tables, we -must take care to suspend the cache while we look those up. -Skin.php does exactly that--see dateLink(), for example. diff --git a/includes/Article.php b/includes/Article.php index 2e798f3ebc..5a667b635d 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -36,14 +36,17 @@ class Article { var $mOldId; var $mRevIdFetched; var $mRevision; + var $mRedirectUrl; /**#@-*/ /** * Constructor and clear the article - * @param mixed &$title + * @param Title &$title + * @param integer $oldId Revision ID, null to fetch from request, zero for current */ - function Article( &$title ) { + function Article( &$title, $oldId = null ) { $this->mTitle =& $title; + $this->mOldId = $oldId; $this->clear(); } @@ -71,6 +74,7 @@ class Article { $this->mForUpdate = false; $this->mIsRedirect = false; $this->mRevIdFetched = 0; + $this->mRedirectUrl = false; } /** @@ -236,18 +240,22 @@ class Article { } /** - * Return the oldid of the article that is to be shown. - * For requests with a "direction", this is not the oldid of the - * query + * Return the oldid of the article that is to be shown, 0 for the current revision */ function getOldID() { - global $wgRequest, $wgOut; - static $lastid; - - if ( isset( $lastid ) ) { - return $lastid; + if ( is_null( $this->mOldId ) ) { + $this->mOldId = $this->getOldIDFromRequest(); } - # Query variables :P + return $this->mOldId; + } + + /** + * Get the old ID from the request, return it. + * Sets $this->mRedirectUrl to a correct URL if the query parameters are incorrect + */ + function getOldIDFromRequest() { + global $wgRequest; + $this->mRedirectUrl = false; $oldid = $wgRequest->getVal( 'oldid' ); if ( isset( $oldid ) ) { $oldid = intval( $oldid ); @@ -256,7 +264,7 @@ class Article { if ( $nextid ) { $oldid = $nextid; } else { - $wgOut->redirect( $this->mTitle->getFullURL( 'redirect=no' ) ); + $this->mRedirectUrl = $this->mTitle->getFullURL( 'redirect=no' ); } } elseif ( $wgRequest->getVal( 'direction' ) == 'prev' ) { $previd = $this->mTitle->getPreviousRevisionID( $oldid ); @@ -268,12 +276,14 @@ class Article { } $lastid = $oldid; } - return @$oldid; # "@" to be able to return "unset" without PHP complaining + if ( !$oldid ) { + $oldid = 0; + } + return $oldid; } - /** - * Load the revision (including cur_text) into this object + * Load the revision (including text) into this object */ function loadContent( $noredir = false ) { global $wgOut, $wgRequest; @@ -348,12 +358,12 @@ class Article { $this->mTitle->loadRestrictions( $data->page_restrictions ); $this->mTitle->mRestrictionsLoaded = true; - $this->mCounter = $data->page_counter; - $this->mTouched = wfTimestamp( TS_MW, $data->page_touched ); - $this->mIsRedirect = $data->page_is_redirect; - $this->mLatest = $data->page_latest; + $this->mCounter = $data->page_counter; + $this->mTouched = wfTimestamp( TS_MW, $data->page_touched ); + $this->mIsRedirect = $data->page_is_redirect; + $this->mLatest = $data->page_latest; - $this->mDataLoaded = true; + $this->mDataLoaded = true; } /** @@ -615,6 +625,7 @@ class Article { $this->mTimestamp = $this->mLastRevision->getTimestamp(); $this->mComment = $this->mLastRevision->getComment(); $this->mMinorEdit = $this->mLastRevision->isMinor(); + $this->mRevIdFetched = $this->mLastRevision->getID(); } } @@ -697,6 +708,14 @@ class Article { wfProfileIn( $fname ); # Get variables from query string $oldid = $this->getOldID(); + + # getOldID may want us to redirect somewhere else + if ( $this->mRedirectUrl ) { + $wgOut->redirect( $this->mRedirectUrl ); + wfProfileOut( $fname ); + return; + } + $diff = $wgRequest->getVal( 'diff' ); $rcid = $wgRequest->getVal( 'rcid' ); $rdfrom = $wgRequest->getVal( 'rdfrom' ); @@ -755,6 +774,18 @@ class Article { } if ( !$outputDone ) { $text = $this->getContent( false ); # May change mTitle by following a redirect + if ( $text === false ) { + # Failed to load, replace text with error message + $t = $this->mTitle->getPrefixedText(); + if( $oldid ) { + $t .= ',oldid='.$oldid; + } + if( isset( $redirect ) ) { + $redirect = ($redirect == 'no') ? 'no' : 'yes'; + $t .= ',redirect='.$redirect; + } + $text = wfMsg( 'missingarticle', $t ); + } # Another whitelist check in case oldid or redirects are altering the title if ( !$this->mTitle->userCanRead() ) { @@ -875,9 +906,6 @@ class Article { if ($wgUseTrackbacks) $this->addTrackbacks(); - # Add link titles as META keywords - $wgOut->addMetaTags() ; - $this->viewUpdates(); wfProfileOut( $fname ); } @@ -1163,10 +1191,10 @@ class Article { $fname ); # standard deferred updates - $this->editUpdates( $text, $summary, $isminor, $now ); + $this->editUpdates( $text, $summary, $isminor, $now, $revisionId ); $oldid = 0; # new article - $this->showArticle( $text, wfMsg( 'newarticle' ), false, $isminor, $now, $summary, $oldid, $revisionId ); + $this->showArticle( $text, wfMsg( 'newarticle' ), false, $isminor, $now, $summary, $oldid ); wfRunHooks( 'ArticleInsertComplete', array( &$this, &$wgUser, $text, $summary, $isminor, @@ -1368,6 +1396,9 @@ class Article { // Update caches outside the main transaction Article::onArticleEdit( $this->mTitle ); } + } else { + // Keep the same revision ID, but do some updates on it + $revisionId = $this->getRevIdFetched(); } if( !$wgDBtransactions ) { @@ -1391,10 +1422,36 @@ class Article { } } # standard deferred updates - $this->editUpdates( $text, $summary, $minor, $now ); + $this->editUpdates( $text, $summary, $minor, $now, $revisionId ); + + $urls = array(); + # Invalidate caches of all articles using this article as a template + + # Template namespace + # Purge all articles linking here + $titles = $this->mTitle->getTemplateLinksTo(); + Title::touchArray( $titles ); + if ( $wgUseSquid ) { + foreach ( $titles as $title ) { + $urls[] = $title->getInternalURL(); + } + } + + # Squid updates + if ( $wgUseSquid ) { + $urls = array_merge( $urls, $this->mTitle->getSquidURLs() ); + $u = new SquidUpdate( $urls ); + array_push( $wgPostCommitUpdateList, $u ); + } + + # File cache + if ( $wgUseFileCache ) { + $cm = new CacheManager($this->mTitle); + @unlink($cm->fileCacheName()); + } - $this->showArticle( $text, wfMsg( 'updated' ), $sectionanchor, $isminor, $now, $summary, $lastRevision, $revisionId ); + $this->showArticle( $text, wfMsg( 'updated' ), $sectionanchor, $isminor, $now, $summary, $lastRevision ); } wfRunHooks( 'ArticleSaveComplete', array( &$this, &$wgUser, $text, @@ -1408,42 +1465,14 @@ class Article { * After we've either updated or inserted the article, update * the link tables and redirect to the new page. */ - function showArticle( $text, $subtitle , $sectionanchor = '', $me2, $now, $summary, $oldid, $newid ) { - global $wgUseDumbLinkUpdate, $wgAntiLockFlags, $wgOut, $wgUser, $wgLinkCache; + function showArticle( $text, $subtitle , $sectionanchor = '', $me2, $now, $summary, $oldid ) { + global $wgOut, $wgUser; global $wgUseEnotif; $fname = 'Article::showArticle'; wfProfileIn( $fname ); - - $wgLinkCache = new LinkCache(); - - if ( !$wgUseDumbLinkUpdate ) { - # Preload links to reduce lock time - if ( $wgAntiLockFlags & ALF_PRELOAD_LINKS ) { - $wgLinkCache->preFill( $this->mTitle ); - $wgLinkCache->clear(); - } - } - - # Parse the text and save it to the parser cache - $wgOut = new OutputPage(); - $wgOut->setParserOptions( ParserOptions::newFromUser( $wgUser ) ); - $wgOut->setRevisionId( $newid ); - $wgOut->addPrimaryWikiText( $text, $this ); - - if ( !$wgUseDumbLinkUpdate ) { - # Move the current links back to the second register - $wgLinkCache->swapRegisters(); - - # Get old version of link table to allow incremental link updates - # Lock this data now since it is needed for an update - $wgLinkCache->forUpdate( true ); - $wgLinkCache->preFill( $this->mTitle ); - - # Swap this old version back into its rightful place - $wgLinkCache->swapRegisters(); - } - + + # Output the redirect if( $this->isRedirect( $text ) ) $r = 'redirect=no'; else @@ -1704,7 +1733,7 @@ class Article { $wgOut->addHTML( $skin->historyLink() .''); } - # Fetch cur_text + # Fetch article text $rev = Revision::newFromTitle( $this->mTitle ); # Fetch name(s) of contributors @@ -2076,8 +2105,22 @@ class Article { * @private * @param string $text */ - function editUpdates( $text, $summary, $minoredit, $timestamp_of_pagechange) { - global $wgDeferredUpdateList, $wgMessageCache, $wgUser; + function editUpdates( $text, $summary, $minoredit, $timestamp_of_pagechange, $newid) { + global $wgDeferredUpdateList, $wgMessageCache, $wgUser, $wgParser, $wgParserCache; + + $fname = 'Article::editUpdates'; + wfProfileIn( $fname ); + + # Parse the text + $options = new ParserOptions; + $poutput = $wgParser->parse( $text, $this->mTitle, $options, true, true, $newid ); + + # Save it to the parser cache + $wgParserCache->save( $poutput, $this, $wgUser ); + + # Update the links tables + $u = new LinksUpdate( $this->mTitle, $poutput ); + $u->doUpdate(); if ( wfRunHooks( 'ArticleEditUpdatesDeleteFromRecentchanges', array( &$this ) ) ) { wfSeedRandom(); @@ -2097,32 +2140,35 @@ class Article { $title = $this->mTitle->getPrefixedDBkey(); $shortTitle = $this->mTitle->getDBkey(); - if ( 0 != $id ) { - $u = new LinksUpdate( $id, $title ); - array_push( $wgDeferredUpdateList, $u ); - $u = new SiteStatsUpdate( 0, 1, $this->mGoodAdjustment, $this->mTotalAdjustment ); - array_push( $wgDeferredUpdateList, $u ); - $u = new SearchUpdate( $id, $title, $text ); - array_push( $wgDeferredUpdateList, $u ); - - # If this is another user's talk page, update newtalk - - if ($this->mTitle->getNamespace() == NS_USER_TALK && $shortTitle != $wgUser->getName()) { - $other = User::newFromName( $shortTitle ); - if( is_null( $other ) && User::isIP( $shortTitle ) ) { - // An anonymous user - $other = new User(); - $other->setName( $shortTitle ); - } - if( $other ) { - $other->setNewtalk( true ); - } - } + if ( 0 == $id ) { + wfProfileOut( $fname ); + return; + } - if ( $this->mTitle->getNamespace() == NS_MEDIAWIKI ) { - $wgMessageCache->replace( $shortTitle, $text ); + $u = new SiteStatsUpdate( 0, 1, $this->mGoodAdjustment, $this->mTotalAdjustment ); + array_push( $wgDeferredUpdateList, $u ); + $u = new SearchUpdate( $id, $title, $text ); + array_push( $wgDeferredUpdateList, $u ); + + # If this is another user's talk page, update newtalk + + if ($this->mTitle->getNamespace() == NS_USER_TALK && $shortTitle != $wgUser->getName()) { + $other = User::newFromName( $shortTitle ); + if( is_null( $other ) && User::isIP( $shortTitle ) ) { + // An anonymous user + $other = new User(); + $other->setName( $shortTitle ); + } + if( $other ) { + $other->setNewtalk( true ); } } + + if ( $this->mTitle->getNamespace() == NS_MEDIAWIKI ) { + $wgMessageCache->replace( $shortTitle, $text ); + } + + wfProfileOut( $fname ); } /** @@ -2212,7 +2258,7 @@ class Article { } /** - * Loads cur_touched and returns a value indicating if it should be used + * Loads page_touched and returns a value indicating if it should be used * */ function checkTouched() { @@ -2227,6 +2273,21 @@ class Article { return !$this->mIsRedirect; } + /** + * Get the page_touched field + */ + function getTouched() { + # Ensure that page data has been loaded + if( !$this->mDataLoaded ) { + $dbr =& $this->getDB(); + $data = $this->pageDataFromId( $dbr, $this->getId() ); + if( $data ) { + $this->loadPageData( $data ); + } + } + return $this->mTouched; + } + /** * Edit an article without doing all that other stuff * The article must already exist; link tables etc @@ -2480,29 +2541,27 @@ class Article { /** * Return a list of templates used by this article. - * Uses the links table to find the templates + * Uses the templatelinks table * - * @return array + * @return array Array of Title objects */ function getUsedTemplates() { $result = array(); $id = $this->mTitle->getArticleID(); - $db =& wfGetDB( DB_SLAVE ); - $res = $db->select( array( 'pagelinks' ), - array( 'pl_title' ), - array( - 'pl_from' => $id, - 'pl_namespace' => NS_TEMPLATE ), + $dbr =& wfGetDB( DB_SLAVE ); + $res = $dbr->select( array( 'templatelinks' ), + array( 'tl_namespace', 'tl_title' ), + array( 'tl_from' => $id ), 'Article:getUsedTemplates' ); if ( false !== $res ) { - if ( $db->numRows( $res ) ) { - while ( $row = $db->fetchObject( $res ) ) { - $result[] = $row->pl_title; + if ( $dbr->numRows( $res ) ) { + while ( $row = $dbr->fetchObject( $res ) ) { + $result[] = Title::makeTitle( $row->tl_namespace, $row->tl_title ); } } } - $db->freeResult( $res ); + $dbr->freeResult( $res ); return $result; } } diff --git a/includes/Database.php b/includes/Database.php index 9dc7bd3873..6f48ce2b03 100644 --- a/includes/Database.php +++ b/includes/Database.php @@ -1069,7 +1069,7 @@ class Database { * $mode: LIST_COMMA - comma separated, no field names * LIST_AND - ANDed WHERE clause (without the WHERE) * LIST_SET - comma separated with field names, like a SET clause - * LIST_NAMES - comma separated field names + * LIST_NAMES - comma separated field names */ function makeList( $a, $mode = LIST_COMMA ) { if ( !is_array( $a ) ) { @@ -1693,7 +1693,7 @@ class ResultWrapper { /** * @todo document */ - function ResultWrapper( $database, $result ) { + function ResultWrapper( &$database, $result ) { $this->db =& $database; $this->result =& $result; } diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index f96250da9a..2973937333 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -690,12 +690,6 @@ $wgDebugLogGroups = array(); */ $wgShowSQLErrors = false; -# Should [[Category:Dog]] on a page associate it with the -# category "Dog"? (a link to that category page will be -# added to the article, clicking it reveals a list of -# all articles in the category) -$wgUseCategoryMagic = true; - /** * disable experimental dmoz-like category browsing. Output things like: * Encyclopedia > Music > Style of Music > Jazz diff --git a/includes/EditPage.php b/includes/EditPage.php index 7554022a5b..a83b20a5d5 100644 --- a/includes/EditPage.php +++ b/includes/EditPage.php @@ -828,7 +828,7 @@ class EditPage { if( !$this->preview && !$this->diff ) { $wgOut->setOnloadHandler( 'document.editform.wpTextbox1.focus()' ); } - $templates = $this->getTemplatesUsed(); + $templates = $this->formatTemplates(); global $wgLivePreview; if ( $wgLivePreview ) { @@ -986,27 +986,25 @@ END /** * Prepare a list of templates used by this page. Returns HTML. */ - function getTemplatesUsed() { + function formatTemplates() { global $wgUser; - $fname = 'EditPage::getTemplatesUsed'; + $fname = 'EditPage::formatTemplates'; wfProfileIn( $fname ); $sk =& $wgUser->getSkin(); - $templates = ''; - $articleTemplates = $this->mArticle->getUsedTemplates(); - if ( count( $articleTemplates ) > 0 ) { - $templates = '
'. wfMsg( 'templatesused' ) . ''; } wfProfileOut( $fname ); - return $templates; + return $outText; } /** diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index 60f56eae62..bae7751977 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -90,6 +90,24 @@ if( !function_exists( 'floatval' ) ) { } } +if ( !function_exists( 'array_diff_key' ) ) { + /** + * Exists in PHP 5.1.0+ + * Not quite compatible, two-argument version only + * Null values will cause problems due to this use of isset() + */ + function array_diff_key( $left, $right ) { + $result = $left; + foreach ( $left as $key => $value ) { + if ( isset( $right[$key] ) ) { + unset( $result[$key] ); + } + } + return $result; + } +} + + /** * Where as we got a random seed * @var bool $wgTotalViews diff --git a/includes/ImagePage.php b/includes/ImagePage.php index 31fe0837cc..660e83d8bd 100644 --- a/includes/ImagePage.php +++ b/includes/ImagePage.php @@ -53,7 +53,6 @@ class ImagePage extends Article { $wgOut->setArticleFlag( true ); $wgOut->setRobotpolicy( 'index,follow' ); $wgOut->setPageTitle( $this->mTitle->getPrefixedText() ); - $wgOut->addMetaTags(); $this->viewUpdates(); } diff --git a/includes/LinkCache.php b/includes/LinkCache.php index f96fdf426e..d0371e4efa 100644 --- a/includes/LinkCache.php +++ b/includes/LinkCache.php @@ -110,14 +110,22 @@ class LinkCache { $wgMemc->delete( $this->getKey( $title ) ); } + /** @deprecated */ function suspend() { $this->mActive = false; } + /** @deprecated */ function resume() { $this->mActive = true; } + function getPageLinks() { return $this->mPageLinks; } function getGoodLinks() { return $this->mGoodLinks; } function getBadLinks() { return array_keys( $this->mBadLinks ); } function getImageLinks() { return $this->mImageLinks; } function getCategoryLinks() { return $this->mCategoryLinks; } + /** + * Add a title to the link cache, return the page_id or zero if non-existent + * @param string $title Title to add + * @return integer + */ function addLink( $title ) { $nt = Title::newFromDBkey( $title ); if( $nt ) { @@ -126,7 +134,12 @@ class LinkCache { return 0; } } - + + /** + * Add a title to the link cache, return the page_id or zero if non-existent + * @param Title $nt Title to add + * @return integer + */ function addLinkObj( &$nt ) { global $wgMemc, $wgLinkCacheMemcached, $wgAntiLockFlags; $title = $nt->getPrefixedDBkey(); @@ -188,6 +201,7 @@ class LinkCache { /** * Bulk-check the pagelinks and page arrays for existence info. * @param Title $fromtitle + * @deprecated */ function preFill( &$fromtitle ) { global $wgAntiLockFlags; @@ -238,91 +252,6 @@ class LinkCache { wfProfileOut( $fname ); } - function getGoodAdditions() { - return array_diff( $this->mGoodLinks, $this->mOldGoodLinks ); - } - - function getBadAdditions() { - #wfDebug( "mOldBadLinks: " . implode( ', ', array_keys( $this->mOldBadLinks ) ) . "\n" ); - #wfDebug( "mBadLinks: " . implode( ', ', array_keys( $this->mBadLinks ) ) . "\n" ); - return array_values( array_diff( array_keys( $this->mBadLinks ), array_keys( $this->mOldBadLinks ) ) ); - } - - function getImageAdditions() { - return array_diff_assoc( $this->mImageLinks, $this->mOldImageLinks ); - } - - function getGoodDeletions() { - return array_diff( $this->mOldGoodLinks, $this->mGoodLinks ); - } - - function getBadDeletions() { - return array_values( array_diff( array_keys( $this->mOldBadLinks ), array_keys( $this->mBadLinks ) )); - } - - function getImageDeletions() { - return array_diff_assoc( $this->mOldImageLinks, $this->mImageLinks ); - } - - function getPageAdditions() { - $set = array_diff( array_keys( $this->mPageLinks ), array_keys( $this->mOldPageLinks ) ); - $out = array(); - foreach( $set as $key ) { - $out[$key] = $this->mPageLinks[$key]; - } - return $out; - } - - function getPageDeletions() { - $set = array_diff( array_keys( $this->mOldPageLinks ), array_keys( $this->mPageLinks ) ); - $out = array(); - foreach( $set as $key ) { - $out[$key] = $this->mOldPageLinks[$key]; - } - return $out; - } - - /** - * Parameters: - * @param $which is one of the LINKCACHE_xxx constants - * @param $del,$add are the incremental update arrays which will be filled. - * - * @return Returns whether or not it's worth doing the incremental version. - * - * For example, if [[List of mathematical topics]] was blanked, - * it would take a long, long time to do incrementally. - */ - function incrementalSetup( $which, &$del, &$add ) { - if ( ! $this->mPreFilled ) { - return false; - } - - switch ( $which ) { - case LINKCACHE_GOOD: - $old =& $this->mOldGoodLinks; - $cur =& $this->mGoodLinks; - $del = $this->getGoodDeletions(); - $add = $this->getGoodAdditions(); - break; - case LINKCACHE_BAD: - $old =& $this->mOldBadLinks; - $cur =& $this->mBadLinks; - $del = $this->getBadDeletions(); - $add = $this->getBadAdditions(); - break; - case LINKCACHE_PAGE: - $old =& $this->mOldPageLinks; - $cur =& $this->mPageLinks; - $del = $this->getPageDeletions(); - $add = $this->getPageAdditions(); - break; - default: # LINKCACHE_IMAGE - return false; - } - - return true; - } - /** * Clears cache */ @@ -339,6 +268,7 @@ class LinkCache { /** * Swaps old and current link registers + * @deprecated */ function swapRegisters() { swap( $this->mGoodLinks, $this->mOldGoodLinks ); @@ -386,46 +316,78 @@ class LinkBatch { $this->data[$ns][$dbkey] = 1; } + /** + * Set the link list to a given 2-d array + * First key is the namespace, second is the DB key, value arbitrary + */ + function setArray( $array ) { + $this->data = $array; + } + + /** + * Do the query and add the results to a LinkCache object + * Return an array mapping PDBK to ID + */ function execute( &$cache ) { $fname = 'LinkBatch::execute'; - $namespaces = array(); - - if ( !count( $this->data ) ) { - return; - } - wfProfileIn( $fname ); - - // Construct query - // This is very similar to Parser::replaceLinkHolders - $dbr =& wfGetDB( DB_SLAVE ); - $page = $dbr->tableName( 'page' ); - $sql = "SELECT page_id, page_namespace, page_title FROM $page WHERE " - . $this->constructSet( 'page', $dbr ); - // Do query - $res = $dbr->query( $sql, $fname ); + $res = $this->doQuery(); + if ( !$res ) { + wfProfileOut( $fname ); + return array(); + } - // Process results // For each returned entry, add it to the list of good links, and remove it from $remaining + $ids = array(); $remaining = $this->data; - while ( $row = $dbr->fetchObject( $res ) ) { + while ( $row = $res->fetchObject() ) { $title = Title::makeTitle( $row->page_namespace, $row->page_title ); $cache->addGoodLinkObj( $row->page_id, $title ); + $ids[$title->getPrefixedDBkey()] = $row->page_id; unset( $remaining[$row->page_namespace][$row->page_title] ); } - $dbr->freeResult( $res ); + $res->free(); // The remaining links in $data are bad links, register them as such foreach ( $remaining as $ns => $dbkeys ) { foreach ( $dbkeys as $dbkey => $nothing ) { $title = Title::makeTitle( $ns, $dbkey ); $cache->addBadLinkObj( $title ); + $ids[$title->getPrefixedDBkey()] = 0; } } + wfProfileOut( $fname ); + return $ids; + } + /** + * Perform the existence test query, return a ResultWrapper with page_id fields + */ + function doQuery() { + $fname = 'LinkBatch::execute'; + $namespaces = array(); + + if ( !count( $this->data ) ) { + return false; + } + wfProfileIn( $fname ); + + // Construct query + // This is very similar to Parser::replaceLinkHolders + $dbr =& wfGetDB( DB_SLAVE ); + $page = $dbr->tableName( 'page' ); + $set = $this->constructSet( 'page', $dbr ); + if ( $set === false ) { + return false; + } + $sql = "SELECT page_id, page_namespace, page_title FROM $page WHERE $set"; + + // Do query + $res = new ResultWrapper( $dbr, $dbr->query( $sql, $fname ) ); wfProfileOut( $fname ); + return $res; } /** @@ -436,8 +398,9 @@ class LinkBatch { * @return string * @access public */ - function constructSet( $prefix, $db ) { + function constructSet( $prefix, &$db ) { $first = true; + $firstTitle = true; $sql = ''; foreach ( $this->data as $ns => $dbkeys ) { if ( !count( $dbkeys ) ) { @@ -463,7 +426,12 @@ class LinkBatch { $sql .= '))'; } - return $sql; + if ( $first && $firstTitle ) { + # No titles added + return false; + } else { + return $sql; + } } } diff --git a/includes/LinksUpdate.php b/includes/LinksUpdate.php index 2e02aec46a..b15f684281 100644 --- a/includes/LinksUpdate.php +++ b/includes/LinksUpdate.php @@ -13,7 +13,15 @@ class LinksUpdate { /**#@+ * @access private */ - var $mId, $mTitle; + var $mId, # Page ID of the article linked from + $mTitle, # Title object of the article linked from + $mParserOutput, # Parser output containing the links to be inserted into the database + $mLinks, # Map of title strings to IDs for the links in the document + $mImages, # DB keys of the images used, in the array key only + $mTemplates, # Map of title strings to IDs for the template references, including broken ones + $mCategories, # Map of category names to sort keys + $mDb, # Database connection reference + $mOptions; # SELECT options to be used (array) /**#@-*/ /** @@ -22,162 +30,73 @@ class LinksUpdate { * @param integer $id * @param string $title */ - function LinksUpdate( $id, $title ) { - $this->mId = $id; + function LinksUpdate( $title, $parserOutput ) { + global $wgAntiLockFlags; + + if ( $wgAntiLockFlags & ALF_NO_LINK_LOCK ) { + $this->mOptions = array(); + } else { + $this->mOptions = array( 'FOR UPDATE' ); + } + $this->mDb =& wfGetDB( DB_MASTER ); + + if ( !is_object( $title ) ) { + wfDebugDieBacktrace( "The calling convention to LinksUpdate::LinksUpdate() has changed. " . + "Please see Article::editUpdates() for an invocation example.\n" ); + } $this->mTitle = $title; + $this->mId = $title->getArticleID(); + $this->mParserOutput = $parserOutput; + + // Shortcut aliases + $this->mLinks =& $this->mParserOutput->getLinks(); + $this->mImages =& $this->mParserOutput->getImages(); + $this->mTemplates =& $this->mParserOutput->getTemplates(); + $this->mCategories =& $this->mParserOutput->getCategories(); + } /** * Update link tables with outgoing links from an updated article - * Relies on the 'link cache' to be filled out. */ - function doUpdate() { - global $wgUseDumbLinkUpdate, $wgLinkCache, $wgUseCategoryMagic; - + global $wgUseDumbLinkUpdate; if ( $wgUseDumbLinkUpdate ) { $this->doDumbUpdate(); - return; + } else { + $this->doIncrementalUpdate(); } + } - $fname = 'LinksUpdate::doUpdate'; + function doIncrementalUpdate() { + $fname = 'LinksUpdate::doIncrementalUpdate'; wfProfileIn( $fname ); - $del = array(); - $add = array(); + # Page links + $existing = $this->getExistingLinks(); + $this->incrTableUpdate( 'pagelinks', 'pl', $this->getLinkDeletions( $existing ), + $this->getLinkInsertions( $existing ) ); - $dbw =& wfGetDB( DB_MASTER ); - $pagelinks = $dbw->tableName( 'pagelinks' ); - $imagelinks = $dbw->tableName( 'imagelinks' ); - $categorylinks = $dbw->tableName( 'categorylinks' ); - - #------------------------------------------------------------------------------ - # Good links - - if ( $wgLinkCache->incrementalSetup( LINKCACHE_PAGE, $del, $add ) ) { - # Delete where necessary - if ( count( $del ) ) { - $batch = new LinkBatch( $del ); - $set = $batch->constructSet( 'pl', $dbw ); - if ( $set ) { - $sql = "DELETE FROM $pagelinks WHERE pl_from={$this->mId} AND ($set)"; - $dbw->query( $sql, $fname ); - } - } - } else { - # Delete everything - $dbw->delete( 'pagelinks', array( 'pl_from' => $this->mId ), $fname ); - - # Get the addition list - $add = $wgLinkCache->getPageLinks(); - } - - # Do the insertion - if( 0 != count( $add ) ) { - $arr = array(); - foreach( $add as $lt => $target ) { - array_push( $arr, array( - 'pl_from' => $this->mId, - 'pl_namespace' => $target->getNamespace(), - 'pl_title' => $target->getDbKey() ) ); - } - - # The link cache was constructed without FOR UPDATE, so there may be collisions - # Ignoring for now, I'm not sure if that causes problems or not, but I'm fairly - # sure it's better than without IGNORE - $dbw->insert( 'pagelinks', $arr, $fname, array( 'IGNORE' ) ); - } + # Template links + $existing = $this->getExistingTemplates(); + $this->incrTableUpdate( 'templatelinks', 'tl', $this->getTemplateDeletions( $existing ), + $this->getTemplateInsertions( $existing ) ); - #------------------------------------------------------------------------------ # Image links - $dbw->delete('imagelinks',array('il_from'=>$this->mId),$fname); - - # Get addition list - $add = $wgLinkCache->getImageLinks(); - - # Do the insertion - $sql = ''; - $image = NS_IMAGE; - if ( 0 != count ( $add ) ) { - $arr = array(); - foreach ($add as $iname => $val ) { - $nt = Title::makeTitle( $image, $iname ); - if( !$nt ) continue; - $nt->invalidateCache(); - array_push( $arr, array( - 'il_from' => $this->mId, - 'il_to' => $iname ) ); - } - $dbw->insert('imagelinks', $arr, $fname, array('IGNORE')); - } + $existing = $this->getExistingImages(); + $this->incrTableUpdate( 'imagelinks', 'il', $this->getImageDeletions( $existing ), + $this->getImageInsertions( $existing ) ); - #------------------------------------------------------------------------------ # Category links - if( $wgUseCategoryMagic ) { - global $messageMemc, $wgDBname; - - # Get addition list - $add = $wgLinkCache->getCategoryLinks(); - - # select existing catlinks for this page - $res = $dbw->select( 'categorylinks', - array( 'cl_to', 'cl_sortkey' ), - array( 'cl_from' => $this->mId ), - $fname, - 'FOR UPDATE' ); - - $del = array(); - if( 0 != $dbw->numRows( $res ) ) { - while( $row = $dbw->fetchObject( $res ) ) { - if( !isset( $add[$row->cl_to] ) || $add[$row->cl_to] != $row->cl_sortkey ) { - // in the db, but no longer in the page - // or sortkey has changed -> delete - $del[] = $row->cl_to; - } else { - // remove already existing category memberships - // from the add array - unset( $add[$row->cl_to] ); - } - } - } - - // delete any removed categorylinks - if( count( $del ) > 0) { - // delete old ones - $dbw->delete( 'categorylinks', - array( - 'cl_from' => $this->mId, - 'cl_to' => $del ), - $fname ); - foreach( $del as $cname ){ - $nt = Title::makeTitle( NS_CATEGORY, $cname ); - $nt->invalidateCache(); - // update the timestamp which indicates when the last article - // was added or removed to/from this article - $key = $wgDBname . ':Category:' . md5( $nt->getDBkey() ) . ':adddeltimestamp'; - $messageMemc->set( $key , wfTimestamp( TS_MW ), 24*3600 ); - } - } - - // add any new category memberships - if( count( $add ) > 0 ) { - $arr = array(); - foreach( $add as $cname => $sortkey ) { - $nt = Title::makeTitle( NS_CATEGORY, $cname ); - $nt->invalidateCache(); - // update the timestamp which indicates when the last article - // was added or removed to/from this article - $key = $wgDBname . ':Category:' . md5( $nt->getDBkey() ) . ':adddeltimestamp'; - $messageMemc->set( $key , wfTimestamp( TS_MW ), 24*3600 ); - array_push( $arr, array( - 'cl_from' => $this->mId, - 'cl_to' => $cname, - 'cl_sortkey' => $sortkey ) ); - } - // do the actual sql insertion - $dbw->insert( 'categorylinks', $arr, $fname, array( 'IGNORE' ) ); - } - } + $existing = $this->getExistingCategories(); + $this->incrTableUpdate( 'categorylinks', 'cl', $this->getCategoryDeletions( $existing ), + $this->getCategoryInsertions( $existing ) ); + + # I think this works out to a set XOR operation, the idea is to invalidate all + # categories which were added, deleted or changed + # FIXME: surely there's a more appropriate place to put this update? + $categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing ); + $this->invalidateCategories( $categoryUpdates ); wfProfileOut( $fname ); } @@ -188,67 +107,281 @@ class LinksUpdate { * Also useful where link table corruption needs to be repaired, e.g. in refreshLinks.php */ function doDumbUpdate() { - global $wgLinkCache, $wgUseCategoryMagic; $fname = 'LinksUpdate::doDumbUpdate'; wfProfileIn( $fname ); + + $existing = $this->getExistingCategories(); + $categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing ); + $this->dumbTableUpdate( 'pagelinks', $this->getLinkInsertions(), 'pl_from' ); + $this->dumbTableUpdate( 'imagelinks', $this->getImageInsertions(), 'il_from' ); + $this->dumbTableUpdate( 'categorylinks', $this->getCategoryInsertions(), 'cl_from' ); + $this->dumbTableUpdate( 'templatelinks', $this->getTemplateInsertions(), 'tl_from' ); + + # Update the cache of all the category pages + $this->invalidateCategories( $categoryUpdates ); - $dbw =& wfGetDB( DB_MASTER ); - $pagelinks = $dbw->tableName( 'pagelinks' ); - $imagelinks = $dbw->tableName( 'imagelinks' ); - $categorylinks = $dbw->tableName( 'categorylinks' ); - - $dbw->delete('pagelinks', array('pl_from'=>$this->mId),$fname); + wfProfileOut( $fname ); + } + + function invalidateCategories( $cats ) { + $fname = 'LinksUpdate::invalidateCategories'; + if ( count( $cats ) ) { + $this->mDb->update( 'page', array( 'page_touched' => $this->mDb->timestamp() ), + array( + 'page_namespace' => NS_CATEGORY, + 'page_title IN (' . $this->mDb->makeList( array_keys( $cats ) ) . ')' + ), $fname + ); + } + } + + function dumbTableUpdate( $table, $insertions, $fromField ) { + $fname = 'LinksUpdate::dumbTableUpdate'; + $this->mDb->delete( $table, array( $fromField => $this->mId ), $fname ); + if ( count( $insertions ) ) { + # The link array was constructed without FOR UPDATE, so there may be collisions + # Ignoring for now, I'm not sure if that causes problems or not, but I'm fairly + # sure it's better than without IGNORE + $this->mDb->insert( $table, $insertions, $fname, array( 'IGNORE' ) ); + } + } + + /** + * Make a WHERE clause from a 2-d NS/dbkey array + * + * @param array $arr 2-d array indexed by namespace and DB key + * @param string $prefix Field name prefix, without the underscore + */ + function makeWhereFrom2d( &$arr, $prefix ) { + $lb = new LinkBatch; + $lb->setArray( $arr ); + return $lb->constructSet( $prefix, $this->mDb ); + } + + /** + * Update a table by doing a delete query then an insert query + * @private + */ + function incrTableUpdate( $table, $prefix, $deletions, $insertions ) { + $fname = 'LinksUpdate::incrTableUpdate'; + $where = array( "{$prefix}_from" => $this->mId ); + if ( $table == 'pagelinks' || $table == 'templatelinks' ) { + $clause = $this->makeWhereFrom2d( $deletions, $prefix ); + if ( $clause ) { + $where[] = $clause; + } else { + $where = false; + } + } else { + if ( count( $deletions ) ) { + $where[] = "{$prefix}_to IN (" . $this->mDb->makeList( array_keys( $deletions ) ) . ')'; + } else { + $where = false; + } + } + if ( $where ) { + $this->mDb->delete( $table, $where, $fname ); + } + if ( count( $insertions ) ) { + $this->mDb->insert( $table, $insertions, $fname, 'IGNORE' ); + } + } + - $a = $wgLinkCache->getPageLinks(); - if ( 0 != count( $a ) ) { - $arr = array(); - foreach( $a as $lt => $target ) { - array_push( $arr, array( + /** + * Get an array of pagelinks insertions for passing to the DB + * Skips the titles specified by the 2-D array $existing + * @private + */ + function getLinkInsertions( $existing = array() ) { + $arr = array(); + foreach( $this->mLinks as $ns => $dbkeys ) { + # array_diff_key() was introduced in PHP 5.1, there is a compatibility function + # in GlobalFunctions.php + $diffs = isset( $existing[$ns] ) ? array_diff_key( $dbkeys, $existing[$ns] ) : $dbkeys; + foreach ( $diffs as $dbk => $id ) { + $arr[] = array( 'pl_from' => $this->mId, - 'pl_namespace' => $target->getNamespace(), - 'pl_title' => $target->getDBkey() ) ); + 'pl_namespace' => $ns, + 'pl_title' => $dbk + ); } - $dbw->insert( 'pagelinks', $arr, $fname, array( 'IGNORE' ) ); } + return $arr; + } - $dbw->delete('imagelinks', array('il_from'=>$this->mId),$fname); + /** + * Get an array of template insertions. Like getLinkInsertions() + * @private + */ + function getTemplateInsertions( $existing = array() ) { + $arr = array(); + foreach( $this->mTemplates as $ns => $dbkeys ) { + $diffs = isset( $existing[$ns] ) ? array_diff_key( $dbkeys, $existing[$ns] ) : $dbkeys; + foreach ( $diffs as $dbk => $id ) { + $arr[] = array( + 'tl_from' => $this->mId, + 'tl_namespace' => $ns, + 'tl_title' => $dbk + ); + } + } + return $arr; + } - $a = $wgLinkCache->getImageLinks(); - $sql = ''; - if ( 0 != count ( $a ) ) { - $arr = array(); - foreach( $a as $iname => $val ) - array_push( $arr, array( - 'il_from' => $this->mId, - 'il_to' => $iname ) ); - $dbw->insert( 'imagelinks', $arr, $fname, array( 'IGNORE' ) ); + /** + * Get an array of image insertions + * Skips the names specified in $existing + * @private + */ + function getImageInsertions( $existing = array() ) { + $arr = array(); + $diffs = array_diff_key( $this->mImages, $existing ); + foreach( $diffs as $iname => $val ) { + $arr[] = array( + 'il_from' => $this->mId, + 'il_to' => $iname + ); } + return $arr; + } - if( $wgUseCategoryMagic ) { - $dbw->delete('categorylinks', array('cl_from'=>$this->mId),$fname); - - # Get addition list - $add = $wgLinkCache->getCategoryLinks(); - - # Do the insertion - $sql = ''; - if ( 0 != count ( $add ) ) { - $arr = array(); - foreach( $add as $cname => $sortkey ) { - # FIXME: Change all this to avoid unnecessary duplication - $nt = Title::makeTitle( NS_CATEGORY, $cname ); - if( !$nt ) continue; - $nt->invalidateCache(); - array_push( $arr, array( - 'cl_from' => $this->mId, - 'cl_to' => $cname, - 'cl_sortkey' => $sortkey ) ); - } - $dbw->insert( 'categorylinks', $arr, $fname, array( 'IGNORE' ) ); + /** + * Get an array of category insertions + * @param array $existing Array mapping existing category names to sort keys. If both + * match a link in $this, the link will be omitted from the output + * @private + */ + function getCategoryInsertions( $existing = array() ) { + $diffs = array_diff_assoc( $this->mCategories, $existing ); + $arr = array(); + foreach ( $diffs as $name => $sortkey ) { + $arr[] = array( + 'cl_from' => $this->mId, + 'cl_to' => $name, + 'cl_sortkey' => $sortkey + ); + } + return $arr; + } + + /** + * Given an array of existing links, returns those links which are not in $this + * and thus should be deleted. + * @private + */ + function getLinkDeletions( $existing ) { + $del = array(); + foreach ( $existing as $ns => $dbkeys ) { + if ( isset( $this->mLinks[$ns] ) ) { + $del[$ns] = array_diff_key( $existing[$ns], $this->mLinks[$ns] ); + } else { + $del[$ns] = $existing[$ns]; } } - wfProfileOut( $fname ); + return $del; + } + + /** + * Given an array of existing templates, returns those templates which are not in $this + * and thus should be deleted. + * @private + */ + function getTemplateDeletions( $existing ) { + $del = array(); + foreach ( $existing as $ns => $dbkeys ) { + if ( isset( $this->mTemplates[$ns] ) ) { + $del[$ns] = array_diff_key( $existing[$ns], $this->mTemplates[$ns] ); + } else { + $del[$ns] = $existing[$ns]; + } + } + return $del; + } + + /** + * Given an array of existing images, returns those images which are not in $this + * and thus should be deleted. + * @private + */ + function getImageDeletions( $existing ) { + return array_diff_key( $existing, $this->mImages ); + } + + /** + * Given an array of existing categories, returns those categories which are not in $this + * and thus should be deleted. + * @private + */ + function getCategoryDeletions( $existing ) { + return array_diff_assoc( $existing, $this->mCategories ); + } + + /** + * Get an array of existing links, as a 2-D array + * @private + */ + function getExistingLinks() { + $fname = 'LinksUpdate::getExistingLinks'; + $res = $this->mDb->select( 'pagelinks', array( 'pl_namespace', 'pl_title' ), + array( 'pl_from' => $this->mId ), $fname, $this->mOptions ); + $arr = array(); + while ( $row = $this->mDb->fetchObject( $res ) ) { + if ( !isset( $arr[$row->pl_namespace] ) ) { + $arr[$row->pl_namespace] = array(); + } + $arr[$row->pl_namespace][$row->pl_title] = 1; + } + return $arr; + } + + /** + * Get an array of existing templates, as a 2-D array + * @private + */ + function getExistingTemplates() { + $fname = 'LinksUpdate::getExistingTemplates'; + $res = $this->mDb->select( 'templatelinks', array( 'tl_namespace', 'tl_title' ), + array( 'tl_from' => $this->mId ), $fname, $this->mOptions ); + $arr = array(); + while ( $row = $this->mDb->fetchObject( $res ) ) { + if ( !isset( $arr[$row->tl_namespace] ) ) { + $arr[$row->tl_namespace] = array(); + } + $arr[$row->tl_namespace][$row->tl_title] = 1; + } + return $arr; + } + + /** + * Get an array of existing images, image names in the keys + * @private + */ + function getExistingImages() { + $fname = 'LinksUpdate::getExistingImages'; + $res = $this->mDb->select( 'imagelinks', array( 'il_to' ), + array( 'il_from' => $this->mId ), $fname, $this->mOptions ); + $arr = array(); + while ( $row = $this->mDb->fetchObject( $res ) ) { + $arr[$row->il_to] = 1; + } + return $arr; + } + + /** + * Get an array of existing categories, with the name in the key and sort key in the value. + * @private + */ + function getExistingCategories() { + $fname = 'LinksUpdate::getExistingCategories'; + $res = $this->mDb->select( 'categorylinks', array( 'cl_to', 'cl_sortkey' ), + array( 'cl_from' => $this->mId ), $fname, $this->mOptions ); + $arr = array(); + while ( $row = $this->mDb->fetchObject( $res ) ) { + $arr[$row->cl_to] = $row->cl_sortkey; + } + return $arr; } } ?> diff --git a/includes/OutputPage.php b/includes/OutputPage.php index 1e03a5c32a..3d464f4132 100644 --- a/includes/OutputPage.php +++ b/includes/OutputPage.php @@ -214,11 +214,30 @@ class OutputPage { function getCategoryLinks() { return $this->mCategoryLinks; } - function addCategoryLinks($newLinkArray) { - $this->mCategoryLinks += $newLinkArray; + + /** + * Add an array of categories, with names in the keys + */ + function addCategoryLinks($categories) { + global $wgUser, $wgLinkCache, $wgContLang; + + # Add the links to the link cache in a batch + $arr = array( NS_CATEGORY => $categories ); + $lb = new LinkBatch; + $lb->setArray( $arr ); + $lb->execute( $wgLinkCache ); + + $sk =& $wgUser->getSkin(); + foreach ( $categories as $category => $arbitrary ) { + $title = Title::makeTitleSafe( NS_CATEGORY, $category ); + $text = $wgContLang->convertHtml( $title->getText() ); + $this->mCategoryLinks[] = $sk->makeLinkObj( $title, $text ); + } } - function setCategoryLinks($newLinkArray) { - $this->mCategoryLinks += $newLinkArray; + + function setCategoryLinks($categories) { + $this->mCategoryLinks = array(); + $this->addCategoryLinks($categories); } function suppressQuickbar() { $this->mSuppressQuickbar = true; } @@ -268,7 +287,8 @@ class OutputPage { $parserOutput = $wgParser->parse( $text, $title, $this->mParserOptions, $linestart, true, $this->mRevisionId ); $this->mLanguageLinks += $parserOutput->getLanguageLinks(); - $this->mCategoryLinks += $parserOutput->getCategoryLinks(); + $this->addCategoryLinks( $parserOutput->getCategories() ); + $this->addKeywords( $parserOutput ); if ( $parserOutput->getCacheTime() == -1 ) { $this->enableClientCache( false ); } @@ -279,20 +299,21 @@ class OutputPage { * Add wikitext to the buffer, assuming that this is the primary text for a page view * Saves the text into the parser cache if possible */ - function addPrimaryWikiText( $text, $cacheArticle ) { + function addPrimaryWikiText( $text, $article, $cache = true ) { global $wgParser, $wgParserCache, $wgUser; - $parserOutput = $wgParser->parse( $text, $cacheArticle->mTitle, + $parserOutput = $wgParser->parse( $text, $article->mTitle, $this->mParserOptions, true, true, $this->mRevisionId ); $text = $parserOutput->getText(); - if ( $cacheArticle && $parserOutput->getCacheTime() != -1 ) { - $wgParserCache->save( $parserOutput, $cacheArticle, $wgUser ); + if ( $article && $parserOutput->getCacheTime() != -1 ) { + $wgParserCache->save( $parserOutput, $article, $wgUser ); } $this->mLanguageLinks += $parserOutput->getLanguageLinks(); - $this->mCategoryLinks += $parserOutput->getCategoryLinks(); + $this->addCategoryLinks( $parserOutput->getCategories() ); + $this->addKeywords( $parserOutput ); if ( $parserOutput->getCacheTime() == -1 ) { $this->enableClientCache( false ); } @@ -306,7 +327,7 @@ class OutputPage { function addTemplate( &$template ) { ob_start(); $template->execute(); - $this->addHtml( ob_get_contents() ); + $this->addHTML( ob_get_contents() ); ob_end_clean(); } @@ -331,7 +352,8 @@ class OutputPage { $parserOutput = $wgParserCache->get( $article, $user ); if ( $parserOutput !== false ) { $this->mLanguageLinks += $parserOutput->getLanguageLinks(); - $this->mCategoryLinks += $parserOutput->getCategoryLinks(); + $this->addCategoryLinks( $parserOutput->getCategories() ); + $this->addKeywords( $parserOutput ); $this->addHTML( $parserOutput->getText() ); $t = $parserOutput->getTitleText(); if( !empty( $t ) ) { @@ -828,21 +850,19 @@ class OutputPage { * This function takes the title (first item of mGoodLinks), categories, existing and broken links for the page * and uses the first 10 of them for META keywords */ - function addMetaTags () { - global $wgLinkCache , $wgOut ; - $categories = array_keys ( $wgLinkCache->mCategoryLinks ) ; - $good = array_keys ( $wgLinkCache->mGoodLinks ) ; - $bad = array_keys ( $wgLinkCache->mBadLinks ) ; - $a = array_merge ( array_slice ( $good , 0 , 1 ), $categories, array_slice ( $good , 1 , 9 ) , $bad ) ; - $a = array_slice ( $a , 0 , 10 ) ; # 10 keywords max - $a = implode ( ',' , $a ) ; - $strip = array( - "/<.*?>/" => '', - "/_/" => ' ' - ); - $a = htmlspecialchars(preg_replace(array_keys($strip), array_values($strip),$a )); - - $wgOut->addMeta( 'keywords' , $a ) ; + function addKeywords( &$parserOutput ) { + global $wgTitle; + $this->addKeyword( $wgTitle->getPrefixedText() ); + $count = 1; + $links2d =& $parserOutput->getLinks(); + foreach ( $links2d as $ns => $dbkeys ) { + foreach( $dbkeys as $dbkey => $id ) { + $this->addKeyword( $dbkey ); + if ( ++$count > 10 ) { + break 2; + } + } + } } /** diff --git a/includes/Parser.php b/includes/Parser.php index b921d90c9a..5c6f9af7a2 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -101,13 +101,15 @@ class Parser var $mInterwikiLinkHolders, $mLinkHolders, $mUniqPrefix; # Temporary: - var $mOptions, $mTitle, $mOutputType, + var $mOptions, // ParserOptions object + $mTitle, // Title context, used for self-link rendering and similar things + $mOutputType, // Output type, one of the OT_xxx constants $mTemplates, // cache of already loaded templates, avoids // multiple SQL queries for the same string - $mTemplatePath; // stores an unsorted hash of all the templates already loaded + $mTemplatePath, // stores an unsorted hash of all the templates already loaded // in this path. Used for loop detection. - - var $mIWTransData = array(); + $mIWTransData = array(), + $mRevisionId; // ID to display in {{REVISIONID}} tags /**#@-*/ @@ -155,9 +157,9 @@ class Parser wfRunHooks( 'ParserClearState', array( &$this ) ); } - /** - * First pass--just handle sections, pass the rest off - * to internalParse() which does all the real work. + /** + * Convert wikitext to HTML + * Do not call this function recursively. * * @access private * @param string $text Text we want to parse @@ -169,6 +171,11 @@ class Parser * @return ParserOutput a ParserOutput */ function parse( $text, &$title, $options, $linestart = true, $clearState = true, $revid = null ) { + /** + * First pass--just handle sections, pass the rest off + * to internalParse() which does all the real work. + */ + global $wgUseTidy, $wgContLang; $fname = 'Parser::parse'; wfProfileIn( $fname ); @@ -193,8 +200,10 @@ class Parser wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$x ) ); # Hook to suspend the parser in this state - if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$x ) ) ) + if ( !wfRunHooks( 'ParserBeforeInternalParse', array( &$this, &$text, &$x ) ) ) { + wfProfileOut( $fname ); return $text ; + } $text = $this->internalParse( $text ); @@ -236,6 +245,7 @@ class Parser $this->mOutput->setText( $text ); wfProfileOut( $fname ); + return $this->mOutput; } @@ -1394,7 +1404,7 @@ class Parser # Interwikis if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) { - array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() ); + $this->mOutput->addLanguageLink( $nt->getFullText() ); $s = rtrim($s . "\n"); $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail; continue; @@ -1411,7 +1421,7 @@ class Parser # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them $s .= $prefix . $this->armorLinks( $this->makeImage( $nt, $text ) ) . $trail; - $wgLinkCache->addImageLinkObj( $nt ); + $this->mOutput->addImage( $nt->getDBkey() ); wfProfileOut( "$fname-image" ); continue; @@ -1422,13 +1432,8 @@ class Parser if ( $ns == NS_CATEGORY ) { wfProfileIn( "$fname-category" ); - $t = $wgContLang->convertHtml( $nt->getText() ); $s = rtrim($s . "\n"); # bug 87 - $wgLinkCache->suspend(); # Don't save in links/brokenlinks - $t = $sk->makeLinkObj( $nt, $t, '', '' , $prefix ); - $wgLinkCache->resume(); - if ( $wasblank ) { if ( $this->mTitle->getNamespace() == NS_CATEGORY ) { $sortkey = $this->mTitle->getText(); @@ -1440,8 +1445,7 @@ class Parser } $sortkey = Sanitizer::decodeCharReferences( $sortkey ); $sortkey = $wgContLang->convertCategoryKey( $sortkey ); - $wgLinkCache->addCategoryLinkObj( $nt, $sortkey ); - $this->mOutput->addCategoryLink( $t ); + $this->mOutput->addCategory( $nt->getDBkey(), $sortkey ); /** * Strip the whitespace Category links produce, see bug 87 @@ -1466,7 +1470,7 @@ class Parser $link = $sk->makeMediaLinkObj( $nt, $text ); # Cloak with NOPARSE to avoid replacement in replaceExternalLinks $s .= $prefix . $this->armorLinks( $link ) . $trail; - $wgLinkCache->addImageLinkObj( $nt ); + $this->mOutput->addImage( $nt->getDBkey() ); continue; } elseif( $ns == NS_SPECIAL ) { $s .= $this->makeKnownLinkHolder( $nt, $text, '', $trail, $prefix ); @@ -2297,7 +2301,7 @@ class Parser * @access private */ function braceSubstitution( $piece ) { - global $wgLinkCache, $wgContLang; + global $wgContLang; $fname = 'Parser::braceSubstitution'; wfProfileIn( $fname ); @@ -2515,6 +2519,9 @@ class Parser $text = $articleContent; $replaceHeadings = true; } + # Register a template reference whether or not the template exists + $this->mOutput->addTemplate( $title->getNamespace(), $title->getDBkey(), + $article->getID() ); } } @@ -2577,11 +2584,6 @@ class Parser } $text = $this->replaceVariables( $text, $assocArgs ); - # Resume the link cache and register the inclusion as a link - if ( $this->mOutputType == OT_HTML && !is_null( $title ) ) { - $wgLinkCache->addLinkObj( $title ); - } - # If the template begins with a table or block-level # element, it should be treated as beginning a new line. if (!$piece['lineStart'] && preg_match('/^({\\||:|;|#|\*)/', $text)) { @@ -3348,7 +3350,7 @@ class Parser # Generate query $query = false; - foreach ( $this->mLinkHolders['namespaces'] as $key => $val ) { + foreach ( $this->mLinkHolders['namespaces'] as $key => $ns ) { # Make title object $title = $this->mLinkHolders['titles'][$key]; @@ -3359,23 +3361,26 @@ class Parser } $pdbk = $pdbks[$key] = $title->getPrefixedDBkey(); - # Check if it's in the link cache already - if ( $title->isAlwaysKnown() || $wgLinkCache->getGoodLinkID( $pdbk ) ) { + # Check if it's a static known link, e.g. interwiki + if ( $title->isAlwaysKnown() ) { + $colours[$pdbk] = 1; + } elseif ( ( $id = $wgLinkCache->getGoodLinkID( $pdbk ) ) != 0 ) { $colours[$pdbk] = 1; + $this->mOutput->addLink( $ns, $this->mLinkHolders['dbkeys'][$key], $id ); } elseif ( $wgLinkCache->isBadLink( $pdbk ) ) { $colours[$pdbk] = 0; } else { # Not in the link cache, add it to the query if ( !isset( $current ) ) { - $current = $val; + $current = $ns; $query = "SELECT page_id, page_namespace, page_title"; if ( $threshold > 0 ) { $query .= ', page_len, page_is_redirect'; } - $query .= " FROM $page WHERE (page_namespace=$val AND page_title IN("; - } elseif ( $current != $val ) { - $current = $val; - $query .= ")) OR (page_namespace=$val AND page_title IN("; + $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; + } elseif ( $current != $ns ) { + $current = $ns; + $query .= ")) OR (page_namespace=$ns AND page_title IN("; } else { $query .= ', '; } @@ -3399,6 +3404,7 @@ class Parser $title = Title::makeTitle( $s->page_namespace, $s->page_title ); $pdbk = $title->getPrefixedDBkey(); $wgLinkCache->addGoodLinkObj( $s->page_id, $title ); + $this->mOutput->addLink( $s->page_namespace, $s->page_title, $s->page_id ); if ( $threshold > 0 ) { $size = $s->page_len; @@ -3424,6 +3430,7 @@ class Parser if ( empty( $colours[$pdbk] ) ) { $wgLinkCache->addBadLinkObj( $title ); $colours[$pdbk] = 0; + $this->mOutput->addLink( $ns, $this->mLinkHolders['dbkeys'][$key], 0 ); $wgOutputReplace[$searchkey] = $sk->makeBrokenLinkObj( $title, $this->mLinkHolders['texts'][$key], $this->mLinkHolders['queries'][$key] ); @@ -3479,7 +3486,7 @@ class Parser * @return string */ function replaceLinkHoldersText( $text ) { - global $wgUser, $wgLinkCache; + global $wgUser; global $wgOutputReplace; $fname = 'Parser::replaceLinkHoldersText'; @@ -3527,11 +3534,10 @@ class Parser */ function renderImageGallery( $text ) { # Setup the parser - global $wgUser, $wgTitle; - $parserOptions = ParserOptions::newFromUser( $wgUser ); + global $wgTitle; + $parserOptions = new ParserOptions; $localParser = new Parser(); - global $wgLinkCache; $ig = new ImageGallery(); $ig->setShowBytes( false ); $ig->setShowFilename( false ); @@ -3560,7 +3566,7 @@ class Parser $html = $html->mText; $ig->add( new Image( $nt ), $html ); - $wgLinkCache->addImageLinkObj( $nt ); + $this->mOutput->addImage( $nt->getDBkey() ); } return $ig->toHTML(); } @@ -3680,43 +3686,77 @@ class Parser */ class ParserOutput { - var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic; - var $mCacheTime; # Timestamp on this article, or -1 for uncacheable. Used in ParserCache. - var $mVersion; # Compatibility check - var $mTitleText; # title text of the chosen language variant + var $mText, # The output text + $mLanguageLinks, # List of the full text of language links, in the order they appear + $mCategories, # Map of category names to sort keys + $mContainsOldMagic, # Boolean variable indicating if the input contained variables like {{CURRENTDAY}} + $mCacheTime, # Timestamp on this article, or -1 for uncacheable. Used in ParserCache. + $mVersion, # Compatibility check + $mTitleText, # title text of the chosen language variant + $mLinks, # 2-D map of NS/DBK to ID for the links in the document. ID=zero for broken. + $mTemplates, # 2-D map of NS/DBK to ID for the template references. ID=zero for broken. + $mImages; # DB keys of the images used, in the array key only function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(), $containsOldMagic = false, $titletext = '' ) { $this->mText = $text; $this->mLanguageLinks = $languageLinks; - $this->mCategoryLinks = $categoryLinks; + $this->mCategories = $categoryLinks; $this->mContainsOldMagic = $containsOldMagic; $this->mCacheTime = ''; $this->mVersion = MW_PARSER_VERSION; $this->mTitleText = $titletext; + $this->mLinks = array(); + $this->mTemplates = array(); + $this->mImages = array(); } function getText() { return $this->mText; } function getLanguageLinks() { return $this->mLanguageLinks; } - function getCategoryLinks() { return array_keys( $this->mCategoryLinks ); } + function getCategoryLinks() { return array_keys( $this->mCategories ); } + function &getCategories() { return $this->mCategories; } function getCacheTime() { return $this->mCacheTime; } function getTitleText() { return $this->mTitleText; } + function &getLinks() { return $this->mLinks; } + function &getTemplates() { return $this->mTemplates; } + function &getImages() { return $this->mImages; } + function containsOldMagic() { return $this->mContainsOldMagic; } function setText( $text ) { return wfSetVar( $this->mText, $text ); } function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); } - function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); } + function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategories, $cl ); } function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); } function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); } function setTitleText( $t ) { return wfSetVar ($this->mTitleText, $t); } - function addCategoryLink( $c ) { $this->mCategoryLinks[$c] = 1; } + function addCategory( $c, $sort ) { $this->mCategories[$c] = $sort; } + function addImage( $name ) { $this->mImages[$name] = 1; } + function addLanguageLink( $t ) { $this->mLanguageLinks[] = $t; } + + function addLink( $ns, $t, $id ) { + if ( !isset( $this->mLinks[$ns] ) ) { + $this->mLinks[$ns] = array(); + } + $this->mLinks[$ns][$t] = $id; + } + function addTemplate( $ns, $t, $id ) { + if ( !isset( $this->mTemplates[$ns] ) ) { + $this->mTemplates[$ns] = array(); + } + $this->mTemplates[$ns][$t] = $id; + } + + /** + * @deprecated + */ + /* function merge( $other ) { $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks ); - $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks ); + $this->mCategories = array_merge( $this->mCategories, $this->mLanguageLinks ); $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic; - } + }*/ /** * Return true if this cached output object predates the global or @@ -3883,7 +3923,7 @@ function wfLoadSiteStats() { /** * Escape html tags - * Basicly replacing " > and < with HTML entities ( ", >, <) + * Basically replacing " > and < with HTML entities ( ", >, <) * * @param string $in Text that might contain HTML tags * @return string Escaped string diff --git a/includes/Skin.php b/includes/Skin.php index 42ffde29e4..fbfd17a92c 100644 --- a/includes/Skin.php +++ b/includes/Skin.php @@ -495,10 +495,9 @@ END; function getCategoryLinks () { - global $wgOut, $wgTitle, $wgUseCategoryMagic, $wgUseCategoryBrowser; + global $wgOut, $wgTitle, $wgUseCategoryBrowser; global $wgContLang; - if( !$wgUseCategoryMagic ) return '' ; if( count( $wgOut->mCategoryLinks ) == 0 ) return ''; // Use Unicode bidi embedding override characters, diff --git a/includes/SpecialMovepage.php b/includes/SpecialMovepage.php index 3f65b840d7..3c9d326892 100644 --- a/includes/SpecialMovepage.php +++ b/includes/SpecialMovepage.php @@ -5,11 +5,6 @@ * @subpackage SpecialPage */ -/** - * - */ -require_once( "LinksUpdate.php" ); - /** * Constructor */ diff --git a/includes/SpecialUndelete.php b/includes/SpecialUndelete.php index 111e8ff2ae..685ba7313a 100644 --- a/includes/SpecialUndelete.php +++ b/includes/SpecialUndelete.php @@ -155,7 +155,7 @@ class PageArchive { * @return bool */ function undelete( $timestamps ) { - global $wgDeferredUpdateList, $wgLinkCache, $wgDBtype; + global $wgDeferredUpdateList, $wgParser, $wgDBtype; $fname = "doUndeleteArticle"; $restoreAll = empty( $timestamps ); @@ -224,6 +224,8 @@ class PageArchive { 'ORDER BY' => 'ar_timestamp' ) ); $revision = null; + $newRevId = $previousRevId; + while( $row = $dbw->fetchObject( $result ) ) { if( $row->ar_text_id ) { // Revision was deleted in 1.5+; text is in @@ -248,7 +250,7 @@ class PageArchive { 'minor_edit' => $row->ar_minor_edit, 'text_id' => $row->ar_text_id, ) ); - $revision->insertOn( $dbw ); + $newRevId = $revision->insertOn( $dbw ); } if( $revision ) { @@ -258,16 +260,11 @@ class PageArchive { $article->updateRevisionOn( $dbw, $revision, $previousRevId ); # Finally, clean up the link tables - $wgLinkCache = new LinkCache(); - # Select for update - $wgLinkCache->forUpdate( true ); - - # Create a dummy OutputPage to update the outgoing links - $dummyOut = new OutputPage(); - $dummyOut->addWikiText( $revision->getText() ); - - $u = new LinksUpdate( $newid, $this->title->getPrefixedDBkey() ); - array_push( $wgDeferredUpdateList, $u ); + $parserOptions = new ParserOptions; + $parserOutput = $wgParser->parse( $revision->getText(), $this->title, $options, + true, true, $newRevId ); + $u = new LinksUpdate( $this->title, $parserOutput ); + $u->doUpdate(); #TODO: SearchUpdate, etc. } @@ -278,7 +275,7 @@ class PageArchive { Article::onArticleEdit( $this->title ); } } else { - # Something went terribly worong! + # Something went terribly wrong! } # Now that it's safely stored, take it out of the archive diff --git a/includes/SpecialWhatlinkshere.php b/includes/SpecialWhatlinkshere.php index 087b517ceb..1ca9698332 100644 --- a/includes/SpecialWhatlinkshere.php +++ b/includes/SpecialWhatlinkshere.php @@ -10,117 +10,257 @@ * @param string $par An article name ?? */ function wfSpecialWhatlinkshere($par = NULL) { - global $wgUser, $wgOut, $wgRequest; - $fname = 'wfSpecialWhatlinkshere'; - - $target = isset($par) ? $par : $wgRequest->getVal( 'target' ); - list( $limit, $offset ) = $wgRequest->getLimitOffset(); + global $wgRequest; + $page = new WhatLinksHerePage( $wgRequest, $par ); + $page->execute(); +} - if (is_null($target)) { - $wgOut->errorpage( 'notargettitle', 'notargettext' ); - return; +class WhatLinksHerePage { + var $request, $par; + var $limit, $from, $dir, $target; + var $selfTitle, $skin; + + function WhatLinksHerePage( &$request, $par = null ) { + global $wgUser; + $this->request =& $request; + $this->skin =& $wgUser->getSkin(); + $this->par = $par; } - $nt = Title::newFromURL( $target ); - if( !$nt ) { - $wgOut->errorpage( 'notargettitle', 'notargettext' ); - return; - } - $wgOut->setPagetitle( $nt->getPrefixedText() ); - $wgOut->setSubtitle( wfMsg( 'linklistsub' ) ); + function execute() { + global $wgUser, $wgOut; + + $this->limit = min( $this->request->getInt( 'limit', 50 ), 5000 ); + if ( $this->limit <= 0 ) { + $this->limit = 50; + } + $this->from = $this->request->getInt( 'from' ); + $this->dir = $this->request->getText( 'dir', 'next' ); + if ( $this->dir != 'prev' ) { + $this->dir = 'next'; + } + + $targetString = isset($this->par) ? $this->par : $this->request->getVal( 'target' ); - $sk = $wgUser->getSkin(); - $isredir = ' (' . wfMsg( 'isredirect' ) . ")\n"; + if (is_null($targetString)) { + $wgOut->errorpage( 'notargettitle', 'notargettext' ); + return; + } - $wgOut->addHTML('< '.$sk->makeLinkObj($nt, '', 'redirect=no' )."
\n"); + $this->target = Title::newFromURL( $targetString ); + if( !$this->target ) { + $wgOut->errorpage( 'notargettitle', 'notargettext' ); + return; + } + $this->selfTitle = Title::makeTitleSafe( NS_SPECIAL, + 'Whatlinkshere/' . $this->target->getPrefixedDBkey() ); + $wgOut->setPagetitle( $this->target->getPrefixedText() ); + $wgOut->setSubtitle( wfMsg( 'linklistsub' ) ); - wfShowIndirectLinks( 0, $nt, $limit, $offset ); -} + $isredir = ' (' . wfMsg( 'isredirect' ) . ")\n"; -/** - * @param int $level - * @param Title $target - * @param int $limit - * @param int $offset - * @access private - */ -function wfShowIndirectLinks( $level, $target, $limit, $offset = 0 ) { - global $wgOut, $wgUser; - $fname = 'wfShowIndirectLinks'; + $wgOut->addHTML('< '.$this->skin->makeLinkObj($this->target, '', 'redirect=no' )."
\n"); - $dbr =& wfGetDB( DB_READ ); - - // Read one extra row as an at-end check - $queryLimit = $limit + 1; - $limitSql = ( $level == 0 ) - ? "$offset,$queryLimit" - : $queryLimit; - - $res = $dbr->select( array( 'pagelinks', 'page' ), - array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect' ), - array( - 'pl_from=page_id', - 'pl_namespace' => $target->getNamespace(), - 'pl_title' => $target->getDbKey() ), - $fname, - array( 'LIMIT' => $limitSql ) ); - - if ( 0 == $dbr->numRows( $res ) ) { - if ( 0 == $level ) { - $wgOut->addWikiText( wfMsg( 'nolinkshere' ) ); - } - return; - } - if ( 0 == $level ) { - $wgOut->addWikiText( wfMsg( 'linkshere' ) ); + $this->showIndirectLinks( 0, $this->target, $this->limit, $this->from, $this->dir ); } - $sk = $wgUser->getSkin(); - $isredir = ' (' . wfMsg( 'isredirect' ) . ")\n"; - if( $dbr->numRows( $res ) == 0 ) { - return; - } - $atend = ( $dbr->numRows( $res ) <= $limit ); - - if( $level == 0 ) { - $specialTitle = Title::makeTitle( NS_SPECIAL, 'Whatlinkshere' ); - $prevnext = wfViewPrevNext( $offset, $limit, $specialTitle, - 'target=' . urlencode( $target->getPrefixedDbKey() ), - $atend ); - $wgOut->addHTML( $prevnext ); - } - - $wgOut->addHTML( '
    ' ); - $linksShown = 0; - while ( $row = $dbr->fetchObject( $res ) ) { - if( ++$linksShown > $limit ) { - // Last row is for checks only; don't display it. - break; + /** + * @param int $level Recursion level + * @param Title $target Target title + * @param int $limit Number of entries to display + * @param Title $from Display from this article ID + * @param string $dir 'next' or 'prev', whether $fromTitle is the start or end of the list + * @access private + */ + function showIndirectLinks( $level, $target, $limit, $from = 0, $dir = 'next' ) { + global $wgOut, $wgUser; + $fname = 'WhatLinksHerePage::showIndirectLinks'; + + $dbr =& wfGetDB( DB_READ ); + + extract( $dbr->tableNames( 'pagelinks', 'templatelinks', 'page' ) ); + + // Some extra validation + $from = intval( $from ); + if ( !$from && $dir == 'prev' ) { + // Before start? No make sense + $dir = 'next'; } - $nt = Title::makeTitle( $row->page_namespace, $row->page_title ); + // Make the query + if ( $from ) { + if ( 'prev' == $dir ) { + $offsetCond = "AND page_id < $from"; + $options = 'ORDER BY page_id DESC,is_template DESC'; + } else { + $offsetCond = "AND page_id >= $from"; + $options = 'ORDER BY page_id, is_template DESC'; + } + } else { + $offsetCond = ''; + $options = 'ORDER BY page_id,is_template DESC'; + } + // Read an extra row as an at-end check + $queryLimit = $limit + 1; + $options .= ' LIMIT ' . $queryLimit; + + $ns = $dbr->addQuotes( $target->getNamespace() ); + $dbk = $dbr->addQuotes( $target->getDBkey() ); + $plCond = "page_id=pl_from AND pl_namespace=$ns AND pl_title=$dbk"; + $tlCond = "page_id=tl_from AND tl_namespace=$ns AND tl_title=$dbk"; + + // Make a union query which will read both templatelinks and pagelinks, + // with an is_template field in the output indicating which one the link + // came from + $sql = "(SELECT page_id,page_namespace, page_title, page_is_redirect, 1 as is_template " . + "FROM page, templatelinks WHERE $tlCond $offsetCond) " . + "UNION (SELECT page_id,page_namespace, page_title, page_is_redirect, 0 as is_template " . + "FROM page, pagelinks WHERE $plCond $offsetCond) $options"; + $res = $dbr->query( $sql, $fname ); + $numRows = $dbr->numRows( $res ); + + if ( 0 == $numRows ) { + if ( 0 == $level ) { + $wgOut->addWikiText( wfMsg( 'nolinkshere' ) ); + } + return; + } - if ( $row->page_is_redirect ) { - $extra = 'redirect=no'; + // Read the rows into an array + $rows = array(); + while ( $row = $dbr->fetchObject( $res ) ) { + $rows[] = $row; + } + $lastRow = end( $rows ); + // Work out the start and end IDs, for prev/next links + if ( $dir == 'prev' ) { + // Descending order + if ( $numRows == $queryLimit ) { + // More rows available before these ones + // Get the ID from the last row in the result set + $prevId = $lastRow->page_id; + // Remove undisplayed row + unset( $rows[$queryLimit - 1] ); + } else { + // No more rows available before + $prevId = 0; + } + // Assume that the ID specified in $from exists, so there must be another page + $nextId = $from; + + // Reverse order + $rows = array_reverse( $rows ); } else { - $extra = ''; + // Ascending + if ( $numRows == $queryLimit ) { + // More rows available after these ones + // Get the ID from the last row in the result set + $nextId = $lastRow->page_id; + // Remove undisplayed row + unset( $rows[$queryLimit - 1] ); + } else { + // No more rows after + $nextId = false; + } + $prevId = $from; + } + + if ( 0 == $level ) { + $wgOut->addWikiText( wfMsg( 'linkshere' ) ); } + $isredir = wfMsg( 'isredirect' ); + $istemplate = wfMsg( 'istemplate' ); + + if( $level == 0 ) { + $prevnext = $this->getPrevNext( $limit, $prevId, $nextId ); + $wgOut->addHTML( $prevnext ); + } + + $wgOut->addHTML( '
      ' ); + $linksShown = 0; + $lastNs = false; + $lastDbk = false; + foreach ( $rows as $row ) { + if ( $lastNs === $row->page_namespace && $lastDbk === $row->page_title ) { + // Skip duplicates + continue; + } else { + $lastNs = $row->page_namespace; + $lastDbk = $row->page_title; + } + + $nt = Title::makeTitle( $row->page_namespace, $row->page_title ); - $link = $sk->makeKnownLinkObj( $nt, '', $extra ); - $wgOut->addHTML( '
    • '.$link ); + if ( $row->page_is_redirect ) { + $extra = 'redirect=no'; + } else { + $extra = ''; + } + + $link = $this->skin->makeKnownLinkObj( $nt, '', $extra ); + $wgOut->addHTML( '
    • '.$link ); - if ( $row->page_is_redirect ) { - $wgOut->addHTML( $isredir ); - if ( $level < 2 ) { - wfShowIndirectLinks( $level + 1, $nt, 500 ); + // Display properties (redirect or template) + $props = array(); + if ( $row->page_is_redirect ) { + $props[] = $isredir; + } + if ( $row->is_template ) { + $props[] = $istemplate; } + if ( count( $props ) ) { + // FIXME? Cultural assumption, hard-coded punctuation + $wgOut->addHTML( ' (' . implode( ', ', $props ) . ') ' ); + } + + if ( $row->page_is_redirect ) { + if ( $level < 2 ) { + $this->showIndirectLinks( $level + 1, $nt, 500 ); + } + } + $wgOut->addHTML( "
    • \n" ); + } + $wgOut->addHTML( "
    \n" ); + + if( $level == 0 ) { + $wgOut->addHTML( $prevnext ); } - $wgOut->addHTML( "\n" ); } - $wgOut->addHTML( "
\n" ); - - if( $level == 0 ) { - $wgOut->addHTML( $prevnext ); + + function makeSelfLink( $text, $query ) { + return $this->skin->makeKnownLinkObj( $this->selfTitle, $text, $query ); + } + + function getPrevNext( $limit, $prevId, $nextId ) { + global $wgLang; + $fmtLimit = $wgLang->formatNum( $limit ); + $prev = wfMsg( 'prevn', $fmtLimit ); + $next = wfMsg( 'nextn', $fmtLimit ); + + if ( 0 != $prevId ) { + $prevLink = $this->makeSelfLink( $prev, "limit={$limit}&from={$prevId}&dir=prev" ); + } else { + $prevLink = $prev; + } + if ( 0 != $nextId ) { + $nextLink = $this->makeSelfLink( $next, "limit={$limit}&from={$nextId}" ); + } else { + $nextLink = $next; + } + $nums = $this->numLink( 20, $prevId ) . ' | ' . + $this->numLink( 50, $prevId ) . ' | ' . + $this->numLink( 100, $prevId ) . ' | ' . + $this->numLink( 250, $prevId ) . ' | ' . + $this->numLink( 500, $prevId ); + + return wfMsg( 'viewprevnext', $prevLink, $nextLink, $nums ); + } + + function numLink( $limit, $from ) { + global $wgLang; + $query = "limit={$limit}&from={$from}"; + $fmtLimit = $wgLang->formatNum( $limit ); + return $this->makeSelfLink( $fmtLimit, $query ); } } diff --git a/includes/Title.php b/includes/Title.php index e93805c615..44ba6cf12c 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -1400,7 +1400,7 @@ class Title { * @return array the Title objects linking here * @access public */ - function getLinksTo( $options = '' ) { + function getLinksTo( $options = '', $table = 'pagelinks', $prefix = 'pl' ) { global $wgLinkCache; $id = $this->getArticleID(); @@ -1410,12 +1410,12 @@ class Title { $db =& wfGetDB( DB_SLAVE ); } - $res = $db->select( array( 'page', 'pagelinks' ), + $res = $db->select( array( 'page', $table ), array( 'page_namespace', 'page_title', 'page_id' ), array( - 'pl_from=page_id', - 'pl_namespace' => $this->getNamespace(), - 'pl_title' => $this->getDbKey() ), + "{$prefix}_from=page_id", + "{$prefix}_namespace" => $this->getNamespace(), + "{$prefix}_title" => $this->getDbKey() ), 'Title::getLinksTo', $options ); @@ -1432,6 +1432,18 @@ class Title { return $retVal; } + /** + * Get an array of Title objects using this Title as a template + * Also stores the IDs in the link cache. + * + * @param string $options may be FOR UPDATE + * @return array the Title objects linking here + * @access public + */ + function getTemplateLinksTo( $options = '' ) { + return $this->getLinksTo( $options, 'templatelinks', 'tl' ); + } + /** * Get an array of Title objects referring to non-existent articles linked from this page * diff --git a/index.php b/index.php index 360f04873c..fd8bb699f3 100644 --- a/index.php +++ b/index.php @@ -165,7 +165,7 @@ if( !$wgDisableInternalSearch && !is_null( $search ) && $search !== '' ) { unset($wgArticle); require_once( 'includes/ImagePage.php' ); $wgArticle = new ImagePage( $wgTitle ); - } elseif ( $wgUseCategoryMagic && $ns == NS_CATEGORY ) { + } elseif ( $ns == NS_CATEGORY ) { unset($wgArticle); require_once( 'includes/CategoryPage.php' ); $wgArticle = new CategoryPage( $wgTitle ); diff --git a/languages/Language.php b/languages/Language.php index a99bf83f9f..aa7b556b9e 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -1435,6 +1435,7 @@ to perform this function on.', 'linkshere' => 'The following pages link to here:', 'nolinkshere' => 'No pages link to here.', 'isredirect' => 'redirect page', +'istemplate' => 'inclusion', # Block/unblock IP # @@ -2399,6 +2400,7 @@ class Language { * @param int $ts the time in date('YmdHis') format * @param mixed $tz adjust the time by this amount (default false) * @return int + */ function userAdjust( $ts, $tz = false ) { global $wgUser, $wgLocalTZoffset; @@ -2566,7 +2568,9 @@ class Language { * date('YmdHis') format with wfTimestamp(TS_MW,$ts) * @param bool $adj whether to adjust the time output according to the * user configured offset ($timecorrection) - * @param mixed $format true to use user's date format preference + + * @param mixed $format what format to return, if it's false output the + * default one (default true) * @param string $timecorrection the time offset as returned by * validateTimeZone() in Special:Preferences * @return string diff --git a/maintenance/archives/patch-templatelinks.sql b/maintenance/archives/patch-templatelinks.sql new file mode 100644 index 0000000000..49bd9c5ef2 --- /dev/null +++ b/maintenance/archives/patch-templatelinks.sql @@ -0,0 +1,19 @@ +-- +-- Track template inclusions. +-- +CREATE TABLE /*$wgDBprefix*/templatelinks ( + -- Key to the page_id of the page containing the link. + tl_from int(8) unsigned NOT NULL default '0', + + -- Key to page_namespace/page_title of the target page. + -- The target page may or may not exist, and due to renames + -- and deletions may refer to different page records as time + -- goes by. + tl_namespace int NOT NULL default '0', + tl_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY tl_from(tl_from,tl_namespace,tl_title), + KEY (tl_namespace,tl_title) + +) TYPE=InnoDB; + diff --git a/maintenance/dumpHTML.inc b/maintenance/dumpHTML.inc index 6c2ced5758..589fb57e0b 100644 --- a/maintenance/dumpHTML.inc +++ b/maintenance/dumpHTML.inc @@ -357,7 +357,7 @@ class DumpHTML { /** Reads the content of a title object, executes the skin and captures the result */ function getArticleHTML( &$title ) { - global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgUseCategoryMagic, $wgLinkCache; + global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgLinkCache; $wgTitle = $title; if ( is_null( $wgTitle ) ) { @@ -373,7 +373,7 @@ class DumpHTML { } else { if ( $ns == NS_IMAGE ) { $wgArticle = new ImagePage( $wgTitle ); - } elseif ( $wgUseCategoryMagic && $ns == NS_CATEGORY ) { + } elseif ( $ns == NS_CATEGORY ) { $wgArticle = new CategoryPage( $wgTitle ); } else { $wgArticle = new Article( $wgTitle ); diff --git a/maintenance/mysql5/tables.sql b/maintenance/mysql5/tables.sql index 025514b7b1..d09a67157d 100644 --- a/maintenance/mysql5/tables.sql +++ b/maintenance/mysql5/tables.sql @@ -384,6 +384,26 @@ CREATE TABLE /*$wgDBprefix*/pagelinks ( ) TYPE=InnoDB, DEFAULT CHARSET=utf8; +-- +-- Track template inclusions. +-- +CREATE TABLE /*$wgDBprefix*/templatelinks ( + -- Key to the page_id of the page containing the link. + tl_from int(8) unsigned NOT NULL default '0', + + -- Key to page_namespace/page_title of the target page. + -- The target page may or may not exist, and due to renames + -- and deletions may refer to different page records as time + -- goes by. + tl_namespace int NOT NULL default '0', + tl_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY tl_from(tl_from,tl_namespace,tl_title), + KEY (tl_namespace,tl_title) + +) TYPE=InnoDB, DEFAULT CHARSET=utf8; + + -- -- Track links to images *used inline* -- We don't distinguish live from broken links here, so diff --git a/maintenance/refreshLinks.inc b/maintenance/refreshLinks.inc index feb91ef214..f146c24eea 100644 --- a/maintenance/refreshLinks.inc +++ b/maintenance/refreshLinks.inc @@ -65,7 +65,7 @@ function refreshLinks( $start, $newOnly = false, $maxLag = false ) { } function fixLinksFromArticle( $id ) { - global $wgTitle, $wgArticle, $wgLinkCache, $wgOut; + global $wgTitle, $wgArticle, $wgOut, $wgParser; $wgTitle = Title::newFromID( $id ); $dbw =& wfGetDB( DB_MASTER ); @@ -75,29 +75,15 @@ function fixLinksFromArticle( $id ) { } $dbw->begin(); - $wgArticle = new Article( $wgTitle ); - $text = $wgArticle->getContent( true ); - $wgLinkCache = new LinkCache; - $wgLinkCache->forUpdate( true ); - - global $wgLinkHolders; - $wgLinkHolders = array( - 'namespaces' => array(), - 'dbkeys' => array(), - 'queries' => array(), - 'texts' => array(), - 'titles' => array() - ); - - - # Parse the text and replace links with placeholders - $wgOut->addWikiText( $text ); - - # Look up the links in the DB and add them to the link cache - $wgOut->clearHTML(); + $revision = Revision::newFromTitle( $wgTitle ); + if ( !$revision ) { + return; + } - $linksUpdate = new LinksUpdate( $id, $wgTitle->getPrefixedDBkey() ); - $linksUpdate->doDumbUpdate(); + $options = new ParserOptions; + $parserOutput = $wgParser->parse( $revision->getText(), $wgTitle, $options, true, true, $revision->getId() ); + $update = new LinksUpdate( $wgTitle, $parserOutput ); + $update->doDumbUpdate(); $dbw->immediateCommit(); } diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 9a712f275c..8a08553d7a 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -371,6 +371,25 @@ CREATE TABLE /*$wgDBprefix*/pagelinks ( ) TYPE=InnoDB; +-- +-- Track template inclusions. +-- +CREATE TABLE /*$wgDBprefix*/templatelinks ( + -- Key to the page_id of the page containing the link. + tl_from int(8) unsigned NOT NULL default '0', + + -- Key to page_namespace/page_title of the target page. + -- The target page may or may not exist, and due to renames + -- and deletions may refer to different page records as time + -- goes by. + tl_namespace int NOT NULL default '0', + tl_title varchar(255) binary NOT NULL default '', + + UNIQUE KEY tl_from(tl_from,tl_namespace,tl_title), + KEY (tl_namespace,tl_title) + +) TYPE=InnoDB; + -- -- Track links to images *used inline* -- We don't distinguish live from broken links here, so diff --git a/maintenance/updaters.inc b/maintenance/updaters.inc index ab950d0aeb..7d64aee8b2 100644 --- a/maintenance/updaters.inc +++ b/maintenance/updaters.inc @@ -688,6 +688,56 @@ function do_page_random_update() { echo "changed $rows rows\n"; } +function do_templatelinks_update() { + global $wgDatabase, $wgLoadBalancer; + $fname = 'do_templatelinks_update'; + + if ( $wgDatabase->tableExists( 'templatelinks' ) ) { + echo "...templatelinks table already exists\n"; + return; + } + echo "Creating templatelinks table...\n"; + dbsource( archive('patch-templatelinks.sql'), $wgDatabase ); + echo "Populating...\n"; + if ( isset( $wgLoadBalancer ) && $wgLoadBalancer->getServerCount() > 1 ) { + // Slow, replication-friendly update + $res = $wgDatabase->select( 'pagelinks', array( 'pl_from', 'pl_namespace', 'pl_title' ), + array( 'pl_namespace' => NS_TEMPLATE ), $fname ); + $count = 0; + while ( $row = $wgDatabase->fetchObject( $res ) ) { + $count = ($count + 1) % 100; + if ( $count == 0 ) { + if ( function_exists( 'wfWaitForSlaves' ) ) { + wfWaitForSlaves( 10 ); + } else { + sleep( 1 ); + } + } + $wgDatabase->insert( 'templatelinks', + array( + 'tl_from' => $row->pl_from, + 'tl_namespace' => $row->pl_namespace, + 'tl_title' => $row->pl_title, + ), $fname + ); + + } + $wgDatabase->freeResult( $res ); + } else { + // Fast update + $wgDatabase->insertSelect( 'templatelinks', 'pagelinks', + array( + 'tl_from' => 'pl_from', + 'tl_namespace' => 'pl_namespace', + 'tl_title' => 'pl_title' + ), array( + 'pl_namespace' => 10 + ), $fname + ); + } + echo "Done. Please run maintenance/refreshLinks.php for a more thorough templatelinks update.\n"; +} + function do_all_updates() { global $wgNewTables, $wgNewFields, $wgRenamedTables; @@ -724,7 +774,8 @@ function do_all_updates() { do_namespace_size(); flush(); do_pagelinks_update(); flush(); - + do_templatelinks_update(); flush(); // after pagelinks + do_drop_img_type(); flush(); do_user_unique_update(); flush(); -- 2.20.1