From: Yuri Astrakhan Date: Fri, 6 Oct 2006 01:02:14 +0000 (+0000) Subject: * API: Optimized PageSet object to avoid executing queries against page table twice. X-Git-Tag: 1.31.0-rc.0~55611 X-Git-Url: https://git.cyclocoop.org/%27.WWW_URL.%27admin/?a=commitdiff_plain;h=eb16ab605c0c84b765c056abf7a3b4d50a8a7a8a;p=lhc%2Fweb%2Fwiklou.git * API: Optimized PageSet object to avoid executing queries against page table twice. --- diff --git a/includes/api/ApiMain.php b/includes/api/ApiMain.php index 3f7611de23..f31f3e2bc3 100644 --- a/includes/api/ApiMain.php +++ b/includes/api/ApiMain.php @@ -148,7 +148,8 @@ class ApiMain extends ApiBase { header($errorCode, true, $httpRespCode); $data = array ( - 'code' => $errorCode + 'code' => $errorCode, + 'info' => $description ); ApiResult :: setContent($data, $this->makeHelpMsg()); $this->mResult->addValue(null, 'error', $data); diff --git a/includes/api/ApiPageSet.php b/includes/api/ApiPageSet.php index 9e34ce7a1e..2d4d2cd149 100644 --- a/includes/api/ApiPageSet.php +++ b/includes/api/ApiPageSet.php @@ -33,10 +33,11 @@ class ApiPageSet extends ApiQueryBase { private $mAllPages; // [ns][dbkey] => page_id or 0 when missing private $mGoodTitles, $mMissingTitles, $mMissingPageIDs, $mRedirectTitles, $mNormalizedTitles; + private $mResolveRedirects, $mPendingRedirectIDs; - private $mRequestedFields; + private $mRequestedPageFields; - public function __construct($query) { + public function __construct($query, $resolveRedirects = false) { parent :: __construct($query, __CLASS__); $this->mAllPages = array (); @@ -46,15 +47,42 @@ class ApiPageSet extends ApiQueryBase { $this->mRedirectTitles = array (); $this->mNormalizedTitles = array (); - $this->mRequestedFields = array (); + $this->mRequestedPageFields = array (); + $this->mResolveRedirects = $resolveRedirects; + if($resolveRedirects) + $this->mPendingRedirectIDs = array(); + } + + public function isResolvingRedirects() { + return $this->mResolveRedirects; } public function requestField($fieldName) { - $this->mRequestedFields[$fieldName] = null; + $this->mRequestedPageFields[$fieldName] = null; } public function getCustomField($fieldName) { - return $this->mRequestedFields[$fieldName]; + return $this->mRequestedPageFields[$fieldName]; + } + + /** + * Get fields that modules have requested from the page table + */ + public function getPageTableFields() { + // Ensure we get minimum required fields + $pageFlds = array ( + 'page_id' => null, + 'page_namespace' => null, + 'page_title' => null + ); + + // only store non-default fields + $this->mRequestedPageFields = array_diff_key($this->mRequestedPageFields, $pageFlds); + + if ($this->mResolveRedirects) + $pageFlds['page_is_redirect'] = null; + + return array_keys(array_merge($pageFlds, $this->mRequestedPageFields)); } /** @@ -65,6 +93,13 @@ class ApiPageSet extends ApiQueryBase { return $this->mGoodTitles; } + /** + * Returns the number of unique pages (not revisions) in the set. + */ + public function getGoodTitleCount() { + return count($this->getGoodTitles()); + } + /** * Title objects that were NOT found in the database. * @return array of Title objects @@ -98,13 +133,6 @@ class ApiPageSet extends ApiQueryBase { return $this->mNormalizedTitles; } - /** - * Returns the number of unique pages (not revisions) in the set. - */ - public function getGoodTitleCount() { - return count($this->getGoodTitles()); - } - /** * Get the list of revision IDs (requested with revids= parameter) */ @@ -119,6 +147,100 @@ class ApiPageSet extends ApiQueryBase { return 0; // TODO: implement } + /** + * Populate from the request parameters + */ + public function execute() { + $this->profileIn(); + $titles = $pageids = $revids = null; + extract($this->extractRequestParams()); + + // Only one of the titles/pageids/revids is allowed at the same time + $dataSource = null; + if (isset ($titles)) + $dataSource = 'titles'; + if (isset ($pageids)) { + if (isset ($dataSource)) + $this->dieUsage("Cannot use 'pageids' at the same time as '$dataSource'", 'multisource'); + $dataSource = 'pageids'; + } + if (isset ($revids)) { + if (isset ($dataSource)) + $this->dieUsage("Cannot use 'revids' at the same time as '$dataSource'", 'multisource'); + $dataSource = 'revids'; + } + + switch ($dataSource) { + case 'titles' : + $this->initFromTitles($titles); + break; + case 'pageids' : + $this->initFromPageIds($pageids); + break; + case 'revids' : + $this->initFromRevIDs($revids); + break; + default : + // Do nothing - some queries do not need any of the data sources. + break; + } + $this->profileOut(); + } + + /** + * Initialize PageSet from a list of Titles + */ + public function populateFromTitles($titles) { + $this->profileIn(); + $this->initFromTitles($titles); + $this->profileOut(); + } + + /** + * Initialize PageSet from a list of Page IDs + */ + public function populateFromPageIDs($pageIDs) { + $this->profileIn(); + $pageIDs = array_map('intval', $pageIDs); // paranoia + $this->initFromPageIds($pageIDs); + $this->profileOut(); + } + + /** + * Initialize PageSet from a rowset returned from the database + */ + public function populateFromQueryResult($db, $queryResult) { + $this->profileIn(); + $this->initFromQueryResult($db, $queryResult); + $this->profileOut(); + } + + /** + * Extract all requested fields from the row received from the database + */ + public function processDbRow($row) { + $pageId = intval($row->page_id); + + // Store Title object in various data structures + $title = Title :: makeTitle($row->page_namespace, $row->page_title); + $this->mAllPages[$row->page_namespace][$row->page_title] = $pageId; + + if ($this->mResolveRedirects && $row->page_is_redirect == '1') { + $this->mPendingRedirectIDs[$pageId] = $title; + } else { + $this->mGoodTitles[$pageId] = $title; + } + + foreach ($this->mRequestedPageFields as $fieldName => & $fieldValues) + $fieldValues[$pageId] = $row-> $fieldName; + } + + public function finishPageSetGeneration() { + $this->profileIn(); + $this->resolvePendingRedirects(); + $this->profileOut(); + } + /** * This method populates internal variables with page information * based on the given array of title strings. @@ -133,85 +255,79 @@ class ApiPageSet extends ApiQueryBase { * #5 Substitute the original LinkBatch object with the new list * #6 Repeat from step #1 */ - private function populatePages($titles, $pageids, $redirects) { - if (!is_null($titles) && !is_null($pageids)) - ApiBase :: dieDebug(__METHOD__, 'bad parameters'); - $processTitles = !is_null($titles); + private function initFromTitles($titles) { + $db = $this->getDB(); - // Ensure we get minimum required fields - $pageFlds = array ( - 'page_id' => null, - 'page_namespace' => null, - 'page_title' => null - ); + // Get validated and normalized title objects + $linkBatch = $this->processTitlesStrArray($titles); + $set = $linkBatch->constructSet('page', $db); - // only store non-default fields - $this->mRequestedFields = array_diff_key($this->mRequestedFields, $pageFlds); + // Get pageIDs data from the `page` table + $this->profileDBIn(); + $res = $db->select('page', $this->getPageTableFields(), $set, __METHOD__); + $this->profileDBOut(); - if ($redirects) - $pageFlds['page_is_redirect'] = null; + // Hack: get the ns:titles stored in array(ns => array(titles)) format + $this->initFromQueryResult($db, $res, $linkBatch->data, true); // process Titles - $pageFlds = array_keys(array_merge($pageFlds, $this->mRequestedFields)); + // Resolve any found redirects + $this->resolvePendingRedirects(); + } + private function initFromPageIds($pageids) { $db = $this->getDB(); - if ($processTitles) { - - // Get validated and normalized title objects - $linkBatch = $this->processTitlesStrArray($titles); - - $set = $linkBatch->constructSet('page', $db); - } else { - $set = array ( - 'page_id' => $pageids - ); - } - - // - // Repeat until all redirects have been resolved - // The infinite loop is prevented by keeping all known pages in $this->mAllPages - // - do { - if ($processTitles) { - // Hack: get the ns:titles stored in array(ns => array(titles)) format - $remaining = $linkBatch->data; - } else { - $remaining = array_flip($pageids); // turn pageids into keys - } + $set = array ( + 'page_id' => $pageids + ); - $redirectIds = array (); + // Get pageIDs data from the `page` table + $this->profileDBIn(); + $res = $db->select('page', $this->getPageTableFields(), $set, __METHOD__); + $this->profileDBOut(); + + $this->initFromQueryResult($db, $res, array_flip($pageids), false); // process PageIDs - // - // Get data about $linkBatch from `page` table - // - $this->profileDBIn(); - $res = $db->select('page', $pageFlds, $set, __METHOD__); - $this->profileDBOut(); - while ($row = $db->fetchObject($res)) { + // Resolve any found redirects + $this->resolvePendingRedirects(); + } + + /** + * Iterate through the result of the query on 'page' table, + * and for each row create and store title object and save any extra fields requested. + * @param $db Database + * @param $res DB Query result + * @param $remaining Array of either pageID or ns/title elements (optional). + * If given, any missing items will go to $mMissingPageIDs and $mMissingTitles + * @param $processTitles bool Must be provided together with $remaining. + * If true, treat $remaining as an array of [ns][title] + * If false, treat it as an array of [pageIDs] + * @return Array of redirect IDs (only when resolving redirects) + */ + private function initFromQueryResult($db, $res, &$remaining = null, $processTitles = null) { + if (!is_null($remaining) && is_null($processTitles)) + $this->dieDebug('Missing $processTitles parameter when $remaining is provided'); + + while ($row = $db->fetchObject($res)) { - $pageId = intval($row->page_id); + $pageId = intval($row->page_id); + // Remove found page from the list of remaining items + if (isset($remaining)) { if ($processTitles) unset ($remaining[$row->page_namespace][$row->page_title]); else unset ($remaining[$pageId]); - - $title = Title :: makeTitle($row->page_namespace, $row->page_title); - $this->mAllPages[$row->page_namespace][$row->page_title] = $pageId; - - if ($redirects && $row->page_is_redirect == '1') { - $redirectIds[$pageId] = $title; - } else { - $this->mGoodTitles[$pageId] = $title; - } - - foreach ($this->mRequestedFields as $fieldName => & $fieldValues) { - $fieldValues[$pageId] = $row-> $fieldName; - } } - $db->freeResult($res); - - if ($processTitles) { + + // Store any extra fields requested by modules + $this->processDbRow($row); + } + $db->freeResult($res); + + if(isset($remaining)) { + // Any items left in the $remaining list are added as missing + if($processTitles) { // The remaining titles in $remaining are non-existant pages foreach ($remaining as $ns => $dbkeys) { foreach ($dbkeys as $dbkey => $nothing) { @@ -219,29 +335,55 @@ class ApiPageSet extends ApiQueryBase { $this->mAllPages[$ns][$dbkey] = 0; } } - } else { - // The remaining pageids in $remaining do not exist - foreach ($remaining as $pageid => $ignore) { - $this->mMissingPageIDs[] = $pageid; - } } + else + { + // The remaining pageids do not exist + if(empty($this->mMissingPageIDs)) + $this->mMissingPageIDs = array_keys($remaining); + else + $this->mMissingPageIDs = array_merge($this->mMissingPageIDs, array_keys($remaining)); + } + } + } - if (!$redirects || empty ($redirectIds)) - break; + private function initFromRevIDs($revids) { + $this->dieUsage(__METHOD__ . ' is not implemented', 'notimplemented'); + } - // - // Resolve redirects by querying the pagelinks table, and repeat the process - // Create a new linkBatch object for the next pass - // - $linkBatch = $this->resolveRedirectList($redirectIds); + private function resolvePendingRedirects() { - // Redirects are always titles - $processTitles = true; + if($this->mResolveRedirects) { + $db = $this->getDB(); + $pageFlds = $this->getPageTableFields(); + + // Repeat until all redirects have been resolved + // The infinite loop is prevented by keeping all known pages in $this->mAllPages + while (!empty ($this->mPendingRedirectIDs)) { + + // Resolve redirects by querying the pagelinks table, and repeat the process + // Create a new linkBatch object for the next pass + $linkBatch = $this->getRedirectTargets(); + + if ($linkBatch->isEmpty()) + break; + + $set = $linkBatch->constructSet('page', $db); + if(false === $set) + break; + + // Get pageIDs data from the `page` table + $this->profileDBIn(); + $res = $db->select('page', $pageFlds, $set, __METHOD__); + $this->profileDBOut(); + + // Hack: get the ns:titles stored in array(ns => array(titles)) format + $this->initFromQueryResult($db, $res, $linkBatch->data, true); + } } - while (false !== ($set = $linkBatch->constructSet('page', $db))); } - private function resolveRedirectList($redirectIds) { + private function getRedirectTargets() { $linkBatch = new LinkBatch(); $db = $this->getDB(); @@ -253,7 +395,7 @@ class ApiPageSet extends ApiQueryBase { 'pl_namespace', 'pl_title' ), array ( - 'pl_from' => array_keys($redirectIds + 'pl_from' => array_keys($this->mPendingRedirectIDs )), __METHOD__); $this->profileDBOut(); @@ -265,11 +407,11 @@ class ApiPageSet extends ApiQueryBase { // ( http://bugzilla.wikipedia.org/show_bug.cgi?id=7304 ) // A redirect page may have more than one link. // This code will only use the first link returned. - if (isset ($redirectIds[$plfrom])) { // remove line when bug 7304 is fixed + if (isset ($this->mPendingRedirectIDs[$plfrom])) { // remove line when bug 7304 is fixed - $titleStrFrom = $redirectIds[$plfrom]->getPrefixedText(); + $titleStrFrom = $this->mPendingRedirectIDs[$plfrom]->getPrefixedText(); $titleStrTo = Title :: makeTitle($row->pl_namespace, $row->pl_title)->getPrefixedText(); - unset ($redirectIds[$plfrom]); // remove line when bug 7304 is fixed + unset ($this->mPendingRedirectIDs[$plfrom]); // remove line when bug 7304 is fixed // Avoid an infinite loop by checking if we have already processed this target if (!isset ($this->mAllPages[$row->pl_namespace][$row->pl_title])) { @@ -299,6 +441,10 @@ class ApiPageSet extends ApiQueryBase { } $db->freeResult($res); + // All IDs must exist in the page table + if (!empty($this->mPendingRedirectIDs[$plfrom])) + $this->dieDebug('Invalid redirect IDs were found'); + return $linkBatch; } @@ -337,55 +483,6 @@ class ApiPageSet extends ApiQueryBase { return $linkBatch; } - private function populateRevIDs($revids) { - $this->dieUsage(__METHOD__ . ' is not implemented', 'notimplemented'); - } - - public function execute() { - $this->profileIn(); - $titles = $pageids = $revids = $redirects = null; - extract($this->extractRequestParams()); - - // Only one of the titles/pageids/revids is allowed at the same time - $dataSource = null; - if (isset ($titles)) - $dataSource = 'titles'; - if (isset ($pageids)) { - if (isset ($dataSource)) - $this->dieUsage("Cannot use 'pageids' at the same time as '$dataSource'", 'multisource'); - $dataSource = 'pageids'; - } - if (isset ($revids)) { - if (isset ($dataSource)) - $this->dieUsage("Cannot use 'revids' at the same time as '$dataSource'", 'multisource'); - $dataSource = 'revids'; - } - - switch ($dataSource) { - case 'titles' : - case 'pageids' : - $this->populatePages($titles, $pageids, $redirects); - break; - case 'revids' : - $this->populateRevIDs($revids); - break; - default : - // Do nothing - some queries do not need any of the data sources. - break; - } - $this->profileOut(); - } - - /** - * This method is used by generators to pass the list of pageIDs internaly - */ - public function executeForPageIDs($pageIDs) { - $this->profileIn(); - $pageIDs = array_map( 'intval', $pageIDs ); // paranoia - $this->populatePages(null, $pageIDs, $this->getParameter('redirects')); - $this->profileOut(); - } - protected function getAllowedParams() { return array ( 'titles' => array ( @@ -398,8 +495,7 @@ class ApiPageSet extends ApiQueryBase { 'revids' => array ( ApiBase :: PARAM_TYPE => 'integer', ApiBase :: PARAM_ISMULTI => true - ), - 'redirects' => false + ) ); } @@ -407,8 +503,7 @@ class ApiPageSet extends ApiQueryBase { return array ( 'titles' => 'A list of titles to work on', 'pageids' => 'A list of page IDs to work on', - 'revids' => 'A list of revision IDs to work on', - 'redirects' => 'Automatically resolve redirects' + 'revids' => 'A list of revision IDs to work on' ); } diff --git a/includes/api/ApiQuery.php b/includes/api/ApiQuery.php index 92c8284360..be57b06dc2 100644 --- a/includes/api/ApiQuery.php +++ b/includes/api/ApiQuery.php @@ -96,13 +96,13 @@ class ApiQuery extends ApiBase { * #5 Execute all requested modules */ public function execute() { - $prop = $list = $meta = $generator = null; + $prop = $list = $meta = $generator = $redirects = null; extract($this->extractRequestParams()); // // Create PageSet // - $this->mPageSet = new ApiPageSet($this); + $this->mPageSet = new ApiPageSet($this, $redirects); // Instantiate required modules $modules = array (); @@ -126,7 +126,7 @@ class ApiQuery extends ApiBase { // If given, execute generator to substitute user supplied data with generated data. // if (isset ($generator)) - $this->executeGenerator($generator); + $this->executeGeneratorModule($generator, $redirects); // // Populate page information for the given pageSet @@ -212,7 +212,7 @@ class ApiQuery extends ApiBase { } } - protected function executeGenerator($generatorName) { + protected function executeGeneratorModule($generatorName, $redirects) { // Find class that implements requested generator if (isset ($this->mQueryListModules[$generatorName])) { @@ -226,7 +226,7 @@ class ApiQuery extends ApiBase { // Use current pageset as the result, and create a new one just for the generator $resultPageSet = $this->mPageSet; - $this->mPageSet = new ApiPageSet($this); + $this->mPageSet = new ApiPageSet($this, $redirects); // Create and execute the generator $generator = new $className ($this, $generatorName); @@ -242,6 +242,7 @@ class ApiQuery extends ApiBase { // populate resultPageSet with the generator output $generator->profileIn(); $generator->executeGenerator($resultPageSet); + $resultPageSet->finishPageSetGeneration(); $generator->profileOut(); // Swap the resulting pageset back in @@ -264,7 +265,8 @@ class ApiQuery extends ApiBase { ), 'generator' => array ( ApiBase :: PARAM_TYPE => $this->mAllowedGenerators - ) + ), + 'redirects' => false ); } @@ -322,7 +324,8 @@ class ApiQuery extends ApiBase { 'prop' => 'Which properties to get for the titles/revisions/pageids', 'list' => 'Which lists to get', 'meta' => 'Which meta data to get about the site', - 'generator' => 'Use the output of a list as the input for other prop/list/meta items' + 'generator' => 'Use the output of a list as the input for other prop/list/meta items', + 'redirects' => 'Automatically resolve redirects' ); } diff --git a/includes/api/ApiQueryAllpages.php b/includes/api/ApiQueryAllpages.php index 55b5bdb6c6..89b22662a3 100644 --- a/includes/api/ApiQueryAllpages.php +++ b/includes/api/ApiQueryAllpages.php @@ -40,6 +40,9 @@ class ApiQueryAllpages extends ApiQueryGeneratorBase { } public function executeGenerator($resultPageSet) { + if ($resultPageSet->isResolvingRedirects()) + $this->dieUsage('Use "gapfilterredir=nonredirects" option instead of "redirects" when using allpages as a generator', 'params'); + $this->run($resultPageSet); } @@ -52,9 +55,11 @@ class ApiQueryAllpages extends ApiQueryGeneratorBase { $where = array ( 'page_namespace' => $namespace ); + if (isset ($from)) { $where[] = 'page_title>=' . $db->addQuotes(ApiQueryBase :: titleToKey($from)); } + if ($filterredir === 'redirects') { $where['page_is_redirect'] = 1; } @@ -62,12 +67,18 @@ class ApiQueryAllpages extends ApiQueryGeneratorBase { $where['page_is_redirect'] = 0; } + if (is_null($resultPageSet)) { + $fields = array ( + 'page_id', + 'page_namespace', + 'page_title' + ); + } else { + $fields = $resultPageSet->getPageTableFields(); + } + $this->profileDBIn(); - $res = $db->select('page', array ( - 'page_id', - 'page_namespace', - 'page_title' - ), $where, __CLASS__ . '::' . __METHOD__, array ( + $res = $db->select('page', $fields, $where, __CLASS__ . '::' . __METHOD__, array ( 'USE INDEX' => 'name_title', 'LIMIT' => $limit +1, 'ORDER BY' => 'page_namespace, page_title' @@ -80,7 +91,8 @@ class ApiQueryAllpages extends ApiQueryGeneratorBase { if (++ $count > $limit) { // We've reached the one extra which shows that there are additional pages to be had. Stop here... $msg = array ( - 'continue' => $this->encodeParamName('from') . '='. ApiQueryBase :: keyToTitle($row->page_title)); + 'continue' => $this->encodeParamName('from' + ) . '=' . ApiQueryBase :: keyToTitle($row->page_title)); $this->getResult()->addValue('query-status', 'allpages', $msg); break; } @@ -88,18 +100,12 @@ class ApiQueryAllpages extends ApiQueryGeneratorBase { $title = Title :: makeTitle($row->page_namespace, $row->page_title); // skip any pages that user has no rights to read if ($title->userCanRead()) { - $id = intval($row->page_id); if (is_null($resultPageSet)) { - $pagedata = array (); - $pagedata['id'] = $id; - if ($title->getNamespace() !== 0) - $pagedata['ns'] = $title->getNamespace(); - $pagedata['title'] = $title->getPrefixedText(); - - $data[$id] = $pagedata; - } else { + $id = intval($row->page_id); $data[] = $id; // in generator mode, just assemble a list of page IDs. + } else { + $resultPageSet->processDbRow($row); } } } @@ -108,8 +114,6 @@ class ApiQueryAllpages extends ApiQueryGeneratorBase { if (is_null($resultPageSet)) { ApiResult :: setIndexedTagName($data, 'p'); $this->getResult()->addValue('query', 'allpages', $data); - } else { - $resultPageSet->executeForPageIDs($data); } } @@ -161,9 +165,14 @@ class ApiQueryAllpages extends ApiQueryGeneratorBase { protected function getExamples() { return array ( - 'api.php?action=query&list=allpages', - 'api.php?action=query&list=allpages&apfrom=B&aplimit=5', - 'api.php?action=query&generator=allpages&gaplimit=4&prop=info (generator)' + 'Simple Use', + ' api.php?action=query&list=allpages', + ' api.php?action=query&list=allpages&apfrom=B&aplimit=5', + 'Using as Generator', + ' Show info about 4 pages starting at the letter "T"', + ' api.php?action=query&generator=allpages&gaplimit=4&gapfrom=T&prop=info', + ' Show content of first 2 non-redirect pages begining at "Re"', + ' api.php?action=query&generator=allpages&gaplimit=2&gapfilterredir=nonredirects&gapfrom=Re&prop=revisions&rvprop=content' ); }