private $mAllPages; // [ns][dbkey] => page_id or 0 when missing
private $mGoodTitles, $mMissingTitles, $mMissingPageIDs, $mRedirectTitles, $mNormalizedTitles;
+ private $mResolveRedirects, $mPendingRedirectIDs;
- private $mRequestedFields;
+ private $mRequestedPageFields;
- public function __construct($query) {
+ public function __construct($query, $resolveRedirects = false) {
parent :: __construct($query, __CLASS__);
$this->mAllPages = array ();
$this->mRedirectTitles = array ();
$this->mNormalizedTitles = array ();
- $this->mRequestedFields = array ();
+ $this->mRequestedPageFields = array ();
+ $this->mResolveRedirects = $resolveRedirects;
+ if($resolveRedirects)
+ $this->mPendingRedirectIDs = array();
+ }
+
+ public function isResolvingRedirects() {
+ return $this->mResolveRedirects;
}
public function requestField($fieldName) {
- $this->mRequestedFields[$fieldName] = null;
+ $this->mRequestedPageFields[$fieldName] = null;
}
public function getCustomField($fieldName) {
- return $this->mRequestedFields[$fieldName];
+ return $this->mRequestedPageFields[$fieldName];
+ }
+
+ /**
+ * Get fields that modules have requested from the page table
+ */
+ public function getPageTableFields() {
+ // Ensure we get minimum required fields
+ $pageFlds = array (
+ 'page_id' => null,
+ 'page_namespace' => null,
+ 'page_title' => null
+ );
+
+ // only store non-default fields
+ $this->mRequestedPageFields = array_diff_key($this->mRequestedPageFields, $pageFlds);
+
+ if ($this->mResolveRedirects)
+ $pageFlds['page_is_redirect'] = null;
+
+ return array_keys(array_merge($pageFlds, $this->mRequestedPageFields));
}
/**
return $this->mGoodTitles;
}
+ /**
+ * Returns the number of unique pages (not revisions) in the set.
+ */
+ public function getGoodTitleCount() {
+ return count($this->getGoodTitles());
+ }
+
/**
* Title objects that were NOT found in the database.
* @return array of Title objects
return $this->mNormalizedTitles;
}
- /**
- * Returns the number of unique pages (not revisions) in the set.
- */
- public function getGoodTitleCount() {
- return count($this->getGoodTitles());
- }
-
/**
* Get the list of revision IDs (requested with revids= parameter)
*/
return 0; // TODO: implement
}
+ /**
+ * Populate from the request parameters
+ */
+ public function execute() {
+ $this->profileIn();
+ $titles = $pageids = $revids = null;
+ extract($this->extractRequestParams());
+
+ // Only one of the titles/pageids/revids is allowed at the same time
+ $dataSource = null;
+ if (isset ($titles))
+ $dataSource = 'titles';
+ if (isset ($pageids)) {
+ if (isset ($dataSource))
+ $this->dieUsage("Cannot use 'pageids' at the same time as '$dataSource'", 'multisource');
+ $dataSource = 'pageids';
+ }
+ if (isset ($revids)) {
+ if (isset ($dataSource))
+ $this->dieUsage("Cannot use 'revids' at the same time as '$dataSource'", 'multisource');
+ $dataSource = 'revids';
+ }
+
+ switch ($dataSource) {
+ case 'titles' :
+ $this->initFromTitles($titles);
+ break;
+ case 'pageids' :
+ $this->initFromPageIds($pageids);
+ break;
+ case 'revids' :
+ $this->initFromRevIDs($revids);
+ break;
+ default :
+ // Do nothing - some queries do not need any of the data sources.
+ break;
+ }
+ $this->profileOut();
+ }
+
+ /**
+ * Initialize PageSet from a list of Titles
+ */
+ public function populateFromTitles($titles) {
+ $this->profileIn();
+ $this->initFromTitles($titles);
+ $this->profileOut();
+ }
+
+ /**
+ * Initialize PageSet from a list of Page IDs
+ */
+ public function populateFromPageIDs($pageIDs) {
+ $this->profileIn();
+ $pageIDs = array_map('intval', $pageIDs); // paranoia
+ $this->initFromPageIds($pageIDs);
+ $this->profileOut();
+ }
+
+ /**
+ * Initialize PageSet from a rowset returned from the database
+ */
+ public function populateFromQueryResult($db, $queryResult) {
+ $this->profileIn();
+ $this->initFromQueryResult($db, $queryResult);
+ $this->profileOut();
+ }
+
+ /**
+ * Extract all requested fields from the row received from the database
+ */
+ public function processDbRow($row) {
+ $pageId = intval($row->page_id);
+
+ // Store Title object in various data structures
+ $title = Title :: makeTitle($row->page_namespace, $row->page_title);
+ $this->mAllPages[$row->page_namespace][$row->page_title] = $pageId;
+
+ if ($this->mResolveRedirects && $row->page_is_redirect == '1') {
+ $this->mPendingRedirectIDs[$pageId] = $title;
+ } else {
+ $this->mGoodTitles[$pageId] = $title;
+ }
+
+ foreach ($this->mRequestedPageFields as $fieldName => & $fieldValues)
+ $fieldValues[$pageId] = $row-> $fieldName;
+ }
+
+ public function finishPageSetGeneration() {
+ $this->profileIn();
+ $this->resolvePendingRedirects();
+ $this->profileOut();
+ }
+
/**
* This method populates internal variables with page information
* based on the given array of title strings.
* #5 Substitute the original LinkBatch object with the new list
* #6 Repeat from step #1
*/
- private function populatePages($titles, $pageids, $redirects) {
- if (!is_null($titles) && !is_null($pageids))
- ApiBase :: dieDebug(__METHOD__, 'bad parameters');
- $processTitles = !is_null($titles);
+ private function initFromTitles($titles) {
+ $db = $this->getDB();
- // Ensure we get minimum required fields
- $pageFlds = array (
- 'page_id' => null,
- 'page_namespace' => null,
- 'page_title' => null
- );
+ // Get validated and normalized title objects
+ $linkBatch = $this->processTitlesStrArray($titles);
+ $set = $linkBatch->constructSet('page', $db);
- // only store non-default fields
- $this->mRequestedFields = array_diff_key($this->mRequestedFields, $pageFlds);
+ // Get pageIDs data from the `page` table
+ $this->profileDBIn();
+ $res = $db->select('page', $this->getPageTableFields(), $set, __METHOD__);
+ $this->profileDBOut();
- if ($redirects)
- $pageFlds['page_is_redirect'] = null;
+ // Hack: get the ns:titles stored in array(ns => array(titles)) format
+ $this->initFromQueryResult($db, $res, $linkBatch->data, true); // process Titles
- $pageFlds = array_keys(array_merge($pageFlds, $this->mRequestedFields));
+ // Resolve any found redirects
+ $this->resolvePendingRedirects();
+ }
+ private function initFromPageIds($pageids) {
$db = $this->getDB();
- if ($processTitles) {
-
- // Get validated and normalized title objects
- $linkBatch = $this->processTitlesStrArray($titles);
-
- $set = $linkBatch->constructSet('page', $db);
- } else {
- $set = array (
- 'page_id' => $pageids
- );
- }
-
- //
- // Repeat until all redirects have been resolved
- // The infinite loop is prevented by keeping all known pages in $this->mAllPages
- //
- do {
- if ($processTitles) {
- // Hack: get the ns:titles stored in array(ns => array(titles)) format
- $remaining = $linkBatch->data;
- } else {
- $remaining = array_flip($pageids); // turn pageids into keys
- }
+ $set = array (
+ 'page_id' => $pageids
+ );
- $redirectIds = array ();
+ // Get pageIDs data from the `page` table
+ $this->profileDBIn();
+ $res = $db->select('page', $this->getPageTableFields(), $set, __METHOD__);
+ $this->profileDBOut();
+
+ $this->initFromQueryResult($db, $res, array_flip($pageids), false); // process PageIDs
- //
- // Get data about $linkBatch from `page` table
- //
- $this->profileDBIn();
- $res = $db->select('page', $pageFlds, $set, __METHOD__);
- $this->profileDBOut();
- while ($row = $db->fetchObject($res)) {
+ // Resolve any found redirects
+ $this->resolvePendingRedirects();
+ }
+
+ /**
+ * Iterate through the result of the query on 'page' table,
+ * and for each row create and store title object and save any extra fields requested.
+ * @param $db Database
+ * @param $res DB Query result
+ * @param $remaining Array of either pageID or ns/title elements (optional).
+ * If given, any missing items will go to $mMissingPageIDs and $mMissingTitles
+ * @param $processTitles bool Must be provided together with $remaining.
+ * If true, treat $remaining as an array of [ns][title]
+ * If false, treat it as an array of [pageIDs]
+ * @return Array of redirect IDs (only when resolving redirects)
+ */
+ private function initFromQueryResult($db, $res, &$remaining = null, $processTitles = null) {
+ if (!is_null($remaining) && is_null($processTitles))
+ $this->dieDebug('Missing $processTitles parameter when $remaining is provided');
+
+ while ($row = $db->fetchObject($res)) {
- $pageId = intval($row->page_id);
+ $pageId = intval($row->page_id);
+ // Remove found page from the list of remaining items
+ if (isset($remaining)) {
if ($processTitles)
unset ($remaining[$row->page_namespace][$row->page_title]);
else
unset ($remaining[$pageId]);
-
- $title = Title :: makeTitle($row->page_namespace, $row->page_title);
- $this->mAllPages[$row->page_namespace][$row->page_title] = $pageId;
-
- if ($redirects && $row->page_is_redirect == '1') {
- $redirectIds[$pageId] = $title;
- } else {
- $this->mGoodTitles[$pageId] = $title;
- }
-
- foreach ($this->mRequestedFields as $fieldName => & $fieldValues) {
- $fieldValues[$pageId] = $row-> $fieldName;
- }
}
- $db->freeResult($res);
-
- if ($processTitles) {
+
+ // Store any extra fields requested by modules
+ $this->processDbRow($row);
+ }
+ $db->freeResult($res);
+
+ if(isset($remaining)) {
+ // Any items left in the $remaining list are added as missing
+ if($processTitles) {
// The remaining titles in $remaining are non-existant pages
foreach ($remaining as $ns => $dbkeys) {
foreach ($dbkeys as $dbkey => $nothing) {
$this->mAllPages[$ns][$dbkey] = 0;
}
}
- } else {
- // The remaining pageids in $remaining do not exist
- foreach ($remaining as $pageid => $ignore) {
- $this->mMissingPageIDs[] = $pageid;
- }
}
+ else
+ {
+ // The remaining pageids do not exist
+ if(empty($this->mMissingPageIDs))
+ $this->mMissingPageIDs = array_keys($remaining);
+ else
+ $this->mMissingPageIDs = array_merge($this->mMissingPageIDs, array_keys($remaining));
+ }
+ }
+ }
- if (!$redirects || empty ($redirectIds))
- break;
+ private function initFromRevIDs($revids) {
+ $this->dieUsage(__METHOD__ . ' is not implemented', 'notimplemented');
+ }
- //
- // Resolve redirects by querying the pagelinks table, and repeat the process
- // Create a new linkBatch object for the next pass
- //
- $linkBatch = $this->resolveRedirectList($redirectIds);
+ private function resolvePendingRedirects() {
- // Redirects are always titles
- $processTitles = true;
+ if($this->mResolveRedirects) {
+ $db = $this->getDB();
+ $pageFlds = $this->getPageTableFields();
+
+ // Repeat until all redirects have been resolved
+ // The infinite loop is prevented by keeping all known pages in $this->mAllPages
+ while (!empty ($this->mPendingRedirectIDs)) {
+
+ // Resolve redirects by querying the pagelinks table, and repeat the process
+ // Create a new linkBatch object for the next pass
+ $linkBatch = $this->getRedirectTargets();
+
+ if ($linkBatch->isEmpty())
+ break;
+
+ $set = $linkBatch->constructSet('page', $db);
+ if(false === $set)
+ break;
+
+ // Get pageIDs data from the `page` table
+ $this->profileDBIn();
+ $res = $db->select('page', $pageFlds, $set, __METHOD__);
+ $this->profileDBOut();
+
+ // Hack: get the ns:titles stored in array(ns => array(titles)) format
+ $this->initFromQueryResult($db, $res, $linkBatch->data, true);
+ }
}
- while (false !== ($set = $linkBatch->constructSet('page', $db)));
}
- private function resolveRedirectList($redirectIds) {
+ private function getRedirectTargets() {
$linkBatch = new LinkBatch();
$db = $this->getDB();
'pl_namespace',
'pl_title'
), array (
- 'pl_from' => array_keys($redirectIds
+ 'pl_from' => array_keys($this->mPendingRedirectIDs
)), __METHOD__);
$this->profileDBOut();
// ( http://bugzilla.wikipedia.org/show_bug.cgi?id=7304 )
// A redirect page may have more than one link.
// This code will only use the first link returned.
- if (isset ($redirectIds[$plfrom])) { // remove line when bug 7304 is fixed
+ if (isset ($this->mPendingRedirectIDs[$plfrom])) { // remove line when bug 7304 is fixed
- $titleStrFrom = $redirectIds[$plfrom]->getPrefixedText();
+ $titleStrFrom = $this->mPendingRedirectIDs[$plfrom]->getPrefixedText();
$titleStrTo = Title :: makeTitle($row->pl_namespace, $row->pl_title)->getPrefixedText();
- unset ($redirectIds[$plfrom]); // remove line when bug 7304 is fixed
+ unset ($this->mPendingRedirectIDs[$plfrom]); // remove line when bug 7304 is fixed
// Avoid an infinite loop by checking if we have already processed this target
if (!isset ($this->mAllPages[$row->pl_namespace][$row->pl_title])) {
}
$db->freeResult($res);
+ // All IDs must exist in the page table
+ if (!empty($this->mPendingRedirectIDs[$plfrom]))
+ $this->dieDebug('Invalid redirect IDs were found');
+
return $linkBatch;
}
return $linkBatch;
}
- private function populateRevIDs($revids) {
- $this->dieUsage(__METHOD__ . ' is not implemented', 'notimplemented');
- }
-
- public function execute() {
- $this->profileIn();
- $titles = $pageids = $revids = $redirects = null;
- extract($this->extractRequestParams());
-
- // Only one of the titles/pageids/revids is allowed at the same time
- $dataSource = null;
- if (isset ($titles))
- $dataSource = 'titles';
- if (isset ($pageids)) {
- if (isset ($dataSource))
- $this->dieUsage("Cannot use 'pageids' at the same time as '$dataSource'", 'multisource');
- $dataSource = 'pageids';
- }
- if (isset ($revids)) {
- if (isset ($dataSource))
- $this->dieUsage("Cannot use 'revids' at the same time as '$dataSource'", 'multisource');
- $dataSource = 'revids';
- }
-
- switch ($dataSource) {
- case 'titles' :
- case 'pageids' :
- $this->populatePages($titles, $pageids, $redirects);
- break;
- case 'revids' :
- $this->populateRevIDs($revids);
- break;
- default :
- // Do nothing - some queries do not need any of the data sources.
- break;
- }
- $this->profileOut();
- }
-
- /**
- * This method is used by generators to pass the list of pageIDs internaly
- */
- public function executeForPageIDs($pageIDs) {
- $this->profileIn();
- $pageIDs = array_map( 'intval', $pageIDs ); // paranoia
- $this->populatePages(null, $pageIDs, $this->getParameter('redirects'));
- $this->profileOut();
- }
-
protected function getAllowedParams() {
return array (
'titles' => array (
'revids' => array (
ApiBase :: PARAM_TYPE => 'integer',
ApiBase :: PARAM_ISMULTI => true
- ),
- 'redirects' => false
+ )
);
}
return array (
'titles' => 'A list of titles to work on',
'pageids' => 'A list of page IDs to work on',
- 'revids' => 'A list of revision IDs to work on',
- 'redirects' => 'Automatically resolve redirects'
+ 'revids' => 'A list of revision IDs to work on'
);
}
}
public function executeGenerator($resultPageSet) {
+ if ($resultPageSet->isResolvingRedirects())
+ $this->dieUsage('Use "gapfilterredir=nonredirects" option instead of "redirects" when using allpages as a generator', 'params');
+
$this->run($resultPageSet);
}
$where = array (
'page_namespace' => $namespace
);
+
if (isset ($from)) {
$where[] = 'page_title>=' . $db->addQuotes(ApiQueryBase :: titleToKey($from));
}
+
if ($filterredir === 'redirects') {
$where['page_is_redirect'] = 1;
}
$where['page_is_redirect'] = 0;
}
+ if (is_null($resultPageSet)) {
+ $fields = array (
+ 'page_id',
+ 'page_namespace',
+ 'page_title'
+ );
+ } else {
+ $fields = $resultPageSet->getPageTableFields();
+ }
+
$this->profileDBIn();
- $res = $db->select('page', array (
- 'page_id',
- 'page_namespace',
- 'page_title'
- ), $where, __CLASS__ . '::' . __METHOD__, array (
+ $res = $db->select('page', $fields, $where, __CLASS__ . '::' . __METHOD__, array (
'USE INDEX' => 'name_title',
'LIMIT' => $limit +1,
'ORDER BY' => 'page_namespace, page_title'
if (++ $count > $limit) {
// We've reached the one extra which shows that there are additional pages to be had. Stop here...
$msg = array (
- 'continue' => $this->encodeParamName('from') . '='. ApiQueryBase :: keyToTitle($row->page_title));
+ 'continue' => $this->encodeParamName('from'
+ ) . '=' . ApiQueryBase :: keyToTitle($row->page_title));
$this->getResult()->addValue('query-status', 'allpages', $msg);
break;
}
$title = Title :: makeTitle($row->page_namespace, $row->page_title);
// skip any pages that user has no rights to read
if ($title->userCanRead()) {
- $id = intval($row->page_id);
if (is_null($resultPageSet)) {
- $pagedata = array ();
- $pagedata['id'] = $id;
- if ($title->getNamespace() !== 0)
- $pagedata['ns'] = $title->getNamespace();
- $pagedata['title'] = $title->getPrefixedText();
-
- $data[$id] = $pagedata;
- } else {
+ $id = intval($row->page_id);
$data[] = $id; // in generator mode, just assemble a list of page IDs.
+ } else {
+ $resultPageSet->processDbRow($row);
}
}
}
if (is_null($resultPageSet)) {
ApiResult :: setIndexedTagName($data, 'p');
$this->getResult()->addValue('query', 'allpages', $data);
- } else {
- $resultPageSet->executeForPageIDs($data);
}
}
protected function getExamples() {
return array (
- 'api.php?action=query&list=allpages',
- 'api.php?action=query&list=allpages&apfrom=B&aplimit=5',
- 'api.php?action=query&generator=allpages&gaplimit=4&prop=info (generator)'
+ 'Simple Use',
+ ' api.php?action=query&list=allpages',
+ ' api.php?action=query&list=allpages&apfrom=B&aplimit=5',
+ 'Using as Generator',
+ ' Show info about 4 pages starting at the letter "T"',
+ ' api.php?action=query&generator=allpages&gaplimit=4&gapfrom=T&prop=info',
+ ' Show content of first 2 non-redirect pages begining at "Re"',
+ ' api.php?action=query&generator=allpages&gaplimit=2&gapfilterredir=nonredirects&gapfrom=Re&prop=revisions&rvprop=content'
);
}