From fc6ec50f94a2cee1f1dee1888b7732bf2efe4097 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Mon, 25 Sep 2006 04:12:07 +0000 Subject: [PATCH] * API: A new ApiPageSet class to retrieve page data and resolve redirects. --- api.php | 58 ++++++++--- includes/api/ApiFormatBase.php | 4 + includes/api/ApiFormatXml.php | 4 + includes/api/ApiMain.php | 6 +- includes/api/ApiPageSet.php | 182 +++++++++++++++++++++++++++++++++ includes/api/ApiQuery.php | 147 ++++++++++++++++---------- includes/api/ApiQueryBase.php | 18 +++- includes/api/ApiResult.php | 36 +++---- 8 files changed, 362 insertions(+), 93 deletions(-) create mode 100644 includes/api/ApiPageSet.php diff --git a/api.php b/api.php index a286134574..25191ca8a5 100644 --- a/api.php +++ b/api.php @@ -24,36 +24,68 @@ $apiStartTime = microtime(true); +/** + * When no format parameter is given, this format will be used + */ +define('API_DEFAULT_FORMAT', 'xmlfm'); + +$apidir = 'includes/api'; /** * List of classes and containing files. */ $apiAutoloadClasses = array ( - 'ApiBase' => 'includes/api/ApiBase.php', - 'ApiMain' => 'includes/api/ApiMain.php', - 'ApiResult' => 'includes/api/ApiResult.php', - - // Available modules - should match the $apiModules list - 'ApiHelp' => 'includes/api/ApiHelp.php', - 'ApiLogin' => 'includes/api/ApiLogin.php', - 'ApiQuery' => 'includes/api/ApiQuery.php' + + 'ApiMain' => "$apidir/ApiMain.php", + + // Utility classes + 'ApiBase' => "$apidir/ApiBase.php", + 'ApiQueryBase' => "$apidir/ApiQueryBase.php", + 'ApiResult' => "$apidir/ApiResult.php", + + // Formats + 'ApiFormatBase' => "$apidir/ApiFormatBase.php", + 'ApiFormatYaml' => "$apidir/ApiFormatYaml.php", + 'ApiFormatXml' => "$apidir/ApiFormatXml.php", + 'ApiFormatJson' => "$apidir/ApiFormatJson.php", + + // Modules (action=...) - should match the $apiModules list + 'ApiHelp' => "$apidir/ApiHelp.php", + 'ApiLogin' => "$apidir/ApiLogin.php", + 'ApiQuery' => "$apidir/ApiQuery.php", + + // Query items (what/list=...) + 'ApiQueryContent' => "$apidir/ApiQueryContent.php", + + 'ApiPageSet' => "$apidir/ApiPageSet.php" ); /** * List of available modules: action name => module class * The class must also be listed in the $apiAutoloadClasses array. - */ + */ $apiModules = array ( 'help' => 'ApiHelp', 'login' => 'ApiLogin', 'query' => 'ApiQuery' ); +/** + * List of available formats: format name => format class + * The class must also be listed in the $apiAutoloadClasses array. + */ +$apiFormats = array ( + 'json' => 'ApiFormatJson', + 'jsonfm' => 'ApiFormatJson', + 'xml' => 'ApiFormatXml', + 'xmlfm' => 'ApiFormatXml', + 'yaml' => 'ApiFormatYaml', + 'yamlfm' => 'ApiFormatYaml' +); // Initialise common code require_once ('./includes/WebStart.php'); wfProfileIn('api.php'); - // Verify that the API has not been disabled // The next line should be // if (isset ($wgEnableAPI) && !$wgEnableAPI) { @@ -64,16 +96,14 @@ if (!isset ($wgEnableAPI) || !$wgEnableAPI) { die(-1); } - ApiInitAutoloadClasses($apiAutoloadClasses); -$processor = new ApiMain($apiStartTime, $apiModules); +$processor = new ApiMain($apiStartTime, $apiModules, $apiFormats); $processor->Execute(); wfProfileOut('api.php'); wfLogProfilingData(); exit; // Done! - function ApiInitAutoloadClasses($apiAutoloadClasses) { // Append $apiAutoloadClasses to $wgAutoloadClasses @@ -84,4 +114,4 @@ function ApiInitAutoloadClasses($apiAutoloadClasses) { $wgAutoloadClasses = $apiAutoloadClasses; } } -?> +?> \ No newline at end of file diff --git a/includes/api/ApiFormatBase.php b/includes/api/ApiFormatBase.php index a6847e3e4e..c26d62a7bc 100644 --- a/includes/api/ApiFormatBase.php +++ b/includes/api/ApiFormatBase.php @@ -55,6 +55,10 @@ abstract class ApiFormatBase extends ApiBase { */ public abstract function GetMimeType(); + public function GetNeedsRawData() { + return false; + } + /** * Returns true when an HTML filtering printer should be used. * The default implementation assumes that formats ending with 'fm' diff --git a/includes/api/ApiFormatXml.php b/includes/api/ApiFormatXml.php index 3c30e75416..301130b6c8 100644 --- a/includes/api/ApiFormatXml.php +++ b/includes/api/ApiFormatXml.php @@ -38,6 +38,10 @@ class ApiFormatXml extends ApiFormatBase { public function GetMimeType() { return 'text/xml'; } + + public function GetNeedsRawData() { + return true; + } public function Execute() { $xmlindent = null; diff --git a/includes/api/ApiMain.php b/includes/api/ApiMain.php index 2a6da70ca1..2b61f25483 100644 --- a/includes/api/ApiMain.php +++ b/includes/api/ApiMain.php @@ -114,6 +114,8 @@ class ApiMain extends ApiBase { */ private function PrintResult($isError) { $this->mPrinter->InitPrinter($isError); + if (!$this->mPrinter->GetNeedsRawData()) + $this->GetResult()->SanitizeData(); $this->mPrinter->Execute(); $this->mPrinter->ClosePrinter(); } @@ -148,7 +150,7 @@ class ApiMain extends ApiBase { $msg = parent :: MakeHelpMsg(); $astriks = str_repeat('*** ', 10); - $msg .= "\n\n$astriks Modules $astriks\n\n"; + $msg .= "\n\n$astriks Modules $astriks\n\n"; foreach ($this->mModules as $moduleName => $moduleClass) { $msg .= "* action=$moduleName *"; $module = new $this->mModules[$moduleName] ($this, $moduleName); @@ -158,7 +160,7 @@ class ApiMain extends ApiBase { $msg .= "\n"; } - $msg .= "\n$astriks Formats $astriks\n\n"; + $msg .= "\n$astriks Formats $astriks\n\n"; foreach ($this->mFormats as $moduleName => $moduleClass) { $msg .= "* format=$moduleName *"; $module = new $this->mFormats[$moduleName] ($this, $moduleName); diff --git a/includes/api/ApiPageSet.php b/includes/api/ApiPageSet.php new file mode 100644 index 0000000000..20fae3d310 --- /dev/null +++ b/includes/api/ApiPageSet.php @@ -0,0 +1,182 @@ + + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * http://www.gnu.org/copyleft/gpl.html + */ + +if (!defined('MEDIAWIKI')) { + // Eclipse helper - will be ignored in production + require_once ("ApiBase.php"); +} + +class ApiPageSet { + + private $allPages; // [ns][dbkey] => page_id or 0 when missing + private $db, $resolveRedirs; + private $goodTitles, $missingTitles, $redirectTitles; + + public function __construct($db, $resolveRedirs) { + $this->db = $db; + $this->resolveRedirs = $resolveRedirs; + + $this->allPages = array (); + $this->goodTitles = array (); + $this->missingTitles = array (); + + // only when resolving redirects: + if ($resolveRedirs) { + $this->redirectTitles = array (); + } + } + + /** + * Title objects that were found in the database. + * @return array page_id (int) => Title (obj) + */ + public function GetGoodTitles() { + return $this->goodTitles; + } + + /** + * Title objects that were NOT found in the database. + * @return array of Title objects + */ + public function GetMissingTitles() { + return $this->missingTitles; + } + + /** + * Get a list of redirects when doing redirect resolution + * @return array prefixed_title (string) => prefixed_title (string) + */ + public function GetRedirectTitles() { + return $this->redirectTitles; + } + + /** + * This method populates internal variables with page information + * based on the list of page titles given as a LinkBatch object. + * + * Steps: + * #1 For each title, get data from `page` table + * #2 If page was not found in the DB, store it as missing + * + * Additionally, when resolving redirects: + * #3 If no more redirects left, stop. + * #4 For each redirect, get its links from `pagelinks` table. + * #5 Substitute the original LinkBatch object with the new list + * #6 Repeat from step #1 + */ + public function PopulateTitles($linkBatch) { + $pageFlds = array ( + 'page_id', + 'page_namespace', + 'page_title' + ); + if ($this->resolveRedirs) { + $pageFlds[] = 'page_is_redirect'; + } + + // + // Repeat until all redirects have been resolved + // + while (false !== ($set = $linkBatch->constructSet('page', $this->db))) { + + // Hack: Get the ns:titles stored in array(ns => array(titles)) format + $remaining = $linkBatch->data; + + if ($this->resolveRedirs) + $redirectIds = array (); + + // + // Get data about $linkBatch from `page` table + // + $res = $this->db->select('page', $pageFlds, $set, __CLASS__ . '::' . __FUNCTION__); + while ($row = $this->db->fetchObject($res)) { + + unset ($remaining[$row->page_namespace][$row->page_title]); + $title = Title :: makeTitle($row->page_namespace, $row->page_title); + $this->allPages[$row->page_namespace][$row->page_title] = $row->page_id; + + if ($this->resolveRedirs && $row->page_is_redirect == '1') { + $redirectIds[$row->page_id] = $title; + } else { + $this->goodTitles[$row->page_id] = $title; + } + } + $this->db->freeResult($res); + + // + // The remaining titles in $remaining are non-existant pages + // + foreach ($remaining as $ns => $dbkeys) { + foreach ($dbkeys as $dbkey => $nothing) { + $this->missingTitles[] = Title :: makeTitle($ns, $dbkey); + $this->allPages[$ns][$dbkey] = 0; + } + } + + if (!$this->resolveRedirs || empty ($redirectIds)) + break; + + // + // Resolve redirects by querying the pagelinks table, and repeat the process + // + + // Create a new linkBatch object for the next pass + $linkBatch = new LinkBatch(); + + // find redirect targets for all redirect pages + $res = $this->db->select('pagelinks', array ( + 'pl_from', + 'pl_namespace', + 'pl_title' + ), array ( + 'pl_from' => array_keys($redirectIds + )), __CLASS__ . '::' . __FUNCTION__); + + while ($row = $this->db->fetchObject($res)) { + + // Bug 7304 workaround + // ( http://bugzilla.wikipedia.org/show_bug.cgi?id=7304 ) + // A redirect page may have more than one link. + // This code will only use the first link returned. + if (isset ($redirectIds[$row->pl_from])) { // remove line when 7304 is fixed + + $titleStrFrom = $redirectIds[$row->pl_from]->getPrefixedText(); + $titleStrTo = Title :: makeTitle($row->pl_namespace, $row->pl_title)->getPrefixedText(); + $this->redirectTitles[$titleStrFrom] = $titleStrTo; + + unset ($redirectIds[$row->pl_from]); // remove line when 7304 is fixed + + // Avoid an infinite loop by checking if we have already processed this target + if (!isset ($this->allPages[$row->pl_namespace][$row->pl_title])) { + $linkBatch->add($row->pl_namespace, $row->pl_title); + } + } + } + $this->db->freeResult($res); + } + } +} +?> \ No newline at end of file diff --git a/includes/api/ApiQuery.php b/includes/api/ApiQuery.php index 501ebb0787..bc73f4874c 100644 --- a/includes/api/ApiQuery.php +++ b/includes/api/ApiQuery.php @@ -34,32 +34,38 @@ class ApiQuery extends ApiBase { var $mMetaModuleNames, $mPropModuleNames, $mListModuleNames; private $mQueryMetaModules = array ( -// 'siteinfo' => 'ApiQuerySiteinfo', -// 'userinfo' => 'ApiQueryUserinfo' + 'siteinfo' => 'ApiQuerySiteinfo', + //'userinfo' => 'ApiQueryUserinfo' + + ); + private $mQueryPropModules = array ( -// 'info' => 'ApiQueryInfo', -// 'categories' => 'ApiQueryCategories', -// 'imageinfo' => 'ApiQueryImageinfo', -// 'langlinks' => 'ApiQueryLanglinks', -// 'links' => 'ApiQueryLinks', -// 'templates' => 'ApiQueryTemplates', -// 'revisions' => 'ApiQueryRevisions', + // 'info' => 'ApiQueryInfo', + // 'categories' => 'ApiQueryCategories', + // 'imageinfo' => 'ApiQueryImageinfo', + // 'langlinks' => 'ApiQueryLanglinks', + // 'links' => 'ApiQueryLinks', + // 'templates' => 'ApiQueryTemplates', + // 'revisions' => 'ApiQueryRevisions', // Should be removed - 'content' => 'ApiQueryContent' + 'content' => 'ApiQueryContent' ); + private $mQueryListModules = array ( -// 'allpages' => 'ApiQueryAllpages', -// 'backlinks' => 'ApiQueryBacklinks', -// 'categorymembers' => 'ApiQueryCategorymembers', -// 'embeddedin' => 'ApiQueryEmbeddedin', -// 'imagelinks' => 'ApiQueryImagelinks', -// 'logevents' => 'ApiQueryLogevents', -// 'recentchanges' => 'ApiQueryRecentchanges', -// 'usercontribs' => 'ApiQueryUsercontribs', -// 'users' => 'ApiQueryUsers', -// 'watchlist' => 'ApiQueryWatchlist', + 'allpages' => 'ApiQueryAllpages', + // 'backlinks' => 'ApiQueryBacklinks', + // 'categorymembers' => 'ApiQueryCategorymembers', + // 'embeddedin' => 'ApiQueryEmbeddedin', + // 'imagelinks' => 'ApiQueryImagelinks', + // 'logevents' => 'ApiQueryLogevents', + // 'recentchanges' => 'ApiQueryRecentchanges', + // 'usercontribs' => 'ApiQueryUsercontribs', + // 'users' => 'ApiQueryUsers', + // 'watchlist' => 'ApiQueryWatchlist' + + ); private $mSlaveDB = null; @@ -69,8 +75,8 @@ class ApiQuery extends ApiBase { $this->mMetaModuleNames = array_keys($this->mQueryMetaModules); $this->mPropModuleNames = array_keys($this->mQueryPropModules); $this->mListModuleNames = array_keys($this->mQueryListModules); - - $this->mAllowedGenerators = array_merge( $this->mListModuleNames, $this->mPropModuleNames); + + $this->mAllowedGenerators = array_merge($this->mListModuleNames, $this->mPropModuleNames); } public function GetDB() { @@ -88,46 +94,75 @@ class ApiQuery extends ApiBase { // Only one of the titles/pageids/revids is allowed at the same time // $dataSource = null; - if (isset($titles)) + if (isset ($titles)) $dataSource = 'titles'; - if (isset($pageids)) { - if (isset($dataSource)) + if (isset ($pageids)) { + if (isset ($dataSource)) $this->DieUsage("Cannot use 'pageids' at the same time as '$dataSource'", 'multisource'); $dataSource = 'pageids'; } - if (isset($revids)) { - if (isset($dataSource)) + if (isset ($revids)) { + if (isset ($dataSource)) $this->DieUsage("Cannot use 'revids' at the same time as '$dataSource'", 'multisource'); $dataSource = 'revids'; } - - // + + if (isset($dataSource) && $dataSource !== 'titles') + $this->DieUsage('Currently only titles= parameter is supported.', 'notimplemented'); + // Normalize titles - // - if ($dataSource === 'titles') { - $linkBatch = new LinkBatch; - foreach ( $titles as &$titleString ) { - $titleObj = &Title::newFromText( $titleString ); - - // Validation - if (!$titleObj) - $this->dieUsage( "bad title $titleString", 'pi_invalidtitle' ); - if ($titleObj->getNamespace() < 0) - $this->dieUsage( "No support for special page $titleString has been implemented", 'pi_unsupportednamespace' ); - if (!$titleObj->userCanRead()) - $this->dieUsage( "No read permission for $titleString", 'pi_titleaccessdenied' ); - - $linkBatch->addObj( $titleObj ); - - // Make sure we remember the original title that was given to us - // This way the caller can correlate new titles with the originally requested, i.e. namespace is localized or capitalization - if( $titleString !== $titleObj->getPrefixedText() ) { - $this->GetResult()->AddMessage('query', 'normalized', array($titleString => $titleObj->getPrefixedText())); - } + $linkBatch = $this->ProcessTitles($titles); + + // Get titles info from DB + $data = new ApiPageSet($this->GetDB(), $redirects); + $data->PopulateTitles($linkBatch); + + // Show redirects information + if ($redirects) { + foreach ($data->GetRedirectTitles() as $titleStrFrom => $titleStrTo) { + $this->GetResult()->AddMessage('query', 'redirects', array ( + 'from' => $titleStrFrom, + 'to' => $titleStrTo + ), 'r'); } } } + /** + * Given an array of title strings, convert them into Title objects. + * This method validates access rights for the title, + * and appends normalization values to the output. + * @return LinkBatch of title objects. + */ + protected function ProcessTitles($titles) { + + $linkBatch = new LinkBatch(); + + foreach ($titles as $titleString) { + $titleObj = Title :: newFromText($titleString); + + // Validation + if (!$titleObj) + $this->dieUsage("bad title $titleString", 'invalidtitle'); + if ($titleObj->getNamespace() < 0) + $this->dieUsage("No support for special page $titleString has been implemented", 'unsupportednamespace'); + if (!$titleObj->userCanRead()) + $this->dieUsage("No read permission for $titleString", 'titleaccessdenied'); + + $linkBatch->addObj($titleObj); + + // Make sure we remember the original title that was given to us + // This way the caller can correlate new titles with the originally requested, i.e. namespace is localized or capitalization + if ($titleString !== $titleObj->getPrefixedText()) { + $this->GetResult()->AddMessage('query', 'normalized', array ( + 'from' => $titleString, + 'to' => $titleObj->getPrefixedText()), 'n'); + } + } + + return $linkBatch; + } + protected function GetAllowedParams() { return array ( 'meta' => array ( @@ -155,7 +190,8 @@ class ApiQuery extends ApiBase { 'revids' => array ( GN_ENUM_TYPE => 'integer', GN_ENUM_ISMULTI => true - ) + ), + 'redirects' => false ); } @@ -172,10 +208,11 @@ class ApiQuery extends ApiBase { } protected function GetDescription() { - return array( - 'Query API module allows applications to get needed pieces of data from the MediaWiki databases,', - 'and is loosely based on the Query API interface currently available on all MediaWiki servers.', - 'All data modifications will first have to use query to acquire a token to prevent abuse from malicious sites.'); + return array ( + 'Query API module allows applications to get needed pieces of data from the MediaWiki databases,', + 'and is loosely based on the Query API interface currently available on all MediaWiki servers.', + 'All data modifications will first have to use query to acquire a token to prevent abuse from malicious sites.' + ); } protected function GetExamples() { diff --git a/includes/api/ApiQueryBase.php b/includes/api/ApiQueryBase.php index 78d98ffc9b..0b5879a3ff 100644 --- a/includes/api/ApiQueryBase.php +++ b/includes/api/ApiQueryBase.php @@ -33,16 +33,28 @@ abstract class ApiQueryBase extends ApiBase { private $mQueryModule; - /** - * Constructor - */ public function __construct($main, $query) { parent :: __construct($main); $this->mQueryModule = $query; } + /** + * Get the name of the query being executed by this instance + */ public function GetQuery() { return $this->mQueryModule; } + + /** + * Derived classes return true when they can be used as title generators for other query modules. + */ + protected static abstract function GetCanGenerate(); + + /** + * Return true if this instance is being used as a generator. + */ + protected function GetIsGenerator() { + return false; + } } ?> \ No newline at end of file diff --git a/includes/api/ApiResult.php b/includes/api/ApiResult.php index 0a5abd3bae..f99273bd10 100644 --- a/includes/api/ApiResult.php +++ b/includes/api/ApiResult.php @@ -49,14 +49,7 @@ class ApiResult extends ApiBase { return $this->mData; } - /* function addPage($title) - { - if (!isset($this->mPages)) - $this->mPages &= $this->mData['pages']; - } - */ - - function AddMessage($mainSection, $subSection, $value, $preserveXmlSpacing = false) { + function AddMessage($mainSection, $subSection, $value, $multiitem = false, $preserveXmlSpacing = false) { if (!array_key_exists($mainSection, $this->mData)) { $this->mData[$mainSection] = array (); } @@ -68,19 +61,24 @@ class ApiResult extends ApiBase { } else { $element = & $this->mData[$mainSection]; } - if (is_array($value)) { - $element = array_merge($element, $value); - if (!array_key_exists('*', $element)) { - $element['*'] = ''; - } + if( $multiitem ) { + $element['_element'] = $multiitem; + $element[] = $value; } else { - if (array_key_exists('*', $element)) { - $element['*'] .= $value; + if (is_array($value)) { + $element = array_merge($element, $value); + if (!array_key_exists('*', $element)) { + $element['*'] = ''; + } } else { - $element['*'] = $value; - } - if ($preserveXmlSpacing) { - $element['xml:space'] = 'preserve'; + if (array_key_exists('*', $element)) { + $element['*'] .= $value; + } else { + $element['*'] = $value; + } + if ($preserveXmlSpacing) { + $element['xml:space'] = 'preserve'; + } } } } -- 2.20.1