3 * Created on Dec 01, 2007
5 * Copyright © 2007 Yuri Astrakhan "<Firstname><Lastname>@gmail.com"
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
28 class ApiParse
extends ApiBase
{
30 /** @var string $section */
31 private $section = null;
33 /** @var Content $content */
34 private $content = null;
36 /** @var Content $pstContent */
37 private $pstContent = null;
39 public function execute() {
40 // The data is hot but user-dependent, like page views, so we set vary cookies
41 $this->getMain()->setCacheMode( 'anon-public-user-private' );
44 $params = $this->extractRequestParams();
45 $text = $params['text'];
46 $title = $params['title'];
47 if ( $title === null ) {
48 $titleProvided = false;
49 // A title is needed for parsing, so arbitrarily choose one
52 $titleProvided = true;
55 $page = $params['page'];
56 $pageid = $params['pageid'];
57 $oldid = $params['oldid'];
59 $model = $params['contentmodel'];
60 $format = $params['contentformat'];
62 if ( !is_null( $page ) && ( !is_null( $text ) ||
$titleProvided ) ) {
64 'The page parameter cannot be used together with the text and title parameters',
69 $prop = array_flip( $params['prop'] );
71 if ( isset( $params['section'] ) ) {
72 $this->section
= $params['section'];
74 $this->section
= false;
77 // The parser needs $wgTitle to be set, apparently the
78 // $title parameter in Parser::parse isn't enough *sigh*
79 // TODO: Does this still need $wgTitle?
80 global $wgParser, $wgTitle;
85 $result = $this->getResult();
87 if ( !is_null( $oldid ) ||
!is_null( $pageid ) ||
!is_null( $page ) ) {
88 if ( !is_null( $oldid ) ) {
89 // Don't use the parser cache
90 $rev = Revision
::newFromID( $oldid );
92 $this->dieUsage( "There is no revision ID $oldid", 'missingrev' );
94 if ( !$rev->userCan( Revision
::DELETED_TEXT
, $this->getUser() ) ) {
95 $this->dieUsage( "You don't have permission to view deleted revisions", 'permissiondenied' );
98 $titleObj = $rev->getTitle();
100 $pageObj = WikiPage
::factory( $titleObj );
101 $popts = $this->makeParserOptions( $pageObj, $params );
103 // If for some reason the "oldid" is actually the current revision, it may be cached
104 if ( $rev->isCurrent() ) {
105 // May get from/save to parser cache
106 $p_result = $this->getParsedContent( $pageObj, $popts,
107 $pageid, isset( $prop['wikitext'] ) );
108 } else { // This is an old revision, so get the text differently
109 $this->content
= $rev->getContent( Revision
::FOR_THIS_USER
, $this->getUser() );
111 if ( $this->section
!== false ) {
112 $this->content
= $this->getSectionContent( $this->content
, 'r' . $rev->getId() );
115 // Should we save old revision parses to the parser cache?
116 $p_result = $this->content
->getParserOutput( $titleObj, $rev->getId(), $popts );
118 } else { // Not $oldid, but $pageid or $page
119 if ( $params['redirects'] ) {
124 if ( !is_null( $pageid ) ) {
125 $reqParams['pageids'] = $pageid;
127 $reqParams['titles'] = $page;
129 $req = new FauxRequest( $reqParams );
130 $main = new ApiMain( $req );
132 $data = $main->getResultData();
133 $redirValues = isset( $data['query']['redirects'] )
134 ?
$data['query']['redirects']
137 foreach ( (array)$redirValues as $r ) {
140 $pageParams = array( 'title' => $to );
141 } elseif ( !is_null( $pageid ) ) {
142 $pageParams = array( 'pageid' => $pageid );
144 $pageParams = array( 'title' => $page );
147 $pageObj = $this->getTitleOrPageId( $pageParams, 'fromdb' );
148 $titleObj = $pageObj->getTitle();
149 if ( !$titleObj ||
!$titleObj->exists() ) {
150 $this->dieUsage( "The page you specified doesn't exist", 'missingtitle' );
152 $wgTitle = $titleObj;
154 if ( isset( $prop['revid'] ) ) {
155 $oldid = $pageObj->getLatest();
158 $popts = $this->makeParserOptions( $pageObj, $params );
160 // Potentially cached
161 $p_result = $this->getParsedContent( $pageObj, $popts, $pageid,
162 isset( $prop['wikitext'] ) );
164 } else { // Not $oldid, $pageid, $page. Hence based on $text
165 $titleObj = Title
::newFromText( $title );
166 if ( !$titleObj ||
$titleObj->isExternal() ) {
167 $this->dieUsageMsg( array( 'invalidtitle', $title ) );
169 $wgTitle = $titleObj;
170 if ( $titleObj->canExist() ) {
171 $pageObj = WikiPage
::factory( $titleObj );
173 // Do like MediaWiki::initializeArticle()
174 $article = Article
::newFromTitle( $titleObj, $this->getContext() );
175 $pageObj = $article->getPage();
178 $popts = $this->makeParserOptions( $pageObj, $params );
179 $textProvided = !is_null( $text );
181 if ( !$textProvided ) {
182 if ( $titleProvided && ( $prop ||
$params['generatexml'] ) ) {
184 "'title' used without 'text', and parsed page properties were requested " .
185 "(did you mean to use 'page' instead of 'title'?)"
188 // Prevent warning from ContentHandler::makeContent()
192 // If we are parsing text, do not use the content model of the default
193 // API title, but default to wikitext to keep BC.
194 if ( $textProvided && !$titleProvided && is_null( $model ) ) {
195 $model = CONTENT_MODEL_WIKITEXT
;
196 $this->setWarning( "No 'title' or 'contentmodel' was given, assuming $model." );
200 $this->content
= ContentHandler
::makeContent( $text, $titleObj, $model, $format );
201 } catch ( MWContentSerializationException
$ex ) {
202 $this->dieUsage( $ex->getMessage(), 'parseerror' );
205 if ( $this->section
!== false ) {
206 $this->content
= $this->getSectionContent( $this->content
, $titleObj->getPrefixedText() );
209 if ( $params['pst'] ||
$params['onlypst'] ) {
210 $this->pstContent
= $this->content
->preSaveTransform( $titleObj, $this->getUser(), $popts );
212 if ( $params['onlypst'] ) {
213 // Build a result and bail out
214 $result_array = array();
215 $result_array['text'] = array();
216 ApiResult
::setContent( $result_array['text'], $this->pstContent
->serialize( $format ) );
217 if ( isset( $prop['wikitext'] ) ) {
218 $result_array['wikitext'] = array();
219 ApiResult
::setContent( $result_array['wikitext'], $this->content
->serialize( $format ) );
221 $result->addValue( null, $this->getModuleName(), $result_array );
226 // Not cached (save or load)
227 if ( $params['pst'] ) {
228 $p_result = $this->pstContent
->getParserOutput( $titleObj, null, $popts );
230 $p_result = $this->content
->getParserOutput( $titleObj, null, $popts );
234 $result_array = array();
236 $result_array['title'] = $titleObj->getPrefixedText();
238 if ( !is_null( $oldid ) ) {
239 $result_array['revid'] = intval( $oldid );
242 if ( $params['redirects'] && !is_null( $redirValues ) ) {
243 $result_array['redirects'] = $redirValues;
246 if ( $params['disabletoc'] ) {
247 $p_result->setTOCEnabled( false );
250 if ( isset( $prop['text'] ) ) {
251 $result_array['text'] = array();
252 ApiResult
::setContent( $result_array['text'], $p_result->getText() );
255 if ( !is_null( $params['summary'] ) ) {
256 $result_array['parsedsummary'] = array();
257 ApiResult
::setContent(
258 $result_array['parsedsummary'],
259 Linker
::formatComment( $params['summary'], $titleObj )
263 if ( isset( $prop['langlinks'] ) ) {
264 $langlinks = $p_result->getLanguageLinks();
266 if ( $params['effectivelanglinks'] ) {
267 // Link flags are ignored for now, but may in the future be
268 // included in the result.
269 $linkFlags = array();
270 wfRunHooks( 'LanguageLinks', array( $titleObj, &$langlinks, &$linkFlags ) );
276 if ( isset( $prop['langlinks'] ) ) {
277 $result_array['langlinks'] = $this->formatLangLinks( $langlinks );
279 if ( isset( $prop['categories'] ) ) {
280 $result_array['categories'] = $this->formatCategoryLinks( $p_result->getCategories() );
282 if ( isset( $prop['categorieshtml'] ) ) {
283 $categoriesHtml = $this->categoriesHtml( $p_result->getCategories() );
284 $result_array['categorieshtml'] = array();
285 ApiResult
::setContent( $result_array['categorieshtml'], $categoriesHtml );
287 if ( isset( $prop['links'] ) ) {
288 $result_array['links'] = $this->formatLinks( $p_result->getLinks() );
290 if ( isset( $prop['templates'] ) ) {
291 $result_array['templates'] = $this->formatLinks( $p_result->getTemplates() );
293 if ( isset( $prop['images'] ) ) {
294 $result_array['images'] = array_keys( $p_result->getImages() );
296 if ( isset( $prop['externallinks'] ) ) {
297 $result_array['externallinks'] = array_keys( $p_result->getExternalLinks() );
299 if ( isset( $prop['sections'] ) ) {
300 $result_array['sections'] = $p_result->getSections();
303 if ( isset( $prop['displaytitle'] ) ) {
304 $result_array['displaytitle'] = $p_result->getDisplayTitle() ?
305 $p_result->getDisplayTitle() :
306 $titleObj->getPrefixedText();
309 if ( isset( $prop['headitems'] ) ||
isset( $prop['headhtml'] ) ) {
310 $context = $this->getContext();
311 $context->setTitle( $titleObj );
312 $context->getOutput()->addParserOutputMetadata( $p_result );
314 if ( isset( $prop['headitems'] ) ) {
315 $headItems = $this->formatHeadItems( $p_result->getHeadItems() );
317 $css = $this->formatCss( $context->getOutput()->buildCssLinksArray() );
319 $scripts = array( $context->getOutput()->getHeadScripts() );
321 $result_array['headitems'] = array_merge( $headItems, $css, $scripts );
324 if ( isset( $prop['headhtml'] ) ) {
325 $result_array['headhtml'] = array();
326 ApiResult
::setContent(
327 $result_array['headhtml'],
328 $context->getOutput()->headElement( $context->getSkin() )
333 if ( isset( $prop['modules'] ) ) {
334 $result_array['modules'] = array_values( array_unique( $p_result->getModules() ) );
335 $result_array['modulescripts'] = array_values( array_unique( $p_result->getModuleScripts() ) );
336 $result_array['modulestyles'] = array_values( array_unique( $p_result->getModuleStyles() ) );
337 $result_array['modulemessages'] = array_values( array_unique( $p_result->getModuleMessages() ) );
340 if ( isset( $prop['iwlinks'] ) ) {
341 $result_array['iwlinks'] = $this->formatIWLinks( $p_result->getInterwikiLinks() );
344 if ( isset( $prop['wikitext'] ) ) {
345 $result_array['wikitext'] = array();
346 ApiResult
::setContent( $result_array['wikitext'], $this->content
->serialize( $format ) );
347 if ( !is_null( $this->pstContent
) ) {
348 $result_array['psttext'] = array();
349 ApiResult
::setContent( $result_array['psttext'], $this->pstContent
->serialize( $format ) );
352 if ( isset( $prop['properties'] ) ) {
353 $result_array['properties'] = $this->formatProperties( $p_result->getProperties() );
356 if ( isset( $prop['limitreportdata'] ) ) {
357 $result_array['limitreportdata'] =
358 $this->formatLimitReportData( $p_result->getLimitReportData() );
360 if ( isset( $prop['limitreporthtml'] ) ) {
361 $limitreportHtml = EditPage
::getPreviewLimitReport( $p_result );
362 $result_array['limitreporthtml'] = array();
363 ApiResult
::setContent( $result_array['limitreporthtml'], $limitreportHtml );
366 if ( $params['generatexml'] ) {
367 if ( $this->content
->getModel() != CONTENT_MODEL_WIKITEXT
) {
368 $this->dieUsage( "generatexml is only supported for wikitext content", "notwikitext" );
371 $wgParser->startExternalParse( $titleObj, $popts, OT_PREPROCESS
);
372 $dom = $wgParser->preprocessToDom( $this->content
->getNativeData() );
373 if ( is_callable( array( $dom, 'saveXML' ) ) ) {
374 $xml = $dom->saveXML();
376 $xml = $dom->__toString();
378 $result_array['parsetree'] = array();
379 ApiResult
::setContent( $result_array['parsetree'], $xml );
382 $result_mapping = array(
385 'categories' => 'cl',
389 'externallinks' => 'el',
394 'modulescripts' => 'm',
395 'modulestyles' => 'm',
396 'modulemessages' => 'm',
397 'properties' => 'pp',
398 'limitreportdata' => 'lr',
400 $this->setIndexedTagNames( $result_array, $result_mapping );
401 $result->addValue( null, $this->getModuleName(), $result_array );
405 * Constructs a ParserOptions object
407 * @param WikiPage $pageObj
408 * @param array $params
410 * @return ParserOptions
412 protected function makeParserOptions( WikiPage
$pageObj, array $params ) {
413 wfProfileIn( __METHOD__
);
415 $popts = $pageObj->makeParserOptions( $this->getContext() );
416 $popts->enableLimitReport( !$params['disablepp'] );
417 $popts->setIsPreview( $params['preview'] ||
$params['sectionpreview'] );
418 $popts->setIsSectionPreview( $params['sectionpreview'] );
419 $popts->setEditSection( !$params['disableeditsection'] );
421 wfProfileOut( __METHOD__
);
427 * @param WikiPage $page
428 * @param ParserOptions $popts
430 * @param bool $getWikitext
431 * @return ParserOutput
433 private function getParsedContent( WikiPage
$page, $popts, $pageId = null, $getWikitext = false ) {
434 $this->content
= $page->getContent( Revision
::RAW
); //XXX: really raw?
436 if ( $this->section
!== false && $this->content
!== null ) {
437 $this->content
= $this->getSectionContent(
439 !is_null( $pageId ) ?
'page id ' . $pageId : $page->getTitle()->getPrefixedText()
442 // Not cached (save or load)
443 return $this->content
->getParserOutput( $page->getTitle(), null, $popts );
446 // Try the parser cache first
447 // getParserOutput will save to Parser cache if able
448 $pout = $page->getParserOutput( $popts );
450 $this->dieUsage( "There is no revision ID {$page->getLatest()}", 'missingrev' );
452 if ( $getWikitext ) {
453 $this->content
= $page->getContent( Revision
::RAW
);
460 * @param Content $content
461 * @param string $what Identifies the content in error messages, e.g. page title.
462 * @return Content|bool
464 private function getSectionContent( Content
$content, $what ) {
465 // Not cached (save or load)
466 $section = $content->getSection( $this->section
);
467 if ( $section === false ) {
468 $this->dieUsage( "There is no section {$this->section} in " . $what, 'nosuchsection' );
470 if ( $section === null ) {
471 $this->dieUsage( "Sections are not supported by " . $what, 'nosuchsection' );
478 private function formatLangLinks( $links ) {
480 foreach ( $links as $link ) {
482 $bits = explode( ':', $link, 2 );
483 $title = Title
::newFromText( $link );
485 $entry['lang'] = $bits[0];
487 $entry['url'] = wfExpandUrl( $title->getFullURL(), PROTO_CURRENT
);
488 // localised language name in 'uselang' language
489 $entry['langname'] = Language
::fetchLanguageName(
490 $title->getInterwiki(),
491 $this->getLanguage()->getCode()
494 // native language name
495 $entry['autonym'] = Language
::fetchLanguageName( $title->getInterwiki() );
497 ApiResult
::setContent( $entry, $bits[1] );
504 private function formatCategoryLinks( $links ) {
511 // Fetch hiddencat property
513 $lb->setArray( array( NS_CATEGORY
=> $links ) );
514 $db = $this->getDB();
515 $res = $db->select( array( 'page', 'page_props' ),
516 array( 'page_title', 'pp_propname' ),
517 $lb->constructSet( 'page', $db ),
520 array( 'page_props' => array(
521 'LEFT JOIN', array( 'pp_propname' => 'hiddencat', 'pp_page = page_id' )
524 $hiddencats = array();
525 foreach ( $res as $row ) {
526 $hiddencats[$row->page_title
] = isset( $row->pp_propname
);
529 foreach ( $links as $link => $sortkey ) {
531 $entry['sortkey'] = $sortkey;
532 ApiResult
::setContent( $entry, $link );
533 if ( !isset( $hiddencats[$link] ) ) {
534 $entry['missing'] = '';
535 } elseif ( $hiddencats[$link] ) {
536 $entry['hidden'] = '';
544 private function categoriesHtml( $categories ) {
545 $context = $this->getContext();
546 $context->getOutput()->addCategoryLinks( $categories );
548 return $context->getSkin()->getCategories();
551 private function formatLinks( $links ) {
553 foreach ( $links as $ns => $nslinks ) {
554 foreach ( $nslinks as $title => $id ) {
557 ApiResult
::setContent( $entry, Title
::makeTitle( $ns, $title )->getFullText() );
559 $entry['exists'] = '';
568 private function formatIWLinks( $iw ) {
570 foreach ( $iw as $prefix => $titles ) {
571 foreach ( array_keys( $titles ) as $title ) {
573 $entry['prefix'] = $prefix;
575 $title = Title
::newFromText( "{$prefix}:{$title}" );
577 $entry['url'] = wfExpandUrl( $title->getFullURL(), PROTO_CURRENT
);
580 ApiResult
::setContent( $entry, $title->getFullText() );
588 private function formatHeadItems( $headItems ) {
590 foreach ( $headItems as $tag => $content ) {
592 $entry['tag'] = $tag;
593 ApiResult
::setContent( $entry, $content );
600 private function formatProperties( $properties ) {
602 foreach ( $properties as $name => $value ) {
604 $entry['name'] = $name;
605 ApiResult
::setContent( $entry, $value );
612 private function formatCss( $css ) {
614 foreach ( $css as $file => $link ) {
616 $entry['file'] = $file;
617 ApiResult
::setContent( $entry, $link );
624 private function formatLimitReportData( $limitReportData ) {
626 $apiResult = $this->getResult();
628 foreach ( $limitReportData as $name => $value ) {
630 $entry['name'] = $name;
631 if ( !is_array( $value ) ) {
632 $value = array( $value );
634 $apiResult->setIndexedTagName( $value, 'param' );
635 $apiResult->setIndexedTagName_recursive( $value, 'param' );
636 $entry = array_merge( $entry, $value );
643 private function setIndexedTagNames( &$array, $mapping ) {
644 foreach ( $mapping as $key => $name ) {
645 if ( isset( $array[$key] ) ) {
646 $this->getResult()->setIndexedTagName( $array[$key], $name );
651 public function getAllowedParams() {
658 ApiBase
::PARAM_TYPE
=> 'integer',
660 'redirects' => false,
662 ApiBase
::PARAM_TYPE
=> 'integer',
665 ApiBase
::PARAM_DFLT
=> 'text|langlinks|categories|links|templates|' .
666 'images|externallinks|sections|revid|displaytitle|iwlinks|properties',
667 ApiBase
::PARAM_ISMULTI
=> true,
668 ApiBase
::PARAM_TYPE
=> array(
692 'effectivelanglinks' => false,
694 'disablepp' => false,
695 'disableeditsection' => false,
696 'generatexml' => false,
698 'sectionpreview' => false,
699 'disabletoc' => false,
700 'contentformat' => array(
701 ApiBase
::PARAM_TYPE
=> ContentHandler
::getAllContentFormats(),
703 'contentmodel' => array(
704 ApiBase
::PARAM_TYPE
=> ContentHandler
::getContentModels(),
709 public function getParamDescription() {
710 $p = $this->getModulePrefix();
711 $wikitext = CONTENT_MODEL_WIKITEXT
;
714 'text' => "Text to parse. Use {$p}title or {$p}contentmodel to control the content model",
715 'summary' => 'Summary to parse',
716 'redirects' => "If the {$p}page or the {$p}pageid parameter is set to a redirect, resolve it",
717 'title' => "Title of page the text belongs to. " .
718 "If omitted, {$p}contentmodel must be specified, and \"API\" will be used as the title",
719 'page' => "Parse the content of this page. Cannot be used together with {$p}text and {$p}title",
720 'pageid' => "Parse the content of this page. Overrides {$p}page",
721 'oldid' => "Parse the content of this revision. Overrides {$p}page and {$p}pageid",
723 'Which pieces of information to get',
724 ' text - Gives the parsed text of the wikitext',
725 ' langlinks - Gives the language links in the parsed wikitext',
726 ' categories - Gives the categories in the parsed wikitext',
727 ' categorieshtml - Gives the HTML version of the categories',
728 ' links - Gives the internal links in the parsed wikitext',
729 ' templates - Gives the templates in the parsed wikitext',
730 ' images - Gives the images in the parsed wikitext',
731 ' externallinks - Gives the external links in the parsed wikitext',
732 ' sections - Gives the sections in the parsed wikitext',
733 ' revid - Adds the revision ID of the parsed page',
734 ' displaytitle - Adds the title of the parsed wikitext',
735 ' headitems - Gives items to put in the <head> of the page',
736 ' headhtml - Gives parsed <head> of the page',
737 ' modules - Gives the ResourceLoader modules used on the page',
738 ' iwlinks - Gives interwiki links in the parsed wikitext',
739 ' wikitext - Gives the original wikitext that was parsed',
740 ' properties - Gives various properties defined in the parsed wikitext',
741 ' limitreportdata - Gives the limit report in a structured way.',
742 " Gives no data, when {$p}disablepp is set.",
743 ' limitreporthtml - Gives the HTML version of the limit report.',
744 " Gives no data, when {$p}disablepp is set.",
746 'effectivelanglinks' => array(
747 'Includes language links supplied by extensions',
748 '(for use with prop=langlinks)',
751 'Do a pre-save transform on the input before parsing it',
752 "Only valid when used with {$p}text",
755 'Do a pre-save transform (PST) on the input, but don\'t parse it',
756 'Returns the same wikitext, after a PST has been applied.',
757 "Only valid when used with {$p}text",
759 'section' => 'Only retrieve the content of this section number',
760 'disablepp' => 'Disable the PP Report from the parser output',
761 'disableeditsection' => 'Disable edit section links from the parser output',
762 'generatexml' => "Generate XML parse tree (requires contentmodel=$wikitext)",
763 'preview' => 'Parse in preview mode',
764 'sectionpreview' => 'Parse in section preview mode (enables preview mode too)',
765 'disabletoc' => 'Disable table of contents in output',
766 'contentformat' => array(
767 'Content serialization format used for the input text',
768 "Only valid when used with {$p}text",
770 'contentmodel' => array(
771 "Content model of the input text. If omitted, ${p}title must be specified, " .
772 "and default will be the model of the specified ${p}title",
773 "Only valid when used with {$p}text",
778 public function getDescription() {
779 $p = $this->getModulePrefix();
782 'Parses content and returns parser output.',
783 'See the various prop-Modules of action=query to get information from the current' .
784 'version of a page.',
785 'There are several ways to specify the text to parse:',
786 "1) Specify a page or revision, using {$p}page, {$p}pageid, or {$p}oldid.",
787 "2) Specify content explicitly, using {$p}text, {$p}title, and {$p}contentmodel.",
788 "3) Specify only a summary to parse. {$p}prop should be given an empty value.",
792 public function getExamples() {
794 'api.php?action=parse&page=Project:Sandbox' => 'Parse a page',
795 'api.php?action=parse&text={{Project:Sandbox}}&contentmodel=wikitext' => 'Parse wikitext',
796 'api.php?action=parse&text={{PAGENAME}}&title=Test'
797 => 'Parse wikitext, specifying the page title',
798 'api.php?action=parse&summary=Some+[[link]]&prop=' => 'Parse a summary',
802 public function getHelpUrls() {
803 return 'https://www.mediawiki.org/wiki/API:Parsing_wikitext#parse';