Merge "Make SpecialPage::getLinkRenderer() public"
[lhc/web/wiklou.git] / includes / content / ContentHandler.php
1 <?php
2
3 use MediaWiki\Search\ParserOutputSearchDataExtractor;
4
5 /**
6 * Base class for content handling.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @since 1.21
24 *
25 * @file
26 * @ingroup Content
27 *
28 * @author Daniel Kinzler
29 */
30 /**
31 * A content handler knows how do deal with a specific type of content on a wiki
32 * page. Content is stored in the database in a serialized form (using a
33 * serialization format a.k.a. MIME type) and is unserialized into its native
34 * PHP representation (the content model), which is wrapped in an instance of
35 * the appropriate subclass of Content.
36 *
37 * ContentHandler instances are stateless singletons that serve, among other
38 * things, as a factory for Content objects. Generally, there is one subclass
39 * of ContentHandler and one subclass of Content for every type of content model.
40 *
41 * Some content types have a flat model, that is, their native representation
42 * is the same as their serialized form. Examples would be JavaScript and CSS
43 * code. As of now, this also applies to wikitext (MediaWiki's default content
44 * type), but wikitext content may be represented by a DOM or AST structure in
45 * the future.
46 *
47 * @ingroup Content
48 */
49 abstract class ContentHandler {
50 /**
51 * Convenience function for getting flat text from a Content object. This
52 * should only be used in the context of backwards compatibility with code
53 * that is not yet able to handle Content objects!
54 *
55 * If $content is null, this method returns the empty string.
56 *
57 * If $content is an instance of TextContent, this method returns the flat
58 * text as returned by $content->getNativeData().
59 *
60 * If $content is not a TextContent object, the behavior of this method
61 * depends on the global $wgContentHandlerTextFallback:
62 * - If $wgContentHandlerTextFallback is 'fail' and $content is not a
63 * TextContent object, an MWException is thrown.
64 * - If $wgContentHandlerTextFallback is 'serialize' and $content is not a
65 * TextContent object, $content->serialize() is called to get a string
66 * form of the content.
67 * - If $wgContentHandlerTextFallback is 'ignore' and $content is not a
68 * TextContent object, this method returns null.
69 * - otherwise, the behavior is undefined.
70 *
71 * @since 1.21
72 *
73 * @param Content $content
74 *
75 * @throws MWException If the content is not an instance of TextContent and
76 * wgContentHandlerTextFallback was set to 'fail'.
77 * @return string|null Textual form of the content, if available.
78 */
79 public static function getContentText( Content $content = null ) {
80 global $wgContentHandlerTextFallback;
81
82 if ( is_null( $content ) ) {
83 return '';
84 }
85
86 if ( $content instanceof TextContent ) {
87 return $content->getNativeData();
88 }
89
90 wfDebugLog( 'ContentHandler', 'Accessing ' . $content->getModel() . ' content as text!' );
91
92 if ( $wgContentHandlerTextFallback == 'fail' ) {
93 throw new MWException(
94 "Attempt to get text from Content with model " .
95 $content->getModel()
96 );
97 }
98
99 if ( $wgContentHandlerTextFallback == 'serialize' ) {
100 return $content->serialize();
101 }
102
103 return null;
104 }
105
106 /**
107 * Convenience function for creating a Content object from a given textual
108 * representation.
109 *
110 * $text will be deserialized into a Content object of the model specified
111 * by $modelId (or, if that is not given, $title->getContentModel()) using
112 * the given format.
113 *
114 * @since 1.21
115 *
116 * @param string $text The textual representation, will be
117 * unserialized to create the Content object
118 * @param Title $title The title of the page this text belongs to.
119 * Required if $modelId is not provided.
120 * @param string $modelId The model to deserialize to. If not provided,
121 * $title->getContentModel() is used.
122 * @param string $format The format to use for deserialization. If not
123 * given, the model's default format is used.
124 *
125 * @throws MWException If model ID or format is not supported or if the text can not be
126 * unserialized using the format.
127 * @return Content A Content object representing the text.
128 */
129 public static function makeContent( $text, Title $title = null,
130 $modelId = null, $format = null ) {
131 if ( is_null( $modelId ) ) {
132 if ( is_null( $title ) ) {
133 throw new MWException( "Must provide a Title object or a content model ID." );
134 }
135
136 $modelId = $title->getContentModel();
137 }
138
139 $handler = ContentHandler::getForModelID( $modelId );
140
141 return $handler->unserializeContent( $text, $format );
142 }
143
144 /**
145 * Returns the name of the default content model to be used for the page
146 * with the given title.
147 *
148 * Note: There should rarely be need to call this method directly.
149 * To determine the actual content model for a given page, use
150 * Title::getContentModel().
151 *
152 * Which model is to be used by default for the page is determined based
153 * on several factors:
154 * - The global setting $wgNamespaceContentModels specifies a content model
155 * per namespace.
156 * - The hook ContentHandlerDefaultModelFor may be used to override the page's default
157 * model.
158 * - Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript
159 * model if they end in .js or .css, respectively.
160 * - Pages in NS_MEDIAWIKI default to the wikitext model otherwise.
161 * - The hook TitleIsCssOrJsPage may be used to force a page to use the CSS
162 * or JavaScript model. This is a compatibility feature. The ContentHandlerDefaultModelFor
163 * hook should be used instead if possible.
164 * - The hook TitleIsWikitextPage may be used to force a page to use the
165 * wikitext model. This is a compatibility feature. The ContentHandlerDefaultModelFor
166 * hook should be used instead if possible.
167 *
168 * If none of the above applies, the wikitext model is used.
169 *
170 * Note: this is used by, and may thus not use, Title::getContentModel()
171 *
172 * @since 1.21
173 *
174 * @param Title $title
175 *
176 * @return string Default model name for the page given by $title
177 */
178 public static function getDefaultModelFor( Title $title ) {
179 // NOTE: this method must not rely on $title->getContentModel() directly or indirectly,
180 // because it is used to initialize the mContentModel member.
181
182 $ns = $title->getNamespace();
183
184 $ext = false;
185 $m = null;
186 $model = MWNamespace::getNamespaceContentModel( $ns );
187
188 // Hook can determine default model
189 if ( !Hooks::run( 'ContentHandlerDefaultModelFor', [ $title, &$model ] ) ) {
190 if ( !is_null( $model ) ) {
191 return $model;
192 }
193 }
194
195 // Could this page contain code based on the title?
196 $isCodePage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js|json)$!u', $title->getText(), $m );
197 if ( $isCodePage ) {
198 $ext = $m[1];
199 }
200
201 // Hook can force JS/CSS
202 Hooks::run( 'TitleIsCssOrJsPage', [ $title, &$isCodePage ], '1.21' );
203
204 // Is this a user subpage containing code?
205 $isCodeSubpage = NS_USER == $ns
206 && !$isCodePage
207 && preg_match( "/\\/.*\\.(js|css|json)$/", $title->getText(), $m );
208 if ( $isCodeSubpage ) {
209 $ext = $m[1];
210 }
211
212 // Is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
213 $isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT;
214 $isWikitext = $isWikitext && !$isCodePage && !$isCodeSubpage;
215
216 // Hook can override $isWikitext
217 Hooks::run( 'TitleIsWikitextPage', [ $title, &$isWikitext ], '1.21' );
218
219 if ( !$isWikitext ) {
220 switch ( $ext ) {
221 case 'js':
222 return CONTENT_MODEL_JAVASCRIPT;
223 case 'css':
224 return CONTENT_MODEL_CSS;
225 case 'json':
226 return CONTENT_MODEL_JSON;
227 default:
228 return is_null( $model ) ? CONTENT_MODEL_TEXT : $model;
229 }
230 }
231
232 // We established that it must be wikitext
233
234 return CONTENT_MODEL_WIKITEXT;
235 }
236
237 /**
238 * Returns the appropriate ContentHandler singleton for the given title.
239 *
240 * @since 1.21
241 *
242 * @param Title $title
243 *
244 * @return ContentHandler
245 */
246 public static function getForTitle( Title $title ) {
247 $modelId = $title->getContentModel();
248
249 return ContentHandler::getForModelID( $modelId );
250 }
251
252 /**
253 * Returns the appropriate ContentHandler singleton for the given Content
254 * object.
255 *
256 * @since 1.21
257 *
258 * @param Content $content
259 *
260 * @return ContentHandler
261 */
262 public static function getForContent( Content $content ) {
263 $modelId = $content->getModel();
264
265 return ContentHandler::getForModelID( $modelId );
266 }
267
268 /**
269 * @var array A Cache of ContentHandler instances by model id
270 */
271 protected static $handlers;
272
273 /**
274 * Returns the ContentHandler singleton for the given model ID. Use the
275 * CONTENT_MODEL_XXX constants to identify the desired content model.
276 *
277 * ContentHandler singletons are taken from the global $wgContentHandlers
278 * array. Keys in that array are model names, the values are either
279 * ContentHandler singleton objects, or strings specifying the appropriate
280 * subclass of ContentHandler.
281 *
282 * If a class name is encountered when looking up the singleton for a given
283 * model name, the class is instantiated and the class name is replaced by
284 * the resulting singleton in $wgContentHandlers.
285 *
286 * If no ContentHandler is defined for the desired $modelId, the
287 * ContentHandler may be provided by the ContentHandlerForModelID hook.
288 * If no ContentHandler can be determined, an MWException is raised.
289 *
290 * @since 1.21
291 *
292 * @param string $modelId The ID of the content model for which to get a
293 * handler. Use CONTENT_MODEL_XXX constants.
294 *
295 * @throws MWException For internal errors and problems in the configuration.
296 * @throws MWUnknownContentModelException If no handler is known for the model ID.
297 * @return ContentHandler The ContentHandler singleton for handling the model given by the ID.
298 */
299 public static function getForModelID( $modelId ) {
300 global $wgContentHandlers;
301
302 if ( isset( ContentHandler::$handlers[$modelId] ) ) {
303 return ContentHandler::$handlers[$modelId];
304 }
305
306 if ( empty( $wgContentHandlers[$modelId] ) ) {
307 $handler = null;
308
309 Hooks::run( 'ContentHandlerForModelID', [ $modelId, &$handler ] );
310
311 if ( $handler === null ) {
312 throw new MWUnknownContentModelException( $modelId );
313 }
314
315 if ( !( $handler instanceof ContentHandler ) ) {
316 throw new MWException( "ContentHandlerForModelID must supply a ContentHandler instance" );
317 }
318 } else {
319 $classOrCallback = $wgContentHandlers[$modelId];
320
321 if ( is_callable( $classOrCallback ) ) {
322 $handler = call_user_func( $classOrCallback, $modelId );
323 } else {
324 $handler = new $classOrCallback( $modelId );
325 }
326
327 if ( !( $handler instanceof ContentHandler ) ) {
328 throw new MWException( "$classOrCallback from \$wgContentHandlers is not " .
329 "compatible with ContentHandler" );
330 }
331 }
332
333 wfDebugLog( 'ContentHandler', 'Created handler for ' . $modelId
334 . ': ' . get_class( $handler ) );
335
336 ContentHandler::$handlers[$modelId] = $handler;
337
338 return ContentHandler::$handlers[$modelId];
339 }
340
341 /**
342 * Returns the localized name for a given content model.
343 *
344 * Model names are localized using system messages. Message keys
345 * have the form content-model-$name, where $name is getContentModelName( $id ).
346 *
347 * @param string $name The content model ID, as given by a CONTENT_MODEL_XXX
348 * constant or returned by Revision::getContentModel().
349 * @param Language|null $lang The language to parse the message in (since 1.26)
350 *
351 * @throws MWException If the model ID isn't known.
352 * @return string The content model's localized name.
353 */
354 public static function getLocalizedName( $name, Language $lang = null ) {
355 // Messages: content-model-wikitext, content-model-text,
356 // content-model-javascript, content-model-css
357 $key = "content-model-$name";
358
359 $msg = wfMessage( $key );
360 if ( $lang ) {
361 $msg->inLanguage( $lang );
362 }
363
364 return $msg->exists() ? $msg->plain() : $name;
365 }
366
367 public static function getContentModels() {
368 global $wgContentHandlers;
369
370 return array_keys( $wgContentHandlers );
371 }
372
373 public static function getAllContentFormats() {
374 global $wgContentHandlers;
375
376 $formats = [];
377
378 foreach ( $wgContentHandlers as $model => $class ) {
379 $handler = ContentHandler::getForModelID( $model );
380 $formats = array_merge( $formats, $handler->getSupportedFormats() );
381 }
382
383 $formats = array_unique( $formats );
384
385 return $formats;
386 }
387
388 // ------------------------------------------------------------------------
389
390 /**
391 * @var string
392 */
393 protected $mModelID;
394
395 /**
396 * @var string[]
397 */
398 protected $mSupportedFormats;
399
400 /**
401 * Constructor, initializing the ContentHandler instance with its model ID
402 * and a list of supported formats. Values for the parameters are typically
403 * provided as literals by subclass's constructors.
404 *
405 * @param string $modelId (use CONTENT_MODEL_XXX constants).
406 * @param string[] $formats List for supported serialization formats
407 * (typically as MIME types)
408 */
409 public function __construct( $modelId, $formats ) {
410 $this->mModelID = $modelId;
411 $this->mSupportedFormats = $formats;
412 }
413
414 /**
415 * Serializes a Content object of the type supported by this ContentHandler.
416 *
417 * @since 1.21
418 *
419 * @param Content $content The Content object to serialize
420 * @param string $format The desired serialization format
421 *
422 * @return string Serialized form of the content
423 */
424 abstract public function serializeContent( Content $content, $format = null );
425
426 /**
427 * Applies transformations on export (returns the blob unchanged per default).
428 * Subclasses may override this to perform transformations such as conversion
429 * of legacy formats or filtering of internal meta-data.
430 *
431 * @param string $blob The blob to be exported
432 * @param string|null $format The blob's serialization format
433 *
434 * @return string
435 */
436 public function exportTransform( $blob, $format = null ) {
437 return $blob;
438 }
439
440 /**
441 * Unserializes a Content object of the type supported by this ContentHandler.
442 *
443 * @since 1.21
444 *
445 * @param string $blob Serialized form of the content
446 * @param string $format The format used for serialization
447 *
448 * @return Content The Content object created by deserializing $blob
449 */
450 abstract public function unserializeContent( $blob, $format = null );
451
452 /**
453 * Apply import transformation (per default, returns $blob unchanged).
454 * This gives subclasses an opportunity to transform data blobs on import.
455 *
456 * @since 1.24
457 *
458 * @param string $blob
459 * @param string|null $format
460 *
461 * @return string
462 */
463 public function importTransform( $blob, $format = null ) {
464 return $blob;
465 }
466
467 /**
468 * Creates an empty Content object of the type supported by this
469 * ContentHandler.
470 *
471 * @since 1.21
472 *
473 * @return Content
474 */
475 abstract public function makeEmptyContent();
476
477 /**
478 * Creates a new Content object that acts as a redirect to the given page,
479 * or null if redirects are not supported by this content model.
480 *
481 * This default implementation always returns null. Subclasses supporting redirects
482 * must override this method.
483 *
484 * Note that subclasses that override this method to return a Content object
485 * should also override supportsRedirects() to return true.
486 *
487 * @since 1.21
488 *
489 * @param Title $destination The page to redirect to.
490 * @param string $text Text to include in the redirect, if possible.
491 *
492 * @return Content Always null.
493 */
494 public function makeRedirectContent( Title $destination, $text = '' ) {
495 return null;
496 }
497
498 /**
499 * Returns the model id that identifies the content model this
500 * ContentHandler can handle. Use with the CONTENT_MODEL_XXX constants.
501 *
502 * @since 1.21
503 *
504 * @return string The model ID
505 */
506 public function getModelID() {
507 return $this->mModelID;
508 }
509
510 /**
511 * @since 1.21
512 *
513 * @param string $model_id The model to check
514 *
515 * @throws MWException If the model ID is not the ID of the content model supported by this
516 * ContentHandler.
517 */
518 protected function checkModelID( $model_id ) {
519 if ( $model_id !== $this->mModelID ) {
520 throw new MWException( "Bad content model: " .
521 "expected {$this->mModelID} " .
522 "but got $model_id." );
523 }
524 }
525
526 /**
527 * Returns a list of serialization formats supported by the
528 * serializeContent() and unserializeContent() methods of this
529 * ContentHandler.
530 *
531 * @since 1.21
532 *
533 * @return string[] List of serialization formats as MIME type like strings
534 */
535 public function getSupportedFormats() {
536 return $this->mSupportedFormats;
537 }
538
539 /**
540 * The format used for serialization/deserialization by default by this
541 * ContentHandler.
542 *
543 * This default implementation will return the first element of the array
544 * of formats that was passed to the constructor.
545 *
546 * @since 1.21
547 *
548 * @return string The name of the default serialization format as a MIME type
549 */
550 public function getDefaultFormat() {
551 return $this->mSupportedFormats[0];
552 }
553
554 /**
555 * Returns true if $format is a serialization format supported by this
556 * ContentHandler, and false otherwise.
557 *
558 * Note that if $format is null, this method always returns true, because
559 * null means "use the default format".
560 *
561 * @since 1.21
562 *
563 * @param string $format The serialization format to check
564 *
565 * @return bool
566 */
567 public function isSupportedFormat( $format ) {
568 if ( !$format ) {
569 return true; // this means "use the default"
570 }
571
572 return in_array( $format, $this->mSupportedFormats );
573 }
574
575 /**
576 * Convenient for checking whether a format provided as a parameter is actually supported.
577 *
578 * @param string $format The serialization format to check
579 *
580 * @throws MWException If the format is not supported by this content handler.
581 */
582 protected function checkFormat( $format ) {
583 if ( !$this->isSupportedFormat( $format ) ) {
584 throw new MWException(
585 "Format $format is not supported for content model "
586 . $this->getModelID()
587 );
588 }
589 }
590
591 /**
592 * Returns overrides for action handlers.
593 * Classes listed here will be used instead of the default one when
594 * (and only when) $wgActions[$action] === true. This allows subclasses
595 * to override the default action handlers.
596 *
597 * @since 1.21
598 *
599 * @return array An array mapping action names (typically "view", "edit", "history" etc.) to
600 * either the full qualified class name of an Action class, a callable taking ( Page $page,
601 * IContextSource $context = null ) as parameters and returning an Action object, or an actual
602 * Action object. An empty array in this default implementation.
603 *
604 * @see Action::factory
605 */
606 public function getActionOverrides() {
607 return [];
608 }
609
610 /**
611 * Factory for creating an appropriate DifferenceEngine for this content model.
612 *
613 * @since 1.21
614 *
615 * @param IContextSource $context Context to use, anything else will be ignored.
616 * @param int $old Revision ID we want to show and diff with.
617 * @param int|string $new Either a revision ID or one of the strings 'cur', 'prev' or 'next'.
618 * @param int $rcid FIXME: Deprecated, no longer used. Defaults to 0.
619 * @param bool $refreshCache If set, refreshes the diff cache. Defaults to false.
620 * @param bool $unhide If set, allow viewing deleted revs. Defaults to false.
621 *
622 * @return DifferenceEngine
623 */
624 public function createDifferenceEngine( IContextSource $context, $old = 0, $new = 0,
625 $rcid = 0, // FIXME: Deprecated, no longer used
626 $refreshCache = false, $unhide = false ) {
627
628 // hook: get difference engine
629 $differenceEngine = null;
630 if ( !Hooks::run( 'GetDifferenceEngine',
631 [ $context, $old, $new, $refreshCache, $unhide, &$differenceEngine ]
632 ) ) {
633 return $differenceEngine;
634 }
635 $diffEngineClass = $this->getDiffEngineClass();
636 return new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide );
637 }
638
639 /**
640 * Get the language in which the content of the given page is written.
641 *
642 * This default implementation just returns $wgContLang (except for pages
643 * in the MediaWiki namespace)
644 *
645 * Note that the pages language is not cacheable, since it may in some
646 * cases depend on user settings.
647 *
648 * Also note that the page language may or may not depend on the actual content of the page,
649 * that is, this method may load the content in order to determine the language.
650 *
651 * @since 1.21
652 *
653 * @param Title $title The page to determine the language for.
654 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
655 *
656 * @return Language The page's language
657 */
658 public function getPageLanguage( Title $title, Content $content = null ) {
659 global $wgContLang, $wgLang;
660 $pageLang = $wgContLang;
661
662 if ( $title->getNamespace() == NS_MEDIAWIKI ) {
663 // Parse mediawiki messages with correct target language
664 list( /* $unused */, $lang ) = MessageCache::singleton()->figureMessage( $title->getText() );
665 $pageLang = Language::factory( $lang );
666 }
667
668 Hooks::run( 'PageContentLanguage', [ $title, &$pageLang, $wgLang ] );
669
670 return wfGetLangObj( $pageLang );
671 }
672
673 /**
674 * Get the language in which the content of this page is written when
675 * viewed by user. Defaults to $this->getPageLanguage(), but if the user
676 * specified a preferred variant, the variant will be used.
677 *
678 * This default implementation just returns $this->getPageLanguage( $title, $content ) unless
679 * the user specified a preferred variant.
680 *
681 * Note that the pages view language is not cacheable, since it depends on user settings.
682 *
683 * Also note that the page language may or may not depend on the actual content of the page,
684 * that is, this method may load the content in order to determine the language.
685 *
686 * @since 1.21
687 *
688 * @param Title $title The page to determine the language for.
689 * @param Content $content The page's content, if you have it handy, to avoid reloading it.
690 *
691 * @return Language The page's language for viewing
692 */
693 public function getPageViewLanguage( Title $title, Content $content = null ) {
694 $pageLang = $this->getPageLanguage( $title, $content );
695
696 if ( $title->getNamespace() !== NS_MEDIAWIKI ) {
697 // If the user chooses a variant, the content is actually
698 // in a language whose code is the variant code.
699 $variant = $pageLang->getPreferredVariant();
700 if ( $pageLang->getCode() !== $variant ) {
701 $pageLang = Language::factory( $variant );
702 }
703 }
704
705 return $pageLang;
706 }
707
708 /**
709 * Determines whether the content type handled by this ContentHandler
710 * can be used on the given page.
711 *
712 * This default implementation always returns true.
713 * Subclasses may override this to restrict the use of this content model to specific locations,
714 * typically based on the namespace or some other aspect of the title, such as a special suffix
715 * (e.g. ".svg" for SVG content).
716 *
717 * @note this calls the ContentHandlerCanBeUsedOn hook which may be used to override which
718 * content model can be used where.
719 *
720 * @param Title $title The page's title.
721 *
722 * @return bool True if content of this kind can be used on the given page, false otherwise.
723 */
724 public function canBeUsedOn( Title $title ) {
725 $ok = true;
726
727 Hooks::run( 'ContentModelCanBeUsedOn', [ $this->getModelID(), $title, &$ok ] );
728
729 return $ok;
730 }
731
732 /**
733 * Returns the name of the diff engine to use.
734 *
735 * @since 1.21
736 *
737 * @return string
738 */
739 protected function getDiffEngineClass() {
740 return DifferenceEngine::class;
741 }
742
743 /**
744 * Attempts to merge differences between three versions. Returns a new
745 * Content object for a clean merge and false for failure or a conflict.
746 *
747 * This default implementation always returns false.
748 *
749 * @since 1.21
750 *
751 * @param Content $oldContent The page's previous content.
752 * @param Content $myContent One of the page's conflicting contents.
753 * @param Content $yourContent One of the page's conflicting contents.
754 *
755 * @return Content|bool Always false.
756 */
757 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
758 return false;
759 }
760
761 /**
762 * Return an applicable auto-summary if one exists for the given edit.
763 *
764 * @since 1.21
765 *
766 * @param Content $oldContent The previous text of the page.
767 * @param Content $newContent The submitted text of the page.
768 * @param int $flags Bit mask: a bit mask of flags submitted for the edit.
769 *
770 * @return string An appropriate auto-summary, or an empty string.
771 */
772 public function getAutosummary( Content $oldContent = null, Content $newContent = null,
773 $flags ) {
774 // Decide what kind of auto-summary is needed.
775
776 // Redirect auto-summaries
777
778 /**
779 * @var $ot Title
780 * @var $rt Title
781 */
782
783 $ot = !is_null( $oldContent ) ? $oldContent->getRedirectTarget() : null;
784 $rt = !is_null( $newContent ) ? $newContent->getRedirectTarget() : null;
785
786 if ( is_object( $rt ) ) {
787 if ( !is_object( $ot )
788 || !$rt->equals( $ot )
789 || $ot->getFragment() != $rt->getFragment()
790 ) {
791 $truncatedtext = $newContent->getTextForSummary(
792 250
793 - strlen( wfMessage( 'autoredircomment' )->inContentLanguage()->text() )
794 - strlen( $rt->getFullText() ) );
795
796 return wfMessage( 'autoredircomment', $rt->getFullText() )
797 ->rawParams( $truncatedtext )->inContentLanguage()->text();
798 }
799 }
800
801 // New page auto-summaries
802 if ( $flags & EDIT_NEW && $newContent->getSize() > 0 ) {
803 // If they're making a new article, give its text, truncated, in
804 // the summary.
805
806 $truncatedtext = $newContent->getTextForSummary(
807 200 - strlen( wfMessage( 'autosumm-new' )->inContentLanguage()->text() ) );
808
809 return wfMessage( 'autosumm-new' )->rawParams( $truncatedtext )
810 ->inContentLanguage()->text();
811 }
812
813 // Blanking auto-summaries
814 if ( !empty( $oldContent ) && $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) {
815 return wfMessage( 'autosumm-blank' )->inContentLanguage()->text();
816 } elseif ( !empty( $oldContent )
817 && $oldContent->getSize() > 10 * $newContent->getSize()
818 && $newContent->getSize() < 500
819 ) {
820 // Removing more than 90% of the article
821
822 $truncatedtext = $newContent->getTextForSummary(
823 200 - strlen( wfMessage( 'autosumm-replace' )->inContentLanguage()->text() ) );
824
825 return wfMessage( 'autosumm-replace' )->rawParams( $truncatedtext )
826 ->inContentLanguage()->text();
827 }
828
829 // New blank article auto-summary
830 if ( $flags & EDIT_NEW && $newContent->isEmpty() ) {
831 return wfMessage( 'autosumm-newblank' )->inContentLanguage()->text();
832 }
833
834 // If we reach this point, there's no applicable auto-summary for our
835 // case, so our auto-summary is empty.
836 return '';
837 }
838
839 /**
840 * Auto-generates a deletion reason
841 *
842 * @since 1.21
843 *
844 * @param Title $title The page's title
845 * @param bool &$hasHistory Whether the page has a history
846 *
847 * @return mixed String containing deletion reason or empty string, or
848 * boolean false if no revision occurred
849 *
850 * @todo &$hasHistory is extremely ugly, it's here because
851 * WikiPage::getAutoDeleteReason() and Article::generateReason()
852 * have it / want it.
853 */
854 public function getAutoDeleteReason( Title $title, &$hasHistory ) {
855 $dbr = wfGetDB( DB_REPLICA );
856
857 // Get the last revision
858 $rev = Revision::newFromTitle( $title );
859
860 if ( is_null( $rev ) ) {
861 return false;
862 }
863
864 // Get the article's contents
865 $content = $rev->getContent();
866 $blank = false;
867
868 // If the page is blank, use the text from the previous revision,
869 // which can only be blank if there's a move/import/protect dummy
870 // revision involved
871 if ( !$content || $content->isEmpty() ) {
872 $prev = $rev->getPrevious();
873
874 if ( $prev ) {
875 $rev = $prev;
876 $content = $rev->getContent();
877 $blank = true;
878 }
879 }
880
881 $this->checkModelID( $rev->getContentModel() );
882
883 // Find out if there was only one contributor
884 // Only scan the last 20 revisions
885 $res = $dbr->select( 'revision', 'rev_user_text',
886 [
887 'rev_page' => $title->getArticleID(),
888 $dbr->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0'
889 ],
890 __METHOD__,
891 [ 'LIMIT' => 20 ]
892 );
893
894 if ( $res === false ) {
895 // This page has no revisions, which is very weird
896 return false;
897 }
898
899 $hasHistory = ( $res->numRows() > 1 );
900 $row = $dbr->fetchObject( $res );
901
902 if ( $row ) { // $row is false if the only contributor is hidden
903 $onlyAuthor = $row->rev_user_text;
904 // Try to find a second contributor
905 foreach ( $res as $row ) {
906 if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999
907 $onlyAuthor = false;
908 break;
909 }
910 }
911 } else {
912 $onlyAuthor = false;
913 }
914
915 // Generate the summary with a '$1' placeholder
916 if ( $blank ) {
917 // The current revision is blank and the one before is also
918 // blank. It's just not our lucky day
919 $reason = wfMessage( 'exbeforeblank', '$1' )->inContentLanguage()->text();
920 } else {
921 if ( $onlyAuthor ) {
922 $reason = wfMessage(
923 'excontentauthor',
924 '$1',
925 $onlyAuthor
926 )->inContentLanguage()->text();
927 } else {
928 $reason = wfMessage( 'excontent', '$1' )->inContentLanguage()->text();
929 }
930 }
931
932 if ( $reason == '-' ) {
933 // Allow these UI messages to be blanked out cleanly
934 return '';
935 }
936
937 // Max content length = max comment length - length of the comment (excl. $1)
938 $text = $content ? $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) ) : '';
939
940 // Now replace the '$1' placeholder
941 $reason = str_replace( '$1', $text, $reason );
942
943 return $reason;
944 }
945
946 /**
947 * Get the Content object that needs to be saved in order to undo all revisions
948 * between $undo and $undoafter. Revisions must belong to the same page,
949 * must exist and must not be deleted.
950 *
951 * @since 1.21
952 *
953 * @param Revision $current The current text
954 * @param Revision $undo The revision to undo
955 * @param Revision $undoafter Must be an earlier revision than $undo
956 *
957 * @return mixed String on success, false on failure
958 */
959 public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter ) {
960 $cur_content = $current->getContent();
961
962 if ( empty( $cur_content ) ) {
963 return false; // no page
964 }
965
966 $undo_content = $undo->getContent();
967 $undoafter_content = $undoafter->getContent();
968
969 if ( !$undo_content || !$undoafter_content ) {
970 return false; // no content to undo
971 }
972
973 try {
974 $this->checkModelID( $cur_content->getModel() );
975 $this->checkModelID( $undo_content->getModel() );
976 if ( $current->getId() !== $undo->getId() ) {
977 // If we are undoing the most recent revision,
978 // its ok to revert content model changes. However
979 // if we are undoing a revision in the middle, then
980 // doing that will be confusing.
981 $this->checkModelID( $undoafter_content->getModel() );
982 }
983 } catch ( MWException $e ) {
984 // If the revisions have different content models
985 // just return false
986 return false;
987 }
988
989 if ( $cur_content->equals( $undo_content ) ) {
990 // No use doing a merge if it's just a straight revert.
991 return $undoafter_content;
992 }
993
994 $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
995
996 return $undone_content;
997 }
998
999 /**
1000 * Get parser options suitable for rendering and caching the article
1001 *
1002 * @param IContextSource|User|string $context One of the following:
1003 * - IContextSource: Use the User and the Language of the provided
1004 * context
1005 * - User: Use the provided User object and $wgLang for the language,
1006 * so use an IContextSource object if possible.
1007 * - 'canonical': Canonical options (anonymous user with default
1008 * preferences and content language).
1009 *
1010 * @throws MWException
1011 * @return ParserOptions
1012 */
1013 public function makeParserOptions( $context ) {
1014 global $wgContLang, $wgEnableParserLimitReporting;
1015
1016 if ( $context instanceof IContextSource ) {
1017 $options = ParserOptions::newFromContext( $context );
1018 } elseif ( $context instanceof User ) { // settings per user (even anons)
1019 $options = ParserOptions::newFromUser( $context );
1020 } elseif ( $context === 'canonical' ) { // canonical settings
1021 $options = ParserOptions::newFromUserAndLang( new User, $wgContLang );
1022 } else {
1023 throw new MWException( "Bad context for parser options: $context" );
1024 }
1025
1026 $options->enableLimitReport( $wgEnableParserLimitReporting ); // show inclusion/loop reports
1027 $options->setTidy( true ); // fix bad HTML
1028
1029 return $options;
1030 }
1031
1032 /**
1033 * Returns true for content models that support caching using the
1034 * ParserCache mechanism. See WikiPage::shouldCheckParserCache().
1035 *
1036 * @since 1.21
1037 *
1038 * @return bool Always false.
1039 */
1040 public function isParserCacheSupported() {
1041 return false;
1042 }
1043
1044 /**
1045 * Returns true if this content model supports sections.
1046 * This default implementation returns false.
1047 *
1048 * Content models that return true here should also implement
1049 * Content::getSection, Content::replaceSection, etc. to handle sections..
1050 *
1051 * @return bool Always false.
1052 */
1053 public function supportsSections() {
1054 return false;
1055 }
1056
1057 /**
1058 * Returns true if this content model supports categories.
1059 * The default implementation returns true.
1060 *
1061 * @return bool Always true.
1062 */
1063 public function supportsCategories() {
1064 return true;
1065 }
1066
1067 /**
1068 * Returns true if this content model supports redirects.
1069 * This default implementation returns false.
1070 *
1071 * Content models that return true here should also implement
1072 * ContentHandler::makeRedirectContent to return a Content object.
1073 *
1074 * @return bool Always false.
1075 */
1076 public function supportsRedirects() {
1077 return false;
1078 }
1079
1080 /**
1081 * Return true if this content model supports direct editing, such as via EditPage.
1082 *
1083 * @return bool Default is false, and true for TextContent and it's derivatives.
1084 */
1085 public function supportsDirectEditing() {
1086 return false;
1087 }
1088
1089 /**
1090 * Whether or not this content model supports direct editing via ApiEditPage
1091 *
1092 * @return bool Default is false, and true for TextContent and derivatives.
1093 */
1094 public function supportsDirectApiEditing() {
1095 return $this->supportsDirectEditing();
1096 }
1097
1098 /**
1099 * Call a legacy hook that uses text instead of Content objects.
1100 * Will log a warning when a matching hook function is registered.
1101 * If the textual representation of the content is changed by the
1102 * hook function, a new Content object is constructed from the new
1103 * text.
1104 *
1105 * @param string $event Event name
1106 * @param array $args Parameters passed to hook functions
1107 * @param string|null $deprecatedVersion Emit a deprecation notice
1108 * when the hook is run for the provided version
1109 *
1110 * @return bool True if no handler aborted the hook
1111 */
1112 public static function runLegacyHooks( $event, $args = [],
1113 $deprecatedVersion = null
1114 ) {
1115
1116 if ( !Hooks::isRegistered( $event ) ) {
1117 return true; // nothing to do here
1118 }
1119
1120 // convert Content objects to text
1121 $contentObjects = [];
1122 $contentTexts = [];
1123
1124 foreach ( $args as $k => $v ) {
1125 if ( $v instanceof Content ) {
1126 /* @var Content $v */
1127
1128 $contentObjects[$k] = $v;
1129
1130 $v = $v->serialize();
1131 $contentTexts[$k] = $v;
1132 $args[$k] = $v;
1133 }
1134 }
1135
1136 // call the hook functions
1137 $ok = Hooks::run( $event, $args, $deprecatedVersion );
1138
1139 // see if the hook changed the text
1140 foreach ( $contentTexts as $k => $orig ) {
1141 /* @var Content $content */
1142
1143 $modified = $args[$k];
1144 $content = $contentObjects[$k];
1145
1146 if ( $modified !== $orig ) {
1147 // text was changed, create updated Content object
1148 $content = $content->getContentHandler()->unserializeContent( $modified );
1149 }
1150
1151 $args[$k] = $content;
1152 }
1153
1154 return $ok;
1155 }
1156
1157 /**
1158 * Get fields definition for search index
1159 *
1160 * @todo Expose title, redirect, namespace, text, source_text, text_bytes
1161 * field mappings here. (see T142670 and T143409)
1162 *
1163 * @param SearchEngine $engine
1164 * @return SearchIndexField[] List of fields this content handler can provide.
1165 * @since 1.28
1166 */
1167 public function getFieldsForSearchIndex( SearchEngine $engine ) {
1168 $fields['category'] = $engine->makeSearchFieldMapping(
1169 'category',
1170 SearchIndexField::INDEX_TYPE_TEXT
1171 );
1172
1173 $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1174
1175 $fields['external_link'] = $engine->makeSearchFieldMapping(
1176 'external_link',
1177 SearchIndexField::INDEX_TYPE_KEYWORD
1178 );
1179
1180 $fields['outgoing_link'] = $engine->makeSearchFieldMapping(
1181 'outgoing_link',
1182 SearchIndexField::INDEX_TYPE_KEYWORD
1183 );
1184
1185 $fields['template'] = $engine->makeSearchFieldMapping(
1186 'template',
1187 SearchIndexField::INDEX_TYPE_KEYWORD
1188 );
1189
1190 $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
1191
1192 return $fields;
1193 }
1194
1195 /**
1196 * Add new field definition to array.
1197 * @param SearchIndexField[] $fields
1198 * @param SearchEngine $engine
1199 * @param string $name
1200 * @param int $type
1201 * @return SearchIndexField[] new field defs
1202 * @since 1.28
1203 */
1204 protected function addSearchField( &$fields, SearchEngine $engine, $name, $type ) {
1205 $fields[$name] = $engine->makeSearchFieldMapping( $name, $type );
1206 return $fields;
1207 }
1208
1209 /**
1210 * Return fields to be indexed by search engine
1211 * as representation of this document.
1212 * Overriding class should call parent function or take care of calling
1213 * the SearchDataForIndex hook.
1214 * @param WikiPage $page Page to index
1215 * @param ParserOutput $output
1216 * @param SearchEngine $engine Search engine for which we are indexing
1217 * @return array Map of name=>value for fields
1218 * @since 1.28
1219 */
1220 public function getDataForSearchIndex( WikiPage $page, ParserOutput $output,
1221 SearchEngine $engine ) {
1222 $fieldData = [];
1223 $content = $page->getContent();
1224
1225 if ( $content ) {
1226 $searchDataExtractor = new ParserOutputSearchDataExtractor();
1227
1228 $fieldData['category'] = $searchDataExtractor->getCategories( $output );
1229 $fieldData['external_link'] = $searchDataExtractor->getExternalLinks( $output );
1230 $fieldData['outgoing_link'] = $searchDataExtractor->getOutgoingLinks( $output );
1231 $fieldData['template'] = $searchDataExtractor->getTemplates( $output );
1232
1233 $text = $content->getTextForSearchIndex();
1234
1235 $fieldData['text'] = $text;
1236 $fieldData['source_text'] = $text;
1237 $fieldData['text_bytes'] = $content->getSize();
1238 }
1239
1240 Hooks::run( 'SearchDataForIndex', [ &$fieldData, $this, $page, $output, $engine ] );
1241 return $fieldData;
1242 }
1243
1244 /**
1245 * Produce page output suitable for indexing.
1246 *
1247 * Specific content handlers may override it if they need different content handling.
1248 *
1249 * @param WikiPage $page
1250 * @param ParserCache $cache
1251 * @return ParserOutput
1252 */
1253 public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) {
1254 $parserOptions = $page->makeParserOptions( 'canonical' );
1255 $revId = $page->getRevision()->getId();
1256 if ( $cache ) {
1257 $parserOutput = $cache->get( $page, $parserOptions );
1258 }
1259 if ( empty( $parserOutput ) ) {
1260 $parserOutput =
1261 $page->getContent()->getParserOutput( $page->getTitle(), $revId, $parserOptions );
1262 if ( $cache ) {
1263 $cache->save( $parserOutput, $page, $parserOptions );
1264 }
1265 }
1266 return $parserOutput;
1267 }
1268
1269 }