Merge branch 'Wikidata' of ssh://gerrit.wikimedia.org:29418/mediawiki/core into Wikidata
[lhc/web/wiklou.git] / includes / ContentHandler.php
1 <?php
2
3 class MWContentSerializationException extends MWException {
4
5 }
6
7
8 /**
9 * A content handler knows how do deal with a specific type of content on a wiki page.
10 * Content is stored in the database in a serialized form (using a serialization format aka mime type)
11 * and is be unserialized into it's native PHP represenation (the content model), which is wrappe in
12 * an instance of the appropriate subclass of Content.
13 *
14 * Some content types have a flat model, that is, their native represenation is the
15 * same as their serialized form. Examples would be JavaScript and CSS code. As of now,
16 * this also applies to wikitext (mediawiki's default content type), but wikitext
17 * content may be represented by a DOM or AST structure in the future.
18 *
19 * TODO: add documentation
20 */
21 abstract class ContentHandler {
22
23
24 /**
25 * Conveniance function for getting flat text from a Content object. This shleould only
26 * be used in the context of backwards compatibility with code that is not yet able
27 * to handle Content objects!
28 *
29 * If $content is equal to null or false, this method returns the empty string.
30 *
31 * If $content is an instance of TextContent, this method returns the flat text as returned by $content->getnativeData().
32 *
33 * If $content is not a TextContent object, the bahaviour of this method depends on the global $wgContentHandlerTextFallback:
34 * If $wgContentHandlerTextFallback is 'fail' and $content is not a TextContent object, an MWException is thrown.
35 * If $wgContentHandlerTextFallback is 'serialize' and $content is not a TextContent object, $content->serialize()
36 * is called to get a string form of the content.
37 * Otherwise, this method returns null.
38 *
39 * @static
40 * @param Content|null $content
41 * @return null|string the textual form of $content, if available
42 * @throws MWException if $content is not an instance of TextContent and $wgContentHandlerTextFallback was set to 'fail'.
43 */
44
45 /**
46 * @abstract
47 * @param Content $content
48 * @param null $format
49 * @return String
50 */
51 public abstract function serialize( Content $content, $format = null );
52
53 /**
54 * TODO: calling unserialize on a ContentHandler returns a Content?!! Something looks wrong here...
55 *
56 * @abstract
57 * @param $blob String
58 * @param null $format
59 * @return Content
60 */
61 public abstract function unserialize( $blob, $format = null );
62
63 /**
64 * FIXME: bad method name: suggests it empties the content of an instance rather then creating a new empty one
65 */
66 public abstract function emptyContent();
67
68
69 public static function getContentText( Content $content = null ) {
70 global $wgContentHandlerTextFallback;
71
72 if ( is_null( $content ) ) {
73 return '';
74 }
75
76 if ( $content instanceof TextContent ) {
77 return $content->getNativeData();
78 }
79
80 if ( $wgContentHandlerTextFallback == 'fail' ) {
81 throw new MWException( "Attempt to get text from Content with model " . $content->getModelName() );
82 }
83
84 if ( $wgContentHandlerTextFallback == 'serialize' ) {
85 return $content->serialize();
86 }
87
88 return null;
89 }
90
91 /**
92 * Conveniance function for creating a Content object from a given textual representation.
93 *
94 * $text will be deserialized into a Content object of the model specified by $modelName (or,
95 * if that is not given, $title->getContentModelName()) using the given format.
96 *
97 * @static
98 * @param $text the textual represenation, will be unserialized to create the Content object
99 * @param Title $title the title of the page this text belongs to, required as a context for deserialization
100 * @param null|String $modelName the model to deserialize to. If not provided, $title->getContentModelName() is used.
101 * @param null|String $format the format to use for deserialization. If not given, the model's default format is used.
102 *
103 * @return Content a Content object representing $text
104 */
105 public static function makeContent( $text, Title $title, $modelName = null, $format = null ) {
106
107 if ( is_null( $modelName ) ) {
108 $modelName = $title->getContentModelName();
109 }
110
111 $handler = ContentHandler::getForModelName( $modelName );
112 return $handler->unserialize( $text, $format );
113 }
114
115 /**
116 * Returns the name of the default content model to be used for the page with the given title.
117 *
118 * Note: There should rarely be need to call this method directly.
119 * To determine the actual content model for a given page, use Title::getContentModelName().
120 *
121 * Which model is to be used per default for the page is determined based on several factors:
122 * * The global setting $wgNamespaceContentModels specifies a content model per namespace.
123 * * The hook DefaultModelFor may be used to override the page's default model.
124 * * Pages in NS_MEDIAWIKI and NS_USER default to the CSS or JavaScript model if they end in .js or .css, respectively.
125 * * Pages in NS_MEDIAWIKI default to the wikitext model otherwise.
126 * * The hook TitleIsCssOrJsPage may be used to force a page to use the CSS or JavaScript model if they end in .js or .css, respectively.
127 * * The hook TitleIsWikitextPage may be used to force a page to use the wikitext model.
128 *
129 * If none of the above applies, the wikitext model is used.
130 *
131 * Note: this is used by, and may thus not use, Title::getContentModelName()
132 *
133 * @static
134 * @param Title $title
135 * @return null|string default model name for the page given by $title
136 */
137 public static function getDefaultModelFor( Title $title ) {
138 global $wgNamespaceContentModels;
139
140 // NOTE: this method must not rely on $title->getContentModelName() directly or indirectly,
141 // because it is used to initialized the mContentModelName memebr.
142
143 $ns = $title->getNamespace();
144
145 $ext = false;
146 $m = null;
147 $model = null;
148
149 if ( !empty( $wgNamespaceContentModels[ $ns ] ) ) {
150 $model = $wgNamespaceContentModels[ $ns ];
151 }
152
153 // hook can determin default model
154 if ( !wfRunHooks( 'DefaultModelFor', array( $title, &$model ) ) ) { #FIXME: document new hook!
155 if ( !is_null( $model ) ) {
156 return $model;
157 }
158 }
159
160 // Could this page contain custom CSS or JavaScript, based on the title?
161 $isCssOrJsPage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js)$!u', $title->getText(), $m );
162 if ( $isCssOrJsPage ) {
163 $ext = $m[1];
164 }
165
166 // hook can force js/css
167 wfRunHooks( 'TitleIsCssOrJsPage', array( $title, &$isCssOrJsPage ) );
168
169 // Is this a .css subpage of a user page?
170 $isJsCssSubpage = NS_USER == $ns && !$isCssOrJsPage && preg_match( "/\\/.*\\.(js|css)$/", $title->getText(), $m );
171 if ( $isJsCssSubpage ) {
172 $ext = $m[1];
173 }
174
175 // is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
176 $isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT;
177 $isWikitext = $isWikitext && !$isCssOrJsPage && !$isJsCssSubpage;
178
179 // hook can override $isWikitext
180 wfRunHooks( 'TitleIsWikitextPage', array( $title, &$isWikitext ) );
181
182 if ( !$isWikitext ) {
183 switch ( $ext ) {
184 case 'js':
185 return CONTENT_MODEL_JAVASCRIPT;
186 case 'css':
187 return CONTENT_MODEL_CSS;
188 default:
189 return is_null( $model ) ? CONTENT_MODEL_TEXT : $model;
190 }
191 }
192
193 // we established that is must be wikitext
194
195 return CONTENT_MODEL_WIKITEXT;
196 }
197
198 /**
199 * returns the appropriate ContentHandler singleton for the given title
200 *
201 * @static
202 * @param Title $title
203 * @return ContentHandler
204 */
205 public static function getForTitle( Title $title ) {
206 $modelName = $title->getContentModelName();
207 return ContentHandler::getForModelName( $modelName );
208 }
209
210 /**
211 * returns the appropriate ContentHandler singleton for the given Content object
212 *
213 * @static
214 * @param Title $title
215 * @return ContentHandler
216 */
217 public static function getForContent( Content $content ) {
218 $modelName = $content->getModelName();
219 return ContentHandler::getForModelName( $modelName );
220 }
221
222 /**
223 * returns the ContentHandler singleton for the given model name. Use the CONTENT_MODEL_XXX constants to
224 * identify the desired content model.
225 *
226 * ContentHandler singletons are take from the global $wgContentHandlers array. Keys in that array are
227 * model names, the values are either ContentHandler singleton objects, or strings specifying the appropriate
228 * subclass of ContentHandler.
229 *
230 * If a class name in encountered when looking up the singleton for a given model name, the class is
231 * instantiated and the class name is replaced by te resulting singleton in $wgContentHandlers.
232 *
233 * If no ContentHandler is defined for the desired $modelName, the ContentHandler may be provided by the
234 * a ContentHandlerForModelName hook. if no Contenthandler can be determined, an MWException is raised.
235 *
236 * @static
237 * @param $modelName String the name of the content model for which to get a handler. Use CONTENT_MODEL_XXX constants.
238 * @return ContentHandler the ContentHandler singleton for handling the model given by $modelName
239 * @throws MWException if no handler is known for $modelName.
240 */
241 public static function getForModelName( $modelName ) {
242 global $wgContentHandlers;
243
244 if ( empty( $wgContentHandlers[$modelName] ) ) {
245 $handler = null;
246
247 // TODO: document new hook
248 wfRunHooks( 'ContentHandlerForModelName', array( $modelName, &$handler ) );
249
250 if ( $handler ) { // NOTE: may be a string or an object, either is fine!
251 $wgContentHandlers[$modelName] = $handler;
252 } else {
253 throw new MWException( "No handler for model $modelName registered in \$wgContentHandlers" );
254 }
255 }
256
257 if ( is_string( $wgContentHandlers[$modelName] ) ) {
258 $class = $wgContentHandlers[$modelName];
259 $wgContentHandlers[$modelName] = new $class( $modelName );
260 }
261
262 return $wgContentHandlers[$modelName];
263 }
264
265 // ----------------------------------------------------------------------------------------------------------
266
267 /**
268 * Constructor, initializing the ContentHandler instance with it's model name and a list of supported formats.
269 * Values for the parameters are typically provided as literals by subclasses' constructors.
270 *
271 * @param String $modelName (use CONTENT_MODEL_XXX constants).
272 * @param array $formats list for supported serialization formats (typically as MIME types)
273 */
274 public function __construct( $modelName, $formats ) {
275 $this->mModelName = $modelName;
276 $this->mSupportedFormats = $formats;
277 }
278
279 /**
280 *
281 * @return String the model name
282 */
283 public function getModelName() {
284 // for wikitext: wikitext; in the future: wikiast, wikidom?
285 // for wikidata: wikidata
286 return $this->mModelName;
287 }
288
289 protected function checkModelName( $modelName ) {
290 if ( $modelName !== $this->mModelName ) {
291 throw new MWException( "Bad content model: expected " . $this->mModelName . " but got found " . $modelName );
292 }
293 }
294
295 public function getSupportedFormats() {
296 // for wikitext: "text/x-mediawiki-1", "text/x-mediawiki-2", etc
297 // for wikidata: "application/json", "application/x-php", etc
298 return $this->mSupportedFormats;
299 }
300
301 public function getDefaultFormat() {
302 return $this->mSupportedFormats[0];
303 }
304
305 public function isSupportedFormat( $format ) {
306
307 if ( !$format ) {
308 return true; // this means "use the default"
309 }
310
311 return in_array( $format, $this->mSupportedFormats );
312 }
313
314 protected function checkFormat( $format ) {
315 if ( !$this->isSupportedFormat( $format ) ) {
316 throw new MWException( "Format $format is not supported for content model " . $this->getModelName() );
317 }
318 }
319
320 /**
321 * Return an Article object suitable for viewing the given object
322 *
323 * NOTE: does *not* do special handling for Image and Category pages!
324 * Use Article::newFromTitle() for that!
325 *
326 * @param Title $title
327 * @return Article
328 * @todo Article is being refactored into an action class, keep track of that
329 */
330 public function createArticle( Title $title ) {
331 $this->checkModelName( $title->getContentModelName() );
332
333 $article = new Article($title);
334 return $article;
335 }
336
337 /**
338 * Return an EditPage object suitable for editing the given object
339 *
340 * @param Article $article
341 * @return EditPage
342 */
343 public function createEditPage( Article $article ) {
344 $this->checkModelName( $article->getContentModelName() );
345
346 $editPage = new EditPage( $article );
347 return $editPage;
348 }
349
350 /**
351 * Return an ExternalEdit object suitable for editing the given object
352 *
353 * @param IContextSource $context
354 * @return ExternalEdit
355 */
356 public function createExternalEdit( IContextSource $context ) {
357 $this->checkModelName( $context->getTitle()->getModelName() );
358
359 $externalEdit = new ExternalEdit( $context );
360 return $externalEdit;
361 }
362
363 /**
364 * Factory
365 * @param $context IContextSource context to use, anything else will be ignored
366 * @param $old Integer old ID we want to show and diff with.
367 * @param $new String either 'prev' or 'next'.
368 * @param $rcid Integer ??? FIXME (default 0)
369 * @param $refreshCache boolean If set, refreshes the diff cache
370 * @param $unhide boolean If set, allow viewing deleted revs
371 *
372 * @return DifferenceEngine
373 */
374 public function getDifferenceEngine( IContextSource $context, $old = 0, $new = 0, $rcid = 0, #FIMXE: use everywhere!
375 $refreshCache = false, $unhide = false ) {
376
377 $this->checkModelName( $context->getTitle()->getModelName() );
378
379 $diffEngineClass = $this->getDiffEngineClass();
380
381 return new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide );
382 }
383
384 /**
385 * Returns the name of the diff engine to use.
386 *
387 * @since 0.1
388 *
389 * @return string
390 */
391 protected function getDiffEngineClass() {
392 return 'DifferenceEngine';
393 }
394
395 /**
396 * attempts to merge differences between three versions.
397 * Returns a new Content object for a clean merge and false for failure or a conflict.
398 *
399 * This default implementation always returns false.
400 *
401 * @param $oldContent String
402 * @param $myContent String
403 * @param $yourContent String
404 * @return Content|Bool
405 */
406 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
407 return false;
408 }
409
410 /**
411 * Return an applicable autosummary if one exists for the given edit.
412 *
413 * @param $oldContent Content|null: the previous text of the page.
414 * @param $newContent Content|null: The submitted text of the page.
415 * @param $flags Int bitmask: a bitmask of flags submitted for the edit.
416 *
417 * @return string An appropriate autosummary, or an empty string.
418 */
419 public function getAutosummary( Content $oldContent = null, Content $newContent = null, $flags ) {
420 global $wgContLang;
421
422 // Decide what kind of autosummary is needed.
423
424 // Redirect autosummaries
425
426 $ot = !empty( $ot ) ? $oldContent->getRedirectTarget() : false;
427 $rt = !empty( $rt ) ? $newContent->getRedirectTarget() : false;
428
429 if ( is_object( $rt ) && ( !is_object( $ot ) || !$rt->equals( $ot ) || $ot->getFragment() != $rt->getFragment() ) ) {
430
431 $truncatedtext = $newContent->getTextForSummary(
432 250
433 - strlen( wfMsgForContent( 'autoredircomment' ) )
434 - strlen( $rt->getFullText() ) );
435
436 return wfMsgForContent( 'autoredircomment', $rt->getFullText(), $truncatedtext );
437 }
438
439 // New page autosummaries
440 if ( $flags & EDIT_NEW && $newContent->getSize() > 0 ) {
441 // If they're making a new article, give its text, truncated, in the summary.
442
443 $truncatedtext = $newContent->getTextForSummary(
444 200 - strlen( wfMsgForContent( 'autosumm-new' ) ) );
445
446 return wfMsgForContent( 'autosumm-new', $truncatedtext );
447 }
448
449 // Blanking autosummaries
450 if ( $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) {
451 return wfMsgForContent( 'autosumm-blank' );
452 } elseif ( $oldContent->getSize() > 10 * $newContent->getSize() && $newContent->getSize() < 500 ) {
453 // Removing more than 90% of the article
454
455 $truncatedtext = $newContent->getTextForSummary(
456 200 - strlen( wfMsgForContent( 'autosumm-replace' ) ) );
457
458 return wfMsgForContent( 'autosumm-replace', $truncatedtext );
459 }
460
461 // If we reach this point, there's no applicable autosummary for our case, so our
462 // autosummary is empty.
463
464 return '';
465 }
466
467 /**
468 * Auto-generates a deletion reason
469 *
470 * @param $title Title: the page's title
471 * @param &$hasHistory Boolean: whether the page has a history
472 * @return mixed String containing deletion reason or empty string, or boolean false
473 * if no revision occurred
474 */
475 public function getAutoDeleteReason( Title $title, &$hasHistory ) {
476 $dbw = wfGetDB( DB_MASTER );
477
478 // Get the last revision
479 $rev = Revision::newFromTitle( $title );
480
481 if ( is_null( $rev ) ) {
482 return false;
483 }
484
485 // Get the article's contents
486 $content = $rev->getContent();
487 $blank = false;
488
489 // If the page is blank, use the text from the previous revision,
490 // which can only be blank if there's a move/import/protect dummy revision involved
491 if ( $content->getSize() == 0 ) {
492 $prev = $rev->getPrevious();
493
494 if ( $prev ) {
495 $content = $rev->getContent();
496 $blank = true;
497 }
498 }
499
500 // Find out if there was only one contributor
501 // Only scan the last 20 revisions
502 $res = $dbw->select( 'revision', 'rev_user_text',
503 array( 'rev_page' => $title->getArticleID(), $dbw->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0' ),
504 __METHOD__,
505 array( 'LIMIT' => 20 )
506 );
507
508 if ( $res === false ) {
509 // This page has no revisions, which is very weird
510 return false;
511 }
512
513 $hasHistory = ( $res->numRows() > 1 );
514 $row = $dbw->fetchObject( $res );
515
516 if ( $row ) { // $row is false if the only contributor is hidden
517 $onlyAuthor = $row->rev_user_text;
518 // Try to find a second contributor
519 foreach ( $res as $row ) {
520 if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999
521 $onlyAuthor = false;
522 break;
523 }
524 }
525 } else {
526 $onlyAuthor = false;
527 }
528
529 // Generate the summary with a '$1' placeholder
530 if ( $blank ) {
531 // The current revision is blank and the one before is also
532 // blank. It's just not our lucky day
533 $reason = wfMsgForContent( 'exbeforeblank', '$1' );
534 } else {
535 if ( $onlyAuthor ) {
536 $reason = wfMsgForContent( 'excontentauthor', '$1', $onlyAuthor );
537 } else {
538 $reason = wfMsgForContent( 'excontent', '$1' );
539 }
540 }
541
542 if ( $reason == '-' ) {
543 // Allow these UI messages to be blanked out cleanly
544 return '';
545 }
546
547 // Max content length = max comment length - length of the comment (excl. $1)
548 $text = $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) );
549
550 // Now replace the '$1' placeholder
551 $reason = str_replace( '$1', $text, $reason );
552
553 return $reason;
554 }
555
556 /**
557 * Get the Content object that needs to be saved in order to undo all revisions
558 * between $undo and $undoafter. Revisions must belong to the same page,
559 * must exist and must not be deleted
560 * @param $undo Revision
561 * @param $undoafter null|Revision Must be an earlier revision than $undo
562 * @return mixed string on success, false on failure
563 */
564 public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter = null ) {
565 $cur_content = $current->getContent();
566
567 if ( empty( $cur_content ) ) {
568 return false; // no page
569 }
570
571 $undo_content = $undo->getContent();
572 $undoafter_content = $undoafter->getContent();
573
574 if ( $cur_content->equals( $undo_content ) ) {
575 // No use doing a merge if it's just a straight revert.
576 return $undoafter_content;
577 }
578
579 $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
580
581 return $undone_content;
582 }
583 }
584
585
586 abstract class TextContentHandler extends ContentHandler {
587
588 public function __construct( $modelName, $formats ) {
589 parent::__construct( $modelName, $formats );
590 }
591
592 public function serialize( Content $content, $format = null ) {
593 $this->checkFormat( $format );
594 return $content->getNativeData();
595 }
596
597 /**
598 * attempts to merge differences between three versions.
599 * Returns a new Content object for a clean merge and false for failure or a conflict.
600 *
601 * This text-based implementation uses wfMerge().
602 *
603 * @param $oldContent String
604 * @param $myContent String
605 * @param $yourContent String
606 * @return Content|Bool
607 */
608 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
609 $this->checkModelName( $oldContent->getModelName() );
610 #TODO: check that all Content objects have the same content model! #XXX: what to do if they don't?
611
612 $format = $this->getDefaultFormat();
613
614 $old = $this->serialize( $oldContent, $format );
615 $mine = $this->serialize( $myContent, $format );
616 $yours = $this->serialize( $yourContent, $format );
617
618 $ok = wfMerge( $old, $mine, $yours, $result );
619
620 if ( !$ok ) {
621 return false;
622 }
623
624 if ( !$result ) {
625 return $this->emptyContent();
626 }
627
628 $mergedContent = $this->unserialize( $result, $format );
629 return $mergedContent;
630 }
631
632
633 }
634 class WikitextContentHandler extends TextContentHandler {
635
636 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
637 parent::__construct( $modelName, array( 'application/x-wikitext' ) ); #FIXME: mime
638 }
639
640 public function unserialize( $text, $format = null ) {
641 $this->checkFormat( $format );
642
643 return new WikitextContent( $text );
644 }
645
646 public function emptyContent() {
647 return new WikitextContent( '' );
648 }
649
650
651 }
652
653 #TODO: make ScriptContentHandler base class with plugin interface for syntax highlighting!
654
655 class JavaScriptContentHandler extends TextContentHandler {
656
657 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
658 parent::__construct( $modelName, array( 'text/javascript' ) ); #XXX: or use $wgJsMimeType? this is for internal storage, not HTTP...
659 }
660
661 public function unserialize( $text, $format = null ) {
662 return new JavaScriptContent( $text );
663 }
664
665 public function emptyContent() {
666 return new JavaScriptContent( '' );
667 }
668 }
669
670 class CssContentHandler extends TextContentHandler {
671
672 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
673 parent::__construct( $modelName, array( 'text/css' ) );
674 }
675
676 public function unserialize( $text, $format = null ) {
677 return new CssContent( $text );
678 }
679
680 public function emptyContent() {
681 return new CssContent( '' );
682 }
683
684 }