Merge branch 'Wikidata' of ssh://review/mediawiki/core into Wikidata
[lhc/web/wiklou.git] / includes / ContentHandler.php
1 <?php
2
3 class MWContentSerializationException extends MWException {
4
5 }
6
7
8 /**
9 * A content handler knows how do deal with a specific type of content on a wiki page.
10 * Content is stored in the database in a serialized form (using a serialization format aka mime type)
11 * and is be unserialized into it's native PHP represenation (the content model).
12 *
13 * Some content types have a flat model, that is, their native represenation is the
14 * same as their serialized form. Examples would be JavaScript and CSS code. As of now,
15 * this also applies to wikitext (mediawiki's default content type), but wikitext
16 * content may be represented by a DOM or AST structure in the future.
17 *
18 */
19 abstract class ContentHandler {
20
21 public static function getContentText( Content $content = null ) {
22 global $wgContentHandlerTextFallback;
23
24 if ( is_null( $content ) ) {
25 return '';
26 }
27
28 if ( $content instanceof TextContent ) {
29 return $content->getNativeData();
30 }
31
32 if ( $wgContentHandlerTextFallback == 'fail' ) {
33 throw new MWException( "Attempt to get text from Content with model " . $content->getModelName() );
34 }
35
36 if ( $wgContentHandlerTextFallback == 'serialize' ) {
37 return $content->serialize();
38 }
39
40 return null;
41 }
42
43 public static function makeContent( $text, Title $title, $modelName = null, $format = null ) {
44
45 if ( is_null( $modelName ) ) {
46 $modelName = $title->getContentModelName();
47 }
48
49 $handler = ContentHandler::getForModelName( $modelName );
50 return $handler->unserialize( $text, $format );
51 }
52
53 public static function getDefaultModelFor( Title $title ) {
54 global $wgNamespaceContentModels;
55
56 // NOTE: this method must not rely on $title->getContentModelName() directly or indirectly,
57 // because it is used to initialized the mContentModelName memebr.
58
59 $ns = $title->getNamespace();
60
61 $ext = false;
62 $m = null;
63 $model = null;
64
65 if ( !empty( $wgNamespaceContentModels[ $ns ] ) ) {
66 $model = $wgNamespaceContentModels[ $ns ];
67 }
68
69 // hook can determin default model
70 if ( !wfRunHooks( 'DefaultModelFor', array( $title, &$model ) ) ) { #FIXME: document new hook!
71 if ( !is_null( $model ) ) {
72 return $model;
73 }
74 }
75
76 // Could this page contain custom CSS or JavaScript, based on the title?
77 $isCssOrJsPage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js)$!u', $title->getText(), $m );
78 if ( $isCssOrJsPage ) {
79 $ext = $m[1];
80 }
81
82 // hook can force js/css
83 wfRunHooks( 'TitleIsCssOrJsPage', array( $title, &$isCssOrJsPage ) );
84
85 // Is this a .css subpage of a user page?
86 $isJsCssSubpage = NS_USER == $ns && !$isCssOrJsPage && preg_match( "/\\/.*\\.(js|css)$/", $title->getText(), $m );
87 if ( $isJsCssSubpage ) {
88 $ext = $m[1];
89 }
90
91 // is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
92 $isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT;
93 $isWikitext = $isWikitext && !$isCssOrJsPage && !$isJsCssSubpage;
94
95 // hook can override $isWikitext
96 wfRunHooks( 'TitleIsWikitextPage', array( $title, &$isWikitext ) );
97
98 if ( !$isWikitext ) {
99 switch ( $ext ) {
100 case 'js':
101 return CONTENT_MODEL_JAVASCRIPT;
102 case 'css':
103 return CONTENT_MODEL_CSS;
104 default:
105 return is_null( $model ) ? CONTENT_MODEL_TEXT : $model;
106 }
107 }
108
109 // we established that is must be wikitext
110
111 return CONTENT_MODEL_WIKITEXT;
112 }
113
114 public static function getForTitle( Title $title ) {
115 $modelName = $title->getContentModelName();
116 return ContentHandler::getForModelName( $modelName );
117 }
118
119 public static function getForContent( Content $content ) {
120 $modelName = $content->getModelName();
121 return ContentHandler::getForModelName( $modelName );
122 }
123
124 /**
125 * @static
126 * @param $modelName String the name of the content model for which to get a handler. Use CONTENT_MODEL_XXX constants.
127 * @return ContentHandler
128 * @throws MWException
129 */
130 public static function getForModelName( $modelName ) {
131 global $wgContentHandlers;
132
133 if ( empty( $wgContentHandlers[$modelName] ) ) {
134 $handler = null;
135
136 // TODO: document new hook
137 wfRunHooks( 'ContentHandlerForModelName', array( $modelName, &$handler ) );
138
139 if ( $handler ) { // NOTE: may be a string or an object, either is fine!
140 $wgContentHandlers[$modelName] = $handler;
141 } else {
142 throw new MWException( "No handler for model $modelName registered in \$wgContentHandlers" );
143 }
144 }
145
146 if ( is_string( $wgContentHandlers[$modelName] ) ) {
147 $class = $wgContentHandlers[$modelName];
148 $wgContentHandlers[$modelName] = new $class( $modelName );
149 }
150
151 return $wgContentHandlers[$modelName];
152 }
153
154 // ----------------------------------------------------------------------------------------------------------
155 public function __construct( $modelName, $formats ) {
156 $this->mModelName = $modelName;
157 $this->mSupportedFormats = $formats;
158 }
159
160 public function getModelName() {
161 // for wikitext: wikitext; in the future: wikiast, wikidom?
162 // for wikidata: wikidata
163 return $this->mModelName;
164 }
165
166 protected function checkModelName( $modelName ) {
167 if ( $modelName !== $this->mModelName ) {
168 throw new MWException( "Bad content model: expected " . $this->mModelName . " but got found " . $modelName );
169 }
170 }
171
172 public function getSupportedFormats() {
173 // for wikitext: "text/x-mediawiki-1", "text/x-mediawiki-2", etc
174 // for wikidata: "application/json", "application/x-php", etc
175 return $this->mSupportedFormats;
176 }
177
178 public function getDefaultFormat() {
179 return $this->mSupportedFormats[0];
180 }
181
182 public function isSupportedFormat( $format ) {
183
184 if ( !$format ) {
185 return true; // this means "use the default"
186 }
187
188 return in_array( $format, $this->mSupportedFormats );
189 }
190
191 protected function checkFormat( $format ) {
192 if ( !$this->isSupportedFormat( $format ) ) {
193 throw new MWException( "Format $format is not supported for content model " . $this->getModelName() );
194 }
195 }
196
197 /**
198 * @abstract
199 * @param Content $content
200 * @param null $format
201 * @return String
202 */
203 public abstract function serialize( Content $content, $format = null );
204
205 /**
206 * @abstract
207 * @param $blob String
208 * @param null $format
209 * @return Content
210 */
211 public abstract function unserialize( $blob, $format = null );
212
213 public abstract function emptyContent();
214
215 /**
216 * Return an Article object suitable for viewing the given object
217 *
218 * NOTE: does *not* do special handling for Image and Category pages!
219 * Use Article::newFromTitle() for that!
220 *
221 * @param Title $title
222 * @return Article
223 * @todo Article is being refactored into an action class, keep track of that
224 */
225 public function createArticle( Title $title ) {
226 $this->checkModelName( $title->getContentModelName() );
227
228 $article = new Article($title);
229 return $article;
230 }
231
232 /**
233 * Return an EditPage object suitable for editing the given object
234 *
235 * @param Article $article
236 * @return EditPage
237 */
238 public function createEditPage( Article $article ) {
239 $this->checkModelName( $article->getContentModelName() );
240
241 $editPage = new EditPage( $article );
242 return $editPage;
243 }
244
245 /**
246 * Return an ExternalEdit object suitable for editing the given object
247 *
248 * @param IContextSource $context
249 * @return ExternalEdit
250 */
251 public function createExternalEdit( IContextSource $context ) {
252 $this->checkModelName( $context->getTitle()->getModelName() );
253
254 $externalEdit = new ExternalEdit( $context );
255 return $externalEdit;
256 }
257
258 /**
259 * Factory
260 * @param $context IContextSource context to use, anything else will be ignored
261 * @param $old Integer old ID we want to show and diff with.
262 * @param $new String either 'prev' or 'next'.
263 * @param $rcid Integer ??? FIXME (default 0)
264 * @param $refreshCache boolean If set, refreshes the diff cache
265 * @param $unhide boolean If set, allow viewing deleted revs
266 *
267 * @return DifferenceEngine
268 */
269 public function getDifferenceEngine( IContextSource $context, $old = 0, $new = 0, $rcid = 0, #FIMXE: use everywhere!
270 $refreshCache = false, $unhide = false ) {
271
272 $this->checkModelName( $context->getTitle()->getModelName() );
273
274 return new DifferenceEngine( $context, $old, $new, $rcid, $refreshCache, $unhide );
275 }
276
277 /**
278 * attempts to merge differences between three versions.
279 * Returns a new Content object for a clean merge and false for failure or a conflict.
280 *
281 * This default implementation always returns false.
282 *
283 * @param $oldContent String
284 * @param $myContent String
285 * @param $yourContent String
286 * @return Content|Bool
287 */
288 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
289 return false;
290 }
291
292 /**
293 * Return an applicable autosummary if one exists for the given edit.
294 *
295 * @param $oldContent Content|null: the previous text of the page.
296 * @param $newContent Content|null: The submitted text of the page.
297 * @param $flags Int bitmask: a bitmask of flags submitted for the edit.
298 *
299 * @return string An appropriate autosummary, or an empty string.
300 */
301 public function getAutosummary( Content $oldContent = null, Content $newContent = null, $flags ) {
302 global $wgContLang;
303
304 // Decide what kind of autosummary is needed.
305
306 // Redirect autosummaries
307
308 $ot = !empty( $ot ) ? $oldContent->getRedirectTarget() : false;
309 $rt = !empty( $rt ) ? $newContent->getRedirectTarget() : false;
310
311 if ( is_object( $rt ) && ( !is_object( $ot ) || !$rt->equals( $ot ) || $ot->getFragment() != $rt->getFragment() ) ) {
312
313 $truncatedtext = $newContent->getTextForSummary(
314 250
315 - strlen( wfMsgForContent( 'autoredircomment' ) )
316 - strlen( $rt->getFullText() ) );
317
318 return wfMsgForContent( 'autoredircomment', $rt->getFullText(), $truncatedtext );
319 }
320
321 // New page autosummaries
322 if ( $flags & EDIT_NEW && $newContent->getSize() > 0 ) {
323 // If they're making a new article, give its text, truncated, in the summary.
324
325 $truncatedtext = $newContent->getTextForSummary(
326 200 - strlen( wfMsgForContent( 'autosumm-new' ) ) );
327
328 return wfMsgForContent( 'autosumm-new', $truncatedtext );
329 }
330
331 // Blanking autosummaries
332 if ( $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) {
333 return wfMsgForContent( 'autosumm-blank' );
334 } elseif ( $oldContent->getSize() > 10 * $newContent->getSize() && $newContent->getSize() < 500 ) {
335 // Removing more than 90% of the article
336
337 $truncatedtext = $newContent->getTextForSummary(
338 200 - strlen( wfMsgForContent( 'autosumm-replace' ) ) );
339
340 return wfMsgForContent( 'autosumm-replace', $truncatedtext );
341 }
342
343 // If we reach this point, there's no applicable autosummary for our case, so our
344 // autosummary is empty.
345
346 return '';
347 }
348
349 /**
350 * Auto-generates a deletion reason
351 *
352 * @param $title Title: the page's title
353 * @param &$hasHistory Boolean: whether the page has a history
354 * @return mixed String containing deletion reason or empty string, or boolean false
355 * if no revision occurred
356 */
357 public function getAutoDeleteReason( Title $title, &$hasHistory ) {
358 $dbw = wfGetDB( DB_MASTER );
359
360 // Get the last revision
361 $rev = Revision::newFromTitle( $title );
362
363 if ( is_null( $rev ) ) {
364 return false;
365 }
366
367 // Get the article's contents
368 $content = $rev->getContent();
369 $blank = false;
370
371 // If the page is blank, use the text from the previous revision,
372 // which can only be blank if there's a move/import/protect dummy revision involved
373 if ( $content->getSize() == 0 ) {
374 $prev = $rev->getPrevious();
375
376 if ( $prev ) {
377 $content = $rev->getContent();
378 $blank = true;
379 }
380 }
381
382 // Find out if there was only one contributor
383 // Only scan the last 20 revisions
384 $res = $dbw->select( 'revision', 'rev_user_text',
385 array( 'rev_page' => $title->getArticleID(), $dbw->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0' ),
386 __METHOD__,
387 array( 'LIMIT' => 20 )
388 );
389
390 if ( $res === false ) {
391 // This page has no revisions, which is very weird
392 return false;
393 }
394
395 $hasHistory = ( $res->numRows() > 1 );
396 $row = $dbw->fetchObject( $res );
397
398 if ( $row ) { // $row is false if the only contributor is hidden
399 $onlyAuthor = $row->rev_user_text;
400 // Try to find a second contributor
401 foreach ( $res as $row ) {
402 if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999
403 $onlyAuthor = false;
404 break;
405 }
406 }
407 } else {
408 $onlyAuthor = false;
409 }
410
411 // Generate the summary with a '$1' placeholder
412 if ( $blank ) {
413 // The current revision is blank and the one before is also
414 // blank. It's just not our lucky day
415 $reason = wfMsgForContent( 'exbeforeblank', '$1' );
416 } else {
417 if ( $onlyAuthor ) {
418 $reason = wfMsgForContent( 'excontentauthor', '$1', $onlyAuthor );
419 } else {
420 $reason = wfMsgForContent( 'excontent', '$1' );
421 }
422 }
423
424 if ( $reason == '-' ) {
425 // Allow these UI messages to be blanked out cleanly
426 return '';
427 }
428
429 // Max content length = max comment length - length of the comment (excl. $1)
430 $text = $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) );
431
432 // Now replace the '$1' placeholder
433 $reason = str_replace( '$1', $text, $reason );
434
435 return $reason;
436 }
437
438 /**
439 * Get the Content object that needs to be saved in order to undo all revisions
440 * between $undo and $undoafter. Revisions must belong to the same page,
441 * must exist and must not be deleted
442 * @param $undo Revision
443 * @param $undoafter null|Revision Must be an earlier revision than $undo
444 * @return mixed string on success, false on failure
445 */
446 public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter = null ) {
447 $cur_content = $current->getContent();
448
449 if ( empty( $cur_content ) ) {
450 return false; // no page
451 }
452
453 $undo_content = $undo->getContent();
454 $undoafter_content = $undoafter->getContent();
455
456 if ( $cur_content->equals( $undo_content ) ) {
457 // No use doing a merge if it's just a straight revert.
458 return $undoafter_content;
459 }
460
461 $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
462
463 return $undone_content;
464 }
465 }
466
467
468 abstract class TextContentHandler extends ContentHandler {
469
470 public function __construct( $modelName, $formats ) {
471 parent::__construct( $modelName, $formats );
472 }
473
474 public function serialize( Content $content, $format = null ) {
475 $this->checkFormat( $format );
476 return $content->getNativeData();
477 }
478
479 /**
480 * attempts to merge differences between three versions.
481 * Returns a new Content object for a clean merge and false for failure or a conflict.
482 *
483 * This text-based implementation uses wfMerge().
484 *
485 * @param $oldContent String
486 * @param $myContent String
487 * @param $yourContent String
488 * @return Content|Bool
489 */
490 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
491 $this->checkModelName( $oldContent->getModelName() );
492 #TODO: check that all Content objects have the same content model! #XXX: what to do if they don't?
493
494 $format = $this->getDefaultFormat();
495
496 $old = $this->serialize( $oldContent, $format );
497 $mine = $this->serialize( $myContent, $format );
498 $yours = $this->serialize( $yourContent, $format );
499
500 $ok = wfMerge( $old, $mine, $yours, $result );
501
502 if ( !$ok ) {
503 return false;
504 }
505
506 if ( !$result ) {
507 return $this->emptyContent();
508 }
509
510 $mergedContent = $this->unserialize( $result, $format );
511 return $mergedContent;
512 }
513
514
515 }
516 class WikitextContentHandler extends TextContentHandler {
517
518 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
519 parent::__construct( $modelName, array( 'application/x-wikitext' ) ); #FIXME: mime
520 }
521
522 public function unserialize( $text, $format = null ) {
523 $this->checkFormat( $format );
524
525 return new WikitextContent( $text );
526 }
527
528 public function emptyContent() {
529 return new WikitextContent( '' );
530 }
531
532
533 }
534
535 #TODO: make ScriptContentHandler base class with plugin interface for syntax highlighting!
536
537 class JavaScriptContentHandler extends TextContentHandler {
538
539 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
540 parent::__construct( $modelName, array( 'text/javascript' ) ); #XXX: or use $wgJsMimeType? this is for internal storage, not HTTP...
541 }
542
543 public function unserialize( $text, $format = null ) {
544 return new JavaScriptContent( $text );
545 }
546
547 public function emptyContent() {
548 return new JavaScriptContent( '' );
549 }
550 }
551
552 class CssContentHandler extends TextContentHandler {
553
554 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
555 parent::__construct( $modelName, array( 'text/css' ) );
556 }
557
558 public function unserialize( $text, $format = null ) {
559 return new CssContent( $text );
560 }
561
562 public function emptyContent() {
563 return new CssContent( '' );
564 }
565
566 }