minor cleanup
[lhc/web/wiklou.git] / includes / ContentHandler.php
1 <?php
2
3 class MWContentSerializationException extends MWException {
4
5 }
6
7
8 /**
9 * A content handler knows how do deal with a specific type of content on a wiki page.
10 * Content is stored in the database in a serialized form (using a serialization format aka mime type)
11 * and is be unserialized into it's native PHP represenation (the content model).
12 *
13 * Some content types have a flat model, that is, their native represenation is the
14 * same as their serialized form. Examples would be JavaScript and CSS code. As of now,
15 * this also applies to wikitext (mediawiki's default content type), but wikitext
16 * content may be represented by a DOM or AST structure in the future.
17 *
18 * TODO: add documentation
19 */
20 abstract class ContentHandler {
21
22 /**
23 * @abstract
24 * @param Content $content
25 * @param null $format
26 * @return String
27 */
28 public abstract function serialize( Content $content, $format = null );
29
30 /**
31 * TODO: calling unserialize on a ContentHandler returns a Content?!! Something looks wrong here...
32 *
33 * @abstract
34 * @param $blob String
35 * @param null $format
36 * @return Content
37 */
38 public abstract function unserialize( $blob, $format = null );
39
40 /**
41 * FIXME: bad method name: suggests it empties the content of an instance rather then creating a new empty one
42 */
43 public abstract function emptyContent();
44
45 public static function getContentText( Content $content = null ) {
46 global $wgContentHandlerTextFallback;
47
48 if ( is_null( $content ) ) {
49 return '';
50 }
51
52 if ( $content instanceof TextContent ) {
53 return $content->getNativeData();
54 }
55
56 if ( $wgContentHandlerTextFallback == 'fail' ) {
57 throw new MWException( "Attempt to get text from Content with model " . $content->getModelName() );
58 }
59
60 if ( $wgContentHandlerTextFallback == 'serialize' ) {
61 return $content->serialize();
62 }
63
64 return null;
65 }
66
67 public static function makeContent( $text, Title $title, $modelName = null, $format = null ) {
68
69 if ( is_null( $modelName ) ) {
70 $modelName = $title->getContentModelName();
71 }
72
73 $handler = ContentHandler::getForModelName( $modelName );
74 return $handler->unserialize( $text, $format );
75 }
76
77 public static function getDefaultModelFor( Title $title ) {
78 global $wgNamespaceContentModels;
79
80 // NOTE: this method must not rely on $title->getContentModelName() directly or indirectly,
81 // because it is used to initialized the mContentModelName memebr.
82
83 $ns = $title->getNamespace();
84
85 $ext = false;
86 $m = null;
87 $model = null;
88
89 if ( !empty( $wgNamespaceContentModels[ $ns ] ) ) {
90 $model = $wgNamespaceContentModels[ $ns ];
91 }
92
93 // hook can determin default model
94 if ( !wfRunHooks( 'DefaultModelFor', array( $title, &$model ) ) ) { #FIXME: document new hook!
95 if ( !is_null( $model ) ) {
96 return $model;
97 }
98 }
99
100 // Could this page contain custom CSS or JavaScript, based on the title?
101 $isCssOrJsPage = NS_MEDIAWIKI == $ns && preg_match( '!\.(css|js)$!u', $title->getText(), $m );
102 if ( $isCssOrJsPage ) {
103 $ext = $m[1];
104 }
105
106 // hook can force js/css
107 wfRunHooks( 'TitleIsCssOrJsPage', array( $title, &$isCssOrJsPage ) );
108
109 // Is this a .css subpage of a user page?
110 $isJsCssSubpage = NS_USER == $ns && !$isCssOrJsPage && preg_match( "/\\/.*\\.(js|css)$/", $title->getText(), $m );
111 if ( $isJsCssSubpage ) {
112 $ext = $m[1];
113 }
114
115 // is this wikitext, according to $wgNamespaceContentModels or the DefaultModelFor hook?
116 $isWikitext = is_null( $model ) || $model == CONTENT_MODEL_WIKITEXT;
117 $isWikitext = $isWikitext && !$isCssOrJsPage && !$isJsCssSubpage;
118
119 // hook can override $isWikitext
120 wfRunHooks( 'TitleIsWikitextPage', array( $title, &$isWikitext ) );
121
122 if ( !$isWikitext ) {
123 switch ( $ext ) {
124 case 'js':
125 return CONTENT_MODEL_JAVASCRIPT;
126 case 'css':
127 return CONTENT_MODEL_CSS;
128 default:
129 return is_null( $model ) ? CONTENT_MODEL_TEXT : $model;
130 }
131 }
132
133 // we established that is must be wikitext
134
135 return CONTENT_MODEL_WIKITEXT;
136 }
137
138 public static function getForTitle( Title $title ) {
139 $modelName = $title->getContentModelName();
140 return ContentHandler::getForModelName( $modelName );
141 }
142
143 public static function getForContent( Content $content ) {
144 $modelName = $content->getModelName();
145 return ContentHandler::getForModelName( $modelName );
146 }
147
148 /**
149 * @static
150 * @param $modelName String the name of the content model for which to get a handler. Use CONTENT_MODEL_XXX constants.
151 * @return ContentHandler
152 * @throws MWException
153 */
154 public static function getForModelName( $modelName ) {
155 global $wgContentHandlers;
156
157 if ( empty( $wgContentHandlers[$modelName] ) ) {
158 $handler = null;
159
160 // TODO: document new hook
161 wfRunHooks( 'ContentHandlerForModelName', array( $modelName, &$handler ) );
162
163 if ( $handler ) { // NOTE: may be a string or an object, either is fine!
164 $wgContentHandlers[$modelName] = $handler;
165 } else {
166 throw new MWException( "No handler for model $modelName registered in \$wgContentHandlers" );
167 }
168 }
169
170 if ( is_string( $wgContentHandlers[$modelName] ) ) {
171 $class = $wgContentHandlers[$modelName];
172 $wgContentHandlers[$modelName] = new $class( $modelName );
173 }
174
175 return $wgContentHandlers[$modelName];
176 }
177
178 // ----------------------------------------------------------------------------------------------------------
179 public function __construct( $modelName, $formats ) {
180 $this->mModelName = $modelName;
181 $this->mSupportedFormats = $formats;
182 }
183
184 public function getModelName() {
185 // for wikitext: wikitext; in the future: wikiast, wikidom?
186 // for wikidata: wikidata
187 return $this->mModelName;
188 }
189
190 protected function checkModelName( $modelName ) {
191 if ( $modelName !== $this->mModelName ) {
192 throw new MWException( "Bad content model: expected " . $this->mModelName . " but got found " . $modelName );
193 }
194 }
195
196 public function getSupportedFormats() {
197 // for wikitext: "text/x-mediawiki-1", "text/x-mediawiki-2", etc
198 // for wikidata: "application/json", "application/x-php", etc
199 return $this->mSupportedFormats;
200 }
201
202 public function getDefaultFormat() {
203 return $this->mSupportedFormats[0];
204 }
205
206 public function isSupportedFormat( $format ) {
207
208 if ( !$format ) {
209 return true; // this means "use the default"
210 }
211
212 return in_array( $format, $this->mSupportedFormats );
213 }
214
215 protected function checkFormat( $format ) {
216 if ( !$this->isSupportedFormat( $format ) ) {
217 throw new MWException( "Format $format is not supported for content model " . $this->getModelName() );
218 }
219 }
220
221 /**
222 * Return an Article object suitable for viewing the given object
223 *
224 * NOTE: does *not* do special handling for Image and Category pages!
225 * Use Article::newFromTitle() for that!
226 *
227 * @param Title $title
228 * @return Article
229 * @todo Article is being refactored into an action class, keep track of that
230 */
231 public function createArticle( Title $title ) {
232 $this->checkModelName( $title->getContentModelName() );
233
234 $article = new Article($title);
235 return $article;
236 }
237
238 /**
239 * Return an EditPage object suitable for editing the given object
240 *
241 * @param Article $article
242 * @return EditPage
243 */
244 public function createEditPage( Article $article ) {
245 $this->checkModelName( $article->getContentModelName() );
246
247 $editPage = new EditPage( $article );
248 return $editPage;
249 }
250
251 /**
252 * Return an ExternalEdit object suitable for editing the given object
253 *
254 * @param IContextSource $context
255 * @return ExternalEdit
256 */
257 public function createExternalEdit( IContextSource $context ) {
258 $this->checkModelName( $context->getTitle()->getModelName() );
259
260 $externalEdit = new ExternalEdit( $context );
261 return $externalEdit;
262 }
263
264 /**
265 * Factory
266 * @param $context IContextSource context to use, anything else will be ignored
267 * @param $old Integer old ID we want to show and diff with.
268 * @param $new String either 'prev' or 'next'.
269 * @param $rcid Integer ??? FIXME (default 0)
270 * @param $refreshCache boolean If set, refreshes the diff cache
271 * @param $unhide boolean If set, allow viewing deleted revs
272 *
273 * @return DifferenceEngine
274 */
275 public function getDifferenceEngine( IContextSource $context, $old = 0, $new = 0, $rcid = 0, #FIMXE: use everywhere!
276 $refreshCache = false, $unhide = false ) {
277
278 $this->checkModelName( $context->getTitle()->getModelName() );
279
280 $diffEngineClass = $this->getDiffEngineClass();
281
282 return new $diffEngineClass( $context, $old, $new, $rcid, $refreshCache, $unhide );
283 }
284
285 /**
286 * Returns the name of the diff engine to use.
287 *
288 * @since 0.1
289 *
290 * @return string
291 */
292 protected function getDiffEngineClass() {
293 return 'DifferenceEngine';
294 }
295
296 /**
297 * attempts to merge differences between three versions.
298 * Returns a new Content object for a clean merge and false for failure or a conflict.
299 *
300 * This default implementation always returns false.
301 *
302 * @param $oldContent String
303 * @param $myContent String
304 * @param $yourContent String
305 * @return Content|Bool
306 */
307 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
308 return false;
309 }
310
311 /**
312 * Return an applicable autosummary if one exists for the given edit.
313 *
314 * @param $oldContent Content|null: the previous text of the page.
315 * @param $newContent Content|null: The submitted text of the page.
316 * @param $flags Int bitmask: a bitmask of flags submitted for the edit.
317 *
318 * @return string An appropriate autosummary, or an empty string.
319 */
320 public function getAutosummary( Content $oldContent = null, Content $newContent = null, $flags ) {
321 global $wgContLang;
322
323 // Decide what kind of autosummary is needed.
324
325 // Redirect autosummaries
326
327 $ot = !empty( $ot ) ? $oldContent->getRedirectTarget() : false;
328 $rt = !empty( $rt ) ? $newContent->getRedirectTarget() : false;
329
330 if ( is_object( $rt ) && ( !is_object( $ot ) || !$rt->equals( $ot ) || $ot->getFragment() != $rt->getFragment() ) ) {
331
332 $truncatedtext = $newContent->getTextForSummary(
333 250
334 - strlen( wfMsgForContent( 'autoredircomment' ) )
335 - strlen( $rt->getFullText() ) );
336
337 return wfMsgForContent( 'autoredircomment', $rt->getFullText(), $truncatedtext );
338 }
339
340 // New page autosummaries
341 if ( $flags & EDIT_NEW && $newContent->getSize() > 0 ) {
342 // If they're making a new article, give its text, truncated, in the summary.
343
344 $truncatedtext = $newContent->getTextForSummary(
345 200 - strlen( wfMsgForContent( 'autosumm-new' ) ) );
346
347 return wfMsgForContent( 'autosumm-new', $truncatedtext );
348 }
349
350 // Blanking autosummaries
351 if ( $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) {
352 return wfMsgForContent( 'autosumm-blank' );
353 } elseif ( $oldContent->getSize() > 10 * $newContent->getSize() && $newContent->getSize() < 500 ) {
354 // Removing more than 90% of the article
355
356 $truncatedtext = $newContent->getTextForSummary(
357 200 - strlen( wfMsgForContent( 'autosumm-replace' ) ) );
358
359 return wfMsgForContent( 'autosumm-replace', $truncatedtext );
360 }
361
362 // If we reach this point, there's no applicable autosummary for our case, so our
363 // autosummary is empty.
364
365 return '';
366 }
367
368 /**
369 * Auto-generates a deletion reason
370 *
371 * @param $title Title: the page's title
372 * @param &$hasHistory Boolean: whether the page has a history
373 * @return mixed String containing deletion reason or empty string, or boolean false
374 * if no revision occurred
375 */
376 public function getAutoDeleteReason( Title $title, &$hasHistory ) {
377 $dbw = wfGetDB( DB_MASTER );
378
379 // Get the last revision
380 $rev = Revision::newFromTitle( $title );
381
382 if ( is_null( $rev ) ) {
383 return false;
384 }
385
386 // Get the article's contents
387 $content = $rev->getContent();
388 $blank = false;
389
390 // If the page is blank, use the text from the previous revision,
391 // which can only be blank if there's a move/import/protect dummy revision involved
392 if ( $content->getSize() == 0 ) {
393 $prev = $rev->getPrevious();
394
395 if ( $prev ) {
396 $content = $rev->getContent();
397 $blank = true;
398 }
399 }
400
401 // Find out if there was only one contributor
402 // Only scan the last 20 revisions
403 $res = $dbw->select( 'revision', 'rev_user_text',
404 array( 'rev_page' => $title->getArticleID(), $dbw->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0' ),
405 __METHOD__,
406 array( 'LIMIT' => 20 )
407 );
408
409 if ( $res === false ) {
410 // This page has no revisions, which is very weird
411 return false;
412 }
413
414 $hasHistory = ( $res->numRows() > 1 );
415 $row = $dbw->fetchObject( $res );
416
417 if ( $row ) { // $row is false if the only contributor is hidden
418 $onlyAuthor = $row->rev_user_text;
419 // Try to find a second contributor
420 foreach ( $res as $row ) {
421 if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999
422 $onlyAuthor = false;
423 break;
424 }
425 }
426 } else {
427 $onlyAuthor = false;
428 }
429
430 // Generate the summary with a '$1' placeholder
431 if ( $blank ) {
432 // The current revision is blank and the one before is also
433 // blank. It's just not our lucky day
434 $reason = wfMsgForContent( 'exbeforeblank', '$1' );
435 } else {
436 if ( $onlyAuthor ) {
437 $reason = wfMsgForContent( 'excontentauthor', '$1', $onlyAuthor );
438 } else {
439 $reason = wfMsgForContent( 'excontent', '$1' );
440 }
441 }
442
443 if ( $reason == '-' ) {
444 // Allow these UI messages to be blanked out cleanly
445 return '';
446 }
447
448 // Max content length = max comment length - length of the comment (excl. $1)
449 $text = $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) );
450
451 // Now replace the '$1' placeholder
452 $reason = str_replace( '$1', $text, $reason );
453
454 return $reason;
455 }
456
457 /**
458 * Get the Content object that needs to be saved in order to undo all revisions
459 * between $undo and $undoafter. Revisions must belong to the same page,
460 * must exist and must not be deleted
461 * @param $undo Revision
462 * @param $undoafter null|Revision Must be an earlier revision than $undo
463 * @return mixed string on success, false on failure
464 */
465 public function getUndoContent( Revision $current, Revision $undo, Revision $undoafter = null ) {
466 $cur_content = $current->getContent();
467
468 if ( empty( $cur_content ) ) {
469 return false; // no page
470 }
471
472 $undo_content = $undo->getContent();
473 $undoafter_content = $undoafter->getContent();
474
475 if ( $cur_content->equals( $undo_content ) ) {
476 // No use doing a merge if it's just a straight revert.
477 return $undoafter_content;
478 }
479
480 $undone_content = $this->merge3( $undo_content, $undoafter_content, $cur_content );
481
482 return $undone_content;
483 }
484 }
485
486
487 abstract class TextContentHandler extends ContentHandler {
488
489 public function __construct( $modelName, $formats ) {
490 parent::__construct( $modelName, $formats );
491 }
492
493 public function serialize( Content $content, $format = null ) {
494 $this->checkFormat( $format );
495 return $content->getNativeData();
496 }
497
498 /**
499 * attempts to merge differences between three versions.
500 * Returns a new Content object for a clean merge and false for failure or a conflict.
501 *
502 * This text-based implementation uses wfMerge().
503 *
504 * @param $oldContent String
505 * @param $myContent String
506 * @param $yourContent String
507 * @return Content|Bool
508 */
509 public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
510 $this->checkModelName( $oldContent->getModelName() );
511 #TODO: check that all Content objects have the same content model! #XXX: what to do if they don't?
512
513 $format = $this->getDefaultFormat();
514
515 $old = $this->serialize( $oldContent, $format );
516 $mine = $this->serialize( $myContent, $format );
517 $yours = $this->serialize( $yourContent, $format );
518
519 $ok = wfMerge( $old, $mine, $yours, $result );
520
521 if ( !$ok ) {
522 return false;
523 }
524
525 if ( !$result ) {
526 return $this->emptyContent();
527 }
528
529 $mergedContent = $this->unserialize( $result, $format );
530 return $mergedContent;
531 }
532
533
534 }
535 class WikitextContentHandler extends TextContentHandler {
536
537 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
538 parent::__construct( $modelName, array( 'application/x-wikitext' ) ); #FIXME: mime
539 }
540
541 public function unserialize( $text, $format = null ) {
542 $this->checkFormat( $format );
543
544 return new WikitextContent( $text );
545 }
546
547 public function emptyContent() {
548 return new WikitextContent( '' );
549 }
550
551
552 }
553
554 #TODO: make ScriptContentHandler base class with plugin interface for syntax highlighting!
555
556 class JavaScriptContentHandler extends TextContentHandler {
557
558 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
559 parent::__construct( $modelName, array( 'text/javascript' ) ); #XXX: or use $wgJsMimeType? this is for internal storage, not HTTP...
560 }
561
562 public function unserialize( $text, $format = null ) {
563 return new JavaScriptContent( $text );
564 }
565
566 public function emptyContent() {
567 return new JavaScriptContent( '' );
568 }
569 }
570
571 class CssContentHandler extends TextContentHandler {
572
573 public function __construct( $modelName = CONTENT_MODEL_WIKITEXT ) {
574 parent::__construct( $modelName, array( 'text/css' ) );
575 }
576
577 public function unserialize( $text, $format = null ) {
578 return new CssContent( $text );
579 }
580
581 public function emptyContent() {
582 return new CssContent( '' );
583 }
584
585 }