merged from master
[lhc/web/wiklou.git] / includes / Content.php
1 <?php
2
3 /**
4 * A content object represents page content, e.g. the text to show on a page.
5 * Content objects have no knowledge about how they relate to Wiki pages.
6 *
7 * @since 1.WD
8 */
9 abstract class Content {
10
11 /**
12 * Name of the content model this Content object represents.
13 * Use with CONTENT_MODEL_XXX constants
14 *
15 * @var String $model_name
16 */
17 protected $model_name;
18
19 /**
20 * @return String a string representing the content in a way useful for building a full text search index.
21 * If no useful representation exists, this method returns an empty string.
22 */
23 public abstract function getTextForSearchIndex( );
24
25 /**
26 * @return String the wikitext to include when another page includes this content, or false if the content is not
27 * includable in a wikitext page.
28 *
29 * @TODO: allow native handling, bypassing wikitext representation, like for includable special pages.
30 * @TODO: use in parser, etc!
31 */
32 public abstract function getWikitextForTransclusion( );
33
34 /**
35 * Returns a textual representation of the content suitable for use in edit summaries and log messages.
36 *
37 * @param int $maxlength maximum length of the summary text
38 * @return String the summary text
39 */
40 public abstract function getTextForSummary( $maxlength = 250 );
41
42 /**
43 * Returns native represenation of the data. Interpretation depends on the data model used,
44 * as given by getDataModel().
45 *
46 * @return mixed the native representation of the content. Could be a string, a nested array
47 * structure, an object, a binary blob... anything, really.
48 *
49 * @NOTE: review all calls carefully, caller must be aware of content model!
50 */
51 public abstract function getNativeData( );
52
53 /**
54 * returns the content's nominal size in bogo-bytes.
55 *
56 * @return int
57 */
58 public abstract function getSize( );
59
60 /**
61 * @param $model_name
62 */
63 public function __construct( $model_name = null ) {
64 $this->model_name = $model_name;
65 }
66
67 /**
68 * Returns the name of the content model used by this content objects.
69 * Corresponds to the CONTENT_MODEL_XXX constants.
70 *
71 * @return String the model name
72 */
73 public function getModelName() {
74 return $this->model_name;
75 }
76
77 /**
78 * Throws an MWException if $model_name is not the name of the content model
79 * supported by this Content object.
80 *
81 * @param String $model_name the model to check
82 */
83 protected function checkModelName( $model_name ) {
84 if ( $model_name !== $this->model_name ) {
85 throw new MWException( "Bad content model: expected " . $this->model_name . " but got found " . $model_name );
86 }
87 }
88
89 /**
90 * Conveniance method that returns the ContentHandler singleton for handling the content
91 * model this Content object uses.
92 *
93 * Shorthand for ContentHandler::getForContent( $this )
94 *
95 * @return ContentHandler
96 */
97 public function getContentHandler() {
98 return ContentHandler::getForContent( $this );
99 }
100
101 /**
102 * Conveniance method that returns the default serialization format for the content model
103 * model this Content object uses.
104 *
105 * Shorthand for $this->getContentHandler()->getDefaultFormat()
106 *
107 * @return ContentHandler
108 */
109 public function getDefaultFormat() {
110 return $this->getContentHandler()->getDefaultFormat();
111 }
112
113 /**
114 * Conveniance method that returns the list of serialization formats supported
115 * for the content model model this Content object uses.
116 *
117 * Shorthand for $this->getContentHandler()->getSupportedFormats()
118 *
119 * @return array of supported serialization formats
120 */
121 public function getSupportedFormats() {
122 return $this->getContentHandler()->getSupportedFormats();
123 }
124
125 /**
126 * Returns true if $format is a supported serialization format for this Content object,
127 * false if it isn't.
128 *
129 * Note that this will always return true if $format is null, because null stands for the
130 * default serialization.
131 *
132 * Shorthand for $this->getContentHandler()->isSupportedFormat( $format )
133 *
134 * @param String $format the format to check
135 * @return bool whether the format is supported
136 */
137 public function isSupportedFormat( $format ) {
138 if ( !$format ) {
139 return true; // this means "use the default"
140 }
141
142 return $this->getContentHandler()->isSupportedFormat( $format );
143 }
144
145 /**
146 * Throws an MWException if $this->isSupportedFormat( $format ) doesn't return true.
147 *
148 * @param $format
149 * @throws MWException
150 */
151 protected function checkFormat( $format ) {
152 if ( !$this->isSupportedFormat( $format ) ) {
153 throw new MWException( "Format $format is not supported for content model " . $this->getModelName() );
154 }
155 }
156
157 /**
158 * Conveniance method for serializing this Content object.
159 *
160 * Shorthand for $this->getContentHandler()->serializeContent( $this, $format )
161 *
162 * @param null|String $format the desired serialization format (or null for the default format).
163 * @return String serialized form of this Content object
164 */
165 public function serialize( $format = null ) {
166 return $this->getContentHandler()->serializeContent( $this, $format );
167 }
168
169 /**
170 * Returns true if this Content object represents empty content.
171 *
172 * @return bool whether this Content object is empty
173 */
174 public function isEmpty() {
175 return $this->getSize() == 0;
176 }
177
178 /**
179 * Returns true if this Content objects is conceptually equivalent to the given Content object.
180 *
181 * Will returns false if $that is null.
182 * Will return true if $that === $this.
183 * Will return false if $that->getModleName() != $this->getModelName().
184 * Will return false if $that->getNativeData() is not equal to $this->getNativeData(),
185 * where the meaning of "equal" depends on the actual data model.
186 *
187 * Implementations should be careful to make equals() transitive and reflexive:
188 *
189 * * $a->equals( $b ) <=> $b->equals( $b )
190 * * $a->equals( $b ) && $b->equals( $c ) ==> $a->equals( $c )
191 *
192 * @param Content $that the Content object to compare to
193 * @return bool true if this Content object is euqual to $that, false otherwise.
194 */
195 public function equals( Content $that = null ) {
196 if ( is_null( $that ) ){
197 return false;
198 }
199
200 if ( $that === $this ) {
201 return true;
202 }
203
204 if ( $that->getModelName() !== $this->getModelName() ) {
205 return false;
206 }
207
208 return $this->getNativeData() === $that->getNativeData();
209 }
210
211 /**
212 * Return a copy of this Content object. The following must be true for the object returned
213 * if $copy = $original->copy()
214 *
215 * * get_class($original) === get_class($copy)
216 * * $original->getModelName() === $copy->getModelName()
217 * * $original->equals( $copy )
218 *
219 * If and only if the Content object is imutable, the copy() method can and should
220 * return $this. That is, $copy === $original may be true, but only for imutable content
221 * objects.
222 *
223 * @return Content. A copy of this object
224 */
225 public abstract function copy( );
226
227 /**
228 * Returns true if this content is countable as a "real" wiki page, provided
229 * that it's also in a countable location (e.g. a current revision in the main namespace).
230 *
231 * @param $hasLinks Bool: if it is known whether this content contains links, provide this information here,
232 * to avoid redundant parsing to find out.
233 * @return boolean
234 */
235 public abstract function isCountable( $hasLinks = null ) ;
236
237 /**
238 * @param IContextSource $context
239 * @param null $revId
240 * @param null|ParserOptions $options
241 * @param Boolean $generateHtml whether to generate Html (default: true). If false,
242 * the result of calling getText() on the ParserOutput object returned by
243 * this method is undefined.
244 *
245 * @return ParserOutput
246 */
247 public abstract function getParserOutput( IContextSource $context, $revId = null, ParserOptions $options = NULL, $generateHtml = true );
248
249 /**
250 * Construct the redirect destination from this content and return an
251 * array of Titles, or null if this content doesn't represent a redirect.
252 * The last element in the array is the final destination after all redirects
253 * have been resolved (up to $wgMaxRedirects times).
254 *
255 * @return Array of Titles, with the destination last
256 */
257 public function getRedirectChain() {
258 return null;
259 }
260
261 /**
262 * Construct the redirect destination from this content and return an
263 * array of Titles, or null if this content doesn't represent a redirect.
264 * This will only return the immediate redirect target, useful for
265 * the redirect table and other checks that don't need full recursion.
266 *
267 * @return Title: The corresponding Title
268 */
269 public function getRedirectTarget() {
270 return null;
271 }
272
273 /**
274 * Construct the redirect destination from this content and return the
275 * Title, or null if this content doesn't represent a redirect.
276 * This will recurse down $wgMaxRedirects times or until a non-redirect target is hit
277 * in order to provide (hopefully) the Title of the final destination instead of another redirect.
278 *
279 * @return Title
280 */
281 public function getUltimateRedirectTarget() {
282 return null;
283 }
284
285 public function isRedirect() {
286 return $this->getRedirectTarget() != null;
287 }
288
289 /**
290 * Returns the section with the given id.
291 *
292 * The default implementation returns null.
293 *
294 * @param String $sectionId the section's id, given as a numeric string. The id "0" retrieves the section before
295 * the first heading, "1" the text between the first heading (inluded) and the second heading (excluded), etc.
296 * @return Content|Boolean|null the section, or false if no such section exist, or null if sections are not supported
297 */
298 public function getSection( $sectionId ) {
299 return null;
300 }
301
302 /**
303 * Replaces a section of the content and returns a Content object with the section replaced.
304 *
305 * @param $section empty/null/false or a section number (0, 1, 2, T1, T2...), or "new"
306 * @param $with Content: new content of the section
307 * @param $sectionTitle String: new section's subject, only if $section is 'new'
308 * @return string Complete article text, or null if error
309 */
310 public function replaceSection( $section, Content $with, $sectionTitle = '' ) {
311 return null;
312 }
313
314 /**
315 * Returns a Content object with pre-save transformations applied (or this object if no transformations apply).
316 *
317 * @param Title $title
318 * @param User $user
319 * @param null|ParserOptions $popts
320 * @return Content
321 */
322 public function preSaveTransform( Title $title, User $user, ParserOptions $popts = null ) {
323 return $this;
324 }
325
326 /**
327 * Returns a new WikitextContent object with the given section heading prepended, if supported.
328 * The default implementation just returns this Content object unmodified, ignoring the section header.
329 *
330 * @param $header String
331 * @return Content
332 */
333 public function addSectionHeader( $header ) {
334 return $this;
335 }
336
337 /**
338 * Returns a Content object with preload transformations applied (or this object if no transformations apply).
339 *
340 * @param Title $title
341 * @param null|ParserOptions $popts
342 * @return Content
343 */
344 public function preloadTransform( Title $title, ParserOptions $popts = null ) {
345 return $this;
346 }
347
348 # TODO: handle ImagePage and CategoryPage
349 # TODO: make sure we cover lucene search / wikisearch.
350 # TODO: make sure ReplaceTemplates still works
351 # FUTURE: nice&sane integration of GeSHi syntax highlighting
352 # [11:59] <vvv> Hooks are ugly; make CodeHighlighter interface and a config to set the class which handles syntax highlighting
353 # [12:00] <vvv> And default it to a DummyHighlighter
354
355 # TODO: make sure we cover the external editor interface (does anyone actually use that?!)
356
357 # TODO: tie into API to provide contentModel for Revisions
358 # TODO: tie into API to provide serialized version and contentFormat for Revisions
359 # TODO: tie into API edit interface
360 # FUTURE: make EditForm plugin for EditPage
361 }
362 # FUTURE: special type for redirects?!
363 # FUTURE: MultipartMultipart < WikipageContent (Main + Links + X)
364 # FUTURE: LinksContent < LanguageLinksContent, CategoriesContent
365
366 /**
367 * Content object implementation for representing flat text.
368 *
369 * TextContent instances are imutable
370 */
371 abstract class TextContent extends Content {
372
373 public function __construct( $text, $model_name = null ) {
374 parent::__construct( $model_name );
375
376 $this->mText = $text;
377 }
378
379 public function copy() {
380 return $this; #NOTE: this is ok since TextContent are imutable.
381 }
382
383 public function getTextForSummary( $maxlength = 250 ) {
384 global $wgContLang;
385
386 $text = $this->getNativeData();
387
388 $truncatedtext = $wgContLang->truncate(
389 preg_replace( "/[\n\r]/", ' ', $text ),
390 max( 0, $maxlength ) );
391
392 return $truncatedtext;
393 }
394
395 /**
396 * returns the text's size in bytes.
397 *
398 * @return int the size
399 */
400 public function getSize( ) {
401 $text = $this->getNativeData( );
402 return strlen( $text );
403 }
404
405 /**
406 * Returns true if this content is not a redirect, and $wgArticleCountMethod is "any".
407 *
408 * @param $hasLinks Bool: if it is known whether this content contains links, provide this information here,
409 * to avoid redundant parsing to find out.
410 *
411 * @return bool true if the content is countable
412 */
413 public function isCountable( $hasLinks = null ) {
414 global $wgArticleCountMethod;
415
416 if ( $this->isRedirect( ) ) {
417 return false;
418 }
419
420 if ( $wgArticleCountMethod === 'any' ) {
421 return true;
422 }
423
424 return false;
425 }
426
427 /**
428 * Returns the text represented by this Content object, as a string.
429 *
430 * @return String the raw text
431 */
432 public function getNativeData( ) {
433 $text = $this->mText;
434 return $text;
435 }
436
437 /**
438 * Returns the text represented by this Content object, as a string.
439 *
440 * @return String the raw text
441 */
442 public function getTextForSearchIndex( ) {
443 return $this->getNativeData();
444 }
445
446 /**
447 * Returns the text represented by this Content object, as a string.
448 *
449 * @return String the raw text
450 */
451 public function getWikitextForTransclusion( ) {
452 return $this->getNativeData();
453 }
454
455 /**
456 * Returns a generic ParserOutput object, wrapping the HTML returned by getHtml().
457 *
458 * @return ParserOutput representing the HTML form of the text
459 */
460 public function getParserOutput( IContextSource $context, $revId = null, ParserOptions $options = null, $generateHtml = true ) {
461 # generic implementation, relying on $this->getHtml()
462
463 if ( $generateHtml ) $html = $this->getHtml( $options );
464 else $html = '';
465
466 $po = new ParserOutput( $html );
467
468 return $po;
469 }
470
471 protected abstract function getHtml( );
472
473 }
474
475 class WikitextContent extends TextContent {
476
477 public function __construct( $text ) {
478 parent::__construct($text, CONTENT_MODEL_WIKITEXT);
479
480 $this->mDefaultParserOptions = null; #TODO: use per-class static member?!
481 }
482
483 protected function getHtml( ) {
484 throw new MWException( "getHtml() not implemented for wikitext. Use getParserOutput()->getText()." );
485 }
486
487 public function getDefaultParserOptions() {
488 global $wgUser, $wgContLang;
489
490 if ( !$this->mDefaultParserOptions ) { #TODO: use per-class static member?!
491 $this->mDefaultParserOptions = ParserOptions::newFromUserAndLang( $wgUser, $wgContLang );
492 }
493
494 return $this->mDefaultParserOptions;
495 }
496
497 /**
498 * Returns a ParserOutput object resulting from parsing the content's text using $wgParser.
499 *
500 * @since WikiData1
501 *
502 * @param IContextSource|null $context
503 * @param null $revId
504 * @param null|ParserOptions $options
505 * @param bool $generateHtml
506 *
507 * @return ParserOutput representing the HTML form of the text
508 */
509 public function getParserOutput( IContextSource $context, $revId = null, ParserOptions $options = null, $generateHtml = true ) {
510 global $wgParser;
511
512 if ( !$options ) {
513 $options = $this->getDefaultParserOptions();
514 }
515
516 $po = $wgParser->parse( $this->mText, $context->getTitle(), $options, true, true, $revId );
517
518 return $po;
519 }
520
521 /**
522 * Returns the section with the given id.
523 *
524 * @param String $sectionId the section's id
525 * @return Content|false|null the section, or false if no such section exist, or null if sections are not supported
526 */
527 public function getSection( $section ) {
528 global $wgParser;
529
530 $text = $this->getNativeData();
531 $sect = $wgParser->getSection( $text, $section, false );
532
533 return new WikitextContent( $sect );
534 }
535
536 /**
537 * Replaces a section in the wikitext
538 *
539 * @param $section empty/null/false or a section number (0, 1, 2, T1, T2...), or "new"
540 * @param $with Content: new content of the section
541 * @param $sectionTitle String: new section's subject, only if $section is 'new'
542 * @return Content Complete article content, or null if error
543 */
544 public function replaceSection( $section, Content $with, $sectionTitle = '' ) {
545 wfProfileIn( __METHOD__ );
546
547 $myModelName = $this->getModelName();
548 $sectionModelName = $with->getModelName();
549
550 if ( $sectionModelName != $myModelName ) {
551 throw new MWException( "Incompatible content model for section: document uses $myModelName, section uses $sectionModelName." );
552 }
553
554 $oldtext = $this->getNativeData();
555 $text = $with->getNativeData();
556
557 if ( $section === '' ) {
558 return $with; #XXX: copy first?
559 } if ( $section == 'new' ) {
560 # Inserting a new section
561 $subject = $sectionTitle ? wfMsgForContent( 'newsectionheaderdefaultlevel', $sectionTitle ) . "\n\n" : '';
562 if ( wfRunHooks( 'PlaceNewSection', array( $this, $oldtext, $subject, &$text ) ) ) {
563 $text = strlen( trim( $oldtext ) ) > 0
564 ? "{$oldtext}\n\n{$subject}{$text}"
565 : "{$subject}{$text}";
566 }
567 } else {
568 # Replacing an existing section; roll out the big guns
569 global $wgParser;
570
571 $text = $wgParser->replaceSection( $oldtext, $section, $text );
572 }
573
574 $newContent = new WikitextContent( $text );
575
576 wfProfileOut( __METHOD__ );
577 return $newContent;
578 }
579
580 /**
581 * Returns a new WikitextContent object with the given section heading prepended.
582 *
583 * @param $header String
584 * @return Content
585 */
586 public function addSectionHeader( $header ) {
587 $text = wfMsgForContent( 'newsectionheaderdefaultlevel', $header ) . "\n\n" . $this->getNativeData();
588
589 return new WikitextContent( $text );
590 }
591
592 /**
593 * Returns a Content object with pre-save transformations applied (or this object if no transformations apply).
594 *
595 * @param Title $title
596 * @param User $user
597 * @param null|ParserOptions $popts
598 * @return Content
599 */
600 public function preSaveTransform( Title $title, User $user, ParserOptions $popts = null ) {
601 global $wgParser;
602
603 if ( $popts == null ) $popts = $this->getDefaultParserOptions();
604
605 $text = $this->getNativeData();
606 $pst = $wgParser->preSaveTransform( $text, $title, $user, $popts );
607
608 return new WikitextContent( $pst );
609 }
610
611 /**
612 * Returns a Content object with preload transformations applied (or this object if no transformations apply).
613 *
614 * @param Title $title
615 * @param null|ParserOptions $popts
616 * @return Content
617 */
618 public function preloadTransform( Title $title, ParserOptions $popts = null ) {
619 global $wgParser;
620
621 if ( $popts == null ) $popts = $this->getDefaultParserOptions();
622
623 $text = $this->getNativeData();
624 $plt = $wgParser->getPreloadText( $text, $title, $popts );
625
626 return new WikitextContent( $plt );
627 }
628
629 public function getRedirectChain() {
630 $text = $this->getNativeData();
631 return Title::newFromRedirectArray( $text );
632 }
633
634 public function getRedirectTarget() {
635 $text = $this->getNativeData();
636 return Title::newFromRedirect( $text );
637 }
638
639 public function getUltimateRedirectTarget() {
640 $text = $this->getNativeData();
641 return Title::newFromRedirectRecurse( $text );
642 }
643
644 /**
645 * Returns true if this content is not a redirect, and this content's text is countable according to
646 * the criteria defiend by $wgArticleCountMethod.
647 *
648 * @param Bool $hasLinks if it is known whether this content contains links, provide this information here,
649 * to avoid redundant parsing to find out.
650 * @param IContextSource $context context for parsing if necessary
651 *
652 * @return bool true if the content is countable
653 */
654 public function isCountable( $hasLinks = null, IContextSource $context = null ) {
655 global $wgArticleCountMethod, $wgRequest;
656
657 if ( $this->isRedirect( ) ) {
658 return false;
659 }
660
661 $text = $this->getNativeData();
662
663 switch ( $wgArticleCountMethod ) {
664 case 'any':
665 return true;
666 case 'comma':
667 return strpos( $text, ',' ) !== false;
668 case 'link':
669 if ( $hasLinks === null ) { # not known, find out
670 if ( !$context ) { # make dummy context
671 //XXX: caller of this method often knows the title, but not a context...
672 $context = new RequestContext( $wgRequest );
673 }
674
675 $po = $this->getParserOutput( $context, null, null, false );
676 $links = $po->getLinks();
677 $hasLinks = !empty( $links );
678 }
679
680 return $hasLinks;
681 }
682 }
683
684 public function getTextForSummary( $maxlength = 250 ) {
685 $truncatedtext = parent::getTextForSummary( $maxlength );
686
687 #clean up unfinished links
688 #XXX: make this optional? wasn't there in autosummary, but required for deletion summary.
689 $truncatedtext = preg_replace( '/\[\[([^\]]*)\]?$/', '$1', $truncatedtext );
690
691 return $truncatedtext;
692 }
693
694 }
695
696 class MessageContent extends TextContent {
697 public function __construct( $msg_key, $params = null, $options = null ) {
698 parent::__construct(null, CONTENT_MODEL_WIKITEXT); #XXX: messages may be wikitext, html or plain text! and maybe even something else entirely.
699
700 $this->mMessageKey = $msg_key;
701
702 $this->mParameters = $params;
703
704 if ( is_null( $options ) ) {
705 $options = array();
706 }
707 elseif ( is_string( $options ) ) {
708 $options = array( $options );
709 }
710
711 $this->mOptions = $options;
712
713 $this->mHtmlOptions = null;
714 }
715
716 /**
717 * Returns the message as rendered HTML, using the options supplied to the constructor plus "parse".
718 */
719 protected function getHtml( ) {
720 $opt = array_merge( $this->mOptions, array('parse') );
721
722 return wfMsgExt( $this->mMessageKey, $this->mParameters, $opt );
723 }
724
725
726 /**
727 * Returns the message as raw text, using the options supplied to the constructor minus "parse" and "parseinline".
728 */
729 public function getNativeData( ) {
730 $opt = array_diff( $this->mOptions, array('parse', 'parseinline') );
731
732 return wfMsgExt( $this->mMessageKey, $this->mParameters, $opt );
733 }
734
735 }
736
737
738 class JavaScriptContent extends TextContent {
739 public function __construct( $text ) {
740 parent::__construct($text, CONTENT_MODEL_JAVASCRIPT);
741 }
742
743 protected function getHtml( ) {
744 $html = "";
745 $html .= "<pre class=\"mw-code mw-js\" dir=\"ltr\">\n";
746 $html .= htmlspecialchars( $this->getNativeData() );
747 $html .= "\n</pre>\n";
748
749 return $html;
750 }
751
752 }
753
754 class CssContent extends TextContent {
755 public function __construct( $text ) {
756 parent::__construct($text, CONTENT_MODEL_CSS);
757 }
758
759 protected function getHtml( ) {
760 $html = "";
761 $html .= "<pre class=\"mw-code mw-css\" dir=\"ltr\">\n";
762 $html .= htmlspecialchars( $this->getNativeData() );
763 $html .= "\n</pre>\n";
764
765 return $html;
766 }
767 }