3 * Content object implementation for representing flat text.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
25 * @author Daniel Kinzler
28 use MediaWiki\MediaWikiServices
;
31 * Content object implementation for representing flat text.
33 * TextContent instances are immutable
37 class TextContent
extends AbstractContent
{
46 * @param string $model_id
49 public function __construct( $text, $model_id = CONTENT_MODEL_TEXT
) {
50 parent
::__construct( $model_id );
52 if ( $text === null ||
$text === false ) {
53 wfWarn( "TextContent constructed with \$text = " . var_export( $text, true ) . "! "
54 . "This may indicate an error in the caller's scope.", 2 );
59 if ( !is_string( $text ) ) {
60 throw new MWException( "TextContent expects a string in the constructor." );
67 * @note Mutable subclasses MUST override this to return a copy!
69 * @return Content $this
71 public function copy() {
72 return $this; # NOTE: this is ok since TextContent are immutable.
75 public function getTextForSummary( $maxlength = 250 ) {
76 $text = $this->getText();
78 $truncatedtext = MediaWikiServices
::getInstance()->getContentLanguage()->
79 truncateForDatabase( preg_replace( "/[\n\r]/", ' ', $text ), max( 0, $maxlength ) );
81 return $truncatedtext;
85 * Returns the text's size in bytes.
89 public function getSize() {
90 $text = $this->getText();
92 return strlen( $text );
96 * Returns true if this content is not a redirect, and $wgArticleCountMethod
99 * @param bool|null $hasLinks If it is known whether this content contains links,
100 * provide this information here, to avoid redundant parsing to find out.
104 public function isCountable( $hasLinks = null ) {
105 global $wgArticleCountMethod;
107 if ( $this->isRedirect() ) {
111 if ( $wgArticleCountMethod === 'any' ) {
119 * Returns the text represented by this Content object, as a string.
121 * @deprecated since 1.33 use getText() instead.
123 * @return string The raw text. Subclasses may guarantee a specific syntax here.
125 public function getNativeData() {
126 return $this->getText();
130 * Returns the text represented by this Content object, as a string.
134 * @return string The raw text.
136 public function getText() {
141 * Returns the text represented by this Content object, as a string.
143 * @return string The raw text.
145 public function getTextForSearchIndex() {
146 return $this->getText();
150 * Returns attempts to convert this content object to wikitext,
151 * and then returns the text string. The conversion may be lossy.
153 * @note this allows any text-based content to be transcluded as if it was wikitext.
155 * @return string|bool The raw text, or false if the conversion failed.
157 public function getWikitextForTransclusion() {
158 /** @var WikitextContent $wikitext */
159 $wikitext = $this->convert( CONTENT_MODEL_WIKITEXT
, 'lossy' );
160 '@phan-var WikitextContent $wikitext';
163 return $wikitext->getText();
170 * Do a "\r\n" -> "\n" and "\r" -> "\n" transformation
171 * as well as trim trailing whitespace
173 * This was formerly part of Parser::preSaveTransform, but
174 * for non-wikitext content models they probably still want
175 * to normalize line endings without all of the other PST
179 * @param string $text
182 public static function normalizeLineEndings( $text ) {
183 return str_replace( [ "\r\n", "\r" ], "\n", rtrim( $text ) );
187 * Returns a Content object with pre-save transformations applied.
189 * At a minimum, subclasses should make sure to call TextContent::normalizeLineEndings()
190 * either directly or part of Parser::preSaveTransform().
192 * @param Title $title
194 * @param ParserOptions $popts
198 public function preSaveTransform( Title
$title, User
$user, ParserOptions
$popts ) {
199 $text = $this->getText();
200 $pst = self
::normalizeLineEndings( $text );
202 return ( $text === $pst ) ?
$this : new static( $pst, $this->getModel() );
206 * Diff this content object with another content object.
210 * @param Content $that The other content object to compare this content object to.
211 * @param Language|null $lang The language object to use for text segmentation.
212 * If not given, the content language is used.
214 * @return Diff A diff representing the changes that would have to be
215 * made to this content object to make it equal to $that.
217 public function diff( Content
$that, Language
$lang = null ) {
218 $this->checkModelID( $that->getModel() );
219 /** @var self $that */
220 '@phan-var self $that';
221 // @todo could implement this in DifferenceEngine and just delegate here?
224 $lang = MediaWikiServices
::getInstance()->getContentLanguage();
227 $otext = $this->getText();
228 $ntext = $that->getText();
230 # Note: Use native PHP diff, external engines don't give us abstract output
231 $ota = explode( "\n", $lang->segmentForDiff( $otext ) );
232 $nta = explode( "\n", $lang->segmentForDiff( $ntext ) );
234 $diff = new Diff( $ota, $nta );
240 * Fills the provided ParserOutput object with information derived from the content.
241 * Unless $generateHtml was false, this includes an HTML representation of the content
242 * provided by getHtml().
244 * For content models listed in $wgTextModelsToParse, this method will call the MediaWiki
245 * wikitext parser on the text to extract any (wikitext) links, magic words, etc.
247 * Subclasses may override this to provide custom content processing.
248 * For custom HTML generation alone, it is sufficient to override getHtml().
250 * @param Title $title Context title for parsing
251 * @param int $revId Revision ID (for {{REVISIONID}})
252 * @param ParserOptions $options
253 * @param bool $generateHtml Whether or not to generate HTML
254 * @param ParserOutput &$output The output object to fill (reference).
256 protected function fillParserOutput( Title
$title, $revId,
257 ParserOptions
$options, $generateHtml, ParserOutput
&$output
259 global $wgTextModelsToParse;
261 if ( in_array( $this->getModel(), $wgTextModelsToParse ) ) {
262 // parse just to get links etc into the database, HTML is replaced below.
263 $output = MediaWikiServices
::getInstance()->getParser()
264 ->parse( $this->getText(), $title, $options, true, true, $revId );
267 if ( $generateHtml ) {
268 $html = $this->getHtml();
273 $output->clearWrapperDivClass();
274 $output->setText( $html );
278 * Generates an HTML version of the content, for display. Used by
279 * fillParserOutput() to provide HTML for the ParserOutput object.
281 * Subclasses may override this to provide a custom HTML rendering.
282 * If further information is to be derived from the content (such as
283 * categories), the fillParserOutput() method can be overridden instead.
285 * For backwards-compatibility, this default implementation just calls
286 * getHighlightHtml().
288 * @return string An HTML representation of the content
290 protected function getHtml() {
291 return $this->getHighlightHtml();
295 * Generates an HTML version of the content, for display.
297 * This default implementation returns an HTML-escaped version
298 * of the raw text content.
300 * @note The functionality of this method should really be implemented
301 * in getHtml(), and subclasses should override getHtml() if needed.
302 * getHighlightHtml() is kept around for backward compatibility with
303 * extensions that already override it.
305 * @deprecated since 1.24. Use getHtml() instead. In particular, subclasses overriding
306 * getHighlightHtml() should override getHtml() instead.
308 * @return string An HTML representation of the content
310 protected function getHighlightHtml() {
311 return htmlspecialchars( $this->getText() );
315 * This implementation provides lossless conversion between content models based
318 * @param string $toModel The desired content model, use the CONTENT_MODEL_XXX flags.
319 * @param string $lossy Flag, set to "lossy" to allow lossy conversion. If lossy conversion is not
320 * allowed, full round-trip conversion is expected to work without losing information.
322 * @return Content|bool A content object with the content model $toModel, or false if that
323 * conversion is not supported.
325 * @see Content::convert()
327 public function convert( $toModel, $lossy = '' ) {
328 $converted = parent
::convert( $toModel, $lossy );
330 if ( $converted !== false ) {
334 $toHandler = ContentHandler
::getForModelID( $toModel );
336 if ( $toHandler instanceof TextContentHandler
) {
337 // NOTE: ignore content serialization format - it's just text anyway.
338 $text = $this->getText();
339 $converted = $toHandler->unserializeContent( $text );