d7a15aff23e5ae000b3d606d965ee84c65a81974
[lhc/web/wiklou.git] / includes / parser / Parser.php
1 <?php
2 /**
3 * PHP parser that converts wiki markup to HTML.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23 use MediaWiki\BadFileLookup;
24 use MediaWiki\Config\ServiceOptions;
25 use MediaWiki\Linker\LinkRenderer;
26 use MediaWiki\Linker\LinkRendererFactory;
27 use MediaWiki\Linker\LinkTarget;
28 use MediaWiki\MediaWikiServices;
29 use MediaWiki\Special\SpecialPageFactory;
30 use Psr\Log\NullLogger;
31 use Wikimedia\ScopedCallback;
32 use Psr\Log\LoggerInterface;
33
34 /**
35 * @defgroup Parser Parser
36 */
37
38 /**
39 * PHP Parser - Processes wiki markup (which uses a more user-friendly
40 * syntax, such as "[[link]]" for making links), and provides a one-way
41 * transformation of that wiki markup it into (X)HTML output / markup
42 * (which in turn the browser understands, and can display).
43 *
44 * There are seven main entry points into the Parser class:
45 *
46 * - Parser::parse()
47 * produces HTML output
48 * - Parser::preSaveTransform()
49 * produces altered wiki markup
50 * - Parser::preprocess()
51 * removes HTML comments and expands templates
52 * - Parser::cleanSig() and Parser::cleanSigInSig()
53 * cleans a signature before saving it to preferences
54 * - Parser::getSection()
55 * return the content of a section from an article for section editing
56 * - Parser::replaceSection()
57 * replaces a section by number inside an article
58 * - Parser::getPreloadText()
59 * removes <noinclude> sections and <includeonly> tags
60 *
61 * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
62 *
63 * @par Settings:
64 * $wgNamespacesWithSubpages
65 *
66 * @par Settings only within ParserOptions:
67 * $wgAllowExternalImages
68 * $wgAllowSpecialInclusion
69 * $wgInterwikiMagic
70 * $wgMaxArticleSize
71 *
72 * @ingroup Parser
73 */
74 class Parser {
75 /**
76 * Update this version number when the ParserOutput format
77 * changes in an incompatible way, so the parser cache
78 * can automatically discard old data.
79 */
80 const VERSION = '1.6.4';
81
82 /**
83 * Update this version number when the output of serialiseHalfParsedText()
84 * changes in an incompatible way
85 */
86 const HALF_PARSED_VERSION = 2;
87
88 # Flags for Parser::setFunctionHook
89 const SFH_NO_HASH = 1;
90 const SFH_OBJECT_ARGS = 2;
91
92 # Constants needed for external link processing
93 # Everything except bracket, space, or control characters
94 # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
95 # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
96 # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
97 # uses to replace invalid HTML characters.
98 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
99 # Simplified expression to match an IPv4 or IPv6 address, or
100 # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
101 const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
102 # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
103 // phpcs:ignore Generic.Files.LineLength
104 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
105 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
106
107 # Regular expression for a non-newline space
108 const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
109
110 # Flags for preprocessToDom
111 const PTD_FOR_INCLUSION = 1;
112
113 # Allowed values for $this->mOutputType
114 # Parameter to startExternalParse().
115 const OT_HTML = 1; # like parse()
116 const OT_WIKI = 2; # like preSaveTransform()
117 const OT_PREPROCESS = 3; # like preprocess()
118 const OT_MSG = 3;
119 const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
120
121 /**
122 * @var string Prefix and suffix for temporary replacement strings
123 * for the multipass parser.
124 *
125 * \x7f should never appear in input as it's disallowed in XML.
126 * Using it at the front also gives us a little extra robustness
127 * since it shouldn't match when butted up against identifier-like
128 * string constructs.
129 *
130 * Must not consist of all title characters, or else it will change
131 * the behavior of <nowiki> in a link.
132 *
133 * Must have a character that needs escaping in attributes, otherwise
134 * someone could put a strip marker in an attribute, to get around
135 * escaping quote marks, and break out of the attribute. Thus we add
136 * `'".
137 */
138 const MARKER_SUFFIX = "-QINU`\"'\x7f";
139 const MARKER_PREFIX = "\x7f'\"`UNIQ-";
140
141 # Markers used for wrapping the table of contents
142 const TOC_START = '<mw:toc>';
143 const TOC_END = '</mw:toc>';
144
145 /** @var int Assume that no output will later be saved this many seconds after parsing */
146 const MAX_TTS = 900;
147
148 # Persistent:
149 public $mTagHooks = [];
150 public $mTransparentTagHooks = [];
151 public $mFunctionHooks = [];
152 public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
153 public $mFunctionTagHooks = [];
154 public $mStripList = [];
155 public $mDefaultStripList = [];
156 public $mVarCache = [];
157 public $mImageParams = [];
158 public $mImageParamsMagicArray = [];
159 public $mMarkerIndex = 0;
160 /**
161 * @var bool Whether firstCallInit still needs to be called
162 */
163 public $mFirstCall = true;
164
165 # Initialised by initialiseVariables()
166
167 /**
168 * @var MagicWordArray
169 */
170 public $mVariables;
171
172 /**
173 * @var MagicWordArray
174 */
175 public $mSubstWords;
176
177 /**
178 * @deprecated since 1.34, there should be no need to use this
179 * @var array
180 */
181 public $mConf;
182
183 # Initialised in constructor
184 public $mExtLinkBracketedRegex, $mUrlProtocols;
185
186 # Initialized in getPreprocessor()
187 /** @var Preprocessor */
188 public $mPreprocessor;
189
190 # Cleared with clearState():
191 /**
192 * @var ParserOutput
193 */
194 public $mOutput;
195 public $mAutonumber;
196
197 /**
198 * @var StripState
199 */
200 public $mStripState;
201
202 public $mIncludeCount;
203 /**
204 * @var LinkHolderArray
205 */
206 public $mLinkHolders;
207
208 public $mLinkID;
209 public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
210 public $mDefaultSort;
211 public $mTplRedirCache, $mHeadings, $mDoubleUnderscores;
212 public $mExpensiveFunctionCount; # number of expensive parser function calls
213 public $mShowToc, $mForceTocPosition;
214 /** @var array */
215 public $mTplDomCache;
216
217 /**
218 * @var User
219 */
220 public $mUser; # User object; only used when doing pre-save transform
221
222 # Temporary
223 # These are variables reset at least once per parse regardless of $clearState
224
225 /**
226 * @var ParserOptions
227 */
228 public $mOptions;
229
230 /**
231 * @var Title
232 */
233 public $mTitle; # Title context, used for self-link rendering and similar things
234 public $mOutputType; # Output type, one of the OT_xxx constants
235 public $ot; # Shortcut alias, see setOutputType()
236 public $mRevisionObject; # The revision object of the specified revision ID
237 public $mRevisionId; # ID to display in {{REVISIONID}} tags
238 public $mRevisionTimestamp; # The timestamp of the specified revision ID
239 public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
240 public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
241 public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
242 public $mInputSize = false; # For {{PAGESIZE}} on current page.
243
244 /**
245 * @var array Array with the language name of each language link (i.e. the
246 * interwiki prefix) in the key, value arbitrary. Used to avoid sending
247 * duplicate language links to the ParserOutput.
248 */
249 public $mLangLinkLanguages;
250
251 /**
252 * @var MapCacheLRU|null
253 * @since 1.24
254 *
255 * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
256 */
257 public $currentRevisionCache;
258
259 /**
260 * @var bool|string Recursive call protection.
261 * This variable should be treated as if it were private.
262 */
263 public $mInParse = false;
264
265 /** @var SectionProfiler */
266 protected $mProfiler;
267
268 /**
269 * @var LinkRenderer
270 */
271 protected $mLinkRenderer;
272
273 /** @var MagicWordFactory */
274 private $magicWordFactory;
275
276 /** @var Language */
277 private $contLang;
278
279 /** @var ParserFactory */
280 private $factory;
281
282 /** @var SpecialPageFactory */
283 private $specialPageFactory;
284
285 /**
286 * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
287 * $mOptions, which is public and widely used, and also with the local variable $options used
288 * for ParserOptions throughout this file.
289 *
290 * @var ServiceOptions
291 */
292 private $svcOptions;
293
294 /** @var LinkRendererFactory */
295 private $linkRendererFactory;
296
297 /** @var NamespaceInfo */
298 private $nsInfo;
299
300 /** @var LoggerInterface */
301 private $logger;
302
303 /** @var BadFileLookup */
304 private $badFileLookup;
305
306 /**
307 * TODO Make this a const when HHVM support is dropped (T192166)
308 *
309 * @var array
310 * @since 1.33
311 */
312 public static $constructorOptions = [
313 // See $wgParserConf documentation
314 'class',
315 'preprocessorClass',
316 // See documentation for the corresponding config options
317 'ArticlePath',
318 'EnableScaryTranscluding',
319 'ExtraInterlanguageLinkPrefixes',
320 'FragmentMode',
321 'LanguageCode',
322 'MaxSigChars',
323 'MaxTocLevel',
324 'MiserMode',
325 'ScriptPath',
326 'Server',
327 'ServerName',
328 'ShowHostnames',
329 'Sitename',
330 'StylePath',
331 'TranscludeCacheExpiry',
332 ];
333
334 /**
335 * Constructing parsers directly is deprecated! Use a ParserFactory.
336 *
337 * @param ServiceOptions|null $svcOptions
338 * @param MagicWordFactory|null $magicWordFactory
339 * @param Language|null $contLang Content language
340 * @param ParserFactory|null $factory
341 * @param string|null $urlProtocols As returned from wfUrlProtocols()
342 * @param SpecialPageFactory|null $spFactory
343 * @param LinkRendererFactory|null $linkRendererFactory
344 * @param NamespaceInfo|null $nsInfo
345 * @param LoggerInterface|null $logger
346 * @param BadFileLookup|null $badFileLookup
347 */
348 public function __construct(
349 $svcOptions = null,
350 MagicWordFactory $magicWordFactory = null,
351 Language $contLang = null,
352 ParserFactory $factory = null,
353 $urlProtocols = null,
354 SpecialPageFactory $spFactory = null,
355 $linkRendererFactory = null,
356 $nsInfo = null,
357 $logger = null,
358 BadFileLookup $badFileLookup = null
359 ) {
360 if ( !$svcOptions || is_array( $svcOptions ) ) {
361 // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
362 // Config, and the eighth is LinkRendererFactory.
363 $this->mConf = (array)$svcOptions;
364 if ( empty( $this->mConf['class'] ) ) {
365 $this->mConf['class'] = self::class;
366 }
367 if ( empty( $this->mConf['preprocessorClass'] ) ) {
368 $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
369 }
370 $this->svcOptions = new ServiceOptions( self::$constructorOptions,
371 $this->mConf, func_num_args() > 6
372 ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
373 );
374 $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
375 $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
376 } else {
377 // New calling convention
378 $svcOptions->assertRequiredOptions( self::$constructorOptions );
379 // $this->mConf is public, so we'll keep those two options there as well for
380 // compatibility until it's removed
381 $this->mConf = [
382 'class' => $svcOptions->get( 'class' ),
383 'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
384 ];
385 $this->svcOptions = $svcOptions;
386 }
387
388 $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
389 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
390 self::EXT_LINK_ADDR .
391 self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
392
393 $this->magicWordFactory = $magicWordFactory ??
394 MediaWikiServices::getInstance()->getMagicWordFactory();
395
396 $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
397
398 $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
399 $this->specialPageFactory = $spFactory ??
400 MediaWikiServices::getInstance()->getSpecialPageFactory();
401 $this->linkRendererFactory = $linkRendererFactory ??
402 MediaWikiServices::getInstance()->getLinkRendererFactory();
403 $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
404 $this->logger = $logger ?: new NullLogger();
405 $this->badFileLookup = $badFileLookup ??
406 MediaWikiServices::getInstance()->getBadFileLookup();
407 }
408
409 /**
410 * Reduce memory usage to reduce the impact of circular references
411 */
412 public function __destruct() {
413 if ( isset( $this->mLinkHolders ) ) {
414 unset( $this->mLinkHolders );
415 }
416 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
417 foreach ( $this as $name => $value ) {
418 unset( $this->$name );
419 }
420 }
421
422 /**
423 * Allow extensions to clean up when the parser is cloned
424 */
425 public function __clone() {
426 $this->mInParse = false;
427
428 // T58226: When you create a reference "to" an object field, that
429 // makes the object field itself be a reference too (until the other
430 // reference goes out of scope). When cloning, any field that's a
431 // reference is copied as a reference in the new object. Both of these
432 // are defined PHP5 behaviors, as inconvenient as it is for us when old
433 // hooks from PHP4 days are passing fields by reference.
434 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
435 // Make a non-reference copy of the field, then rebind the field to
436 // reference the new copy.
437 $tmp = $this->$k;
438 $this->$k =& $tmp;
439 unset( $tmp );
440 }
441
442 Hooks::run( 'ParserCloned', [ $this ] );
443 }
444
445 /**
446 * Which class should we use for the preprocessor if not otherwise specified?
447 *
448 * @since 1.34
449 * @deprecated since 1.34, removing configurability of preprocessor
450 * @return string
451 */
452 public static function getDefaultPreprocessorClass() {
453 return Preprocessor_Hash::class;
454 }
455
456 /**
457 * Do various kinds of initialisation on the first call of the parser
458 */
459 public function firstCallInit() {
460 if ( !$this->mFirstCall ) {
461 return;
462 }
463 $this->mFirstCall = false;
464
465 CoreParserFunctions::register( $this );
466 CoreTagHooks::register( $this );
467 $this->initialiseVariables();
468
469 // Avoid PHP 7.1 warning from passing $this by reference
470 $parser = $this;
471 Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
472 }
473
474 /**
475 * Clear Parser state
476 *
477 * @private
478 */
479 public function clearState() {
480 $this->firstCallInit();
481 $this->resetOutput();
482 $this->mAutonumber = 0;
483 $this->mIncludeCount = [];
484 $this->mLinkHolders = new LinkHolderArray( $this );
485 $this->mLinkID = 0;
486 $this->mRevisionObject = $this->mRevisionTimestamp =
487 $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
488 $this->mVarCache = [];
489 $this->mUser = null;
490 $this->mLangLinkLanguages = [];
491 $this->currentRevisionCache = null;
492
493 $this->mStripState = new StripState( $this );
494
495 # Clear these on every parse, T6549
496 $this->mTplRedirCache = $this->mTplDomCache = [];
497
498 $this->mShowToc = true;
499 $this->mForceTocPosition = false;
500 $this->mIncludeSizes = [
501 'post-expand' => 0,
502 'arg' => 0,
503 ];
504 $this->mPPNodeCount = 0;
505 $this->mGeneratedPPNodeCount = 0;
506 $this->mHighestExpansionDepth = 0;
507 $this->mDefaultSort = false;
508 $this->mHeadings = [];
509 $this->mDoubleUnderscores = [];
510 $this->mExpensiveFunctionCount = 0;
511
512 # Fix cloning
513 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
514 $this->mPreprocessor = null;
515 }
516
517 $this->mProfiler = new SectionProfiler();
518
519 // Avoid PHP 7.1 warning from passing $this by reference
520 $parser = $this;
521 Hooks::run( 'ParserClearState', [ &$parser ] );
522 }
523
524 /**
525 * Reset the ParserOutput
526 */
527 public function resetOutput() {
528 $this->mOutput = new ParserOutput;
529 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
530 }
531
532 /**
533 * Convert wikitext to HTML
534 * Do not call this function recursively.
535 *
536 * @param string $text Text we want to parse
537 * @param-taint $text escapes_htmlnoent
538 * @param Title $title
539 * @param ParserOptions $options
540 * @param bool $linestart
541 * @param bool $clearState
542 * @param int|null $revid ID of the revision being rendered. This is used to render
543 * REVISION* magic words. 0 means that any current revision will be used. Null means
544 * that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will
545 * use the current timestamp.
546 * @return ParserOutput A ParserOutput
547 * @return-taint escaped
548 */
549 public function parse(
550 $text, Title $title, ParserOptions $options,
551 $linestart = true, $clearState = true, $revid = null
552 ) {
553 if ( $clearState ) {
554 // We use U+007F DELETE to construct strip markers, so we have to make
555 // sure that this character does not occur in the input text.
556 $text = strtr( $text, "\x7f", "?" );
557 $magicScopeVariable = $this->lock();
558 }
559 // Strip U+0000 NULL (T159174)
560 $text = str_replace( "\000", '', $text );
561
562 $this->startParse( $title, $options, self::OT_HTML, $clearState );
563
564 $this->currentRevisionCache = null;
565 $this->mInputSize = strlen( $text );
566 if ( $this->mOptions->getEnableLimitReport() ) {
567 $this->mOutput->resetParseStartTime();
568 }
569
570 $oldRevisionId = $this->mRevisionId;
571 $oldRevisionObject = $this->mRevisionObject;
572 $oldRevisionTimestamp = $this->mRevisionTimestamp;
573 $oldRevisionUser = $this->mRevisionUser;
574 $oldRevisionSize = $this->mRevisionSize;
575 if ( $revid !== null ) {
576 $this->mRevisionId = $revid;
577 $this->mRevisionObject = null;
578 $this->mRevisionTimestamp = null;
579 $this->mRevisionUser = null;
580 $this->mRevisionSize = null;
581 }
582
583 // Avoid PHP 7.1 warning from passing $this by reference
584 $parser = $this;
585 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
586 # No more strip!
587 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
588 $text = $this->internalParse( $text );
589 Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
590
591 $text = $this->internalParseHalfParsed( $text, true, $linestart );
592
593 /**
594 * A converted title will be provided in the output object if title and
595 * content conversion are enabled, the article text does not contain
596 * a conversion-suppressing double-underscore tag, and no
597 * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
598 * automatic link conversion.
599 */
600 if ( !( $options->getDisableTitleConversion()
601 || isset( $this->mDoubleUnderscores['nocontentconvert'] )
602 || isset( $this->mDoubleUnderscores['notitleconvert'] )
603 || $this->mOutput->getDisplayTitle() !== false )
604 ) {
605 $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
606 if ( $convruletitle ) {
607 $this->mOutput->setTitleText( $convruletitle );
608 } else {
609 $titleText = $this->getTargetLanguage()->convertTitle( $title );
610 $this->mOutput->setTitleText( $titleText );
611 }
612 }
613
614 # Compute runtime adaptive expiry if set
615 $this->mOutput->finalizeAdaptiveCacheExpiry();
616
617 # Warn if too many heavyweight parser functions were used
618 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
619 $this->limitationWarn( 'expensive-parserfunction',
620 $this->mExpensiveFunctionCount,
621 $this->mOptions->getExpensiveParserFunctionLimit()
622 );
623 }
624
625 # Information on limits, for the benefit of users who try to skirt them
626 if ( $this->mOptions->getEnableLimitReport() ) {
627 $text .= $this->makeLimitReport();
628 }
629
630 # Wrap non-interface parser output in a <div> so it can be targeted
631 # with CSS (T37247)
632 $class = $this->mOptions->getWrapOutputClass();
633 if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
634 $this->mOutput->addWrapperDivClass( $class );
635 }
636
637 $this->mOutput->setText( $text );
638
639 $this->mRevisionId = $oldRevisionId;
640 $this->mRevisionObject = $oldRevisionObject;
641 $this->mRevisionTimestamp = $oldRevisionTimestamp;
642 $this->mRevisionUser = $oldRevisionUser;
643 $this->mRevisionSize = $oldRevisionSize;
644 $this->mInputSize = false;
645 $this->currentRevisionCache = null;
646
647 return $this->mOutput;
648 }
649
650 /**
651 * Set the limit report data in the current ParserOutput, and return the
652 * limit report HTML comment.
653 *
654 * @return string
655 */
656 protected function makeLimitReport() {
657 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
658
659 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
660 if ( $cpuTime !== null ) {
661 $this->mOutput->setLimitReportData( 'limitreport-cputime',
662 sprintf( "%.3f", $cpuTime )
663 );
664 }
665
666 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
667 $this->mOutput->setLimitReportData( 'limitreport-walltime',
668 sprintf( "%.3f", $wallTime )
669 );
670
671 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
672 [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
673 );
674 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
675 [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
676 );
677 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
678 [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
679 );
680 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
681 [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
682 );
683 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
684 [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
685 );
686 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
687 [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
688 );
689
690 foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
691 $this->mOutput->setLimitReportData( $key, $value );
692 }
693
694 Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
695
696 $limitReport = "NewPP limit report\n";
697 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
698 $limitReport .= 'Parsed by ' . wfHostname() . "\n";
699 }
700 $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
701 $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
702 $limitReport .= 'Dynamic content: ' .
703 ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
704 "\n";
705 $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
706
707 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
708 if ( Hooks::run( 'ParserLimitReportFormat',
709 [ $key, &$value, &$limitReport, false, false ]
710 ) ) {
711 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
712 $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
713 ->inLanguage( 'en' )->useDatabase( false );
714 if ( !$valueMsg->exists() ) {
715 $valueMsg = new RawMessage( '$1' );
716 }
717 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
718 $valueMsg->params( $value );
719 $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
720 }
721 }
722 }
723 // Since we're not really outputting HTML, decode the entities and
724 // then re-encode the things that need hiding inside HTML comments.
725 $limitReport = htmlspecialchars_decode( $limitReport );
726
727 // Sanitize for comment. Note '‐' in the replacement is U+2010,
728 // which looks much like the problematic '-'.
729 $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
730 $text = "\n<!-- \n$limitReport-->\n";
731
732 // Add on template profiling data in human/machine readable way
733 $dataByFunc = $this->mProfiler->getFunctionStats();
734 uasort( $dataByFunc, function ( $a, $b ) {
735 return $b['real'] <=> $a['real']; // descending order
736 } );
737 $profileReport = [];
738 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
739 $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
740 $item['%real'], $item['real'], $item['calls'],
741 htmlspecialchars( $item['name'] ) );
742 }
743 $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
744 $text .= implode( "\n", $profileReport ) . "\n-->\n";
745
746 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
747
748 // Add other cache related metadata
749 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
750 $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
751 }
752 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
753 $this->mOutput->getCacheTime() );
754 $this->mOutput->setLimitReportData( 'cachereport-ttl',
755 $this->mOutput->getCacheExpiry() );
756 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
757 $this->mOutput->hasDynamicContent() );
758
759 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
760 wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
761 $this->mTitle->getPrefixedDBkey() );
762 }
763 return $text;
764 }
765
766 /**
767 * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
768 * can be called from an extension tag hook.
769 *
770 * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
771 * instead, which means that lists and links have not been fully parsed yet,
772 * and strip markers are still present.
773 *
774 * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
775 *
776 * Use this function if you're a parser tag hook and you want to parse
777 * wikitext before or after applying additional transformations, and you
778 * intend to *return the result as hook output*, which will cause it to go
779 * through the rest of parsing process automatically.
780 *
781 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
782 * $text are not expanded
783 *
784 * @param string $text Text extension wants to have parsed
785 * @param-taint $text escapes_htmlnoent
786 * @param bool|PPFrame $frame The frame to use for expanding any template variables
787 * @return string UNSAFE half-parsed HTML
788 * @return-taint escaped
789 */
790 public function recursiveTagParse( $text, $frame = false ) {
791 // Avoid PHP 7.1 warning from passing $this by reference
792 $parser = $this;
793 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
794 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
795 $text = $this->internalParse( $text, false, $frame );
796 return $text;
797 }
798
799 /**
800 * Fully parse wikitext to fully parsed HTML. This recursive parser entry
801 * point can be called from an extension tag hook.
802 *
803 * The output of this function is fully-parsed HTML that is safe for output.
804 * If you're a parser tag hook, you might want to use recursiveTagParse()
805 * instead.
806 *
807 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
808 * $text are not expanded
809 *
810 * @since 1.25
811 *
812 * @param string $text Text extension wants to have parsed
813 * @param-taint $text escapes_htmlnoent
814 * @param bool|PPFrame $frame The frame to use for expanding any template variables
815 * @return string Fully parsed HTML
816 * @return-taint escaped
817 */
818 public function recursiveTagParseFully( $text, $frame = false ) {
819 $text = $this->recursiveTagParse( $text, $frame );
820 $text = $this->internalParseHalfParsed( $text, false );
821 return $text;
822 }
823
824 /**
825 * Expand templates and variables in the text, producing valid, static wikitext.
826 * Also removes comments.
827 * Do not call this function recursively.
828 * @param string $text
829 * @param Title|null $title
830 * @param ParserOptions $options
831 * @param int|null $revid
832 * @param bool|PPFrame $frame
833 * @return mixed|string
834 */
835 public function preprocess( $text, Title $title = null,
836 ParserOptions $options, $revid = null, $frame = false
837 ) {
838 $magicScopeVariable = $this->lock();
839 $this->startParse( $title, $options, self::OT_PREPROCESS, true );
840 if ( $revid !== null ) {
841 $this->mRevisionId = $revid;
842 }
843 // Avoid PHP 7.1 warning from passing $this by reference
844 $parser = $this;
845 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
846 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
847 $text = $this->replaceVariables( $text, $frame );
848 $text = $this->mStripState->unstripBoth( $text );
849 return $text;
850 }
851
852 /**
853 * Recursive parser entry point that can be called from an extension tag
854 * hook.
855 *
856 * @param string $text Text to be expanded
857 * @param bool|PPFrame $frame The frame to use for expanding any template variables
858 * @return string
859 * @since 1.19
860 */
861 public function recursivePreprocess( $text, $frame = false ) {
862 $text = $this->replaceVariables( $text, $frame );
863 $text = $this->mStripState->unstripBoth( $text );
864 return $text;
865 }
866
867 /**
868 * Process the wikitext for the "?preload=" feature. (T7210)
869 *
870 * "<noinclude>", "<includeonly>" etc. are parsed as for template
871 * transclusion, comments, templates, arguments, tags hooks and parser
872 * functions are untouched.
873 *
874 * @param string $text
875 * @param Title $title
876 * @param ParserOptions $options
877 * @param array $params
878 * @return string
879 */
880 public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
881 $msg = new RawMessage( $text );
882 $text = $msg->params( $params )->plain();
883
884 # Parser (re)initialisation
885 $magicScopeVariable = $this->lock();
886 $this->startParse( $title, $options, self::OT_PLAIN, true );
887
888 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
889 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
890 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
891 $text = $this->mStripState->unstripBoth( $text );
892 return $text;
893 }
894
895 /**
896 * Set the current user.
897 * Should only be used when doing pre-save transform.
898 *
899 * @param User|null $user User object or null (to reset)
900 */
901 public function setUser( $user ) {
902 $this->mUser = $user;
903 }
904
905 /**
906 * Set the context title
907 *
908 * @param Title $t
909 */
910 public function setTitle( $t ) {
911 if ( !$t ) {
912 $t = Title::newFromText( 'NO TITLE' );
913 }
914
915 if ( $t->hasFragment() ) {
916 # Strip the fragment to avoid various odd effects
917 $this->mTitle = $t->createFragmentTarget( '' );
918 } else {
919 $this->mTitle = $t;
920 }
921 }
922
923 /**
924 * Accessor for the Title object
925 *
926 * @return Title|null
927 */
928 public function getTitle() {
929 return $this->mTitle;
930 }
931
932 /**
933 * Accessor/mutator for the Title object
934 *
935 * @param Title|null $x Title object or null to just get the current one
936 * @return Title
937 */
938 public function Title( $x = null ) {
939 return wfSetVar( $this->mTitle, $x );
940 }
941
942 /**
943 * Set the output type
944 *
945 * @param int $ot New value
946 */
947 public function setOutputType( $ot ) {
948 $this->mOutputType = $ot;
949 # Shortcut alias
950 $this->ot = [
951 'html' => $ot == self::OT_HTML,
952 'wiki' => $ot == self::OT_WIKI,
953 'pre' => $ot == self::OT_PREPROCESS,
954 'plain' => $ot == self::OT_PLAIN,
955 ];
956 }
957
958 /**
959 * Accessor/mutator for the output type
960 *
961 * @param int|null $x New value or null to just get the current one
962 * @return int
963 */
964 public function OutputType( $x = null ) {
965 return wfSetVar( $this->mOutputType, $x );
966 }
967
968 /**
969 * Get the ParserOutput object
970 *
971 * @return ParserOutput
972 */
973 public function getOutput() {
974 return $this->mOutput;
975 }
976
977 /**
978 * Get the ParserOptions object
979 *
980 * @return ParserOptions
981 */
982 public function getOptions() {
983 return $this->mOptions;
984 }
985
986 /**
987 * Accessor/mutator for the ParserOptions object
988 *
989 * @param ParserOptions|null $x New value or null to just get the current one
990 * @return ParserOptions Current ParserOptions object
991 */
992 public function Options( $x = null ) {
993 return wfSetVar( $this->mOptions, $x );
994 }
995
996 /**
997 * @return int
998 */
999 public function nextLinkID() {
1000 return $this->mLinkID++;
1001 }
1002
1003 /**
1004 * @param int $id
1005 */
1006 public function setLinkID( $id ) {
1007 $this->mLinkID = $id;
1008 }
1009
1010 /**
1011 * Get a language object for use in parser functions such as {{FORMATNUM:}}
1012 * @return Language
1013 */
1014 public function getFunctionLang() {
1015 return $this->getTargetLanguage();
1016 }
1017
1018 /**
1019 * Get the target language for the content being parsed. This is usually the
1020 * language that the content is in.
1021 *
1022 * @since 1.19
1023 *
1024 * @throws MWException
1025 * @return Language
1026 */
1027 public function getTargetLanguage() {
1028 $target = $this->mOptions->getTargetLanguage();
1029
1030 if ( $target !== null ) {
1031 return $target;
1032 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1033 return $this->mOptions->getUserLangObj();
1034 } elseif ( is_null( $this->mTitle ) ) {
1035 throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1036 }
1037
1038 return $this->mTitle->getPageLanguage();
1039 }
1040
1041 /**
1042 * Get the language object for language conversion
1043 * @deprecated since 1.32, just use getTargetLanguage()
1044 * @return Language|null
1045 */
1046 public function getConverterLanguage() {
1047 return $this->getTargetLanguage();
1048 }
1049
1050 /**
1051 * Get a User object either from $this->mUser, if set, or from the
1052 * ParserOptions object otherwise
1053 *
1054 * @return User
1055 */
1056 public function getUser() {
1057 if ( !is_null( $this->mUser ) ) {
1058 return $this->mUser;
1059 }
1060 return $this->mOptions->getUser();
1061 }
1062
1063 /**
1064 * Get a preprocessor object
1065 *
1066 * @return Preprocessor
1067 */
1068 public function getPreprocessor() {
1069 if ( !isset( $this->mPreprocessor ) ) {
1070 $class = $this->svcOptions->get( 'preprocessorClass' );
1071 $this->mPreprocessor = new $class( $this );
1072 }
1073 return $this->mPreprocessor;
1074 }
1075
1076 /**
1077 * Get a LinkRenderer instance to make links with
1078 *
1079 * @since 1.28
1080 * @return LinkRenderer
1081 */
1082 public function getLinkRenderer() {
1083 // XXX We make the LinkRenderer with current options and then cache it forever
1084 if ( !$this->mLinkRenderer ) {
1085 $this->mLinkRenderer = $this->linkRendererFactory->create();
1086 $this->mLinkRenderer->setStubThreshold(
1087 $this->getOptions()->getStubThreshold()
1088 );
1089 }
1090
1091 return $this->mLinkRenderer;
1092 }
1093
1094 /**
1095 * Get the MagicWordFactory that this Parser is using
1096 *
1097 * @since 1.32
1098 * @return MagicWordFactory
1099 */
1100 public function getMagicWordFactory() {
1101 return $this->magicWordFactory;
1102 }
1103
1104 /**
1105 * Get the content language that this Parser is using
1106 *
1107 * @since 1.32
1108 * @return Language
1109 */
1110 public function getContentLanguage() {
1111 return $this->contLang;
1112 }
1113
1114 /**
1115 * Replaces all occurrences of HTML-style comments and the given tags
1116 * in the text with a random marker and returns the next text. The output
1117 * parameter $matches will be an associative array filled with data in
1118 * the form:
1119 *
1120 * @code
1121 * 'UNIQ-xxxxx' => [
1122 * 'element',
1123 * 'tag content',
1124 * [ 'param' => 'x' ],
1125 * '<element param="x">tag content</element>' ]
1126 * @endcode
1127 *
1128 * @param array $elements List of element names. Comments are always extracted.
1129 * @param string $text Source text string.
1130 * @param array &$matches Out parameter, Array: extracted tags
1131 * @return string Stripped text
1132 */
1133 public static function extractTagsAndParams( $elements, $text, &$matches ) {
1134 static $n = 1;
1135 $stripped = '';
1136 $matches = [];
1137
1138 $taglist = implode( '|', $elements );
1139 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1140
1141 while ( $text != '' ) {
1142 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1143 $stripped .= $p[0];
1144 if ( count( $p ) < 5 ) {
1145 break;
1146 }
1147 if ( count( $p ) > 5 ) {
1148 # comment
1149 $element = $p[4];
1150 $attributes = '';
1151 $close = '';
1152 $inside = $p[5];
1153 } else {
1154 # tag
1155 list( , $element, $attributes, $close, $inside ) = $p;
1156 }
1157
1158 $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1159 $stripped .= $marker;
1160
1161 if ( $close === '/>' ) {
1162 # Empty element tag, <tag />
1163 $content = null;
1164 $text = $inside;
1165 $tail = null;
1166 } else {
1167 if ( $element === '!--' ) {
1168 $end = '/(-->)/';
1169 } else {
1170 $end = "/(<\\/$element\\s*>)/i";
1171 }
1172 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1173 $content = $q[0];
1174 if ( count( $q ) < 3 ) {
1175 # No end tag -- let it run out to the end of the text.
1176 $tail = '';
1177 $text = '';
1178 } else {
1179 list( , $tail, $text ) = $q;
1180 }
1181 }
1182
1183 $matches[$marker] = [ $element,
1184 $content,
1185 Sanitizer::decodeTagAttributes( $attributes ),
1186 "<$element$attributes$close$content$tail" ];
1187 }
1188 return $stripped;
1189 }
1190
1191 /**
1192 * Get a list of strippable XML-like elements
1193 *
1194 * @return array
1195 */
1196 public function getStripList() {
1197 return $this->mStripList;
1198 }
1199
1200 /**
1201 * Get the StripState
1202 *
1203 * @return StripState
1204 */
1205 public function getStripState() {
1206 return $this->mStripState;
1207 }
1208
1209 /**
1210 * Add an item to the strip state
1211 * Returns the unique tag which must be inserted into the stripped text
1212 * The tag will be replaced with the original text in unstrip()
1213 *
1214 * @param string $text
1215 *
1216 * @return string
1217 */
1218 public function insertStripItem( $text ) {
1219 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1220 $this->mMarkerIndex++;
1221 $this->mStripState->addGeneral( $marker, $text );
1222 return $marker;
1223 }
1224
1225 /**
1226 * parse the wiki syntax used to render tables
1227 *
1228 * @private
1229 * @param string $text
1230 * @return string
1231 */
1232 public function doTableStuff( $text ) {
1233 $lines = StringUtils::explode( "\n", $text );
1234 $out = '';
1235 $td_history = []; # Is currently a td tag open?
1236 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1237 $tr_history = []; # Is currently a tr tag open?
1238 $tr_attributes = []; # history of tr attributes
1239 $has_opened_tr = []; # Did this table open a <tr> element?
1240 $indent_level = 0; # indent level of the table
1241
1242 foreach ( $lines as $outLine ) {
1243 $line = trim( $outLine );
1244
1245 if ( $line === '' ) { # empty line, go to next line
1246 $out .= $outLine . "\n";
1247 continue;
1248 }
1249
1250 $first_character = $line[0];
1251 $first_two = substr( $line, 0, 2 );
1252 $matches = [];
1253
1254 if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1255 # First check if we are starting a new table
1256 $indent_level = strlen( $matches[1] );
1257
1258 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1259 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1260
1261 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1262 array_push( $td_history, false );
1263 array_push( $last_tag_history, '' );
1264 array_push( $tr_history, false );
1265 array_push( $tr_attributes, '' );
1266 array_push( $has_opened_tr, false );
1267 } elseif ( count( $td_history ) == 0 ) {
1268 # Don't do any of the following
1269 $out .= $outLine . "\n";
1270 continue;
1271 } elseif ( $first_two === '|}' ) {
1272 # We are ending a table
1273 $line = '</table>' . substr( $line, 2 );
1274 $last_tag = array_pop( $last_tag_history );
1275
1276 if ( !array_pop( $has_opened_tr ) ) {
1277 $line = "<tr><td></td></tr>{$line}";
1278 }
1279
1280 if ( array_pop( $tr_history ) ) {
1281 $line = "</tr>{$line}";
1282 }
1283
1284 if ( array_pop( $td_history ) ) {
1285 $line = "</{$last_tag}>{$line}";
1286 }
1287 array_pop( $tr_attributes );
1288 if ( $indent_level > 0 ) {
1289 $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1290 } else {
1291 $outLine = $line;
1292 }
1293 } elseif ( $first_two === '|-' ) {
1294 # Now we have a table row
1295 $line = preg_replace( '#^\|-+#', '', $line );
1296
1297 # Whats after the tag is now only attributes
1298 $attributes = $this->mStripState->unstripBoth( $line );
1299 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1300 array_pop( $tr_attributes );
1301 array_push( $tr_attributes, $attributes );
1302
1303 $line = '';
1304 $last_tag = array_pop( $last_tag_history );
1305 array_pop( $has_opened_tr );
1306 array_push( $has_opened_tr, true );
1307
1308 if ( array_pop( $tr_history ) ) {
1309 $line = '</tr>';
1310 }
1311
1312 if ( array_pop( $td_history ) ) {
1313 $line = "</{$last_tag}>{$line}";
1314 }
1315
1316 $outLine = $line;
1317 array_push( $tr_history, false );
1318 array_push( $td_history, false );
1319 array_push( $last_tag_history, '' );
1320 } elseif ( $first_character === '|'
1321 || $first_character === '!'
1322 || $first_two === '|+'
1323 ) {
1324 # This might be cell elements, td, th or captions
1325 if ( $first_two === '|+' ) {
1326 $first_character = '+';
1327 $line = substr( $line, 2 );
1328 } else {
1329 $line = substr( $line, 1 );
1330 }
1331
1332 // Implies both are valid for table headings.
1333 if ( $first_character === '!' ) {
1334 $line = StringUtils::replaceMarkup( '!!', '||', $line );
1335 }
1336
1337 # Split up multiple cells on the same line.
1338 # FIXME : This can result in improper nesting of tags processed
1339 # by earlier parser steps.
1340 $cells = explode( '||', $line );
1341
1342 $outLine = '';
1343
1344 # Loop through each table cell
1345 foreach ( $cells as $cell ) {
1346 $previous = '';
1347 if ( $first_character !== '+' ) {
1348 $tr_after = array_pop( $tr_attributes );
1349 if ( !array_pop( $tr_history ) ) {
1350 $previous = "<tr{$tr_after}>\n";
1351 }
1352 array_push( $tr_history, true );
1353 array_push( $tr_attributes, '' );
1354 array_pop( $has_opened_tr );
1355 array_push( $has_opened_tr, true );
1356 }
1357
1358 $last_tag = array_pop( $last_tag_history );
1359
1360 if ( array_pop( $td_history ) ) {
1361 $previous = "</{$last_tag}>\n{$previous}";
1362 }
1363
1364 if ( $first_character === '|' ) {
1365 $last_tag = 'td';
1366 } elseif ( $first_character === '!' ) {
1367 $last_tag = 'th';
1368 } elseif ( $first_character === '+' ) {
1369 $last_tag = 'caption';
1370 } else {
1371 $last_tag = '';
1372 }
1373
1374 array_push( $last_tag_history, $last_tag );
1375
1376 # A cell could contain both parameters and data
1377 $cell_data = explode( '|', $cell, 2 );
1378
1379 # T2553: Note that a '|' inside an invalid link should not
1380 # be mistaken as delimiting cell parameters
1381 # Bug T153140: Neither should language converter markup.
1382 if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1383 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1384 } elseif ( count( $cell_data ) == 1 ) {
1385 // Whitespace in cells is trimmed
1386 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1387 } else {
1388 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1389 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1390 // Whitespace in cells is trimmed
1391 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1392 }
1393
1394 $outLine .= $cell;
1395 array_push( $td_history, true );
1396 }
1397 }
1398 $out .= $outLine . "\n";
1399 }
1400
1401 # Closing open td, tr && table
1402 while ( count( $td_history ) > 0 ) {
1403 if ( array_pop( $td_history ) ) {
1404 $out .= "</td>\n";
1405 }
1406 if ( array_pop( $tr_history ) ) {
1407 $out .= "</tr>\n";
1408 }
1409 if ( !array_pop( $has_opened_tr ) ) {
1410 $out .= "<tr><td></td></tr>\n";
1411 }
1412
1413 $out .= "</table>\n";
1414 }
1415
1416 # Remove trailing line-ending (b/c)
1417 if ( substr( $out, -1 ) === "\n" ) {
1418 $out = substr( $out, 0, -1 );
1419 }
1420
1421 # special case: don't return empty table
1422 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1423 $out = '';
1424 }
1425
1426 return $out;
1427 }
1428
1429 /**
1430 * Helper function for parse() that transforms wiki markup into half-parsed
1431 * HTML. Only called for $mOutputType == self::OT_HTML.
1432 *
1433 * @private
1434 *
1435 * @param string $text The text to parse
1436 * @param-taint $text escapes_html
1437 * @param bool $isMain Whether this is being called from the main parse() function
1438 * @param PPFrame|bool $frame A pre-processor frame
1439 *
1440 * @return string
1441 */
1442 public function internalParse( $text, $isMain = true, $frame = false ) {
1443 $origText = $text;
1444
1445 // Avoid PHP 7.1 warning from passing $this by reference
1446 $parser = $this;
1447
1448 # Hook to suspend the parser in this state
1449 if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1450 return $text;
1451 }
1452
1453 # if $frame is provided, then use $frame for replacing any variables
1454 if ( $frame ) {
1455 # use frame depth to infer how include/noinclude tags should be handled
1456 # depth=0 means this is the top-level document; otherwise it's an included document
1457 if ( !$frame->depth ) {
1458 $flag = 0;
1459 } else {
1460 $flag = self::PTD_FOR_INCLUSION;
1461 }
1462 $dom = $this->preprocessToDom( $text, $flag );
1463 $text = $frame->expand( $dom );
1464 } else {
1465 # if $frame is not provided, then use old-style replaceVariables
1466 $text = $this->replaceVariables( $text );
1467 }
1468
1469 Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1470 $text = Sanitizer::removeHTMLtags(
1471 $text,
1472 [ $this, 'attributeStripCallback' ],
1473 false,
1474 array_keys( $this->mTransparentTagHooks ),
1475 [],
1476 [ $this, 'addTrackingCategory' ]
1477 );
1478 Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1479
1480 # Tables need to come after variable replacement for things to work
1481 # properly; putting them before other transformations should keep
1482 # exciting things like link expansions from showing up in surprising
1483 # places.
1484 $text = $this->doTableStuff( $text );
1485
1486 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1487
1488 $text = $this->doDoubleUnderscore( $text );
1489
1490 $text = $this->doHeadings( $text );
1491 $text = $this->replaceInternalLinks( $text );
1492 $text = $this->doAllQuotes( $text );
1493 $text = $this->replaceExternalLinks( $text );
1494
1495 # replaceInternalLinks may sometimes leave behind
1496 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1497 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1498
1499 $text = $this->doMagicLinks( $text );
1500 $text = $this->formatHeadings( $text, $origText, $isMain );
1501
1502 return $text;
1503 }
1504
1505 /**
1506 * Helper function for parse() that transforms half-parsed HTML into fully
1507 * parsed HTML.
1508 *
1509 * @param string $text
1510 * @param bool $isMain
1511 * @param bool $linestart
1512 * @return string
1513 */
1514 private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1515 $text = $this->mStripState->unstripGeneral( $text );
1516
1517 // Avoid PHP 7.1 warning from passing $this by reference
1518 $parser = $this;
1519
1520 if ( $isMain ) {
1521 Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1522 }
1523
1524 # Clean up special characters, only run once, next-to-last before doBlockLevels
1525 $text = Sanitizer::armorFrenchSpaces( $text );
1526
1527 $text = $this->doBlockLevels( $text, $linestart );
1528
1529 $this->replaceLinkHolders( $text );
1530
1531 /**
1532 * The input doesn't get language converted if
1533 * a) It's disabled
1534 * b) Content isn't converted
1535 * c) It's a conversion table
1536 * d) it is an interface message (which is in the user language)
1537 */
1538 if ( !( $this->mOptions->getDisableContentConversion()
1539 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1540 && !$this->mOptions->getInterfaceMessage()
1541 ) {
1542 # The position of the convert() call should not be changed. it
1543 # assumes that the links are all replaced and the only thing left
1544 # is the <nowiki> mark.
1545 $text = $this->getTargetLanguage()->convert( $text );
1546 }
1547
1548 $text = $this->mStripState->unstripNoWiki( $text );
1549
1550 if ( $isMain ) {
1551 Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1552 }
1553
1554 $text = $this->replaceTransparentTags( $text );
1555 $text = $this->mStripState->unstripGeneral( $text );
1556
1557 $text = Sanitizer::normalizeCharReferences( $text );
1558
1559 if ( MWTidy::isEnabled() ) {
1560 if ( $this->mOptions->getTidy() ) {
1561 $text = MWTidy::tidy( $text );
1562 }
1563 } else {
1564 # attempt to sanitize at least some nesting problems
1565 # (T4702 and quite a few others)
1566 # This code path is buggy and deprecated!
1567 wfDeprecated( 'disabling tidy', '1.33' );
1568 $tidyregs = [
1569 # ''Something [http://www.cool.com cool''] -->
1570 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1571 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1572 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1573 # fix up an anchor inside another anchor, only
1574 # at least for a single single nested link (T5695)
1575 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1576 '\\1\\2</a>\\3</a>\\1\\4</a>',
1577 # fix div inside inline elements- doBlockLevels won't wrap a line which
1578 # contains a div, so fix it up here; replace
1579 # div with escaped text
1580 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1581 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1582 # remove empty italic or bold tag pairs, some
1583 # introduced by rules above
1584 '/<([bi])><\/\\1>/' => '',
1585 ];
1586
1587 $text = preg_replace(
1588 array_keys( $tidyregs ),
1589 array_values( $tidyregs ),
1590 $text );
1591 }
1592
1593 if ( $isMain ) {
1594 Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1595 }
1596
1597 return $text;
1598 }
1599
1600 /**
1601 * Replace special strings like "ISBN xxx" and "RFC xxx" with
1602 * magic external links.
1603 *
1604 * DML
1605 * @private
1606 *
1607 * @param string $text
1608 *
1609 * @return string
1610 */
1611 public function doMagicLinks( $text ) {
1612 $prots = wfUrlProtocolsWithoutProtRel();
1613 $urlChar = self::EXT_LINK_URL_CLASS;
1614 $addr = self::EXT_LINK_ADDR;
1615 $space = self::SPACE_NOT_NL; # non-newline space
1616 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1617 $spaces = "$space++"; # possessive match of 1 or more spaces
1618 $text = preg_replace_callback(
1619 '!(?: # Start cases
1620 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1621 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1622 (\b # m[3]: Free external links
1623 (?i:$prots)
1624 ($addr$urlChar*) # m[4]: Post-protocol path
1625 ) |
1626 \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1627 ([0-9]+)\b |
1628 \bISBN $spaces ( # m[6]: ISBN, capture number
1629 (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1630 (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1631 [0-9Xx] # check digit
1632 )\b
1633 )!xu", [ $this, 'magicLinkCallback' ], $text );
1634 return $text;
1635 }
1636
1637 /**
1638 * @throws MWException
1639 * @param array $m
1640 * @return string HTML
1641 */
1642 public function magicLinkCallback( $m ) {
1643 if ( isset( $m[1] ) && $m[1] !== '' ) {
1644 # Skip anchor
1645 return $m[0];
1646 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1647 # Skip HTML element
1648 return $m[0];
1649 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1650 # Free external link
1651 return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1652 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1653 # RFC or PMID
1654 if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1655 if ( !$this->mOptions->getMagicRFCLinks() ) {
1656 return $m[0];
1657 }
1658 $keyword = 'RFC';
1659 $urlmsg = 'rfcurl';
1660 $cssClass = 'mw-magiclink-rfc';
1661 $trackingCat = 'magiclink-tracking-rfc';
1662 $id = $m[5];
1663 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1664 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1665 return $m[0];
1666 }
1667 $keyword = 'PMID';
1668 $urlmsg = 'pubmedurl';
1669 $cssClass = 'mw-magiclink-pmid';
1670 $trackingCat = 'magiclink-tracking-pmid';
1671 $id = $m[5];
1672 } else {
1673 throw new MWException( __METHOD__ . ': unrecognised match type "' .
1674 substr( $m[0], 0, 20 ) . '"' );
1675 }
1676 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1677 $this->addTrackingCategory( $trackingCat );
1678 return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1679 } elseif ( isset( $m[6] ) && $m[6] !== ''
1680 && $this->mOptions->getMagicISBNLinks()
1681 ) {
1682 # ISBN
1683 $isbn = $m[6];
1684 $space = self::SPACE_NOT_NL; # non-newline space
1685 $isbn = preg_replace( "/$space/", ' ', $isbn );
1686 $num = strtr( $isbn, [
1687 '-' => '',
1688 ' ' => '',
1689 'x' => 'X',
1690 ] );
1691 $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1692 return $this->getLinkRenderer()->makeKnownLink(
1693 SpecialPage::getTitleFor( 'Booksources', $num ),
1694 "ISBN $isbn",
1695 [
1696 'class' => 'internal mw-magiclink-isbn',
1697 'title' => false // suppress title attribute
1698 ]
1699 );
1700 } else {
1701 return $m[0];
1702 }
1703 }
1704
1705 /**
1706 * Make a free external link, given a user-supplied URL
1707 *
1708 * @param string $url
1709 * @param int $numPostProto
1710 * The number of characters after the protocol.
1711 * @return string HTML
1712 * @private
1713 */
1714 public function makeFreeExternalLink( $url, $numPostProto ) {
1715 $trail = '';
1716
1717 # The characters '<' and '>' (which were escaped by
1718 # removeHTMLtags()) should not be included in
1719 # URLs, per RFC 2396.
1720 # Make &nbsp; terminate a URL as well (bug T84937)
1721 $m2 = [];
1722 if ( preg_match(
1723 '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1724 $url,
1725 $m2,
1726 PREG_OFFSET_CAPTURE
1727 ) ) {
1728 $trail = substr( $url, $m2[0][1] ) . $trail;
1729 $url = substr( $url, 0, $m2[0][1] );
1730 }
1731
1732 # Move trailing punctuation to $trail
1733 $sep = ',;\.:!?';
1734 # If there is no left bracket, then consider right brackets fair game too
1735 if ( strpos( $url, '(' ) === false ) {
1736 $sep .= ')';
1737 }
1738
1739 $urlRev = strrev( $url );
1740 $numSepChars = strspn( $urlRev, $sep );
1741 # Don't break a trailing HTML entity by moving the ; into $trail
1742 # This is in hot code, so use substr_compare to avoid having to
1743 # create a new string object for the comparison
1744 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1745 # more optimization: instead of running preg_match with a $
1746 # anchor, which can be slow, do the match on the reversed
1747 # string starting at the desired offset.
1748 # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1749 if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1750 $numSepChars--;
1751 }
1752 }
1753 if ( $numSepChars ) {
1754 $trail = substr( $url, -$numSepChars ) . $trail;
1755 $url = substr( $url, 0, -$numSepChars );
1756 }
1757
1758 # Verify that we still have a real URL after trail removal, and
1759 # not just lone protocol
1760 if ( strlen( $trail ) >= $numPostProto ) {
1761 return $url . $trail;
1762 }
1763
1764 $url = Sanitizer::cleanUrl( $url );
1765
1766 # Is this an external image?
1767 $text = $this->maybeMakeExternalImage( $url );
1768 if ( $text === false ) {
1769 # Not an image, make a link
1770 $text = Linker::makeExternalLink( $url,
1771 $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1772 true, 'free',
1773 $this->getExternalLinkAttribs( $url ), $this->mTitle );
1774 # Register it in the output object...
1775 $this->mOutput->addExternalLink( $url );
1776 }
1777 return $text . $trail;
1778 }
1779
1780 /**
1781 * Parse headers and return html
1782 *
1783 * @private
1784 *
1785 * @param string $text
1786 *
1787 * @return string
1788 */
1789 public function doHeadings( $text ) {
1790 for ( $i = 6; $i >= 1; --$i ) {
1791 $h = str_repeat( '=', $i );
1792 // Trim non-newline whitespace from headings
1793 // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1794 $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1795 }
1796 return $text;
1797 }
1798
1799 /**
1800 * Replace single quotes with HTML markup
1801 * @private
1802 *
1803 * @param string $text
1804 *
1805 * @return string The altered text
1806 */
1807 public function doAllQuotes( $text ) {
1808 $outtext = '';
1809 $lines = StringUtils::explode( "\n", $text );
1810 foreach ( $lines as $line ) {
1811 $outtext .= $this->doQuotes( $line ) . "\n";
1812 }
1813 $outtext = substr( $outtext, 0, -1 );
1814 return $outtext;
1815 }
1816
1817 /**
1818 * Helper function for doAllQuotes()
1819 *
1820 * @param string $text
1821 *
1822 * @return string
1823 */
1824 public function doQuotes( $text ) {
1825 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1826 $countarr = count( $arr );
1827 if ( $countarr == 1 ) {
1828 return $text;
1829 }
1830
1831 // First, do some preliminary work. This may shift some apostrophes from
1832 // being mark-up to being text. It also counts the number of occurrences
1833 // of bold and italics mark-ups.
1834 $numbold = 0;
1835 $numitalics = 0;
1836 for ( $i = 1; $i < $countarr; $i += 2 ) {
1837 $thislen = strlen( $arr[$i] );
1838 // If there are ever four apostrophes, assume the first is supposed to
1839 // be text, and the remaining three constitute mark-up for bold text.
1840 // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1841 if ( $thislen == 4 ) {
1842 $arr[$i - 1] .= "'";
1843 $arr[$i] = "'''";
1844 $thislen = 3;
1845 } elseif ( $thislen > 5 ) {
1846 // If there are more than 5 apostrophes in a row, assume they're all
1847 // text except for the last 5.
1848 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1849 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1850 $arr[$i] = "'''''";
1851 $thislen = 5;
1852 }
1853 // Count the number of occurrences of bold and italics mark-ups.
1854 if ( $thislen == 2 ) {
1855 $numitalics++;
1856 } elseif ( $thislen == 3 ) {
1857 $numbold++;
1858 } elseif ( $thislen == 5 ) {
1859 $numitalics++;
1860 $numbold++;
1861 }
1862 }
1863
1864 // If there is an odd number of both bold and italics, it is likely
1865 // that one of the bold ones was meant to be an apostrophe followed
1866 // by italics. Which one we cannot know for certain, but it is more
1867 // likely to be one that has a single-letter word before it.
1868 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1869 $firstsingleletterword = -1;
1870 $firstmultiletterword = -1;
1871 $firstspace = -1;
1872 for ( $i = 1; $i < $countarr; $i += 2 ) {
1873 if ( strlen( $arr[$i] ) == 3 ) {
1874 $x1 = substr( $arr[$i - 1], -1 );
1875 $x2 = substr( $arr[$i - 1], -2, 1 );
1876 if ( $x1 === ' ' ) {
1877 if ( $firstspace == -1 ) {
1878 $firstspace = $i;
1879 }
1880 } elseif ( $x2 === ' ' ) {
1881 $firstsingleletterword = $i;
1882 // if $firstsingleletterword is set, we don't
1883 // look at the other options, so we can bail early.
1884 break;
1885 } elseif ( $firstmultiletterword == -1 ) {
1886 $firstmultiletterword = $i;
1887 }
1888 }
1889 }
1890
1891 // If there is a single-letter word, use it!
1892 if ( $firstsingleletterword > -1 ) {
1893 $arr[$firstsingleletterword] = "''";
1894 $arr[$firstsingleletterword - 1] .= "'";
1895 } elseif ( $firstmultiletterword > -1 ) {
1896 // If not, but there's a multi-letter word, use that one.
1897 $arr[$firstmultiletterword] = "''";
1898 $arr[$firstmultiletterword - 1] .= "'";
1899 } elseif ( $firstspace > -1 ) {
1900 // ... otherwise use the first one that has neither.
1901 // (notice that it is possible for all three to be -1 if, for example,
1902 // there is only one pentuple-apostrophe in the line)
1903 $arr[$firstspace] = "''";
1904 $arr[$firstspace - 1] .= "'";
1905 }
1906 }
1907
1908 // Now let's actually convert our apostrophic mush to HTML!
1909 $output = '';
1910 $buffer = '';
1911 $state = '';
1912 $i = 0;
1913 foreach ( $arr as $r ) {
1914 if ( ( $i % 2 ) == 0 ) {
1915 if ( $state === 'both' ) {
1916 $buffer .= $r;
1917 } else {
1918 $output .= $r;
1919 }
1920 } else {
1921 $thislen = strlen( $r );
1922 if ( $thislen == 2 ) {
1923 if ( $state === 'i' ) {
1924 $output .= '</i>';
1925 $state = '';
1926 } elseif ( $state === 'bi' ) {
1927 $output .= '</i>';
1928 $state = 'b';
1929 } elseif ( $state === 'ib' ) {
1930 $output .= '</b></i><b>';
1931 $state = 'b';
1932 } elseif ( $state === 'both' ) {
1933 $output .= '<b><i>' . $buffer . '</i>';
1934 $state = 'b';
1935 } else { // $state can be 'b' or ''
1936 $output .= '<i>';
1937 $state .= 'i';
1938 }
1939 } elseif ( $thislen == 3 ) {
1940 if ( $state === 'b' ) {
1941 $output .= '</b>';
1942 $state = '';
1943 } elseif ( $state === 'bi' ) {
1944 $output .= '</i></b><i>';
1945 $state = 'i';
1946 } elseif ( $state === 'ib' ) {
1947 $output .= '</b>';
1948 $state = 'i';
1949 } elseif ( $state === 'both' ) {
1950 $output .= '<i><b>' . $buffer . '</b>';
1951 $state = 'i';
1952 } else { // $state can be 'i' or ''
1953 $output .= '<b>';
1954 $state .= 'b';
1955 }
1956 } elseif ( $thislen == 5 ) {
1957 if ( $state === 'b' ) {
1958 $output .= '</b><i>';
1959 $state = 'i';
1960 } elseif ( $state === 'i' ) {
1961 $output .= '</i><b>';
1962 $state = 'b';
1963 } elseif ( $state === 'bi' ) {
1964 $output .= '</i></b>';
1965 $state = '';
1966 } elseif ( $state === 'ib' ) {
1967 $output .= '</b></i>';
1968 $state = '';
1969 } elseif ( $state === 'both' ) {
1970 $output .= '<i><b>' . $buffer . '</b></i>';
1971 $state = '';
1972 } else { // ($state == '')
1973 $buffer = '';
1974 $state = 'both';
1975 }
1976 }
1977 }
1978 $i++;
1979 }
1980 // Now close all remaining tags. Notice that the order is important.
1981 if ( $state === 'b' || $state === 'ib' ) {
1982 $output .= '</b>';
1983 }
1984 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1985 $output .= '</i>';
1986 }
1987 if ( $state === 'bi' ) {
1988 $output .= '</b>';
1989 }
1990 // There might be lonely ''''', so make sure we have a buffer
1991 if ( $state === 'both' && $buffer ) {
1992 $output .= '<b><i>' . $buffer . '</i></b>';
1993 }
1994 return $output;
1995 }
1996
1997 /**
1998 * Replace external links (REL)
1999 *
2000 * Note: this is all very hackish and the order of execution matters a lot.
2001 * Make sure to run tests/parser/parserTests.php if you change this code.
2002 *
2003 * @private
2004 *
2005 * @param string $text
2006 *
2007 * @throws MWException
2008 * @return string
2009 */
2010 public function replaceExternalLinks( $text ) {
2011 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2012 if ( $bits === false ) {
2013 throw new MWException( "PCRE needs to be compiled with "
2014 . "--enable-unicode-properties in order for MediaWiki to function" );
2015 }
2016 $s = array_shift( $bits );
2017
2018 $i = 0;
2019 while ( $i < count( $bits ) ) {
2020 $url = $bits[$i++];
2021 $i++; // protocol
2022 $text = $bits[$i++];
2023 $trail = $bits[$i++];
2024
2025 # The characters '<' and '>' (which were escaped by
2026 # removeHTMLtags()) should not be included in
2027 # URLs, per RFC 2396.
2028 $m2 = [];
2029 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2030 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2031 $url = substr( $url, 0, $m2[0][1] );
2032 }
2033
2034 # If the link text is an image URL, replace it with an <img> tag
2035 # This happened by accident in the original parser, but some people used it extensively
2036 $img = $this->maybeMakeExternalImage( $text );
2037 if ( $img !== false ) {
2038 $text = $img;
2039 }
2040
2041 $dtrail = '';
2042
2043 # Set linktype for CSS
2044 $linktype = 'text';
2045
2046 # No link text, e.g. [http://domain.tld/some.link]
2047 if ( $text == '' ) {
2048 # Autonumber
2049 $langObj = $this->getTargetLanguage();
2050 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2051 $linktype = 'autonumber';
2052 } else {
2053 # Have link text, e.g. [http://domain.tld/some.link text]s
2054 # Check for trail
2055 list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2056 }
2057
2058 // Excluding protocol-relative URLs may avoid many false positives.
2059 if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2060 $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2061 }
2062
2063 $url = Sanitizer::cleanUrl( $url );
2064
2065 # Use the encoded URL
2066 # This means that users can paste URLs directly into the text
2067 # Funny characters like ö aren't valid in URLs anyway
2068 # This was changed in August 2004
2069 $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2070 $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2071
2072 # Register link in the output object.
2073 $this->mOutput->addExternalLink( $url );
2074 }
2075
2076 return $s;
2077 }
2078
2079 /**
2080 * Get the rel attribute for a particular external link.
2081 *
2082 * @since 1.21
2083 * @param string|bool $url Optional URL, to extract the domain from for rel =>
2084 * nofollow if appropriate
2085 * @param LinkTarget|null $title Optional LinkTarget, for wgNoFollowNsExceptions lookups
2086 * @return string|null Rel attribute for $url
2087 */
2088 public static function getExternalLinkRel( $url = false, $title = null ) {
2089 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
2090 $ns = $title ? $title->getNamespace() : false;
2091 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2092 && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2093 ) {
2094 return 'nofollow';
2095 }
2096 return null;
2097 }
2098
2099 /**
2100 * Get an associative array of additional HTML attributes appropriate for a
2101 * particular external link. This currently may include rel => nofollow
2102 * (depending on configuration, namespace, and the URL's domain) and/or a
2103 * target attribute (depending on configuration).
2104 *
2105 * @param string $url URL to extract the domain from for rel =>
2106 * nofollow if appropriate
2107 * @return array Associative array of HTML attributes
2108 */
2109 public function getExternalLinkAttribs( $url ) {
2110 $attribs = [];
2111 $rel = self::getExternalLinkRel( $url, $this->mTitle );
2112
2113 $target = $this->mOptions->getExternalLinkTarget();
2114 if ( $target ) {
2115 $attribs['target'] = $target;
2116 if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2117 // T133507. New windows can navigate parent cross-origin.
2118 // Including noreferrer due to lacking browser
2119 // support of noopener. Eventually noreferrer should be removed.
2120 if ( $rel !== '' ) {
2121 $rel .= ' ';
2122 }
2123 $rel .= 'noreferrer noopener';
2124 }
2125 }
2126 $attribs['rel'] = $rel;
2127 return $attribs;
2128 }
2129
2130 /**
2131 * Replace unusual escape codes in a URL with their equivalent characters
2132 *
2133 * This generally follows the syntax defined in RFC 3986, with special
2134 * consideration for HTTP query strings.
2135 *
2136 * @param string $url
2137 * @return string
2138 */
2139 public static function normalizeLinkUrl( $url ) {
2140 # Test for RFC 3986 IPv6 syntax
2141 $scheme = '[a-z][a-z0-9+.-]*:';
2142 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2143 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2144 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2145 IP::isValid( rawurldecode( $m[1] ) )
2146 ) {
2147 $isIPv6 = rawurldecode( $m[1] );
2148 } else {
2149 $isIPv6 = false;
2150 }
2151
2152 # Make sure unsafe characters are encoded
2153 $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2154 function ( $m ) {
2155 return rawurlencode( $m[0] );
2156 },
2157 $url
2158 );
2159
2160 $ret = '';
2161 $end = strlen( $url );
2162
2163 # Fragment part - 'fragment'
2164 $start = strpos( $url, '#' );
2165 if ( $start !== false && $start < $end ) {
2166 $ret = self::normalizeUrlComponent(
2167 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2168 $end = $start;
2169 }
2170
2171 # Query part - 'query' minus &=+;
2172 $start = strpos( $url, '?' );
2173 if ( $start !== false && $start < $end ) {
2174 $ret = self::normalizeUrlComponent(
2175 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2176 $end = $start;
2177 }
2178
2179 # Scheme and path part - 'pchar'
2180 # (we assume no userinfo or encoded colons in the host)
2181 $ret = self::normalizeUrlComponent(
2182 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2183
2184 # Fix IPv6 syntax
2185 if ( $isIPv6 !== false ) {
2186 $ipv6Host = "%5B({$isIPv6})%5D";
2187 $ret = preg_replace(
2188 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2189 "$1[$2]",
2190 $ret
2191 );
2192 }
2193
2194 return $ret;
2195 }
2196
2197 private static function normalizeUrlComponent( $component, $unsafe ) {
2198 $callback = function ( $matches ) use ( $unsafe ) {
2199 $char = urldecode( $matches[0] );
2200 $ord = ord( $char );
2201 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2202 # Unescape it
2203 return $char;
2204 } else {
2205 # Leave it escaped, but use uppercase for a-f
2206 return strtoupper( $matches[0] );
2207 }
2208 };
2209 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2210 }
2211
2212 /**
2213 * make an image if it's allowed, either through the global
2214 * option, through the exception, or through the on-wiki whitelist
2215 *
2216 * @param string $url
2217 *
2218 * @return string
2219 */
2220 private function maybeMakeExternalImage( $url ) {
2221 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2222 $imagesexception = !empty( $imagesfrom );
2223 $text = false;
2224 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2225 if ( $imagesexception && is_array( $imagesfrom ) ) {
2226 $imagematch = false;
2227 foreach ( $imagesfrom as $match ) {
2228 if ( strpos( $url, $match ) === 0 ) {
2229 $imagematch = true;
2230 break;
2231 }
2232 }
2233 } elseif ( $imagesexception ) {
2234 $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2235 } else {
2236 $imagematch = false;
2237 }
2238
2239 if ( $this->mOptions->getAllowExternalImages()
2240 || ( $imagesexception && $imagematch )
2241 ) {
2242 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2243 # Image found
2244 $text = Linker::makeExternalImage( $url );
2245 }
2246 }
2247 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2248 && preg_match( self::EXT_IMAGE_REGEX, $url )
2249 ) {
2250 $whitelist = explode(
2251 "\n",
2252 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2253 );
2254
2255 foreach ( $whitelist as $entry ) {
2256 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2257 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2258 continue;
2259 }
2260 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2261 # Image matches a whitelist entry
2262 $text = Linker::makeExternalImage( $url );
2263 break;
2264 }
2265 }
2266 }
2267 return $text;
2268 }
2269
2270 /**
2271 * Process [[ ]] wikilinks
2272 *
2273 * @param string $s
2274 *
2275 * @return string Processed text
2276 *
2277 * @private
2278 */
2279 public function replaceInternalLinks( $s ) {
2280 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2281 return $s;
2282 }
2283
2284 /**
2285 * Process [[ ]] wikilinks (RIL)
2286 * @param string &$s
2287 * @throws MWException
2288 * @return LinkHolderArray
2289 *
2290 * @private
2291 */
2292 public function replaceInternalLinks2( &$s ) {
2293 static $tc = false, $e1, $e1_img;
2294 # the % is needed to support urlencoded titles as well
2295 if ( !$tc ) {
2296 $tc = Title::legalChars() . '#%';
2297 # Match a link having the form [[namespace:link|alternate]]trail
2298 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2299 # Match cases where there is no "]]", which might still be images
2300 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2301 }
2302
2303 $holders = new LinkHolderArray( $this );
2304
2305 # split the entire text string on occurrences of [[
2306 $a = StringUtils::explode( '[[', ' ' . $s );
2307 # get the first element (all text up to first [[), and remove the space we added
2308 $s = $a->current();
2309 $a->next();
2310 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2311 $s = substr( $s, 1 );
2312
2313 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2314 $e2 = null;
2315 if ( $useLinkPrefixExtension ) {
2316 # Match the end of a line for a word that's not followed by whitespace,
2317 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2318 $charset = $this->contLang->linkPrefixCharset();
2319 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2320 }
2321
2322 if ( is_null( $this->mTitle ) ) {
2323 throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2324 }
2325 $nottalk = !$this->mTitle->isTalkPage();
2326
2327 if ( $useLinkPrefixExtension ) {
2328 $m = [];
2329 if ( preg_match( $e2, $s, $m ) ) {
2330 $first_prefix = $m[2];
2331 } else {
2332 $first_prefix = false;
2333 }
2334 } else {
2335 $prefix = '';
2336 }
2337
2338 $useSubpages = $this->areSubpagesAllowed();
2339
2340 # Loop for each link
2341 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2342 # Check for excessive memory usage
2343 if ( $holders->isBig() ) {
2344 # Too big
2345 # Do the existence check, replace the link holders and clear the array
2346 $holders->replace( $s );
2347 $holders->clear();
2348 }
2349
2350 if ( $useLinkPrefixExtension ) {
2351 if ( preg_match( $e2, $s, $m ) ) {
2352 list( , $s, $prefix ) = $m;
2353 } else {
2354 $prefix = '';
2355 }
2356 # first link
2357 if ( $first_prefix ) {
2358 $prefix = $first_prefix;
2359 $first_prefix = false;
2360 }
2361 }
2362
2363 $might_be_img = false;
2364
2365 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2366 $text = $m[2];
2367 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2368 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2369 # the real problem is with the $e1 regex
2370 # See T1500.
2371 # Still some problems for cases where the ] is meant to be outside punctuation,
2372 # and no image is in sight. See T4095.
2373 if ( $text !== ''
2374 && substr( $m[3], 0, 1 ) === ']'
2375 && strpos( $text, '[' ) !== false
2376 ) {
2377 $text .= ']'; # so that replaceExternalLinks($text) works later
2378 $m[3] = substr( $m[3], 1 );
2379 }
2380 # fix up urlencoded title texts
2381 if ( strpos( $m[1], '%' ) !== false ) {
2382 # Should anchors '#' also be rejected?
2383 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2384 }
2385 $trail = $m[3];
2386 } elseif ( preg_match( $e1_img, $line, $m ) ) {
2387 # Invalid, but might be an image with a link in its caption
2388 $might_be_img = true;
2389 $text = $m[2];
2390 if ( strpos( $m[1], '%' ) !== false ) {
2391 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2392 }
2393 $trail = "";
2394 } else { # Invalid form; output directly
2395 $s .= $prefix . '[[' . $line;
2396 continue;
2397 }
2398
2399 $origLink = ltrim( $m[1], ' ' );
2400
2401 # Don't allow internal links to pages containing
2402 # PROTO: where PROTO is a valid URL protocol; these
2403 # should be external links.
2404 if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2405 $s .= $prefix . '[[' . $line;
2406 continue;
2407 }
2408
2409 # Make subpage if necessary
2410 if ( $useSubpages ) {
2411 $link = $this->maybeDoSubpageLink( $origLink, $text );
2412 } else {
2413 $link = $origLink;
2414 }
2415
2416 // \x7f isn't a default legal title char, so most likely strip
2417 // markers will force us into the "invalid form" path above. But,
2418 // just in case, let's assert that xmlish tags aren't valid in
2419 // the title position.
2420 $unstrip = $this->mStripState->killMarkers( $link );
2421 $noMarkers = ( $unstrip === $link );
2422
2423 $nt = $noMarkers ? Title::newFromText( $link ) : null;
2424 if ( $nt === null ) {
2425 $s .= $prefix . '[[' . $line;
2426 continue;
2427 }
2428
2429 $ns = $nt->getNamespace();
2430 $iw = $nt->getInterwiki();
2431
2432 $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2433
2434 if ( $might_be_img ) { # if this is actually an invalid link
2435 if ( $ns == NS_FILE && $noforce ) { # but might be an image
2436 $found = false;
2437 while ( true ) {
2438 # look at the next 'line' to see if we can close it there
2439 $a->next();
2440 $next_line = $a->current();
2441 if ( $next_line === false || $next_line === null ) {
2442 break;
2443 }
2444 $m = explode( ']]', $next_line, 3 );
2445 if ( count( $m ) == 3 ) {
2446 # the first ]] closes the inner link, the second the image
2447 $found = true;
2448 $text .= "[[{$m[0]}]]{$m[1]}";
2449 $trail = $m[2];
2450 break;
2451 } elseif ( count( $m ) == 2 ) {
2452 # if there's exactly one ]] that's fine, we'll keep looking
2453 $text .= "[[{$m[0]}]]{$m[1]}";
2454 } else {
2455 # if $next_line is invalid too, we need look no further
2456 $text .= '[[' . $next_line;
2457 break;
2458 }
2459 }
2460 if ( !$found ) {
2461 # we couldn't find the end of this imageLink, so output it raw
2462 # but don't ignore what might be perfectly normal links in the text we've examined
2463 $holders->merge( $this->replaceInternalLinks2( $text ) );
2464 $s .= "{$prefix}[[$link|$text";
2465 # note: no $trail, because without an end, there *is* no trail
2466 continue;
2467 }
2468 } else { # it's not an image, so output it raw
2469 $s .= "{$prefix}[[$link|$text";
2470 # note: no $trail, because without an end, there *is* no trail
2471 continue;
2472 }
2473 }
2474
2475 $wasblank = ( $text == '' );
2476 if ( $wasblank ) {
2477 $text = $link;
2478 if ( !$noforce ) {
2479 # Strip off leading ':'
2480 $text = substr( $text, 1 );
2481 }
2482 } else {
2483 # T6598 madness. Handle the quotes only if they come from the alternate part
2484 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2485 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2486 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2487 $text = $this->doQuotes( $text );
2488 }
2489
2490 # Link not escaped by : , create the various objects
2491 if ( $noforce && !$nt->wasLocalInterwiki() ) {
2492 # Interwikis
2493 if (
2494 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2495 Language::fetchLanguageName( $iw, null, 'mw' ) ||
2496 in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2497 )
2498 ) {
2499 # T26502: filter duplicates
2500 if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2501 $this->mLangLinkLanguages[$iw] = true;
2502 $this->mOutput->addLanguageLink( $nt->getFullText() );
2503 }
2504
2505 /**
2506 * Strip the whitespace interwiki links produce, see T10897
2507 */
2508 $s = rtrim( $s . $prefix ) . $trail; # T175416
2509 continue;
2510 }
2511
2512 if ( $ns == NS_FILE ) {
2513 if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->mTitle ) ) {
2514 if ( $wasblank ) {
2515 # if no parameters were passed, $text
2516 # becomes something like "File:Foo.png",
2517 # which we don't want to pass on to the
2518 # image generator
2519 $text = '';
2520 } else {
2521 # recursively parse links inside the image caption
2522 # actually, this will parse them in any other parameters, too,
2523 # but it might be hard to fix that, and it doesn't matter ATM
2524 $text = $this->replaceExternalLinks( $text );
2525 $holders->merge( $this->replaceInternalLinks2( $text ) );
2526 }
2527 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2528 $s .= $prefix . $this->armorLinks(
2529 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2530 continue;
2531 }
2532 } elseif ( $ns == NS_CATEGORY ) {
2533 /**
2534 * Strip the whitespace Category links produce, see T2087
2535 */
2536 $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2537
2538 if ( $wasblank ) {
2539 $sortkey = $this->getDefaultSort();
2540 } else {
2541 $sortkey = $text;
2542 }
2543 $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2544 $sortkey = str_replace( "\n", '', $sortkey );
2545 $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2546 $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2547
2548 continue;
2549 }
2550 }
2551
2552 # Self-link checking. For some languages, variants of the title are checked in
2553 # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2554 # for linking to a different variant.
2555 if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2556 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2557 continue;
2558 }
2559
2560 # NS_MEDIA is a pseudo-namespace for linking directly to a file
2561 # @todo FIXME: Should do batch file existence checks, see comment below
2562 if ( $ns == NS_MEDIA ) {
2563 # Give extensions a chance to select the file revision for us
2564 $options = [];
2565 $descQuery = false;
2566 Hooks::run( 'BeforeParserFetchFileAndTitle',
2567 [ $this, $nt, &$options, &$descQuery ] );
2568 # Fetch and register the file (file title may be different via hooks)
2569 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2570 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2571 $s .= $prefix . $this->armorLinks(
2572 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2573 continue;
2574 }
2575
2576 # Some titles, such as valid special pages or files in foreign repos, should
2577 # be shown as bluelinks even though they're not included in the page table
2578 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2579 # batch file existence checks for NS_FILE and NS_MEDIA
2580 if ( $iw == '' && $nt->isAlwaysKnown() ) {
2581 $this->mOutput->addLink( $nt );
2582 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2583 } else {
2584 # Links will be added to the output link list after checking
2585 $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2586 }
2587 }
2588 return $holders;
2589 }
2590
2591 /**
2592 * Render a forced-blue link inline; protect against double expansion of
2593 * URLs if we're in a mode that prepends full URL prefixes to internal links.
2594 * Since this little disaster has to split off the trail text to avoid
2595 * breaking URLs in the following text without breaking trails on the
2596 * wiki links, it's been made into a horrible function.
2597 *
2598 * @param Title $nt
2599 * @param string $text
2600 * @param string $trail
2601 * @param string $prefix
2602 * @return string HTML-wikitext mix oh yuck
2603 */
2604 protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2605 list( $inside, $trail ) = Linker::splitTrail( $trail );
2606
2607 if ( $text == '' ) {
2608 $text = htmlspecialchars( $nt->getPrefixedText() );
2609 }
2610
2611 $link = $this->getLinkRenderer()->makeKnownLink(
2612 $nt, new HtmlArmor( "$prefix$text$inside" )
2613 );
2614
2615 return $this->armorLinks( $link ) . $trail;
2616 }
2617
2618 /**
2619 * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2620 * going to go through further parsing steps before inline URL expansion.
2621 *
2622 * Not needed quite as much as it used to be since free links are a bit
2623 * more sensible these days. But bracketed links are still an issue.
2624 *
2625 * @param string $text More-or-less HTML
2626 * @return string Less-or-more HTML with NOPARSE bits
2627 */
2628 public function armorLinks( $text ) {
2629 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2630 self::MARKER_PREFIX . "NOPARSE$1", $text );
2631 }
2632
2633 /**
2634 * Return true if subpage links should be expanded on this page.
2635 * @return bool
2636 */
2637 public function areSubpagesAllowed() {
2638 # Some namespaces don't allow subpages
2639 return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2640 }
2641
2642 /**
2643 * Handle link to subpage if necessary
2644 *
2645 * @param string $target The source of the link
2646 * @param string &$text The link text, modified as necessary
2647 * @return string The full name of the link
2648 * @private
2649 */
2650 public function maybeDoSubpageLink( $target, &$text ) {
2651 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2652 }
2653
2654 /**
2655 * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2656 *
2657 * @param string $text
2658 * @param bool $linestart Whether or not this is at the start of a line.
2659 * @private
2660 * @return string The lists rendered as HTML
2661 */
2662 public function doBlockLevels( $text, $linestart ) {
2663 return BlockLevelPass::doBlockLevels( $text, $linestart );
2664 }
2665
2666 /**
2667 * Return value of a magic variable (like PAGENAME)
2668 *
2669 * @private
2670 *
2671 * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2672 * @param bool|PPFrame $frame
2673 *
2674 * @throws MWException
2675 * @return string
2676 */
2677 public function getVariableValue( $index, $frame = false ) {
2678 if ( is_null( $this->mTitle ) ) {
2679 // If no title set, bad things are going to happen
2680 // later. Title should always be set since this
2681 // should only be called in the middle of a parse
2682 // operation (but the unit-tests do funky stuff)
2683 throw new MWException( __METHOD__ . ' Should only be '
2684 . ' called while parsing (no title set)' );
2685 }
2686
2687 // Avoid PHP 7.1 warning from passing $this by reference
2688 $parser = $this;
2689
2690 /**
2691 * Some of these require message or data lookups and can be
2692 * expensive to check many times.
2693 */
2694 if (
2695 Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2696 isset( $this->mVarCache[$index] )
2697 ) {
2698 return $this->mVarCache[$index];
2699 }
2700
2701 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2702 Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2703
2704 $pageLang = $this->getFunctionLang();
2705
2706 switch ( $index ) {
2707 case '!':
2708 $value = '|';
2709 break;
2710 case 'currentmonth':
2711 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2712 break;
2713 case 'currentmonth1':
2714 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2715 break;
2716 case 'currentmonthname':
2717 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2718 break;
2719 case 'currentmonthnamegen':
2720 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2721 break;
2722 case 'currentmonthabbrev':
2723 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2724 break;
2725 case 'currentday':
2726 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2727 break;
2728 case 'currentday2':
2729 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2730 break;
2731 case 'localmonth':
2732 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2733 break;
2734 case 'localmonth1':
2735 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2736 break;
2737 case 'localmonthname':
2738 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2739 break;
2740 case 'localmonthnamegen':
2741 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2742 break;
2743 case 'localmonthabbrev':
2744 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2745 break;
2746 case 'localday':
2747 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2748 break;
2749 case 'localday2':
2750 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2751 break;
2752 case 'pagename':
2753 $value = wfEscapeWikiText( $this->mTitle->getText() );
2754 break;
2755 case 'pagenamee':
2756 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2757 break;
2758 case 'fullpagename':
2759 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2760 break;
2761 case 'fullpagenamee':
2762 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2763 break;
2764 case 'subpagename':
2765 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2766 break;
2767 case 'subpagenamee':
2768 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2769 break;
2770 case 'rootpagename':
2771 $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2772 break;
2773 case 'rootpagenamee':
2774 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2775 ' ',
2776 '_',
2777 $this->mTitle->getRootText()
2778 ) ) );
2779 break;
2780 case 'basepagename':
2781 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2782 break;
2783 case 'basepagenamee':
2784 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2785 ' ',
2786 '_',
2787 $this->mTitle->getBaseText()
2788 ) ) );
2789 break;
2790 case 'talkpagename':
2791 if ( $this->mTitle->canHaveTalkPage() ) {
2792 $talkPage = $this->mTitle->getTalkPage();
2793 $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2794 } else {
2795 $value = '';
2796 }
2797 break;
2798 case 'talkpagenamee':
2799 if ( $this->mTitle->canHaveTalkPage() ) {
2800 $talkPage = $this->mTitle->getTalkPage();
2801 $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2802 } else {
2803 $value = '';
2804 }
2805 break;
2806 case 'subjectpagename':
2807 $subjPage = $this->mTitle->getSubjectPage();
2808 $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2809 break;
2810 case 'subjectpagenamee':
2811 $subjPage = $this->mTitle->getSubjectPage();
2812 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2813 break;
2814 case 'pageid': // requested in T25427
2815 # Inform the edit saving system that getting the canonical output
2816 # after page insertion requires a parse that used that exact page ID
2817 $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2818 $value = $this->mTitle->getArticleID();
2819 if ( !$value ) {
2820 $value = $this->mOptions->getSpeculativePageId();
2821 if ( $value ) {
2822 $this->mOutput->setSpeculativePageIdUsed( $value );
2823 }
2824 }
2825 break;
2826 case 'revisionid':
2827 if (
2828 $this->svcOptions->get( 'MiserMode' ) &&
2829 !$this->mOptions->getInterfaceMessage() &&
2830 // @TODO: disallow this word on all namespaces
2831 $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2832 ) {
2833 // Use a stub result instead of the actual revision ID in order to avoid
2834 // double parses on page save but still allow preview detection (T137900)
2835 if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2836 $value = '-';
2837 } else {
2838 $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2839 $value = '';
2840 }
2841 } else {
2842 # Inform the edit saving system that getting the canonical output after
2843 # revision insertion requires a parse that used that exact revision ID
2844 $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2845 $value = $this->getRevisionId();
2846 if ( $value === 0 ) {
2847 $rev = $this->getRevisionObject();
2848 $value = $rev ? $rev->getId() : $value;
2849 }
2850 if ( !$value ) {
2851 $value = $this->mOptions->getSpeculativeRevId();
2852 if ( $value ) {
2853 $this->mOutput->setSpeculativeRevIdUsed( $value );
2854 }
2855 }
2856 }
2857 break;
2858 case 'revisionday':
2859 $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2860 break;
2861 case 'revisionday2':
2862 $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2863 break;
2864 case 'revisionmonth':
2865 $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2866 break;
2867 case 'revisionmonth1':
2868 $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2869 break;
2870 case 'revisionyear':
2871 $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2872 break;
2873 case 'revisiontimestamp':
2874 $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2875 break;
2876 case 'revisionuser':
2877 # Inform the edit saving system that getting the canonical output after
2878 # revision insertion requires a parse that used the actual user ID
2879 $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2880 $value = $this->getRevisionUser();
2881 break;
2882 case 'revisionsize':
2883 $value = $this->getRevisionSize();
2884 break;
2885 case 'namespace':
2886 $value = str_replace( '_', ' ',
2887 $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2888 break;
2889 case 'namespacee':
2890 $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2891 break;
2892 case 'namespacenumber':
2893 $value = $this->mTitle->getNamespace();
2894 break;
2895 case 'talkspace':
2896 $value = $this->mTitle->canHaveTalkPage()
2897 ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2898 : '';
2899 break;
2900 case 'talkspacee':
2901 $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2902 break;
2903 case 'subjectspace':
2904 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2905 break;
2906 case 'subjectspacee':
2907 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2908 break;
2909 case 'currentdayname':
2910 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2911 break;
2912 case 'currentyear':
2913 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2914 break;
2915 case 'currenttime':
2916 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2917 break;
2918 case 'currenthour':
2919 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2920 break;
2921 case 'currentweek':
2922 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2923 # int to remove the padding
2924 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2925 break;
2926 case 'currentdow':
2927 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2928 break;
2929 case 'localdayname':
2930 $value = $pageLang->getWeekdayName(
2931 (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2932 );
2933 break;
2934 case 'localyear':
2935 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2936 break;
2937 case 'localtime':
2938 $value = $pageLang->time(
2939 MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2940 false,
2941 false
2942 );
2943 break;
2944 case 'localhour':
2945 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2946 break;
2947 case 'localweek':
2948 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2949 # int to remove the padding
2950 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2951 break;
2952 case 'localdow':
2953 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2954 break;
2955 case 'numberofarticles':
2956 $value = $pageLang->formatNum( SiteStats::articles() );
2957 break;
2958 case 'numberoffiles':
2959 $value = $pageLang->formatNum( SiteStats::images() );
2960 break;
2961 case 'numberofusers':
2962 $value = $pageLang->formatNum( SiteStats::users() );
2963 break;
2964 case 'numberofactiveusers':
2965 $value = $pageLang->formatNum( SiteStats::activeUsers() );
2966 break;
2967 case 'numberofpages':
2968 $value = $pageLang->formatNum( SiteStats::pages() );
2969 break;
2970 case 'numberofadmins':
2971 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2972 break;
2973 case 'numberofedits':
2974 $value = $pageLang->formatNum( SiteStats::edits() );
2975 break;
2976 case 'currenttimestamp':
2977 $value = wfTimestamp( TS_MW, $ts );
2978 break;
2979 case 'localtimestamp':
2980 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2981 break;
2982 case 'currentversion':
2983 $value = SpecialVersion::getVersion();
2984 break;
2985 case 'articlepath':
2986 return $this->svcOptions->get( 'ArticlePath' );
2987 case 'sitename':
2988 return $this->svcOptions->get( 'Sitename' );
2989 case 'server':
2990 return $this->svcOptions->get( 'Server' );
2991 case 'servername':
2992 return $this->svcOptions->get( 'ServerName' );
2993 case 'scriptpath':
2994 return $this->svcOptions->get( 'ScriptPath' );
2995 case 'stylepath':
2996 return $this->svcOptions->get( 'StylePath' );
2997 case 'directionmark':
2998 return $pageLang->getDirMark();
2999 case 'contentlanguage':
3000 return $this->svcOptions->get( 'LanguageCode' );
3001 case 'pagelanguage':
3002 $value = $pageLang->getCode();
3003 break;
3004 case 'cascadingsources':
3005 $value = CoreParserFunctions::cascadingsources( $this );
3006 break;
3007 default:
3008 $ret = null;
3009 Hooks::run(
3010 'ParserGetVariableValueSwitch',
3011 [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3012 );
3013
3014 return $ret;
3015 }
3016
3017 if ( $index ) {
3018 $this->mVarCache[$index] = $value;
3019 }
3020
3021 return $value;
3022 }
3023
3024 /**
3025 * @param int $start
3026 * @param int $len
3027 * @param int $mtts Max time-till-save; sets vary-revision-timestamp if result changes by then
3028 * @param string $variable Parser variable name
3029 * @return string
3030 */
3031 private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3032 # Get the timezone-adjusted timestamp to be used for this revision
3033 $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3034 # Possibly set vary-revision if there is not yet an associated revision
3035 if ( !$this->getRevisionObject() ) {
3036 # Get the timezone-adjusted timestamp $mtts seconds in the future.
3037 # This future is relative to the current time and not that of the
3038 # parser options. The rendered timestamp can be compared to that
3039 # of the timestamp specified by the parser options.
3040 $resThen = substr(
3041 $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3042 $start,
3043 $len
3044 );
3045
3046 if ( $resNow !== $resThen ) {
3047 # Inform the edit saving system that getting the canonical output after
3048 # revision insertion requires a parse that used an actual revision timestamp
3049 $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3050 }
3051 }
3052
3053 return $resNow;
3054 }
3055
3056 /**
3057 * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
3058 *
3059 * @private
3060 */
3061 public function initialiseVariables() {
3062 $variableIDs = $this->magicWordFactory->getVariableIDs();
3063 $substIDs = $this->magicWordFactory->getSubstIDs();
3064
3065 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3066 $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3067 }
3068
3069 /**
3070 * Preprocess some wikitext and return the document tree.
3071 * This is the ghost of replace_variables().
3072 *
3073 * @param string $text The text to parse
3074 * @param int $flags Bitwise combination of:
3075 * - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
3076 * included. Default is to assume a direct page view.
3077 *
3078 * The generated DOM tree must depend only on the input text and the flags.
3079 * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
3080 *
3081 * Any flag added to the $flags parameter here, or any other parameter liable to cause a
3082 * change in the DOM tree for a given text, must be passed through the section identifier
3083 * in the section edit link and thus back to extractSections().
3084 *
3085 * The output of this function is currently only cached in process memory, but a persistent
3086 * cache may be implemented at a later date which takes further advantage of these strict
3087 * dependency requirements.
3088 *
3089 * @return PPNode
3090 */
3091 public function preprocessToDom( $text, $flags = 0 ) {
3092 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3093 return $dom;
3094 }
3095
3096 /**
3097 * Return a three-element array: leading whitespace, string contents, trailing whitespace
3098 *
3099 * @param string $s
3100 *
3101 * @return array
3102 */
3103 public static function splitWhitespace( $s ) {
3104 $ltrimmed = ltrim( $s );
3105 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3106 $trimmed = rtrim( $ltrimmed );
3107 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3108 if ( $diff > 0 ) {
3109 $w2 = substr( $ltrimmed, -$diff );
3110 } else {
3111 $w2 = '';
3112 }
3113 return [ $w1, $trimmed, $w2 ];
3114 }
3115
3116 /**
3117 * Replace magic variables, templates, and template arguments
3118 * with the appropriate text. Templates are substituted recursively,
3119 * taking care to avoid infinite loops.
3120 *
3121 * Note that the substitution depends on value of $mOutputType:
3122 * self::OT_WIKI: only {{subst:}} templates
3123 * self::OT_PREPROCESS: templates but not extension tags
3124 * self::OT_HTML: all templates and extension tags
3125 *
3126 * @param string $text The text to transform
3127 * @param false|PPFrame|array $frame Object describing the arguments passed to the
3128 * template. Arguments may also be provided as an associative array, as
3129 * was the usual case before MW1.12. Providing arguments this way may be
3130 * useful for extensions wishing to perform variable replacement
3131 * explicitly.
3132 * @param bool $argsOnly Only do argument (triple-brace) expansion, not
3133 * double-brace expansion.
3134 * @return string
3135 */
3136 public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3137 # Is there any text? Also, Prevent too big inclusions!
3138 $textSize = strlen( $text );
3139 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3140 return $text;
3141 }
3142
3143 if ( $frame === false ) {
3144 $frame = $this->getPreprocessor()->newFrame();
3145 } elseif ( !( $frame instanceof PPFrame ) ) {
3146 $this->logger->debug(
3147 __METHOD__ . " called using plain parameters instead of " .
3148 "a PPFrame instance. Creating custom frame."
3149 );
3150 $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3151 }
3152
3153 $dom = $this->preprocessToDom( $text );
3154 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3155 $text = $frame->expand( $dom, $flags );
3156
3157 return $text;
3158 }
3159
3160 /**
3161 * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
3162 *
3163 * @param array $args
3164 *
3165 * @return array
3166 */
3167 public static function createAssocArgs( $args ) {
3168 $assocArgs = [];
3169 $index = 1;
3170 foreach ( $args as $arg ) {
3171 $eqpos = strpos( $arg, '=' );
3172 if ( $eqpos === false ) {
3173 $assocArgs[$index++] = $arg;
3174 } else {
3175 $name = trim( substr( $arg, 0, $eqpos ) );
3176 $value = trim( substr( $arg, $eqpos + 1 ) );
3177 if ( $value === false ) {
3178 $value = '';
3179 }
3180 if ( $name !== false ) {
3181 $assocArgs[$name] = $value;
3182 }
3183 }
3184 }
3185
3186 return $assocArgs;
3187 }
3188
3189 /**
3190 * Warn the user when a parser limitation is reached
3191 * Will warn at most once the user per limitation type
3192 *
3193 * The results are shown during preview and run through the Parser (See EditPage.php)
3194 *
3195 * @param string $limitationType Should be one of:
3196 * 'expensive-parserfunction' (corresponding messages:
3197 * 'expensive-parserfunction-warning',
3198 * 'expensive-parserfunction-category')
3199 * 'post-expand-template-argument' (corresponding messages:
3200 * 'post-expand-template-argument-warning',
3201 * 'post-expand-template-argument-category')
3202 * 'post-expand-template-inclusion' (corresponding messages:
3203 * 'post-expand-template-inclusion-warning',
3204 * 'post-expand-template-inclusion-category')
3205 * 'node-count-exceeded' (corresponding messages:
3206 * 'node-count-exceeded-warning',
3207 * 'node-count-exceeded-category')
3208 * 'expansion-depth-exceeded' (corresponding messages:
3209 * 'expansion-depth-exceeded-warning',
3210 * 'expansion-depth-exceeded-category')
3211 * @param string|int|null $current Current value
3212 * @param string|int|null $max Maximum allowed, when an explicit limit has been
3213 * exceeded, provide the values (optional)
3214 */
3215 public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3216 # does no harm if $current and $max are present but are unnecessary for the message
3217 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3218 # only during preview, and that would split the parser cache unnecessarily.
3219 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3220 ->text();
3221 $this->mOutput->addWarning( $warning );
3222 $this->addTrackingCategory( "$limitationType-category" );
3223 }
3224
3225 /**
3226 * Return the text of a template, after recursively
3227 * replacing any variables or templates within the template.
3228 *
3229 * @param array $piece The parts of the template
3230 * $piece['title']: the title, i.e. the part before the |
3231 * $piece['parts']: the parameter array
3232 * $piece['lineStart']: whether the brace was at the start of a line
3233 * @param PPFrame $frame The current frame, contains template arguments
3234 * @throws Exception
3235 * @return string|array The text of the template
3236 */
3237 public function braceSubstitution( $piece, $frame ) {
3238 // Flags
3239
3240 // $text has been filled
3241 $found = false;
3242 // wiki markup in $text should be escaped
3243 $nowiki = false;
3244 // $text is HTML, armour it against wikitext transformation
3245 $isHTML = false;
3246 // Force interwiki transclusion to be done in raw mode not rendered
3247 $forceRawInterwiki = false;
3248 // $text is a DOM node needing expansion in a child frame
3249 $isChildObj = false;
3250 // $text is a DOM node needing expansion in the current frame
3251 $isLocalObj = false;
3252
3253 # Title object, where $text came from
3254 $title = false;
3255
3256 # $part1 is the bit before the first |, and must contain only title characters.
3257 # Various prefixes will be stripped from it later.
3258 $titleWithSpaces = $frame->expand( $piece['title'] );
3259 $part1 = trim( $titleWithSpaces );
3260 $titleText = false;
3261
3262 # Original title text preserved for various purposes
3263 $originalTitle = $part1;
3264
3265 # $args is a list of argument nodes, starting from index 0, not including $part1
3266 # @todo FIXME: If piece['parts'] is null then the call to getLength()
3267 # below won't work b/c this $args isn't an object
3268 $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3269
3270 $profileSection = null; // profile templates
3271
3272 # SUBST
3273 if ( !$found ) {
3274 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3275
3276 # Possibilities for substMatch: "subst", "safesubst" or FALSE
3277 # Decide whether to expand template or keep wikitext as-is.
3278 if ( $this->ot['wiki'] ) {
3279 if ( $substMatch === false ) {
3280 $literal = true; # literal when in PST with no prefix
3281 } else {
3282 $literal = false; # expand when in PST with subst: or safesubst:
3283 }
3284 } else {
3285 if ( $substMatch == 'subst' ) {
3286 $literal = true; # literal when not in PST with plain subst:
3287 } else {
3288 $literal = false; # expand when not in PST with safesubst: or no prefix
3289 }
3290 }
3291 if ( $literal ) {
3292 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3293 $isLocalObj = true;
3294 $found = true;
3295 }
3296 }
3297
3298 # Variables
3299 if ( !$found && $args->getLength() == 0 ) {
3300 $id = $this->mVariables->matchStartToEnd( $part1 );
3301 if ( $id !== false ) {
3302 $text = $this->getVariableValue( $id, $frame );
3303 if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3304 $this->mOutput->updateCacheExpiry(
3305 $this->magicWordFactory->getCacheTTL( $id ) );
3306 }
3307 $found = true;
3308 }
3309 }
3310
3311 # MSG, MSGNW and RAW
3312 if ( !$found ) {
3313 # Check for MSGNW:
3314 $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3315 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3316 $nowiki = true;
3317 } else {
3318 # Remove obsolete MSG:
3319 $mwMsg = $this->magicWordFactory->get( 'msg' );
3320 $mwMsg->matchStartAndRemove( $part1 );
3321 }
3322
3323 # Check for RAW:
3324 $mwRaw = $this->magicWordFactory->get( 'raw' );
3325 if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3326 $forceRawInterwiki = true;
3327 }
3328 }
3329
3330 # Parser functions
3331 if ( !$found ) {
3332 $colonPos = strpos( $part1, ':' );
3333 if ( $colonPos !== false ) {
3334 $func = substr( $part1, 0, $colonPos );
3335 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3336 $argsLength = $args->getLength();
3337 for ( $i = 0; $i < $argsLength; $i++ ) {
3338 $funcArgs[] = $args->item( $i );
3339 }
3340
3341 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3342
3343 // Extract any forwarded flags
3344 if ( isset( $result['title'] ) ) {
3345 $title = $result['title'];
3346 }
3347 if ( isset( $result['found'] ) ) {
3348 $found = $result['found'];
3349 }
3350 if ( array_key_exists( 'text', $result ) ) {
3351 // a string or null
3352 $text = $result['text'];
3353 }
3354 if ( isset( $result['nowiki'] ) ) {
3355 $nowiki = $result['nowiki'];
3356 }
3357 if ( isset( $result['isHTML'] ) ) {
3358 $isHTML = $result['isHTML'];
3359 }
3360 if ( isset( $result['forceRawInterwiki'] ) ) {
3361 $forceRawInterwiki = $result['forceRawInterwiki'];
3362 }
3363 if ( isset( $result['isChildObj'] ) ) {
3364 $isChildObj = $result['isChildObj'];
3365 }
3366 if ( isset( $result['isLocalObj'] ) ) {
3367 $isLocalObj = $result['isLocalObj'];
3368 }
3369 }
3370 }
3371
3372 # Finish mangling title and then check for loops.
3373 # Set $title to a Title object and $titleText to the PDBK
3374 if ( !$found ) {
3375 $ns = NS_TEMPLATE;
3376 # Split the title into page and subpage
3377 $subpage = '';
3378 $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3379 if ( $part1 !== $relative ) {
3380 $part1 = $relative;
3381 $ns = $this->mTitle->getNamespace();
3382 }
3383 $title = Title::newFromText( $part1, $ns );
3384 if ( $title ) {
3385 $titleText = $title->getPrefixedText();
3386 # Check for language variants if the template is not found
3387 if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3388 $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3389 }
3390 # Do recursion depth check
3391 $limit = $this->mOptions->getMaxTemplateDepth();
3392 if ( $frame->depth >= $limit ) {
3393 $found = true;
3394 $text = '<span class="error">'
3395 . wfMessage( 'parser-template-recursion-depth-warning' )
3396 ->numParams( $limit )->inContentLanguage()->text()
3397 . '</span>';
3398 }
3399 }
3400 }
3401
3402 # Load from database
3403 if ( !$found && $title ) {
3404 $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3405 if ( !$title->isExternal() ) {
3406 if ( $title->isSpecialPage()
3407 && $this->mOptions->getAllowSpecialInclusion()
3408 && $this->ot['html']
3409 ) {
3410 $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3411 // Pass the template arguments as URL parameters.
3412 // "uselang" will have no effect since the Language object
3413 // is forced to the one defined in ParserOptions.
3414 $pageArgs = [];
3415 $argsLength = $args->getLength();
3416 for ( $i = 0; $i < $argsLength; $i++ ) {
3417 $bits = $args->item( $i )->splitArg();
3418 if ( strval( $bits['index'] ) === '' ) {
3419 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3420 $value = trim( $frame->expand( $bits['value'] ) );
3421 $pageArgs[$name] = $value;
3422 }
3423 }
3424
3425 // Create a new context to execute the special page
3426 $context = new RequestContext;
3427 $context->setTitle( $title );
3428 $context->setRequest( new FauxRequest( $pageArgs ) );
3429 if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3430 $context->setUser( $this->getUser() );
3431 } else {
3432 // If this page is cached, then we better not be per user.
3433 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3434 }
3435 $context->setLanguage( $this->mOptions->getUserLangObj() );
3436 $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3437 if ( $ret ) {
3438 $text = $context->getOutput()->getHTML();
3439 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3440 $found = true;
3441 $isHTML = true;
3442 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3443 $this->mOutput->updateRuntimeAdaptiveExpiry(
3444 $specialPage->maxIncludeCacheTime()
3445 );
3446 }
3447 }
3448 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3449 $found = false; # access denied
3450 $this->logger->debug(
3451 __METHOD__ .
3452 ": template inclusion denied for " . $title->getPrefixedDBkey()
3453 );
3454 } else {
3455 list( $text, $title ) = $this->getTemplateDom( $title );
3456 if ( $text !== false ) {
3457 $found = true;
3458 $isChildObj = true;
3459 }
3460 }
3461
3462 # If the title is valid but undisplayable, make a link to it
3463 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3464 $text = "[[:$titleText]]";
3465 $found = true;
3466 }
3467 } elseif ( $title->isTrans() ) {
3468 # Interwiki transclusion
3469 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3470 $text = $this->interwikiTransclude( $title, 'render' );
3471 $isHTML = true;
3472 } else {
3473 $text = $this->interwikiTransclude( $title, 'raw' );
3474 # Preprocess it like a template
3475 $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3476 $isChildObj = true;
3477 }
3478 $found = true;
3479 }
3480
3481 # Do infinite loop check
3482 # This has to be done after redirect resolution to avoid infinite loops via redirects
3483 if ( !$frame->loopCheck( $title ) ) {
3484 $found = true;
3485 $text = '<span class="error">'
3486 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3487 . '</span>';
3488 $this->addTrackingCategory( 'template-loop-category' );
3489 $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3490 wfEscapeWikiText( $titleText ) )->text() );
3491 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3492 }
3493 }
3494
3495 # If we haven't found text to substitute by now, we're done
3496 # Recover the source wikitext and return it
3497 if ( !$found ) {
3498 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3499 if ( $profileSection ) {
3500 $this->mProfiler->scopedProfileOut( $profileSection );
3501 }
3502 return [ 'object' => $text ];
3503 }
3504
3505 # Expand DOM-style return values in a child frame
3506 if ( $isChildObj ) {
3507 # Clean up argument array
3508 $newFrame = $frame->newChild( $args, $title );
3509
3510 if ( $nowiki ) {
3511 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3512 } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3513 # Expansion is eligible for the empty-frame cache
3514 $text = $newFrame->cachedExpand( $titleText, $text );
3515 } else {
3516 # Uncached expansion
3517 $text = $newFrame->expand( $text );
3518 }
3519 }
3520 if ( $isLocalObj && $nowiki ) {
3521 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3522 $isLocalObj = false;
3523 }
3524
3525 if ( $profileSection ) {
3526 $this->mProfiler->scopedProfileOut( $profileSection );
3527 }
3528
3529 # Replace raw HTML by a placeholder
3530 if ( $isHTML ) {
3531 $text = $this->insertStripItem( $text );
3532 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3533 # Escape nowiki-style return values
3534 $text = wfEscapeWikiText( $text );
3535 } elseif ( is_string( $text )
3536 && !$piece['lineStart']
3537 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3538 ) {
3539 # T2529: if the template begins with a table or block-level
3540 # element, it should be treated as beginning a new line.
3541 # This behavior is somewhat controversial.
3542 $text = "\n" . $text;
3543 }
3544
3545 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3546 # Error, oversize inclusion
3547 if ( $titleText !== false ) {
3548 # Make a working, properly escaped link if possible (T25588)
3549 $text = "[[:$titleText]]";
3550 } else {
3551 # This will probably not be a working link, but at least it may
3552 # provide some hint of where the problem is
3553 preg_replace( '/^:/', '', $originalTitle );
3554 $text = "[[:$originalTitle]]";
3555 }
3556 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3557 . 'post-expand include size too large -->' );
3558 $this->limitationWarn( 'post-expand-template-inclusion' );
3559 }
3560
3561 if ( $isLocalObj ) {
3562 $ret = [ 'object' => $text ];
3563 } else {
3564 $ret = [ 'text' => $text ];
3565 }
3566
3567 return $ret;
3568 }
3569
3570 /**
3571 * Call a parser function and return an array with text and flags.
3572 *
3573 * The returned array will always contain a boolean 'found', indicating
3574 * whether the parser function was found or not. It may also contain the
3575 * following:
3576 * text: string|object, resulting wikitext or PP DOM object
3577 * isHTML: bool, $text is HTML, armour it against wikitext transformation
3578 * isChildObj: bool, $text is a DOM node needing expansion in a child frame
3579 * isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3580 * nowiki: bool, wiki markup in $text should be escaped
3581 *
3582 * @since 1.21
3583 * @param PPFrame $frame The current frame, contains template arguments
3584 * @param string $function Function name
3585 * @param array $args Arguments to the function
3586 * @throws MWException
3587 * @return array
3588 */
3589 public function callParserFunction( $frame, $function, array $args = [] ) {
3590 # Case sensitive functions
3591 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3592 $function = $this->mFunctionSynonyms[1][$function];
3593 } else {
3594 # Case insensitive functions
3595 $function = $this->contLang->lc( $function );
3596 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3597 $function = $this->mFunctionSynonyms[0][$function];
3598 } else {
3599 return [ 'found' => false ];
3600 }
3601 }
3602
3603 list( $callback, $flags ) = $this->mFunctionHooks[$function];
3604
3605 // Avoid PHP 7.1 warning from passing $this by reference
3606 $parser = $this;
3607
3608 $allArgs = [ &$parser ];
3609 if ( $flags & self::SFH_OBJECT_ARGS ) {
3610 # Convert arguments to PPNodes and collect for appending to $allArgs
3611 $funcArgs = [];
3612 foreach ( $args as $k => $v ) {
3613 if ( $v instanceof PPNode || $k === 0 ) {
3614 $funcArgs[] = $v;
3615 } else {
3616 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3617 }
3618 }
3619
3620 # Add a frame parameter, and pass the arguments as an array
3621 $allArgs[] = $frame;
3622 $allArgs[] = $funcArgs;
3623 } else {
3624 # Convert arguments to plain text and append to $allArgs
3625 foreach ( $args as $k => $v ) {
3626 if ( $v instanceof PPNode ) {
3627 $allArgs[] = trim( $frame->expand( $v ) );
3628 } elseif ( is_int( $k ) && $k >= 0 ) {
3629 $allArgs[] = trim( $v );
3630 } else {
3631 $allArgs[] = trim( "$k=$v" );
3632 }
3633 }
3634 }
3635
3636 $result = $callback( ...$allArgs );
3637
3638 # The interface for function hooks allows them to return a wikitext
3639 # string or an array containing the string and any flags. This mungs
3640 # things around to match what this method should return.
3641 if ( !is_array( $result ) ) {
3642 $result = [
3643 'found' => true,
3644 'text' => $result,
3645 ];
3646 } else {
3647 if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3648 $result['text'] = $result[0];
3649 }
3650 unset( $result[0] );
3651 $result += [
3652 'found' => true,
3653 ];
3654 }
3655
3656 $noparse = true;
3657 $preprocessFlags = 0;
3658 if ( isset( $result['noparse'] ) ) {
3659 $noparse = $result['noparse'];
3660 }
3661 if ( isset( $result['preprocessFlags'] ) ) {
3662 $preprocessFlags = $result['preprocessFlags'];
3663 }
3664
3665 if ( !$noparse ) {
3666 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3667 $result['isChildObj'] = true;
3668 }
3669
3670 return $result;
3671 }
3672
3673 /**
3674 * Get the semi-parsed DOM representation of a template with a given title,
3675 * and its redirect destination title. Cached.
3676 *
3677 * @param Title $title
3678 *
3679 * @return array
3680 */
3681 public function getTemplateDom( $title ) {
3682 $cacheTitle = $title;
3683 $titleText = $title->getPrefixedDBkey();
3684
3685 if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3686 list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3687 $title = Title::makeTitle( $ns, $dbk );
3688 $titleText = $title->getPrefixedDBkey();
3689 }
3690 if ( isset( $this->mTplDomCache[$titleText] ) ) {
3691 return [ $this->mTplDomCache[$titleText], $title ];
3692 }
3693
3694 # Cache miss, go to the database
3695 list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3696
3697 if ( $text === false ) {
3698 $this->mTplDomCache[$titleText] = false;
3699 return [ false, $title ];
3700 }
3701
3702 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3703 $this->mTplDomCache[$titleText] = $dom;
3704
3705 if ( !$title->equals( $cacheTitle ) ) {
3706 $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3707 [ $title->getNamespace(), $title->getDBkey() ];
3708 }
3709
3710 return [ $dom, $title ];
3711 }
3712
3713 /**
3714 * Fetch the current revision of a given title. Note that the revision
3715 * (and even the title) may not exist in the database, so everything
3716 * contributing to the output of the parser should use this method
3717 * where possible, rather than getting the revisions themselves. This
3718 * method also caches its results, so using it benefits performance.
3719 *
3720 * @since 1.24
3721 * @param Title $title
3722 * @return Revision
3723 */
3724 public function fetchCurrentRevisionOfTitle( $title ) {
3725 $cacheKey = $title->getPrefixedDBkey();
3726 if ( !$this->currentRevisionCache ) {
3727 $this->currentRevisionCache = new MapCacheLRU( 100 );
3728 }
3729 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3730 $this->currentRevisionCache->set( $cacheKey,
3731 // Defaults to Parser::statelessFetchRevision()
3732 call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3733 );
3734 }
3735 return $this->currentRevisionCache->get( $cacheKey );
3736 }
3737
3738 /**
3739 * @param Title $title
3740 * @return bool
3741 * @since 1.34
3742 */
3743 public function isCurrentRevisionOfTitleCached( $title ) {
3744 return (
3745 $this->currentRevisionCache &&
3746 $this->currentRevisionCache->has( $title->getPrefixedText() )
3747 );
3748 }
3749
3750 /**
3751 * Wrapper around Revision::newFromTitle to allow passing additional parameters
3752 * without passing them on to it.
3753 *
3754 * @since 1.24
3755 * @param Title $title
3756 * @param Parser|bool $parser
3757 * @return Revision|bool False if missing
3758 */
3759 public static function statelessFetchRevision( Title $title, $parser = false ) {
3760 $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $title );
3761
3762 return $rev;
3763 }
3764
3765 /**
3766 * Fetch the unparsed text of a template and register a reference to it.
3767 * @param Title $title
3768 * @return array ( string or false, Title )
3769 */
3770 public function fetchTemplateAndTitle( $title ) {
3771 // Defaults to Parser::statelessFetchTemplate()
3772 $templateCb = $this->mOptions->getTemplateCallback();
3773 $stuff = call_user_func( $templateCb, $title, $this );
3774 $rev = $stuff['revision'] ?? null;
3775 $text = $stuff['text'];
3776 if ( is_string( $stuff['text'] ) ) {
3777 // We use U+007F DELETE to distinguish strip markers from regular text
3778 $text = strtr( $text, "\x7f", "?" );
3779 }
3780 $finalTitle = $stuff['finalTitle'] ?? $title;
3781 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3782 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3783 if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3784 // Self-transclusion; final result may change based on the new page version
3785 $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3786 $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3787 }
3788 }
3789
3790 return [ $text, $finalTitle ];
3791 }
3792
3793 /**
3794 * Fetch the unparsed text of a template and register a reference to it.
3795 * @param Title $title
3796 * @return string|bool
3797 */
3798 public function fetchTemplate( $title ) {
3799 return $this->fetchTemplateAndTitle( $title )[0];
3800 }
3801
3802 /**
3803 * Static function to get a template
3804 * Can be overridden via ParserOptions::setTemplateCallback().
3805 *
3806 * @param Title $title
3807 * @param bool|Parser $parser
3808 *
3809 * @return array
3810 */
3811 public static function statelessFetchTemplate( $title, $parser = false ) {
3812 $text = $skip = false;
3813 $finalTitle = $title;
3814 $deps = [];
3815 $rev = null;
3816
3817 # Loop to fetch the article, with up to 1 redirect
3818 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3819 # Give extensions a chance to select the revision instead
3820 $id = false; # Assume current
3821 Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3822 [ $parser, $title, &$skip, &$id ] );
3823
3824 if ( $skip ) {
3825 $text = false;
3826 $deps[] = [
3827 'title' => $title,
3828 'page_id' => $title->getArticleID(),
3829 'rev_id' => null
3830 ];
3831 break;
3832 }
3833 # Get the revision
3834 if ( $id ) {
3835 $rev = Revision::newFromId( $id );
3836 } elseif ( $parser ) {
3837 $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3838 } else {
3839 $rev = Revision::newFromTitle( $title );
3840 }
3841 $rev_id = $rev ? $rev->getId() : 0;
3842 # If there is no current revision, there is no page
3843 if ( $id === false && !$rev ) {
3844 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3845 $linkCache->addBadLinkObj( $title );
3846 }
3847
3848 $deps[] = [
3849 'title' => $title,
3850 'page_id' => $title->getArticleID(),
3851 'rev_id' => $rev_id
3852 ];
3853 if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3854 # We fetched a rev from a different title; register it too...
3855 $deps[] = [
3856 'title' => $rev->getTitle(),
3857 'page_id' => $rev->getPage(),
3858 'rev_id' => $rev_id
3859 ];
3860 }
3861
3862 if ( $rev ) {
3863 $content = $rev->getContent();
3864 $text = $content ? $content->getWikitextForTransclusion() : null;
3865
3866 Hooks::run( 'ParserFetchTemplate',
3867 [ $parser, $title, $rev, &$text, &$deps ] );
3868
3869 if ( $text === false || $text === null ) {
3870 $text = false;
3871 break;
3872 }
3873 } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3874 $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3875 lcfirst( $title->getText() ) )->inContentLanguage();
3876 if ( !$message->exists() ) {
3877 $text = false;
3878 break;
3879 }
3880 $content = $message->content();
3881 $text = $message->plain();
3882 } else {
3883 break;
3884 }
3885 if ( !$content ) {
3886 break;
3887 }
3888 # Redirect?
3889 $finalTitle = $title;
3890 $title = $content->getRedirectTarget();
3891 }
3892 return [
3893 'revision' => $rev,
3894 'text' => $text,
3895 'finalTitle' => $finalTitle,
3896 'deps' => $deps
3897 ];
3898 }
3899
3900 /**
3901 * Fetch a file and its title and register a reference to it.
3902 * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3903 * @param Title $title
3904 * @param array $options Array of options to RepoGroup::findFile
3905 * @return array ( File or false, Title of file )
3906 */
3907 public function fetchFileAndTitle( $title, $options = [] ) {
3908 $file = $this->fetchFileNoRegister( $title, $options );
3909
3910 $time = $file ? $file->getTimestamp() : false;
3911 $sha1 = $file ? $file->getSha1() : false;
3912 # Register the file as a dependency...
3913 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3914 if ( $file && !$title->equals( $file->getTitle() ) ) {
3915 # Update fetched file title
3916 $title = $file->getTitle();
3917 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3918 }
3919 return [ $file, $title ];
3920 }
3921
3922 /**
3923 * Helper function for fetchFileAndTitle.
3924 *
3925 * Also useful if you need to fetch a file but not use it yet,
3926 * for example to get the file's handler.
3927 *
3928 * @param Title $title
3929 * @param array $options Array of options to RepoGroup::findFile
3930 * @return File|bool
3931 */
3932 protected function fetchFileNoRegister( $title, $options = [] ) {
3933 if ( isset( $options['broken'] ) ) {
3934 $file = false; // broken thumbnail forced by hook
3935 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3936 $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3937 } else { // get by (name,timestamp)
3938 $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3939 }
3940 return $file;
3941 }
3942
3943 /**
3944 * Transclude an interwiki link.
3945 *
3946 * @param Title $title
3947 * @param string $action Usually one of (raw, render)
3948 *
3949 * @return string
3950 */
3951 public function interwikiTransclude( $title, $action ) {
3952 if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3953 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3954 }
3955
3956 $url = $title->getFullURL( [ 'action' => $action ] );
3957 if ( strlen( $url ) > 1024 ) {
3958 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3959 }
3960
3961 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3962
3963 $fname = __METHOD__;
3964 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3965
3966 $data = $cache->getWithSetCallback(
3967 $cache->makeGlobalKey(
3968 'interwiki-transclude',
3969 ( $wikiId !== false ) ? $wikiId : 'external',
3970 sha1( $url )
3971 ),
3972 $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3973 function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3974 $req = MWHttpRequest::factory( $url, [], $fname );
3975
3976 $status = $req->execute(); // Status object
3977 if ( !$status->isOK() ) {
3978 $ttl = $cache::TTL_UNCACHEABLE;
3979 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3980 $ttl = min( $cache::TTL_LAGGED, $ttl );
3981 }
3982
3983 return [
3984 'text' => $status->isOK() ? $req->getContent() : null,
3985 'code' => $req->getStatus()
3986 ];
3987 },
3988 [
3989 'checkKeys' => ( $wikiId !== false )
3990 ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3991 : [],
3992 'pcGroup' => 'interwiki-transclude:5',
3993 'pcTTL' => $cache::TTL_PROC_LONG
3994 ]
3995 );
3996
3997 if ( is_string( $data['text'] ) ) {
3998 $text = $data['text'];
3999 } elseif ( $data['code'] != 200 ) {
4000 // Though we failed to fetch the content, this status is useless.
4001 $text = wfMessage( 'scarytranscludefailed-httpstatus' )
4002 ->params( $url, $data['code'] )->inContentLanguage()->text();
4003 } else {
4004 $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4005 }
4006
4007 return $text;
4008 }
4009
4010 /**
4011 * Triple brace replacement -- used for template arguments
4012 * @private
4013 *
4014 * @param array $piece
4015 * @param PPFrame $frame
4016 *
4017 * @return array
4018 */
4019 public function argSubstitution( $piece, $frame ) {
4020 $error = false;
4021 $parts = $piece['parts'];
4022 $nameWithSpaces = $frame->expand( $piece['title'] );
4023 $argName = trim( $nameWithSpaces );
4024 $object = false;
4025 $text = $frame->getArgument( $argName );
4026 if ( $text === false && $parts->getLength() > 0
4027 && ( $this->ot['html']
4028 || $this->ot['pre']
4029 || ( $this->ot['wiki'] && $frame->isTemplate() )
4030 )
4031 ) {
4032 # No match in frame, use the supplied default
4033 $object = $parts->item( 0 )->getChildren();
4034 }
4035 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4036 $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4037 $this->limitationWarn( 'post-expand-template-argument' );
4038 }
4039
4040 if ( $text === false && $object === false ) {
4041 # No match anywhere
4042 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4043 }
4044 if ( $error !== false ) {
4045 $text .= $error;
4046 }
4047 if ( $object !== false ) {
4048 $ret = [ 'object' => $object ];
4049 } else {
4050 $ret = [ 'text' => $text ];
4051 }
4052
4053 return $ret;
4054 }
4055
4056 /**
4057 * Return the text to be used for a given extension tag.
4058 * This is the ghost of strip().
4059 *
4060 * @param array $params Associative array of parameters:
4061 * name PPNode for the tag name
4062 * attr PPNode for unparsed text where tag attributes are thought to be
4063 * attributes Optional associative array of parsed attributes
4064 * inner Contents of extension element
4065 * noClose Original text did not have a close tag
4066 * @param PPFrame $frame
4067 *
4068 * @throws MWException
4069 * @return string
4070 */
4071 public function extensionSubstitution( $params, $frame ) {
4072 static $errorStr = '<span class="error">';
4073 static $errorLen = 20;
4074
4075 $name = $frame->expand( $params['name'] );
4076 if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4077 // Probably expansion depth or node count exceeded. Just punt the
4078 // error up.
4079 return $name;
4080 }
4081
4082 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4083 if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4084 // See above
4085 return $attrText;
4086 }
4087
4088 // We can't safely check if the expansion for $content resulted in an
4089 // error, because the content could happen to be the error string
4090 // (T149622).
4091 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4092
4093 $marker = self::MARKER_PREFIX . "-$name-"
4094 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4095
4096 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4097 ( $this->ot['html'] || $this->ot['pre'] );
4098 if ( $isFunctionTag ) {
4099 $markerType = 'none';
4100 } else {
4101 $markerType = 'general';
4102 }
4103 if ( $this->ot['html'] || $isFunctionTag ) {
4104 $name = strtolower( $name );
4105 $attributes = Sanitizer::decodeTagAttributes( $attrText );
4106 if ( isset( $params['attributes'] ) ) {
4107 $attributes += $params['attributes'];
4108 }
4109
4110 if ( isset( $this->mTagHooks[$name] ) ) {
4111 $output = call_user_func_array( $this->mTagHooks[$name],
4112 [ $content, $attributes, $this, $frame ] );
4113 } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4114 list( $callback, ) = $this->mFunctionTagHooks[$name];
4115
4116 // Avoid PHP 7.1 warning from passing $this by reference
4117 $parser = $this;
4118 $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4119 } else {
4120 $output = '<span class="error">Invalid tag extension name: ' .
4121 htmlspecialchars( $name ) . '</span>';
4122 }
4123
4124 if ( is_array( $output ) ) {
4125 // Extract flags
4126 $flags = $output;
4127 $output = $flags[0];
4128 if ( isset( $flags['markerType'] ) ) {
4129 $markerType = $flags['markerType'];
4130 }
4131 }
4132 } else {
4133 if ( is_null( $attrText ) ) {
4134 $attrText = '';
4135 }
4136 if ( isset( $params['attributes'] ) ) {
4137 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4138 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4139 htmlspecialchars( $attrValue ) . '"';
4140 }
4141 }
4142 if ( $content === null ) {
4143 $output = "<$name$attrText/>";
4144 } else {
4145 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4146 if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4147 // See above
4148 return $close;
4149 }
4150 $output = "<$name$attrText>$content$close";
4151 }
4152 }
4153
4154 if ( $markerType === 'none' ) {
4155 return $output;
4156 } elseif ( $markerType === 'nowiki' ) {
4157 $this->mStripState->addNoWiki( $marker, $output );
4158 } elseif ( $markerType === 'general' ) {
4159 $this->mStripState->addGeneral( $marker, $output );
4160 } else {
4161 throw new MWException( __METHOD__ . ': invalid marker type' );
4162 }
4163 return $marker;
4164 }
4165
4166 /**
4167 * Increment an include size counter
4168 *
4169 * @param string $type The type of expansion
4170 * @param int $size The size of the text
4171 * @return bool False if this inclusion would take it over the maximum, true otherwise
4172 */
4173 public function incrementIncludeSize( $type, $size ) {
4174 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4175 return false;
4176 } else {
4177 $this->mIncludeSizes[$type] += $size;
4178 return true;
4179 }
4180 }
4181
4182 /**
4183 * Increment the expensive function count
4184 *
4185 * @return bool False if the limit has been exceeded
4186 */
4187 public function incrementExpensiveFunctionCount() {
4188 $this->mExpensiveFunctionCount++;
4189 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4190 }
4191
4192 /**
4193 * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
4194 * Fills $this->mDoubleUnderscores, returns the modified text
4195 *
4196 * @param string $text
4197 *
4198 * @return string
4199 */
4200 public function doDoubleUnderscore( $text ) {
4201 # The position of __TOC__ needs to be recorded
4202 $mw = $this->magicWordFactory->get( 'toc' );
4203 if ( $mw->match( $text ) ) {
4204 $this->mShowToc = true;
4205 $this->mForceTocPosition = true;
4206
4207 # Set a placeholder. At the end we'll fill it in with the TOC.
4208 $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4209
4210 # Only keep the first one.
4211 $text = $mw->replace( '', $text );
4212 }
4213
4214 # Now match and remove the rest of them
4215 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4216 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4217
4218 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4219 $this->mOutput->mNoGallery = true;
4220 }
4221 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4222 $this->mShowToc = false;
4223 }
4224 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4225 && $this->mTitle->getNamespace() == NS_CATEGORY
4226 ) {
4227 $this->addTrackingCategory( 'hidden-category-category' );
4228 }
4229 # (T10068) Allow control over whether robots index a page.
4230 # __INDEX__ always overrides __NOINDEX__, see T16899
4231 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4232 $this->mOutput->setIndexPolicy( 'noindex' );
4233 $this->addTrackingCategory( 'noindex-category' );
4234 }
4235 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4236 $this->mOutput->setIndexPolicy( 'index' );
4237 $this->addTrackingCategory( 'index-category' );
4238 }
4239
4240 # Cache all double underscores in the database
4241 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4242 $this->mOutput->setProperty( $key, '' );
4243 }
4244
4245 return $text;
4246 }
4247
4248 /**
4249 * @see ParserOutput::addTrackingCategory()
4250 * @param string $msg Message key
4251 * @return bool Whether the addition was successful
4252 */
4253 public function addTrackingCategory( $msg ) {
4254 return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4255 }
4256
4257 /**
4258 * This function accomplishes several tasks:
4259 * 1) Auto-number headings if that option is enabled
4260 * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4261 * 3) Add a Table of contents on the top for users who have enabled the option
4262 * 4) Auto-anchor headings
4263 *
4264 * It loops through all headlines, collects the necessary data, then splits up the
4265 * string and re-inserts the newly formatted headlines.
4266 *
4267 * @param string $text
4268 * @param string $origText Original, untouched wikitext
4269 * @param bool $isMain
4270 * @return mixed|string
4271 * @private
4272 */
4273 public function formatHeadings( $text, $origText, $isMain = true ) {
4274 # Inhibit editsection links if requested in the page
4275 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4276 $maybeShowEditLink = false;
4277 } else {
4278 $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4279 }
4280
4281 # Get all headlines for numbering them and adding funky stuff like [edit]
4282 # links - this is for later, but we need the number of headlines right now
4283 # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4284 # be trimmed here since whitespace in HTML headings is significant.
4285 $matches = [];
4286 $numMatches = preg_match_all(
4287 '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4288 $text,
4289 $matches
4290 );
4291
4292 # if there are fewer than 4 headlines in the article, do not show TOC
4293 # unless it's been explicitly enabled.
4294 $enoughToc = $this->mShowToc &&
4295 ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4296
4297 # Allow user to stipulate that a page should have a "new section"
4298 # link added via __NEWSECTIONLINK__
4299 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4300 $this->mOutput->setNewSection( true );
4301 }
4302
4303 # Allow user to remove the "new section"
4304 # link via __NONEWSECTIONLINK__
4305 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4306 $this->mOutput->hideNewSection( true );
4307 }
4308
4309 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4310 # override above conditions and always show TOC above first header
4311 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4312 $this->mShowToc = true;
4313 $enoughToc = true;
4314 }
4315
4316 # headline counter
4317 $headlineCount = 0;
4318 $numVisible = 0;
4319
4320 # Ugh .. the TOC should have neat indentation levels which can be
4321 # passed to the skin functions. These are determined here
4322 $toc = '';
4323 $full = '';
4324 $head = [];
4325 $sublevelCount = [];
4326 $levelCount = [];
4327 $level = 0;
4328 $prevlevel = 0;
4329 $toclevel = 0;
4330 $prevtoclevel = 0;
4331 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4332 $baseTitleText = $this->mTitle->getPrefixedDBkey();
4333 $oldType = $this->mOutputType;
4334 $this->setOutputType( self::OT_WIKI );
4335 $frame = $this->getPreprocessor()->newFrame();
4336 $root = $this->preprocessToDom( $origText );
4337 $node = $root->getFirstChild();
4338 $byteOffset = 0;
4339 $tocraw = [];
4340 $refers = [];
4341
4342 $headlines = $numMatches !== false ? $matches[3] : [];
4343
4344 $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4345 foreach ( $headlines as $headline ) {
4346 $isTemplate = false;
4347 $titleText = false;
4348 $sectionIndex = false;
4349 $numbering = '';
4350 $markerMatches = [];
4351 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4352 $serial = $markerMatches[1];
4353 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4354 $isTemplate = ( $titleText != $baseTitleText );
4355 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4356 }
4357
4358 if ( $toclevel ) {
4359 $prevlevel = $level;
4360 }
4361 $level = $matches[1][$headlineCount];
4362
4363 if ( $level > $prevlevel ) {
4364 # Increase TOC level
4365 $toclevel++;
4366 $sublevelCount[$toclevel] = 0;
4367 if ( $toclevel < $maxTocLevel ) {
4368 $prevtoclevel = $toclevel;
4369 $toc .= Linker::tocIndent();
4370 $numVisible++;
4371 }
4372 } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4373 # Decrease TOC level, find level to jump to
4374
4375 for ( $i = $toclevel; $i > 0; $i-- ) {
4376 if ( $levelCount[$i] == $level ) {
4377 # Found last matching level
4378 $toclevel = $i;
4379 break;
4380 } elseif ( $levelCount[$i] < $level ) {
4381 # Found first matching level below current level
4382 $toclevel = $i + 1;
4383 break;
4384 }
4385 }
4386 if ( $i == 0 ) {
4387 $toclevel = 1;
4388 }
4389 if ( $toclevel < $maxTocLevel ) {
4390 if ( $prevtoclevel < $maxTocLevel ) {
4391 # Unindent only if the previous toc level was shown :p
4392 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4393 $prevtoclevel = $toclevel;
4394 } else {
4395 $toc .= Linker::tocLineEnd();
4396 }
4397 }
4398 } else {
4399 # No change in level, end TOC line
4400 if ( $toclevel < $maxTocLevel ) {
4401 $toc .= Linker::tocLineEnd();
4402 }
4403 }
4404
4405 $levelCount[$toclevel] = $level;
4406
4407 # count number of headlines for each level
4408 $sublevelCount[$toclevel]++;
4409 $dot = 0;
4410 for ( $i = 1; $i <= $toclevel; $i++ ) {
4411 if ( !empty( $sublevelCount[$i] ) ) {
4412 if ( $dot ) {
4413 $numbering .= '.';
4414 }
4415 $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4416 $dot = 1;
4417 }
4418 }
4419
4420 # The safe header is a version of the header text safe to use for links
4421
4422 # Remove link placeholders by the link text.
4423 # <!--LINK number-->
4424 # turns into
4425 # link text with suffix
4426 # Do this before unstrip since link text can contain strip markers
4427 $safeHeadline = $this->replaceLinkHoldersText( $headline );
4428
4429 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4430 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4431
4432 # Remove any <style> or <script> tags (T198618)
4433 $safeHeadline = preg_replace(
4434 '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4435 '',
4436 $safeHeadline
4437 );
4438
4439 # Strip out HTML (first regex removes any tag not allowed)
4440 # Allowed tags are:
4441 # * <sup> and <sub> (T10393)
4442 # * <i> (T28375)
4443 # * <b> (r105284)
4444 # * <bdi> (T74884)
4445 # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4446 # * <s> and <strike> (T35715)
4447 # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4448 # to allow setting directionality in toc items.
4449 $tocline = preg_replace(
4450 [
4451 '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4452 '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4453 ],
4454 [ '', '<$1>' ],
4455 $safeHeadline
4456 );
4457
4458 # Strip '<span></span>', which is the result from the above if
4459 # <span id="foo"></span> is used to produce an additional anchor
4460 # for a section.
4461 $tocline = str_replace( '<span></span>', '', $tocline );
4462
4463 $tocline = trim( $tocline );
4464
4465 # For the anchor, strip out HTML-y stuff period
4466 $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4467 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4468
4469 # Save headline for section edit hint before it's escaped
4470 $headlineHint = $safeHeadline;
4471
4472 # Decode HTML entities
4473 $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4474
4475 $safeHeadline = self::normalizeSectionName( $safeHeadline );
4476
4477 $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4478 $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4479 $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4480 if ( $fallbackHeadline === $safeHeadline ) {
4481 # No reason to have both (in fact, we can't)
4482 $fallbackHeadline = false;
4483 }
4484
4485 # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4486 # @todo FIXME: We may be changing them depending on the current locale.
4487 $arrayKey = strtolower( $safeHeadline );
4488 if ( $fallbackHeadline === false ) {
4489 $fallbackArrayKey = false;
4490 } else {
4491 $fallbackArrayKey = strtolower( $fallbackHeadline );
4492 }
4493
4494 # Create the anchor for linking from the TOC to the section
4495 $anchor = $safeHeadline;
4496 $fallbackAnchor = $fallbackHeadline;
4497 if ( isset( $refers[$arrayKey] ) ) {
4498 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4499 for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4500 $anchor .= "_$i";
4501 $linkAnchor .= "_$i";
4502 $refers["${arrayKey}_$i"] = true;
4503 } else {
4504 $refers[$arrayKey] = true;
4505 }
4506 if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4507 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4508 for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4509 $fallbackAnchor .= "_$i";
4510 $refers["${fallbackArrayKey}_$i"] = true;
4511 } else {
4512 $refers[$fallbackArrayKey] = true;
4513 }
4514
4515 # Don't number the heading if it is the only one (looks silly)
4516 if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4517 # the two are different if the line contains a link
4518 $headline = Html::element(
4519 'span',
4520 [ 'class' => 'mw-headline-number' ],
4521 $numbering
4522 ) . ' ' . $headline;
4523 }
4524
4525 if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4526 $toc .= Linker::tocLine( $linkAnchor, $tocline,
4527 $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4528 }
4529
4530 # Add the section to the section tree
4531 # Find the DOM node for this header
4532 $noOffset = ( $isTemplate || $sectionIndex === false );
4533 while ( $node && !$noOffset ) {
4534 if ( $node->getName() === 'h' ) {
4535 $bits = $node->splitHeading();
4536 if ( $bits['i'] == $sectionIndex ) {
4537 break;
4538 }
4539 }
4540 $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4541 $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4542 $node = $node->getNextSibling();
4543 }
4544 $tocraw[] = [
4545 'toclevel' => $toclevel,
4546 'level' => $level,
4547 'line' => $tocline,
4548 'number' => $numbering,
4549 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4550 'fromtitle' => $titleText,
4551 'byteoffset' => ( $noOffset ? null : $byteOffset ),
4552 'anchor' => $anchor,
4553 ];
4554
4555 # give headline the correct <h#> tag
4556 if ( $maybeShowEditLink && $sectionIndex !== false ) {
4557 // Output edit section links as markers with styles that can be customized by skins
4558 if ( $isTemplate ) {
4559 # Put a T flag in the section identifier, to indicate to extractSections()
4560 # that sections inside <includeonly> should be counted.
4561 $editsectionPage = $titleText;
4562 $editsectionSection = "T-$sectionIndex";
4563 $editsectionContent = null;
4564 } else {
4565 $editsectionPage = $this->mTitle->getPrefixedText();
4566 $editsectionSection = $sectionIndex;
4567 $editsectionContent = $headlineHint;
4568 }
4569 // We use a bit of pesudo-xml for editsection markers. The
4570 // language converter is run later on. Using a UNIQ style marker
4571 // leads to the converter screwing up the tokens when it
4572 // converts stuff. And trying to insert strip tags fails too. At
4573 // this point all real inputted tags have already been escaped,
4574 // so we don't have to worry about a user trying to input one of
4575 // these markers directly. We use a page and section attribute
4576 // to stop the language converter from converting these
4577 // important bits of data, but put the headline hint inside a
4578 // content block because the language converter is supposed to
4579 // be able to convert that piece of data.
4580 // Gets replaced with html in ParserOutput::getText
4581 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4582 $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4583 if ( $editsectionContent !== null ) {
4584 $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4585 } else {
4586 $editlink .= '/>';
4587 }
4588 } else {
4589 $editlink = '';
4590 }
4591 $head[$headlineCount] = Linker::makeHeadline( $level,
4592 $matches['attrib'][$headlineCount], $anchor, $headline,
4593 $editlink, $fallbackAnchor );
4594
4595 $headlineCount++;
4596 }
4597
4598 $this->setOutputType( $oldType );
4599
4600 # Never ever show TOC if no headers
4601 if ( $numVisible < 1 ) {
4602 $enoughToc = false;
4603 }
4604
4605 if ( $enoughToc ) {
4606 if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4607 $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4608 }
4609 $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4610 $this->mOutput->setTOCHTML( $toc );
4611 $toc = self::TOC_START . $toc . self::TOC_END;
4612 }
4613
4614 if ( $isMain ) {
4615 $this->mOutput->setSections( $tocraw );
4616 }
4617
4618 # split up and insert constructed headlines
4619 $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4620 $i = 0;
4621
4622 // build an array of document sections
4623 $sections = [];
4624 foreach ( $blocks as $block ) {
4625 // $head is zero-based, sections aren't.
4626 if ( empty( $head[$i - 1] ) ) {
4627 $sections[$i] = $block;
4628 } else {
4629 $sections[$i] = $head[$i - 1] . $block;
4630 }
4631
4632 /**
4633 * Send a hook, one per section.
4634 * The idea here is to be able to make section-level DIVs, but to do so in a
4635 * lower-impact, more correct way than r50769
4636 *
4637 * $this : caller
4638 * $section : the section number
4639 * &$sectionContent : ref to the content of the section
4640 * $maybeShowEditLinks : boolean describing whether this section has an edit link
4641 */
4642 Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4643
4644 $i++;
4645 }
4646
4647 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4648 // append the TOC at the beginning
4649 // Top anchor now in skin
4650 $sections[0] .= $toc . "\n";
4651 }
4652
4653 $full .= implode( '', $sections );
4654
4655 if ( $this->mForceTocPosition ) {
4656 return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4657 } else {
4658 return $full;
4659 }
4660 }
4661
4662 /**
4663 * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4664 * conversion, substituting signatures, {{subst:}} templates, etc.
4665 *
4666 * @param string $text The text to transform
4667 * @param Title $title The Title object for the current article
4668 * @param User $user The User object describing the current user
4669 * @param ParserOptions $options Parsing options
4670 * @param bool $clearState Whether to clear the parser state first
4671 * @return string The altered wiki markup
4672 */
4673 public function preSaveTransform( $text, Title $title, User $user,
4674 ParserOptions $options, $clearState = true
4675 ) {
4676 if ( $clearState ) {
4677 $magicScopeVariable = $this->lock();
4678 }
4679 $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4680 $this->setUser( $user );
4681
4682 // Strip U+0000 NULL (T159174)
4683 $text = str_replace( "\000", '', $text );
4684
4685 // We still normalize line endings for backwards-compatibility
4686 // with other code that just calls PST, but this should already
4687 // be handled in TextContent subclasses
4688 $text = TextContent::normalizeLineEndings( $text );
4689
4690 if ( $options->getPreSaveTransform() ) {
4691 $text = $this->pstPass2( $text, $user );
4692 }
4693 $text = $this->mStripState->unstripBoth( $text );
4694
4695 $this->setUser( null ); # Reset
4696
4697 return $text;
4698 }
4699
4700 /**
4701 * Pre-save transform helper function
4702 *
4703 * @param string $text
4704 * @param User $user
4705 *
4706 * @return string
4707 */
4708 private function pstPass2( $text, $user ) {
4709 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4710 # $this->contLang here in order to give everyone the same signature and use the default one
4711 # rather than the one selected in each user's preferences. (see also T14815)
4712 $ts = $this->mOptions->getTimestamp();
4713 $timestamp = MWTimestamp::getLocalInstance( $ts );
4714 $ts = $timestamp->format( 'YmdHis' );
4715 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4716
4717 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4718
4719 # Variable replacement
4720 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4721 $text = $this->replaceVariables( $text );
4722
4723 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4724 # which may corrupt this parser instance via its wfMessage()->text() call-
4725
4726 # Signatures
4727 if ( strpos( $text, '~~~' ) !== false ) {
4728 $sigText = $this->getUserSig( $user );
4729 $text = strtr( $text, [
4730 '~~~~~' => $d,
4731 '~~~~' => "$sigText $d",
4732 '~~~' => $sigText
4733 ] );
4734 # The main two signature forms used above are time-sensitive
4735 $this->setOutputFlag( 'user-signature', 'User signature detected' );
4736 }
4737
4738 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4739 $tc = '[' . Title::legalChars() . ']';
4740 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4741
4742 // [[ns:page (context)|]]
4743 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4744 // [[ns:page(context)|]] (double-width brackets, added in r40257)
4745 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4746 // [[ns:page (context), context|]] (using either single or double-width comma)
4747 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4748 // [[|page]] (reverse pipe trick: add context from page title)
4749 $p2 = "/\[\[\\|($tc+)]]/";
4750
4751 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4752 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4753 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4754 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4755
4756 $t = $this->mTitle->getText();
4757 $m = [];
4758 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4759 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4760 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4761 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4762 } else {
4763 # if there's no context, don't bother duplicating the title
4764 $text = preg_replace( $p2, '[[\\1]]', $text );
4765 }
4766
4767 return $text;
4768 }
4769
4770 /**
4771 * Fetch the user's signature text, if any, and normalize to
4772 * validated, ready-to-insert wikitext.
4773 * If you have pre-fetched the nickname or the fancySig option, you can
4774 * specify them here to save a database query.
4775 * Do not reuse this parser instance after calling getUserSig(),
4776 * as it may have changed.
4777 *
4778 * @param User &$user
4779 * @param string|bool $nickname Nickname to use or false to use user's default nickname
4780 * @param bool|null $fancySig whether the nicknname is the complete signature
4781 * or null to use default value
4782 * @return string
4783 */
4784 public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4785 $username = $user->getName();
4786
4787 # If not given, retrieve from the user object.
4788 if ( $nickname === false ) {
4789 $nickname = $user->getOption( 'nickname' );
4790 }
4791
4792 if ( is_null( $fancySig ) ) {
4793 $fancySig = $user->getBoolOption( 'fancysig' );
4794 }
4795
4796 $nickname = $nickname == null ? $username : $nickname;
4797
4798 if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4799 $nickname = $username;
4800 $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4801 } elseif ( $fancySig !== false ) {
4802 # Sig. might contain markup; validate this
4803 if ( $this->validateSig( $nickname ) !== false ) {
4804 # Validated; clean up (if needed) and return it
4805 return $this->cleanSig( $nickname, true );
4806 } else {
4807 # Failed to validate; fall back to the default
4808 $nickname = $username;
4809 $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4810 }
4811 }
4812
4813 # Make sure nickname doesnt get a sig in a sig
4814 $nickname = self::cleanSigInSig( $nickname );
4815
4816 # If we're still here, make it a link to the user page
4817 $userText = wfEscapeWikiText( $username );
4818 $nickText = wfEscapeWikiText( $nickname );
4819 $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4820
4821 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4822 ->title( $this->getTitle() )->text();
4823 }
4824
4825 /**
4826 * Check that the user's signature contains no bad XML
4827 *
4828 * @param string $text
4829 * @return string|bool An expanded string, or false if invalid.
4830 */
4831 public function validateSig( $text ) {
4832 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4833 }
4834
4835 /**
4836 * Clean up signature text
4837 *
4838 * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4839 * 2) Substitute all transclusions
4840 *
4841 * @param string $text
4842 * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4843 * @return string Signature text
4844 */
4845 public function cleanSig( $text, $parsing = false ) {
4846 if ( !$parsing ) {
4847 global $wgTitle;
4848 $magicScopeVariable = $this->lock();
4849 $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4850 }
4851
4852 # Option to disable this feature
4853 if ( !$this->mOptions->getCleanSignatures() ) {
4854 return $text;
4855 }
4856
4857 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4858 # => Move this logic to braceSubstitution()
4859 $substWord = $this->magicWordFactory->get( 'subst' );
4860 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4861 $substText = '{{' . $substWord->getSynonym( 0 );
4862
4863 $text = preg_replace( $substRegex, $substText, $text );
4864 $text = self::cleanSigInSig( $text );
4865 $dom = $this->preprocessToDom( $text );
4866 $frame = $this->getPreprocessor()->newFrame();
4867 $text = $frame->expand( $dom );
4868
4869 if ( !$parsing ) {
4870 $text = $this->mStripState->unstripBoth( $text );
4871 }
4872
4873 return $text;
4874 }
4875
4876 /**
4877 * Strip 3, 4 or 5 tildes out of signatures.
4878 *
4879 * @param string $text
4880 * @return string Signature text with /~{3,5}/ removed
4881 */
4882 public static function cleanSigInSig( $text ) {
4883 $text = preg_replace( '/~{3,5}/', '', $text );
4884 return $text;
4885 }
4886
4887 /**
4888 * Set up some variables which are usually set up in parse()
4889 * so that an external function can call some class members with confidence
4890 *
4891 * @param Title|null $title
4892 * @param ParserOptions $options
4893 * @param int $outputType
4894 * @param bool $clearState
4895 * @param int|null $revId
4896 */
4897 public function startExternalParse( Title $title = null, ParserOptions $options,
4898 $outputType, $clearState = true, $revId = null
4899 ) {
4900 $this->startParse( $title, $options, $outputType, $clearState );
4901 if ( $revId !== null ) {
4902 $this->mRevisionId = $revId;
4903 }
4904 }
4905
4906 /**
4907 * @param Title|null $title
4908 * @param ParserOptions $options
4909 * @param int $outputType
4910 * @param bool $clearState
4911 */
4912 private function startParse( Title $title = null, ParserOptions $options,
4913 $outputType, $clearState = true
4914 ) {
4915 $this->setTitle( $title );
4916 $this->mOptions = $options;
4917 $this->setOutputType( $outputType );
4918 if ( $clearState ) {
4919 $this->clearState();
4920 }
4921 }
4922
4923 /**
4924 * Wrapper for preprocess()
4925 *
4926 * @param string $text The text to preprocess
4927 * @param ParserOptions $options
4928 * @param Title|null $title Title object or null to use $wgTitle
4929 * @return string
4930 */
4931 public function transformMsg( $text, $options, $title = null ) {
4932 static $executing = false;
4933
4934 # Guard against infinite recursion
4935 if ( $executing ) {
4936 return $text;
4937 }
4938 $executing = true;
4939
4940 if ( !$title ) {
4941 global $wgTitle;
4942 $title = $wgTitle;
4943 }
4944
4945 $text = $this->preprocess( $text, $title, $options );
4946
4947 $executing = false;
4948 return $text;
4949 }
4950
4951 /**
4952 * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4953 * The callback should have the following form:
4954 * function myParserHook( $text, $params, $parser, $frame ) { ... }
4955 *
4956 * Transform and return $text. Use $parser for any required context, e.g. use
4957 * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4958 *
4959 * Hooks may return extended information by returning an array, of which the
4960 * first numbered element (index 0) must be the return string, and all other
4961 * entries are extracted into local variables within an internal function
4962 * in the Parser class.
4963 *
4964 * This interface (introduced r61913) appears to be undocumented, but
4965 * 'markerType' is used by some core tag hooks to override which strip
4966 * array their results are placed in. **Use great caution if attempting
4967 * this interface, as it is not documented and injudicious use could smash
4968 * private variables.**
4969 *
4970 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4971 * @param callable $callback The callback function (and object) to use for the tag
4972 * @throws MWException
4973 * @return callable|null The old value of the mTagHooks array associated with the hook
4974 */
4975 public function setHook( $tag, callable $callback ) {
4976 $tag = strtolower( $tag );
4977 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4978 throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4979 }
4980 $oldVal = $this->mTagHooks[$tag] ?? null;
4981 $this->mTagHooks[$tag] = $callback;
4982 if ( !in_array( $tag, $this->mStripList ) ) {
4983 $this->mStripList[] = $tag;
4984 }
4985
4986 return $oldVal;
4987 }
4988
4989 /**
4990 * As setHook(), but letting the contents be parsed.
4991 *
4992 * Transparent tag hooks are like regular XML-style tag hooks, except they
4993 * operate late in the transformation sequence, on HTML instead of wikitext.
4994 *
4995 * This is probably obsoleted by things dealing with parser frames?
4996 * The only extension currently using it is geoserver.
4997 *
4998 * @since 1.10
4999 * @todo better document or deprecate this
5000 *
5001 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
5002 * @param callable $callback The callback function (and object) to use for the tag
5003 * @throws MWException
5004 * @return callable|null The old value of the mTagHooks array associated with the hook
5005 */
5006 public function setTransparentTagHook( $tag, callable $callback ) {
5007 $tag = strtolower( $tag );
5008 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5009 throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5010 }
5011 $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
5012 $this->mTransparentTagHooks[$tag] = $callback;
5013
5014 return $oldVal;
5015 }
5016
5017 /**
5018 * Remove all tag hooks
5019 */
5020 public function clearTagHooks() {
5021 $this->mTagHooks = [];
5022 $this->mFunctionTagHooks = [];
5023 $this->mStripList = $this->mDefaultStripList;
5024 }
5025
5026 /**
5027 * Create a function, e.g. {{sum:1|2|3}}
5028 * The callback function should have the form:
5029 * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
5030 *
5031 * Or with Parser::SFH_OBJECT_ARGS:
5032 * function myParserFunction( $parser, $frame, $args ) { ... }
5033 *
5034 * The callback may either return the text result of the function, or an array with the text
5035 * in element 0, and a number of flags in the other elements. The names of the flags are
5036 * specified in the keys. Valid flags are:
5037 * found The text returned is valid, stop processing the template. This
5038 * is on by default.
5039 * nowiki Wiki markup in the return value should be escaped
5040 * isHTML The returned text is HTML, armour it against wikitext transformation
5041 *
5042 * @param string $id The magic word ID
5043 * @param callable $callback The callback function (and object) to use
5044 * @param int $flags A combination of the following flags:
5045 * Parser::SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
5046 *
5047 * Parser::SFH_OBJECT_ARGS Pass the template arguments as PPNode objects instead of text.
5048 * This allows for conditional expansion of the parse tree, allowing you to eliminate dead
5049 * branches and thus speed up parsing. It is also possible to analyse the parse tree of
5050 * the arguments, and to control the way they are expanded.
5051 *
5052 * The $frame parameter is a PPFrame. This can be used to produce expanded text from the
5053 * arguments, for instance:
5054 * $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
5055 *
5056 * For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
5057 * future versions. Please call $frame->expand() on it anyway so that your code keeps
5058 * working if/when this is changed.
5059 *
5060 * If you want whitespace to be trimmed from $args, you need to do it yourself, post-
5061 * expansion.
5062 *
5063 * Please read the documentation in includes/parser/Preprocessor.php for more information
5064 * about the methods available in PPFrame and PPNode.
5065 *
5066 * @throws MWException
5067 * @return string|callable The old callback function for this name, if any
5068 */
5069 public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5070 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5071 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5072
5073 # Add to function cache
5074 $mw = $this->magicWordFactory->get( $id );
5075 if ( !$mw ) {
5076 throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5077 }
5078
5079 $synonyms = $mw->getSynonyms();
5080 $sensitive = intval( $mw->isCaseSensitive() );
5081
5082 foreach ( $synonyms as $syn ) {
5083 # Case
5084 if ( !$sensitive ) {
5085 $syn = $this->contLang->lc( $syn );
5086 }
5087 # Add leading hash
5088 if ( !( $flags & self::SFH_NO_HASH ) ) {
5089 $syn = '#' . $syn;
5090 }
5091 # Remove trailing colon
5092 if ( substr( $syn, -1, 1 ) === ':' ) {
5093 $syn = substr( $syn, 0, -1 );
5094 }
5095 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5096 }
5097 return $oldVal;
5098 }
5099
5100 /**
5101 * Get all registered function hook identifiers
5102 *
5103 * @return array
5104 */
5105 public function getFunctionHooks() {
5106 $this->firstCallInit();
5107 return array_keys( $this->mFunctionHooks );
5108 }
5109
5110 /**
5111 * Create a tag function, e.g. "<test>some stuff</test>".
5112 * Unlike tag hooks, tag functions are parsed at preprocessor level.
5113 * Unlike parser functions, their content is not preprocessed.
5114 * @param string $tag
5115 * @param callable $callback
5116 * @param int $flags
5117 * @throws MWException
5118 * @return null
5119 */
5120 public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5121 $tag = strtolower( $tag );
5122 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5123 throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5124 }
5125 $old = $this->mFunctionTagHooks[$tag] ?? null;
5126 $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5127
5128 if ( !in_array( $tag, $this->mStripList ) ) {
5129 $this->mStripList[] = $tag;
5130 }
5131
5132 return $old;
5133 }
5134
5135 /**
5136 * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5137 * Placeholders created in Linker::link()
5138 *
5139 * @param string &$text
5140 * @param int $options
5141 */
5142 public function replaceLinkHolders( &$text, $options = 0 ) {
5143 $this->mLinkHolders->replace( $text );
5144 }
5145
5146 /**
5147 * Replace "<!--LINK-->" link placeholders with plain text of links
5148 * (not HTML-formatted).
5149 *
5150 * @param string $text
5151 * @return string
5152 */
5153 public function replaceLinkHoldersText( $text ) {
5154 return $this->mLinkHolders->replaceText( $text );
5155 }
5156
5157 /**
5158 * Renders an image gallery from a text with one line per image.
5159 * text labels may be given by using |-style alternative text. E.g.
5160 * Image:one.jpg|The number "1"
5161 * Image:tree.jpg|A tree
5162 * given as text will return the HTML of a gallery with two images,
5163 * labeled 'The number "1"' and
5164 * 'A tree'.
5165 *
5166 * @param string $text
5167 * @param array $params
5168 * @return string HTML
5169 */
5170 public function renderImageGallery( $text, $params ) {
5171 $mode = false;
5172 if ( isset( $params['mode'] ) ) {
5173 $mode = $params['mode'];
5174 }
5175
5176 try {
5177 $ig = ImageGalleryBase::factory( $mode );
5178 } catch ( Exception $e ) {
5179 // If invalid type set, fallback to default.
5180 $ig = ImageGalleryBase::factory( false );
5181 }
5182
5183 $ig->setContextTitle( $this->mTitle );
5184 $ig->setShowBytes( false );
5185 $ig->setShowDimensions( false );
5186 $ig->setShowFilename( false );
5187 $ig->setParser( $this );
5188 $ig->setHideBadImages();
5189 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5190
5191 if ( isset( $params['showfilename'] ) ) {
5192 $ig->setShowFilename( true );
5193 } else {
5194 $ig->setShowFilename( false );
5195 }
5196 if ( isset( $params['caption'] ) ) {
5197 // NOTE: We aren't passing a frame here or below. Frame info
5198 // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5199 // See T107332#4030581
5200 $caption = $this->recursiveTagParse( $params['caption'] );
5201 $ig->setCaptionHtml( $caption );
5202 }
5203 if ( isset( $params['perrow'] ) ) {
5204 $ig->setPerRow( $params['perrow'] );
5205 }
5206 if ( isset( $params['widths'] ) ) {
5207 $ig->setWidths( $params['widths'] );
5208 }
5209 if ( isset( $params['heights'] ) ) {
5210 $ig->setHeights( $params['heights'] );
5211 }
5212 $ig->setAdditionalOptions( $params );
5213
5214 // Avoid PHP 7.1 warning from passing $this by reference
5215 $parser = $this;
5216 Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5217
5218 $lines = StringUtils::explode( "\n", $text );
5219 foreach ( $lines as $line ) {
5220 # match lines like these:
5221 # Image:someimage.jpg|This is some image
5222 $matches = [];
5223 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5224 # Skip empty lines
5225 if ( count( $matches ) == 0 ) {
5226 continue;
5227 }
5228
5229 if ( strpos( $matches[0], '%' ) !== false ) {
5230 $matches[1] = rawurldecode( $matches[1] );
5231 }
5232 $title = Title::newFromText( $matches[1], NS_FILE );
5233 if ( is_null( $title ) ) {
5234 # Bogus title. Ignore these so we don't bomb out later.
5235 continue;
5236 }
5237
5238 # We need to get what handler the file uses, to figure out parameters.
5239 # Note, a hook can overide the file name, and chose an entirely different
5240 # file (which potentially could be of a different type and have different handler).
5241 $options = [];
5242 $descQuery = false;
5243 Hooks::run( 'BeforeParserFetchFileAndTitle',
5244 [ $this, $title, &$options, &$descQuery ] );
5245 # Don't register it now, as TraditionalImageGallery does that later.
5246 $file = $this->fetchFileNoRegister( $title, $options );
5247 $handler = $file ? $file->getHandler() : false;
5248
5249 $paramMap = [
5250 'img_alt' => 'gallery-internal-alt',
5251 'img_link' => 'gallery-internal-link',
5252 ];
5253 if ( $handler ) {
5254 $paramMap += $handler->getParamMap();
5255 // We don't want people to specify per-image widths.
5256 // Additionally the width parameter would need special casing anyhow.
5257 unset( $paramMap['img_width'] );
5258 }
5259
5260 $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5261
5262 $label = '';
5263 $alt = '';
5264 $link = '';
5265 $handlerOptions = [];
5266 if ( isset( $matches[3] ) ) {
5267 // look for an |alt= definition while trying not to break existing
5268 // captions with multiple pipes (|) in it, until a more sensible grammar
5269 // is defined for images in galleries
5270
5271 // FIXME: Doing recursiveTagParse at this stage, and the trim before
5272 // splitting on '|' is a bit odd, and different from makeImage.
5273 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5274 // Protect LanguageConverter markup
5275 $parameterMatches = StringUtils::delimiterExplode(
5276 '-{', '}-', '|', $matches[3], true /* nested */
5277 );
5278
5279 foreach ( $parameterMatches as $parameterMatch ) {
5280 list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5281 if ( $magicName ) {
5282 $paramName = $paramMap[$magicName];
5283
5284 switch ( $paramName ) {
5285 case 'gallery-internal-alt':
5286 $alt = $this->stripAltText( $match, false );
5287 break;
5288 case 'gallery-internal-link':
5289 $linkValue = $this->stripAltText( $match, false );
5290 if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5291 // Result of LanguageConverter::markNoConversion
5292 // invoked on an external link.
5293 $linkValue = substr( $linkValue, 4, -2 );
5294 }
5295 list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5296 if ( $type === 'link-url' ) {
5297 $link = $target;
5298 $this->mOutput->addExternalLink( $target );
5299 } elseif ( $type === 'link-title' ) {
5300 $link = $target->getLinkURL();
5301 $this->mOutput->addLink( $target );
5302 }
5303 break;
5304 default:
5305 // Must be a handler specific parameter.
5306 if ( $handler->validateParam( $paramName, $match ) ) {
5307 $handlerOptions[$paramName] = $match;
5308 } else {
5309 // Guess not, consider it as caption.
5310 $this->logger->debug(
5311 "$parameterMatch failed parameter validation" );
5312 $label = $parameterMatch;
5313 }
5314 }
5315
5316 } else {
5317 // Last pipe wins.
5318 $label = $parameterMatch;
5319 }
5320 }
5321 }
5322
5323 $ig->add( $title, $label, $alt, $link, $handlerOptions );
5324 }
5325 $html = $ig->toHTML();
5326 Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5327 return $html;
5328 }
5329
5330 /**
5331 * @param MediaHandler $handler
5332 * @return array
5333 */
5334 public function getImageParams( $handler ) {
5335 if ( $handler ) {
5336 $handlerClass = get_class( $handler );
5337 } else {
5338 $handlerClass = '';
5339 }
5340 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5341 # Initialise static lists
5342 static $internalParamNames = [
5343 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5344 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5345 'bottom', 'text-bottom' ],
5346 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5347 'upright', 'border', 'link', 'alt', 'class' ],
5348 ];
5349 static $internalParamMap;
5350 if ( !$internalParamMap ) {
5351 $internalParamMap = [];
5352 foreach ( $internalParamNames as $type => $names ) {
5353 foreach ( $names as $name ) {
5354 // For grep: img_left, img_right, img_center, img_none,
5355 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5356 // img_bottom, img_text_bottom,
5357 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5358 // img_border, img_link, img_alt, img_class
5359 $magicName = str_replace( '-', '_', "img_$name" );
5360 $internalParamMap[$magicName] = [ $type, $name ];
5361 }
5362 }
5363 }
5364
5365 # Add handler params
5366 $paramMap = $internalParamMap;
5367 if ( $handler ) {
5368 $handlerParamMap = $handler->getParamMap();
5369 foreach ( $handlerParamMap as $magic => $paramName ) {
5370 $paramMap[$magic] = [ 'handler', $paramName ];
5371 }
5372 }
5373 $this->mImageParams[$handlerClass] = $paramMap;
5374 $this->mImageParamsMagicArray[$handlerClass] =
5375 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5376 }
5377 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5378 }
5379
5380 /**
5381 * Parse image options text and use it to make an image
5382 *
5383 * @param Title $title
5384 * @param string $options
5385 * @param LinkHolderArray|bool $holders
5386 * @return string HTML
5387 */
5388 public function makeImage( $title, $options, $holders = false ) {
5389 # Check if the options text is of the form "options|alt text"
5390 # Options are:
5391 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5392 # * left no resizing, just left align. label is used for alt= only
5393 # * right same, but right aligned
5394 # * none same, but not aligned
5395 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5396 # * center center the image
5397 # * frame Keep original image size, no magnify-button.
5398 # * framed Same as "frame"
5399 # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5400 # * upright reduce width for upright images, rounded to full __0 px
5401 # * border draw a 1px border around the image
5402 # * alt Text for HTML alt attribute (defaults to empty)
5403 # * class Set a class for img node
5404 # * link Set the target of the image link. Can be external, interwiki, or local
5405 # vertical-align values (no % or length right now):
5406 # * baseline
5407 # * sub
5408 # * super
5409 # * top
5410 # * text-top
5411 # * middle
5412 # * bottom
5413 # * text-bottom
5414
5415 # Protect LanguageConverter markup when splitting into parts
5416 $parts = StringUtils::delimiterExplode(
5417 '-{', '}-', '|', $options, true /* allow nesting */
5418 );
5419
5420 # Give extensions a chance to select the file revision for us
5421 $options = [];
5422 $descQuery = false;
5423 Hooks::run( 'BeforeParserFetchFileAndTitle',
5424 [ $this, $title, &$options, &$descQuery ] );
5425 # Fetch and register the file (file title may be different via hooks)
5426 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5427
5428 # Get parameter map
5429 $handler = $file ? $file->getHandler() : false;
5430
5431 list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5432
5433 if ( !$file ) {
5434 $this->addTrackingCategory( 'broken-file-category' );
5435 }
5436
5437 # Process the input parameters
5438 $caption = '';
5439 $params = [ 'frame' => [], 'handler' => [],
5440 'horizAlign' => [], 'vertAlign' => [] ];
5441 $seenformat = false;
5442 foreach ( $parts as $part ) {
5443 $part = trim( $part );
5444 list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5445 $validated = false;
5446 if ( isset( $paramMap[$magicName] ) ) {
5447 list( $type, $paramName ) = $paramMap[$magicName];
5448
5449 # Special case; width and height come in one variable together
5450 if ( $type === 'handler' && $paramName === 'width' ) {
5451 $parsedWidthParam = self::parseWidthParam( $value );
5452 if ( isset( $parsedWidthParam['width'] ) ) {
5453 $width = $parsedWidthParam['width'];
5454 if ( $handler->validateParam( 'width', $width ) ) {
5455 $params[$type]['width'] = $width;
5456 $validated = true;
5457 }
5458 }
5459 if ( isset( $parsedWidthParam['height'] ) ) {
5460 $height = $parsedWidthParam['height'];
5461 if ( $handler->validateParam( 'height', $height ) ) {
5462 $params[$type]['height'] = $height;
5463 $validated = true;
5464 }
5465 }
5466 # else no validation -- T15436
5467 } else {
5468 if ( $type === 'handler' ) {
5469 # Validate handler parameter
5470 $validated = $handler->validateParam( $paramName, $value );
5471 } else {
5472 # Validate internal parameters
5473 switch ( $paramName ) {
5474 case 'manualthumb':
5475 case 'alt':
5476 case 'class':
5477 # @todo FIXME: Possibly check validity here for
5478 # manualthumb? downstream behavior seems odd with
5479 # missing manual thumbs.
5480 $validated = true;
5481 $value = $this->stripAltText( $value, $holders );
5482 break;
5483 case 'link':
5484 list( $paramName, $value ) =
5485 $this->parseLinkParameter(
5486 $this->stripAltText( $value, $holders )
5487 );
5488 if ( $paramName ) {
5489 $validated = true;
5490 if ( $paramName === 'no-link' ) {
5491 $value = true;
5492 }
5493 if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5494 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5495 }
5496 }
5497 break;
5498 case 'frameless':
5499 case 'framed':
5500 case 'thumbnail':
5501 // use first appearing option, discard others.
5502 $validated = !$seenformat;
5503 $seenformat = true;
5504 break;
5505 default:
5506 # Most other things appear to be empty or numeric...
5507 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5508 }
5509 }
5510
5511 if ( $validated ) {
5512 $params[$type][$paramName] = $value;
5513 }
5514 }
5515 }
5516 if ( !$validated ) {
5517 $caption = $part;
5518 }
5519 }
5520
5521 # Process alignment parameters
5522 if ( $params['horizAlign'] ) {
5523 $params['frame']['align'] = key( $params['horizAlign'] );
5524 }
5525 if ( $params['vertAlign'] ) {
5526 $params['frame']['valign'] = key( $params['vertAlign'] );
5527 }
5528
5529 $params['frame']['caption'] = $caption;
5530
5531 # Will the image be presented in a frame, with the caption below?
5532 $imageIsFramed = isset( $params['frame']['frame'] )
5533 || isset( $params['frame']['framed'] )
5534 || isset( $params['frame']['thumbnail'] )
5535 || isset( $params['frame']['manualthumb'] );
5536
5537 # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5538 # came to also set the caption, ordinary text after the image -- which
5539 # makes no sense, because that just repeats the text multiple times in
5540 # screen readers. It *also* came to set the title attribute.
5541 # Now that we have an alt attribute, we should not set the alt text to
5542 # equal the caption: that's worse than useless, it just repeats the
5543 # text. This is the framed/thumbnail case. If there's no caption, we
5544 # use the unnamed parameter for alt text as well, just for the time be-
5545 # ing, if the unnamed param is set and the alt param is not.
5546 # For the future, we need to figure out if we want to tweak this more,
5547 # e.g., introducing a title= parameter for the title; ignoring the un-
5548 # named parameter entirely for images without a caption; adding an ex-
5549 # plicit caption= parameter and preserving the old magic unnamed para-
5550 # meter for BC; ...
5551 if ( $imageIsFramed ) { # Framed image
5552 if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5553 # No caption or alt text, add the filename as the alt text so
5554 # that screen readers at least get some description of the image
5555 $params['frame']['alt'] = $title->getText();
5556 }
5557 # Do not set $params['frame']['title'] because tooltips don't make sense
5558 # for framed images
5559 } else { # Inline image
5560 if ( !isset( $params['frame']['alt'] ) ) {
5561 # No alt text, use the "caption" for the alt text
5562 if ( $caption !== '' ) {
5563 $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5564 } else {
5565 # No caption, fall back to using the filename for the
5566 # alt text
5567 $params['frame']['alt'] = $title->getText();
5568 }
5569 }
5570 # Use the "caption" for the tooltip text
5571 $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5572 }
5573 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5574
5575 Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5576
5577 # Linker does the rest
5578 $time = $options['time'] ?? false;
5579 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5580 $time, $descQuery, $this->mOptions->getThumbSize() );
5581
5582 # Give the handler a chance to modify the parser object
5583 if ( $handler ) {
5584 $handler->parserTransformHook( $this, $file );
5585 }
5586
5587 return $ret;
5588 }
5589
5590 /**
5591 * Parse the value of 'link' parameter in image syntax (`[[File:Foo.jpg|link=<value>]]`).
5592 *
5593 * Adds an entry to appropriate link tables.
5594 *
5595 * @since 1.32
5596 * @param string $value
5597 * @return array of `[ type, target ]`, where:
5598 * - `type` is one of:
5599 * - `null`: Given value is not a valid link target, use default
5600 * - `'no-link'`: Given value is empty, do not generate a link
5601 * - `'link-url'`: Given value is a valid external link
5602 * - `'link-title'`: Given value is a valid internal link
5603 * - `target` is:
5604 * - When `type` is `null` or `'no-link'`: `false`
5605 * - When `type` is `'link-url'`: URL string corresponding to given value
5606 * - When `type` is `'link-title'`: Title object corresponding to given value
5607 */
5608 public function parseLinkParameter( $value ) {
5609 $chars = self::EXT_LINK_URL_CLASS;
5610 $addr = self::EXT_LINK_ADDR;
5611 $prots = $this->mUrlProtocols;
5612 $type = null;
5613 $target = false;
5614 if ( $value === '' ) {
5615 $type = 'no-link';
5616 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5617 if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5618 $this->mOutput->addExternalLink( $value );
5619 $type = 'link-url';
5620 $target = $value;
5621 }
5622 } else {
5623 $linkTitle = Title::newFromText( $value );
5624 if ( $linkTitle ) {
5625 $this->mOutput->addLink( $linkTitle );
5626 $type = 'link-title';
5627 $target = $linkTitle;
5628 }
5629 }
5630 return [ $type, $target ];
5631 }
5632
5633 /**
5634 * @param string $caption
5635 * @param LinkHolderArray|bool $holders
5636 * @return mixed|string
5637 */
5638 protected function stripAltText( $caption, $holders ) {
5639 # Strip bad stuff out of the title (tooltip). We can't just use
5640 # replaceLinkHoldersText() here, because if this function is called
5641 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5642 if ( $holders ) {
5643 $tooltip = $holders->replaceText( $caption );
5644 } else {
5645 $tooltip = $this->replaceLinkHoldersText( $caption );
5646 }
5647
5648 # make sure there are no placeholders in thumbnail attributes
5649 # that are later expanded to html- so expand them now and
5650 # remove the tags
5651 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5652 # Compatibility hack! In HTML certain entity references not terminated
5653 # by a semicolon are decoded (but not if we're in an attribute; that's
5654 # how link URLs get away without properly escaping & in queries).
5655 # But wikitext has always required semicolon-termination of entities,
5656 # so encode & where needed to avoid decode of semicolon-less entities.
5657 # See T209236 and
5658 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5659 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5660 $tooltip = preg_replace( "/
5661 & # 1. entity prefix
5662 (?= # 2. followed by:
5663 (?: # a. one of the legacy semicolon-less named entities
5664 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5665 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5666 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5667 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5668 U(?:acute|circ|grave|uml)|Yacute|
5669 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5670 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5671 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5672 frac(?:1(?:2|4)|34)|
5673 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5674 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5675 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5676 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5677 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5678 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5679 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5680 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5681 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5682 )
5683 (?:[^;]|$)) # b. and not followed by a semicolon
5684 # S = study, for efficiency
5685 /Sx", '&amp;', $tooltip );
5686 $tooltip = Sanitizer::stripAllTags( $tooltip );
5687
5688 return $tooltip;
5689 }
5690
5691 /**
5692 * Set a flag in the output object indicating that the content is dynamic and
5693 * shouldn't be cached.
5694 * @deprecated since 1.28; use getOutput()->updateCacheExpiry()
5695 */
5696 public function disableCache() {
5697 $this->logger->debug( "Parser output marked as uncacheable." );
5698 if ( !$this->mOutput ) {
5699 throw new MWException( __METHOD__ .
5700 " can only be called when actually parsing something" );
5701 }
5702 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5703 }
5704
5705 /**
5706 * Callback from the Sanitizer for expanding items found in HTML attribute
5707 * values, so they can be safely tested and escaped.
5708 *
5709 * @param string &$text
5710 * @param bool|PPFrame $frame
5711 * @return string
5712 */
5713 public function attributeStripCallback( &$text, $frame = false ) {
5714 $text = $this->replaceVariables( $text, $frame );
5715 $text = $this->mStripState->unstripBoth( $text );
5716 return $text;
5717 }
5718
5719 /**
5720 * Accessor
5721 *
5722 * @return array
5723 */
5724 public function getTags() {
5725 $this->firstCallInit();
5726 return array_merge(
5727 array_keys( $this->mTransparentTagHooks ),
5728 array_keys( $this->mTagHooks ),
5729 array_keys( $this->mFunctionTagHooks )
5730 );
5731 }
5732
5733 /**
5734 * @since 1.32
5735 * @return array
5736 */
5737 public function getFunctionSynonyms() {
5738 $this->firstCallInit();
5739 return $this->mFunctionSynonyms;
5740 }
5741
5742 /**
5743 * @since 1.32
5744 * @return string
5745 */
5746 public function getUrlProtocols() {
5747 return $this->mUrlProtocols;
5748 }
5749
5750 /**
5751 * Replace transparent tags in $text with the values given by the callbacks.
5752 *
5753 * Transparent tag hooks are like regular XML-style tag hooks, except they
5754 * operate late in the transformation sequence, on HTML instead of wikitext.
5755 *
5756 * @param string $text
5757 *
5758 * @return string
5759 */
5760 public function replaceTransparentTags( $text ) {
5761 $matches = [];
5762 $elements = array_keys( $this->mTransparentTagHooks );
5763 $text = self::extractTagsAndParams( $elements, $text, $matches );
5764 $replacements = [];
5765
5766 foreach ( $matches as $marker => $data ) {
5767 list( $element, $content, $params, $tag ) = $data;
5768 $tagName = strtolower( $element );
5769 if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5770 $output = call_user_func_array(
5771 $this->mTransparentTagHooks[$tagName],
5772 [ $content, $params, $this ]
5773 );
5774 } else {
5775 $output = $tag;
5776 }
5777 $replacements[$marker] = $output;
5778 }
5779 return strtr( $text, $replacements );
5780 }
5781
5782 /**
5783 * Break wikitext input into sections, and either pull or replace
5784 * some particular section's text.
5785 *
5786 * External callers should use the getSection and replaceSection methods.
5787 *
5788 * @param string $text Page wikitext
5789 * @param string|int $sectionId A section identifier string of the form:
5790 * "<flag1> - <flag2> - ... - <section number>"
5791 *
5792 * Currently the only recognised flag is "T", which means the target section number
5793 * was derived during a template inclusion parse, in other words this is a template
5794 * section edit link. If no flags are given, it was an ordinary section edit link.
5795 * This flag is required to avoid a section numbering mismatch when a section is
5796 * enclosed by "<includeonly>" (T8563).
5797 *
5798 * The section number 0 pulls the text before the first heading; other numbers will
5799 * pull the given section along with its lower-level subsections. If the section is
5800 * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5801 *
5802 * Section 0 is always considered to exist, even if it only contains the empty
5803 * string. If $text is the empty string and section 0 is replaced, $newText is
5804 * returned.
5805 *
5806 * @param string $mode One of "get" or "replace"
5807 * @param string $newText Replacement text for section data.
5808 * @return string For "get", the extracted section text.
5809 * for "replace", the whole page with the section replaced.
5810 */
5811 private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5812 global $wgTitle; # not generally used but removes an ugly failure mode
5813
5814 $magicScopeVariable = $this->lock();
5815 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5816 $outText = '';
5817 $frame = $this->getPreprocessor()->newFrame();
5818
5819 # Process section extraction flags
5820 $flags = 0;
5821 $sectionParts = explode( '-', $sectionId );
5822 $sectionIndex = array_pop( $sectionParts );
5823 foreach ( $sectionParts as $part ) {
5824 if ( $part === 'T' ) {
5825 $flags |= self::PTD_FOR_INCLUSION;
5826 }
5827 }
5828
5829 # Check for empty input
5830 if ( strval( $text ) === '' ) {
5831 # Only sections 0 and T-0 exist in an empty document
5832 if ( $sectionIndex == 0 ) {
5833 if ( $mode === 'get' ) {
5834 return '';
5835 }
5836
5837 return $newText;
5838 } else {
5839 if ( $mode === 'get' ) {
5840 return $newText;
5841 }
5842
5843 return $text;
5844 }
5845 }
5846
5847 # Preprocess the text
5848 $root = $this->preprocessToDom( $text, $flags );
5849
5850 # <h> nodes indicate section breaks
5851 # They can only occur at the top level, so we can find them by iterating the root's children
5852 $node = $root->getFirstChild();
5853
5854 # Find the target section
5855 if ( $sectionIndex == 0 ) {
5856 # Section zero doesn't nest, level=big
5857 $targetLevel = 1000;
5858 } else {
5859 while ( $node ) {
5860 if ( $node->getName() === 'h' ) {
5861 $bits = $node->splitHeading();
5862 if ( $bits['i'] == $sectionIndex ) {
5863 $targetLevel = $bits['level'];
5864 break;
5865 }
5866 }
5867 if ( $mode === 'replace' ) {
5868 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5869 }
5870 $node = $node->getNextSibling();
5871 }
5872 }
5873
5874 if ( !$node ) {
5875 # Not found
5876 if ( $mode === 'get' ) {
5877 return $newText;
5878 } else {
5879 return $text;
5880 }
5881 }
5882
5883 # Find the end of the section, including nested sections
5884 do {
5885 if ( $node->getName() === 'h' ) {
5886 $bits = $node->splitHeading();
5887 $curLevel = $bits['level'];
5888 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5889 break;
5890 }
5891 }
5892 if ( $mode === 'get' ) {
5893 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5894 }
5895 $node = $node->getNextSibling();
5896 } while ( $node );
5897
5898 # Write out the remainder (in replace mode only)
5899 if ( $mode === 'replace' ) {
5900 # Output the replacement text
5901 # Add two newlines on -- trailing whitespace in $newText is conventionally
5902 # stripped by the editor, so we need both newlines to restore the paragraph gap
5903 # Only add trailing whitespace if there is newText
5904 if ( $newText != "" ) {
5905 $outText .= $newText . "\n\n";
5906 }
5907
5908 while ( $node ) {
5909 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5910 $node = $node->getNextSibling();
5911 }
5912 }
5913
5914 if ( is_string( $outText ) ) {
5915 # Re-insert stripped tags
5916 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5917 }
5918
5919 return $outText;
5920 }
5921
5922 /**
5923 * This function returns the text of a section, specified by a number ($section).
5924 * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5925 * the first section before any such heading (section 0).
5926 *
5927 * If a section contains subsections, these are also returned.
5928 *
5929 * @param string $text Text to look in
5930 * @param string|int $sectionId Section identifier as a number or string
5931 * (e.g. 0, 1 or 'T-1').
5932 * @param string $defaultText Default to return if section is not found
5933 *
5934 * @return string Text of the requested section
5935 */
5936 public function getSection( $text, $sectionId, $defaultText = '' ) {
5937 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5938 }
5939
5940 /**
5941 * This function returns $oldtext after the content of the section
5942 * specified by $section has been replaced with $text. If the target
5943 * section does not exist, $oldtext is returned unchanged.
5944 *
5945 * @param string $oldText Former text of the article
5946 * @param string|int $sectionId Section identifier as a number or string
5947 * (e.g. 0, 1 or 'T-1').
5948 * @param string $newText Replacing text
5949 *
5950 * @return string Modified text
5951 */
5952 public function replaceSection( $oldText, $sectionId, $newText ) {
5953 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5954 }
5955
5956 /**
5957 * Get the ID of the revision we are parsing
5958 *
5959 * The return value will be either:
5960 * - a) Positive, indicating a specific revision ID (current or old)
5961 * - b) Zero, meaning the revision ID is specified by getCurrentRevisionCallback()
5962 * - c) Null, meaning the parse is for preview mode and there is no revision
5963 *
5964 * @return int|null
5965 */
5966 public function getRevisionId() {
5967 return $this->mRevisionId;
5968 }
5969
5970 /**
5971 * Get the revision object for $this->mRevisionId
5972 *
5973 * @return Revision|null Either a Revision object or null
5974 * @since 1.23 (public since 1.23)
5975 */
5976 public function getRevisionObject() {
5977 if ( $this->mRevisionObject ) {
5978 return $this->mRevisionObject;
5979 }
5980
5981 // NOTE: try to get the RevisionObject even if mRevisionId is null.
5982 // This is useful when parsing a revision that has not yet been saved.
5983 // However, if we get back a saved revision even though we are in
5984 // preview mode, we'll have to ignore it, see below.
5985 // NOTE: This callback may be used to inject an OLD revision that was
5986 // already loaded, so "current" is a bit of a misnomer. We can't just
5987 // skip it if mRevisionId is set.
5988 $rev = call_user_func(
5989 $this->mOptions->getCurrentRevisionCallback(),
5990 $this->getTitle(),
5991 $this
5992 );
5993
5994 if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5995 // We are in preview mode (mRevisionId is null), and the current revision callback
5996 // returned an existing revision. Ignore it and return null, it's probably the page's
5997 // current revision, which is not what we want here. Note that we do want to call the
5998 // callback to allow the unsaved revision to be injected here, e.g. for
5999 // self-transclusion previews.
6000 return null;
6001 }
6002
6003 // If the parse is for a new revision, then the callback should have
6004 // already been set to force the object and should match mRevisionId.
6005 // If not, try to fetch by mRevisionId for sanity.
6006 if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
6007 $rev = Revision::newFromId( $this->mRevisionId );
6008 }
6009
6010 $this->mRevisionObject = $rev;
6011
6012 return $this->mRevisionObject;
6013 }
6014
6015 /**
6016 * Get the timestamp associated with the current revision, adjusted for
6017 * the default server-local timestamp
6018 * @return string TS_MW timestamp
6019 */
6020 public function getRevisionTimestamp() {
6021 if ( $this->mRevisionTimestamp !== null ) {
6022 return $this->mRevisionTimestamp;
6023 }
6024
6025 # Use specified revision timestamp, falling back to the current timestamp
6026 $revObject = $this->getRevisionObject();
6027 $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6028 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6029
6030 # The cryptic '' timezone parameter tells to use the site-default
6031 # timezone offset instead of the user settings.
6032 # Since this value will be saved into the parser cache, served
6033 # to other users, and potentially even used inside links and such,
6034 # it needs to be consistent for all visitors.
6035 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6036
6037 return $this->mRevisionTimestamp;
6038 }
6039
6040 /**
6041 * Get the name of the user that edited the last revision
6042 *
6043 * @return string User name
6044 */
6045 public function getRevisionUser() {
6046 if ( is_null( $this->mRevisionUser ) ) {
6047 $revObject = $this->getRevisionObject();
6048
6049 # if this template is subst: the revision id will be blank,
6050 # so just use the current user's name
6051 if ( $revObject ) {
6052 $this->mRevisionUser = $revObject->getUserText();
6053 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6054 $this->mRevisionUser = $this->getUser()->getName();
6055 }
6056 }
6057 return $this->mRevisionUser;
6058 }
6059
6060 /**
6061 * Get the size of the revision
6062 *
6063 * @return int|null Revision size
6064 */
6065 public function getRevisionSize() {
6066 if ( is_null( $this->mRevisionSize ) ) {
6067 $revObject = $this->getRevisionObject();
6068
6069 # if this variable is subst: the revision id will be blank,
6070 # so just use the parser input size, because the own substituation
6071 # will change the size.
6072 if ( $revObject ) {
6073 $this->mRevisionSize = $revObject->getSize();
6074 } else {
6075 $this->mRevisionSize = $this->mInputSize;
6076 }
6077 }
6078 return $this->mRevisionSize;
6079 }
6080
6081 /**
6082 * Mutator for $mDefaultSort
6083 *
6084 * @param string $sort New value
6085 */
6086 public function setDefaultSort( $sort ) {
6087 $this->mDefaultSort = $sort;
6088 $this->mOutput->setProperty( 'defaultsort', $sort );
6089 }
6090
6091 /**
6092 * Accessor for $mDefaultSort
6093 * Will use the empty string if none is set.
6094 *
6095 * This value is treated as a prefix, so the
6096 * empty string is equivalent to sorting by
6097 * page name.
6098 *
6099 * @return string
6100 */
6101 public function getDefaultSort() {
6102 if ( $this->mDefaultSort !== false ) {
6103 return $this->mDefaultSort;
6104 } else {
6105 return '';
6106 }
6107 }
6108
6109 /**
6110 * Accessor for $mDefaultSort
6111 * Unlike getDefaultSort(), will return false if none is set
6112 *
6113 * @return string|bool
6114 */
6115 public function getCustomDefaultSort() {
6116 return $this->mDefaultSort;
6117 }
6118
6119 private static function getSectionNameFromStrippedText( $text ) {
6120 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6121 $text = Sanitizer::decodeCharReferences( $text );
6122 $text = self::normalizeSectionName( $text );
6123 return $text;
6124 }
6125
6126 private static function makeAnchor( $sectionName ) {
6127 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6128 }
6129
6130 private function makeLegacyAnchor( $sectionName ) {
6131 $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6132 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6133 // ForAttribute() and ForLink() are the same for legacy encoding
6134 $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6135 } else {
6136 $id = Sanitizer::escapeIdForLink( $sectionName );
6137 }
6138
6139 return "#$id";
6140 }
6141
6142 /**
6143 * Try to guess the section anchor name based on a wikitext fragment
6144 * presumably extracted from a heading, for example "Header" from
6145 * "== Header ==".
6146 *
6147 * @param string $text
6148 * @return string Anchor (starting with '#')
6149 */
6150 public function guessSectionNameFromWikiText( $text ) {
6151 # Strip out wikitext links(they break the anchor)
6152 $text = $this->stripSectionName( $text );
6153 $sectionName = self::getSectionNameFromStrippedText( $text );
6154 return self::makeAnchor( $sectionName );
6155 }
6156
6157 /**
6158 * Same as guessSectionNameFromWikiText(), but produces legacy anchors
6159 * instead, if possible. For use in redirects, since various versions
6160 * of Microsoft browsers interpret Location: headers as something other
6161 * than UTF-8, resulting in breakage.
6162 *
6163 * @param string $text The section name
6164 * @return string Anchor (starting with '#')
6165 */
6166 public function guessLegacySectionNameFromWikiText( $text ) {
6167 # Strip out wikitext links(they break the anchor)
6168 $text = $this->stripSectionName( $text );
6169 $sectionName = self::getSectionNameFromStrippedText( $text );
6170 return $this->makeLegacyAnchor( $sectionName );
6171 }
6172
6173 /**
6174 * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
6175 * @param string $text Section name (plain text)
6176 * @return string Anchor (starting with '#')
6177 */
6178 public static function guessSectionNameFromStrippedText( $text ) {
6179 $sectionName = self::getSectionNameFromStrippedText( $text );
6180 return self::makeAnchor( $sectionName );
6181 }
6182
6183 /**
6184 * Apply the same normalization as code making links to this section would
6185 *
6186 * @param string $text
6187 * @return string
6188 */
6189 private static function normalizeSectionName( $text ) {
6190 # T90902: ensure the same normalization is applied for IDs as to links
6191 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6192 try {
6193
6194 $parts = $titleParser->splitTitleString( "#$text" );
6195 } catch ( MalformedTitleException $ex ) {
6196 return $text;
6197 }
6198 return $parts['fragment'];
6199 }
6200
6201 /**
6202 * Strips a text string of wikitext for use in a section anchor
6203 *
6204 * Accepts a text string and then removes all wikitext from the
6205 * string and leaves only the resultant text (i.e. the result of
6206 * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
6207 * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
6208 * to create valid section anchors by mimicing the output of the
6209 * parser when headings are parsed.
6210 *
6211 * @param string $text Text string to be stripped of wikitext
6212 * for use in a Section anchor
6213 * @return string Filtered text string
6214 */
6215 public function stripSectionName( $text ) {
6216 # Strip internal link markup
6217 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6218 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6219
6220 # Strip external link markup
6221 # @todo FIXME: Not tolerant to blank link text
6222 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6223 # on how many empty links there are on the page - need to figure that out.
6224 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6225
6226 # Parse wikitext quotes (italics & bold)
6227 $text = $this->doQuotes( $text );
6228
6229 # Strip HTML tags
6230 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6231 return $text;
6232 }
6233
6234 /**
6235 * strip/replaceVariables/unstrip for preprocessor regression testing
6236 *
6237 * @param string $text
6238 * @param Title $title
6239 * @param ParserOptions $options
6240 * @param int $outputType
6241 *
6242 * @return string
6243 */
6244 public function testSrvus( $text, Title $title, ParserOptions $options,
6245 $outputType = self::OT_HTML
6246 ) {
6247 $magicScopeVariable = $this->lock();
6248 $this->startParse( $title, $options, $outputType, true );
6249
6250 $text = $this->replaceVariables( $text );
6251 $text = $this->mStripState->unstripBoth( $text );
6252 $text = Sanitizer::removeHTMLtags( $text );
6253 return $text;
6254 }
6255
6256 /**
6257 * @param string $text
6258 * @param Title $title
6259 * @param ParserOptions $options
6260 * @return string
6261 */
6262 public function testPst( $text, Title $title, ParserOptions $options ) {
6263 return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6264 }
6265
6266 /**
6267 * @param string $text
6268 * @param Title $title
6269 * @param ParserOptions $options
6270 * @return string
6271 */
6272 public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6273 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6274 }
6275
6276 /**
6277 * Call a callback function on all regions of the given text that are not
6278 * inside strip markers, and replace those regions with the return value
6279 * of the callback. For example, with input:
6280 *
6281 * aaa<MARKER>bbb
6282 *
6283 * This will call the callback function twice, with 'aaa' and 'bbb'. Those
6284 * two strings will be replaced with the value returned by the callback in
6285 * each case.
6286 *
6287 * @param string $s
6288 * @param callable $callback
6289 *
6290 * @return string
6291 */
6292 public function markerSkipCallback( $s, $callback ) {
6293 $i = 0;
6294 $out = '';
6295 while ( $i < strlen( $s ) ) {
6296 $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6297 if ( $markerStart === false ) {
6298 $out .= call_user_func( $callback, substr( $s, $i ) );
6299 break;
6300 } else {
6301 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6302 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6303 if ( $markerEnd === false ) {
6304 $out .= substr( $s, $markerStart );
6305 break;
6306 } else {
6307 $markerEnd += strlen( self::MARKER_SUFFIX );
6308 $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6309 $i = $markerEnd;
6310 }
6311 }
6312 }
6313 return $out;
6314 }
6315
6316 /**
6317 * Remove any strip markers found in the given text.
6318 *
6319 * @param string $text
6320 * @return string
6321 */
6322 public function killMarkers( $text ) {
6323 return $this->mStripState->killMarkers( $text );
6324 }
6325
6326 /**
6327 * Save the parser state required to convert the given half-parsed text to
6328 * HTML. "Half-parsed" in this context means the output of
6329 * recursiveTagParse() or internalParse(). This output has strip markers
6330 * from replaceVariables (extensionSubstitution() etc.), and link
6331 * placeholders from replaceLinkHolders().
6332 *
6333 * Returns an array which can be serialized and stored persistently. This
6334 * array can later be loaded into another parser instance with
6335 * unserializeHalfParsedText(). The text can then be safely incorporated into
6336 * the return value of a parser hook.
6337 *
6338 * @deprecated since 1.31
6339 * @param string $text
6340 *
6341 * @return array
6342 */
6343 public function serializeHalfParsedText( $text ) {
6344 wfDeprecated( __METHOD__, '1.31' );
6345 $data = [
6346 'text' => $text,
6347 'version' => self::HALF_PARSED_VERSION,
6348 'stripState' => $this->mStripState->getSubState( $text ),
6349 'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6350 ];
6351 return $data;
6352 }
6353
6354 /**
6355 * Load the parser state given in the $data array, which is assumed to
6356 * have been generated by serializeHalfParsedText(). The text contents is
6357 * extracted from the array, and its markers are transformed into markers
6358 * appropriate for the current Parser instance. This transformed text is
6359 * returned, and can be safely included in the return value of a parser
6360 * hook.
6361 *
6362 * If the $data array has been stored persistently, the caller should first
6363 * check whether it is still valid, by calling isValidHalfParsedText().
6364 *
6365 * @deprecated since 1.31
6366 * @param array $data Serialized data
6367 * @throws MWException
6368 * @return string
6369 */
6370 public function unserializeHalfParsedText( $data ) {
6371 wfDeprecated( __METHOD__, '1.31' );
6372 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6373 throw new MWException( __METHOD__ . ': invalid version' );
6374 }
6375
6376 # First, extract the strip state.
6377 $texts = [ $data['text'] ];
6378 $texts = $this->mStripState->merge( $data['stripState'], $texts );
6379
6380 # Now renumber links
6381 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6382
6383 # Should be good to go.
6384 return $texts[0];
6385 }
6386
6387 /**
6388 * Returns true if the given array, presumed to be generated by
6389 * serializeHalfParsedText(), is compatible with the current version of the
6390 * parser.
6391 *
6392 * @deprecated since 1.31
6393 * @param array $data
6394 *
6395 * @return bool
6396 */
6397 public function isValidHalfParsedText( $data ) {
6398 wfDeprecated( __METHOD__, '1.31' );
6399 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6400 }
6401
6402 /**
6403 * Parsed a width param of imagelink like 300px or 200x300px
6404 *
6405 * @param string $value
6406 * @param bool $parseHeight
6407 *
6408 * @return array
6409 * @since 1.20
6410 */
6411 public static function parseWidthParam( $value, $parseHeight = true ) {
6412 $parsedWidthParam = [];
6413 if ( $value === '' ) {
6414 return $parsedWidthParam;
6415 }
6416 $m = [];
6417 # (T15500) In both cases (width/height and width only),
6418 # permit trailing "px" for backward compatibility.
6419 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6420 $width = intval( $m[1] );
6421 $height = intval( $m[2] );
6422 $parsedWidthParam['width'] = $width;
6423 $parsedWidthParam['height'] = $height;
6424 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6425 $width = intval( $value );
6426 $parsedWidthParam['width'] = $width;
6427 }
6428 return $parsedWidthParam;
6429 }
6430
6431 /**
6432 * Lock the current instance of the parser.
6433 *
6434 * This is meant to stop someone from calling the parser
6435 * recursively and messing up all the strip state.
6436 *
6437 * @throws MWException If parser is in a parse
6438 * @return ScopedCallback The lock will be released once the return value goes out of scope.
6439 */
6440 protected function lock() {
6441 if ( $this->mInParse ) {
6442 throw new MWException( "Parser state cleared while parsing. "
6443 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6444 }
6445
6446 // Save the backtrace when locking, so that if some code tries locking again,
6447 // we can print the lock owner's backtrace for easier debugging
6448 $e = new Exception;
6449 $this->mInParse = $e->getTraceAsString();
6450
6451 $recursiveCheck = new ScopedCallback( function () {
6452 $this->mInParse = false;
6453 } );
6454
6455 return $recursiveCheck;
6456 }
6457
6458 /**
6459 * Strip outer <p></p> tag from the HTML source of a single paragraph.
6460 *
6461 * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6462 * or if there is more than one <p/> tag in the input HTML.
6463 *
6464 * @param string $html
6465 * @return string
6466 * @since 1.24
6467 */
6468 public static function stripOuterParagraph( $html ) {
6469 $m = [];
6470 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6471 $html = $m[1];
6472 }
6473
6474 return $html;
6475 }
6476
6477 /**
6478 * Return this parser if it is not doing anything, otherwise
6479 * get a fresh parser. You can use this method by doing
6480 * $newParser = $oldParser->getFreshParser(), or more simply
6481 * $oldParser->getFreshParser()->parse( ... );
6482 * if you're unsure if $oldParser is safe to use.
6483 *
6484 * @since 1.24
6485 * @return Parser A parser object that is not parsing anything
6486 */
6487 public function getFreshParser() {
6488 if ( $this->mInParse ) {
6489 return $this->factory->create();
6490 } else {
6491 return $this;
6492 }
6493 }
6494
6495 /**
6496 * Set's up the PHP implementation of OOUI for use in this request
6497 * and instructs OutputPage to enable OOUI for itself.
6498 *
6499 * @since 1.26
6500 */
6501 public function enableOOUI() {
6502 OutputPage::setupOOUI();
6503 $this->mOutput->setEnableOOUI( true );
6504 }
6505
6506 /**
6507 * @param string $flag
6508 * @param string $reason
6509 */
6510 protected function setOutputFlag( $flag, $reason ) {
6511 $this->mOutput->setFlag( $flag );
6512 $name = $this->mTitle->getPrefixedText();
6513 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6514 }
6515 }