includes/parser/Parser.php

   1 <?php
   2 /**
   3  * PHP parser that converts wiki markup to HTML.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  * @ingroup Parser
  22  */
  23 use MediaWiki\BadFileLookup;
  24 use MediaWiki\Config\ServiceOptions;
  25 use MediaWiki\Linker\LinkRenderer;
  26 use MediaWiki\Linker\LinkRendererFactory;
  27 use MediaWiki\Linker\LinkTarget;
  28 use MediaWiki\MediaWikiServices;
  29 use MediaWiki\Special\SpecialPageFactory;
  30 use Psr\Log\NullLogger;
  31 use Wikimedia\ScopedCallback;
  32 use Psr\Log\LoggerInterface;
  33
  34 /**
  35  * @defgroup Parser Parser
  36  */
  37
  38 /**
  39  * PHP Parser - Processes wiki markup (which uses a more user-friendly
  40  * syntax, such as "[[link]]" for making links), and provides a one-way
  41  * transformation of that wiki markup it into (X)HTML output / markup
  42  * (which in turn the browser understands, and can display).
  43  *
  44  * There are seven main entry points into the Parser class:
  45  *
  46  * - Parser::parse()
  47  *     produces HTML output
  48  * - Parser::preSaveTransform()
  49  *     produces altered wiki markup
  50  * - Parser::preprocess()
  51  *     removes HTML comments and expands templates
  52  * - Parser::cleanSig() and Parser::cleanSigInSig()
  53  *     cleans a signature before saving it to preferences
  54  * - Parser::getSection()
  55  *     return the content of a section from an article for section editing
  56  * - Parser::replaceSection()
  57  *     replaces a section by number inside an article
  58  * - Parser::getPreloadText()
  59  *     removes <noinclude> sections and <includeonly> tags
  60  *
  61  * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
  62  *
  63  * @par Settings:
  64  * $wgNamespacesWithSubpages
  65  *
  66  * @par Settings only within ParserOptions:
  67  * $wgAllowExternalImages
  68  * $wgAllowSpecialInclusion
  69  * $wgInterwikiMagic
  70  * $wgMaxArticleSize
  71  *
  72  * @ingroup Parser
  73  */
  74 class Parser {
  75         /**
  76          * Update this version number when the ParserOutput format
  77          * changes in an incompatible way, so the parser cache
  78          * can automatically discard old data.
  79          */
  80         const VERSION = '1.6.4';
  81
  82         /**
  83          * Update this version number when the output of serialiseHalfParsedText()
  84          * changes in an incompatible way
  85          */
  86         const HALF_PARSED_VERSION = 2;
  87
  88         # Flags for Parser::setFunctionHook
  89         const SFH_NO_HASH = 1;
  90         const SFH_OBJECT_ARGS = 2;
  91
  92         # Constants needed for external link processing
  93         # Everything except bracket, space, or control characters
  94         # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
  95         # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
  96         # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
  97         # uses to replace invalid HTML characters.
  98         const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
  99         # Simplified expression to match an IPv4 or IPv6 address, or
 100         # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
 101         const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
 102         # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
 103         // phpcs:ignore Generic.Files.LineLength
 104         const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
 105                 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
 106
 107         # Regular expression for a non-newline space
 108         const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
 109
 110         # Flags for preprocessToDom
 111         const PTD_FOR_INCLUSION = 1;
 112
 113         # Allowed values for $this->mOutputType
 114         # Parameter to startExternalParse().
 115         const OT_HTML = 1; # like parse()
 116         const OT_WIKI = 2; # like preSaveTransform()
 117         const OT_PREPROCESS = 3; # like preprocess()
 118         const OT_MSG = 3;
 119         const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
 120
 121         /**
 122          * @var string Prefix and suffix for temporary replacement strings
 123          * for the multipass parser.
 124          *
 125          * \x7f should never appear in input as it's disallowed in XML.
 126          * Using it at the front also gives us a little extra robustness
 127          * since it shouldn't match when butted up against identifier-like
 128          * string constructs.
 129          *
 130          * Must not consist of all title characters, or else it will change
 131          * the behavior of <nowiki> in a link.
 132          *
 133          * Must have a character that needs escaping in attributes, otherwise
 134          * someone could put a strip marker in an attribute, to get around
 135          * escaping quote marks, and break out of the attribute. Thus we add
 136          * `'".
 137          */
 138         const MARKER_SUFFIX = "-QINU`\"'\x7f";
 139         const MARKER_PREFIX = "\x7f'\"`UNIQ-";
 140
 141         # Markers used for wrapping the table of contents
 142         const TOC_START = '<mw:toc>';
 143         const TOC_END = '</mw:toc>';
 144
 145         /** @var int Assume that no output will later be saved this many seconds after parsing */
 146         const MAX_TTS = 900;
 147
 148         # Persistent:
 149         public $mTagHooks = [];
 150         public $mTransparentTagHooks = [];
 151         public $mFunctionHooks = [];
 152         public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
 153         public $mFunctionTagHooks = [];
 154         public $mStripList = [];
 155         public $mDefaultStripList = [];
 156         public $mVarCache = [];
 157         public $mImageParams = [];
 158         public $mImageParamsMagicArray = [];
 159         public $mMarkerIndex = 0;
 160         /**
 161          * @var bool Whether firstCallInit still needs to be called
 162          */
 163         public $mFirstCall = true;
 164
 165         # Initialised by initialiseVariables()
 166
 167         /**
 168          * @var MagicWordArray
 169          */
 170         public $mVariables;
 171
 172         /**
 173          * @var MagicWordArray
 174          */
 175         public $mSubstWords;
 176
 177         /**
 178          * @deprecated since 1.34, there should be no need to use this
 179          * @var array
 180          */
 181         public $mConf;
 182
 183         # Initialised in constructor
 184         public $mExtLinkBracketedRegex, $mUrlProtocols;
 185
 186         # Initialized in getPreprocessor()
 187         /** @var Preprocessor */
 188         public $mPreprocessor;
 189
 190         # Cleared with clearState():
 191         /**
 192          * @var ParserOutput
 193          */
 194         public $mOutput;
 195         public $mAutonumber;
 196
 197         /**
 198          * @var StripState
 199          */
 200         public $mStripState;
 201
 202         public $mIncludeCount;
 203         /**
 204          * @var LinkHolderArray
 205          */
 206         public $mLinkHolders;
 207
 208         public $mLinkID;
 209         public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
 210         public $mDefaultSort;
 211         public $mTplRedirCache, $mHeadings, $mDoubleUnderscores;
 212         public $mExpensiveFunctionCount; # number of expensive parser function calls
 213         public $mShowToc, $mForceTocPosition;
 214         /** @var array */
 215         public $mTplDomCache;
 216
 217         /**
 218          * @var User
 219          */
 220         public $mUser; # User object; only used when doing pre-save transform
 221
 222         # Temporary
 223         # These are variables reset at least once per parse regardless of $clearState
 224
 225         /**
 226          * @var ParserOptions
 227          */
 228         public $mOptions;
 229
 230         /**
 231          * @var Title
 232          */
 233         public $mTitle;        # Title context, used for self-link rendering and similar things
 234         public $mOutputType;   # Output type, one of the OT_xxx constants
 235         public $ot;            # Shortcut alias, see setOutputType()
 236         public $mRevisionObject; # The revision object of the specified revision ID
 237         public $mRevisionId;   # ID to display in {{REVISIONID}} tags
 238         public $mRevisionTimestamp; # The timestamp of the specified revision ID
 239         public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
 240         public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
 241         public $mRevIdForTs;   # The revision ID which was used to fetch the timestamp
 242         public $mInputSize = false; # For {{PAGESIZE}} on current page.
 243
 244         /**
 245          * @var array Array with the language name of each language link (i.e. the
 246          * interwiki prefix) in the key, value arbitrary. Used to avoid sending
 247          * duplicate language links to the ParserOutput.
 248          */
 249         public $mLangLinkLanguages;
 250
 251         /**
 252          * @var MapCacheLRU|null
 253          * @since 1.24
 254          *
 255          * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
 256          */
 257         public $currentRevisionCache;
 258
 259         /**
 260          * @var bool|string Recursive call protection.
 261          * This variable should be treated as if it were private.
 262          */
 263         public $mInParse = false;
 264
 265         /** @var SectionProfiler */
 266         protected $mProfiler;
 267
 268         /**
 269          * @var LinkRenderer
 270          */
 271         protected $mLinkRenderer;
 272
 273         /** @var MagicWordFactory */
 274         private $magicWordFactory;
 275
 276         /** @var Language */
 277         private $contLang;
 278
 279         /** @var ParserFactory */
 280         private $factory;
 281
 282         /** @var SpecialPageFactory */
 283         private $specialPageFactory;
 284
 285         /**
 286          * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
 287          * $mOptions, which is public and widely used, and also with the local variable $options used
 288          * for ParserOptions throughout this file.
 289          *
 290          * @var ServiceOptions
 291          */
 292         private $svcOptions;
 293
 294         /** @var LinkRendererFactory */
 295         private $linkRendererFactory;
 296
 297         /** @var NamespaceInfo */
 298         private $nsInfo;
 299
 300         /** @var LoggerInterface */
 301         private $logger;
 302
 303         /** @var BadFileLookup */
 304         private $badFileLookup;
 305
 306         /**
 307          * TODO Make this a const when HHVM support is dropped (T192166)
 308          *
 309          * @var array
 310          * @since 1.33
 311          */
 312         public static $constructorOptions = [
 313                 // See $wgParserConf documentation
 314                 'class',
 315                 'preprocessorClass',
 316                 // See documentation for the corresponding config options
 317                 'ArticlePath',
 318                 'EnableScaryTranscluding',
 319                 'ExtraInterlanguageLinkPrefixes',
 320                 'FragmentMode',
 321                 'LanguageCode',
 322                 'MaxSigChars',
 323                 'MaxTocLevel',
 324                 'MiserMode',
 325                 'ScriptPath',
 326                 'Server',
 327                 'ServerName',
 328                 'ShowHostnames',
 329                 'Sitename',
 330                 'StylePath',
 331                 'TranscludeCacheExpiry',
 332         ];
 333
 334         /**
 335          * Constructing parsers directly is deprecated! Use a ParserFactory.
 336          *
 337          * @param ServiceOptions|null $svcOptions
 338          * @param MagicWordFactory|null $magicWordFactory
 339          * @param Language|null $contLang Content language
 340          * @param ParserFactory|null $factory
 341          * @param string|null $urlProtocols As returned from wfUrlProtocols()
 342          * @param SpecialPageFactory|null $spFactory
 343          * @param LinkRendererFactory|null $linkRendererFactory
 344          * @param NamespaceInfo|null $nsInfo
 345          * @param LoggerInterface|null $logger
 346          * @param BadFileLookup|null $badFileLookup
 347          */
 348         public function __construct(
 349                 $svcOptions = null,
 350                 MagicWordFactory $magicWordFactory = null,
 351                 Language $contLang = null,
 352                 ParserFactory $factory = null,
 353                 $urlProtocols = null,
 354                 SpecialPageFactory $spFactory = null,
 355                 $linkRendererFactory = null,
 356                 $nsInfo = null,
 357                 $logger = null,
 358                 BadFileLookup $badFileLookup = null
 359         ) {
 360                 if ( !$svcOptions || is_array( $svcOptions ) ) {
 361                         // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
 362                         // Config, and the eighth is LinkRendererFactory.
 363                         $this->mConf = (array)$svcOptions;
 364                         if ( empty( $this->mConf['class'] ) ) {
 365                                 $this->mConf['class'] = self::class;
 366                         }
 367                         if ( empty( $this->mConf['preprocessorClass'] ) ) {
 368                                 $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
 369                         }
 370                         $this->svcOptions = new ServiceOptions( self::$constructorOptions,
 371                                 $this->mConf, func_num_args() > 6
 372                                         ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
 373                         );
 374                         $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
 375                         $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
 376                 } else {
 377                         // New calling convention
 378                         $svcOptions->assertRequiredOptions( self::$constructorOptions );
 379                         // $this->mConf is public, so we'll keep those two options there as well for
 380                         // compatibility until it's removed
 381                         $this->mConf = [
 382                                 'class' => $svcOptions->get( 'class' ),
 383                                 'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
 384                         ];
 385                         $this->svcOptions = $svcOptions;
 386                 }
 387
 388                 $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
 389                 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
 390                         self::EXT_LINK_ADDR .
 391                         self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
 392
 393                 $this->magicWordFactory = $magicWordFactory ??
 394                         MediaWikiServices::getInstance()->getMagicWordFactory();
 395
 396                 $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
 397
 398                 $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
 399                 $this->specialPageFactory = $spFactory ??
 400                         MediaWikiServices::getInstance()->getSpecialPageFactory();
 401                 $this->linkRendererFactory = $linkRendererFactory ??
 402                         MediaWikiServices::getInstance()->getLinkRendererFactory();
 403                 $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
 404                 $this->logger = $logger ?: new NullLogger();
 405                 $this->badFileLookup = $badFileLookup ??
 406                         MediaWikiServices::getInstance()->getBadFileLookup();
 407         }
 408
 409         /**
 410          * Reduce memory usage to reduce the impact of circular references
 411          */
 412         public function __destruct() {
 413                 if ( isset( $this->mLinkHolders ) ) {
 414                         unset( $this->mLinkHolders );
 415                 }
 416                 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
 417                 foreach ( $this as $name => $value ) {
 418                         unset( $this->$name );
 419                 }
 420         }
 421
 422         /**
 423          * Allow extensions to clean up when the parser is cloned
 424          */
 425         public function __clone() {
 426                 $this->mInParse = false;
 427
 428                 // T58226: When you create a reference "to" an object field, that
 429                 // makes the object field itself be a reference too (until the other
 430                 // reference goes out of scope). When cloning, any field that's a
 431                 // reference is copied as a reference in the new object. Both of these
 432                 // are defined PHP5 behaviors, as inconvenient as it is for us when old
 433                 // hooks from PHP4 days are passing fields by reference.
 434                 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
 435                         // Make a non-reference copy of the field, then rebind the field to
 436                         // reference the new copy.
 437                         $tmp = $this->$k;
 438                         $this->$k =& $tmp;
 439                         unset( $tmp );
 440                 }
 441
 442                 Hooks::run( 'ParserCloned', [ $this ] );
 443         }
 444
 445         /**
 446          * Which class should we use for the preprocessor if not otherwise specified?
 447          *
 448          * @since 1.34
 449          * @deprecated since 1.34, removing configurability of preprocessor
 450          * @return string
 451          */
 452         public static function getDefaultPreprocessorClass() {
 453                 return Preprocessor_Hash::class;
 454         }
 455
 456         /**
 457          * Do various kinds of initialisation on the first call of the parser
 458          */
 459         public function firstCallInit() {
 460                 if ( !$this->mFirstCall ) {
 461                         return;
 462                 }
 463                 $this->mFirstCall = false;
 464
 465                 CoreParserFunctions::register( $this );
 466                 CoreTagHooks::register( $this );
 467                 $this->initialiseVariables();
 468
 469                 // Avoid PHP 7.1 warning from passing $this by reference
 470                 $parser = $this;
 471                 Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
 472         }
 473
 474         /**
 475          * Clear Parser state
 476          *
 477          * @private
 478          */
 479         public function clearState() {
 480                 $this->firstCallInit();
 481                 $this->resetOutput();
 482                 $this->mAutonumber = 0;
 483                 $this->mIncludeCount = [];
 484                 $this->mLinkHolders = new LinkHolderArray( $this );
 485                 $this->mLinkID = 0;
 486                 $this->mRevisionObject = $this->mRevisionTimestamp =
 487                         $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
 488                 $this->mVarCache = [];
 489                 $this->mUser = null;
 490                 $this->mLangLinkLanguages = [];
 491                 $this->currentRevisionCache = null;
 492
 493                 $this->mStripState = new StripState( $this );
 494
 495                 # Clear these on every parse, T6549
 496                 $this->mTplRedirCache = $this->mTplDomCache = [];
 497
 498                 $this->mShowToc = true;
 499                 $this->mForceTocPosition = false;
 500                 $this->mIncludeSizes = [
 501                         'post-expand' => 0,
 502                         'arg' => 0,
 503                 ];
 504                 $this->mPPNodeCount = 0;
 505                 $this->mGeneratedPPNodeCount = 0;
 506                 $this->mHighestExpansionDepth = 0;
 507                 $this->mDefaultSort = false;
 508                 $this->mHeadings = [];
 509                 $this->mDoubleUnderscores = [];
 510                 $this->mExpensiveFunctionCount = 0;
 511
 512                 # Fix cloning
 513                 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
 514                         $this->mPreprocessor = null;
 515                 }
 516
 517                 $this->mProfiler = new SectionProfiler();
 518
 519                 // Avoid PHP 7.1 warning from passing $this by reference
 520                 $parser = $this;
 521                 Hooks::run( 'ParserClearState', [ &$parser ] );
 522         }
 523
 524         /**
 525          * Reset the ParserOutput
 526          */
 527         public function resetOutput() {
 528                 $this->mOutput = new ParserOutput;
 529                 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
 530         }
 531
 532         /**
 533          * Convert wikitext to HTML
 534          * Do not call this function recursively.
 535          *
 536          * @param string $text Text we want to parse
 537          * @param-taint $text escapes_htmlnoent
 538          * @param Title $title
 539          * @param ParserOptions $options
 540          * @param bool $linestart
 541          * @param bool $clearState
 542          * @param int|null $revid ID of the revision being rendered. This is used to render
 543          *  REVISION* magic words. 0 means that any current revision will be used. Null means
 544          *  that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will
 545          *  use the current timestamp.
 546          * @return ParserOutput A ParserOutput
 547          * @return-taint escaped
 548          */
 549         public function parse(
 550                 $text, Title $title, ParserOptions $options,
 551                 $linestart = true, $clearState = true, $revid = null
 552         ) {
 553                 if ( $clearState ) {
 554                         // We use U+007F DELETE to construct strip markers, so we have to make
 555                         // sure that this character does not occur in the input text.
 556                         $text = strtr( $text, "\x7f", "?" );
 557                         $magicScopeVariable = $this->lock();
 558                 }
 559                 // Strip U+0000 NULL (T159174)
 560                 $text = str_replace( "\000", '', $text );
 561
 562                 $this->startParse( $title, $options, self::OT_HTML, $clearState );
 563
 564                 $this->currentRevisionCache = null;
 565                 $this->mInputSize = strlen( $text );
 566                 if ( $this->mOptions->getEnableLimitReport() ) {
 567                         $this->mOutput->resetParseStartTime();
 568                 }
 569
 570                 $oldRevisionId = $this->mRevisionId;
 571                 $oldRevisionObject = $this->mRevisionObject;
 572                 $oldRevisionTimestamp = $this->mRevisionTimestamp;
 573                 $oldRevisionUser = $this->mRevisionUser;
 574                 $oldRevisionSize = $this->mRevisionSize;
 575                 if ( $revid !== null ) {
 576                         $this->mRevisionId = $revid;
 577                         $this->mRevisionObject = null;
 578                         $this->mRevisionTimestamp = null;
 579                         $this->mRevisionUser = null;
 580                         $this->mRevisionSize = null;
 581                 }
 582
 583                 // Avoid PHP 7.1 warning from passing $this by reference
 584                 $parser = $this;
 585                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 586                 # No more strip!
 587                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 588                 $text = $this->internalParse( $text );
 589                 Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
 590
 591                 $text = $this->internalParseHalfParsed( $text, true, $linestart );
 592
 593                 /**
 594                  * A converted title will be provided in the output object if title and
 595                  * content conversion are enabled, the article text does not contain
 596                  * a conversion-suppressing double-underscore tag, and no
 597                  * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
 598                  * automatic link conversion.
 599                  */
 600                 if ( !( $options->getDisableTitleConversion()
 601                         || isset( $this->mDoubleUnderscores['nocontentconvert'] )
 602                         || isset( $this->mDoubleUnderscores['notitleconvert'] )
 603                         || $this->mOutput->getDisplayTitle() !== false )
 604                 ) {
 605                         $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
 606                         if ( $convruletitle ) {
 607                                 $this->mOutput->setTitleText( $convruletitle );
 608                         } else {
 609                                 $titleText = $this->getTargetLanguage()->convertTitle( $title );
 610                                 $this->mOutput->setTitleText( $titleText );
 611                         }
 612                 }
 613
 614                 # Compute runtime adaptive expiry if set
 615                 $this->mOutput->finalizeAdaptiveCacheExpiry();
 616
 617                 # Warn if too many heavyweight parser functions were used
 618                 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
 619                         $this->limitationWarn( 'expensive-parserfunction',
 620                                 $this->mExpensiveFunctionCount,
 621                                 $this->mOptions->getExpensiveParserFunctionLimit()
 622                         );
 623                 }
 624
 625                 # Information on limits, for the benefit of users who try to skirt them
 626                 if ( $this->mOptions->getEnableLimitReport() ) {
 627                         $text .= $this->makeLimitReport();
 628                 }
 629
 630                 # Wrap non-interface parser output in a <div> so it can be targeted
 631                 # with CSS (T37247)
 632                 $class = $this->mOptions->getWrapOutputClass();
 633                 if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
 634                         $this->mOutput->addWrapperDivClass( $class );
 635                 }
 636
 637                 $this->mOutput->setText( $text );
 638
 639                 $this->mRevisionId = $oldRevisionId;
 640                 $this->mRevisionObject = $oldRevisionObject;
 641                 $this->mRevisionTimestamp = $oldRevisionTimestamp;
 642                 $this->mRevisionUser = $oldRevisionUser;
 643                 $this->mRevisionSize = $oldRevisionSize;
 644                 $this->mInputSize = false;
 645                 $this->currentRevisionCache = null;
 646
 647                 return $this->mOutput;
 648         }
 649
 650         /**
 651          * Set the limit report data in the current ParserOutput, and return the
 652          * limit report HTML comment.
 653          *
 654          * @return string
 655          */
 656         protected function makeLimitReport() {
 657                 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
 658
 659                 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
 660                 if ( $cpuTime !== null ) {
 661                         $this->mOutput->setLimitReportData( 'limitreport-cputime',
 662                                 sprintf( "%.3f", $cpuTime )
 663                         );
 664                 }
 665
 666                 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
 667                 $this->mOutput->setLimitReportData( 'limitreport-walltime',
 668                         sprintf( "%.3f", $wallTime )
 669                 );
 670
 671                 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
 672                         [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
 673                 );
 674                 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
 675                         [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
 676                 );
 677                 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
 678                         [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
 679                 );
 680                 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
 681                         [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
 682                 );
 683                 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
 684                         [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
 685                 );
 686                 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
 687                         [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
 688                 );
 689
 690                 foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
 691                         $this->mOutput->setLimitReportData( $key, $value );
 692                 }
 693
 694                 Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
 695
 696                 $limitReport = "NewPP limit report\n";
 697                 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
 698                         $limitReport .= 'Parsed by ' . wfHostname() . "\n";
 699                 }
 700                 $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
 701                 $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
 702                 $limitReport .= 'Dynamic content: ' .
 703                         ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
 704                         "\n";
 705                 $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
 706
 707                 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
 708                         if ( Hooks::run( 'ParserLimitReportFormat',
 709                                 [ $key, &$value, &$limitReport, false, false ]
 710                         ) ) {
 711                                 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
 712                                 $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
 713                                         ->inLanguage( 'en' )->useDatabase( false );
 714                                 if ( !$valueMsg->exists() ) {
 715                                         $valueMsg = new RawMessage( '$1' );
 716                                 }
 717                                 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
 718                                         $valueMsg->params( $value );
 719                                         $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
 720                                 }
 721                         }
 722                 }
 723                 // Since we're not really outputting HTML, decode the entities and
 724                 // then re-encode the things that need hiding inside HTML comments.
 725                 $limitReport = htmlspecialchars_decode( $limitReport );
 726
 727                 // Sanitize for comment. Note '‐' in the replacement is U+2010,
 728                 // which looks much like the problematic '-'.
 729                 $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
 730                 $text = "\n<!-- \n$limitReport-->\n";
 731
 732                 // Add on template profiling data in human/machine readable way
 733                 $dataByFunc = $this->mProfiler->getFunctionStats();
 734                 uasort( $dataByFunc, function ( $a, $b ) {
 735                         return $b['real'] <=> $a['real']; // descending order
 736                 } );
 737                 $profileReport = [];
 738                 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
 739                         $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
 740                                 $item['%real'], $item['real'], $item['calls'],
 741                                 htmlspecialchars( $item['name'] ) );
 742                 }
 743                 $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
 744                 $text .= implode( "\n", $profileReport ) . "\n-->\n";
 745
 746                 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
 747
 748                 // Add other cache related metadata
 749                 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
 750                         $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
 751                 }
 752                 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
 753                         $this->mOutput->getCacheTime() );
 754                 $this->mOutput->setLimitReportData( 'cachereport-ttl',
 755                         $this->mOutput->getCacheExpiry() );
 756                 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
 757                         $this->mOutput->hasDynamicContent() );
 758
 759                 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
 760                         wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
 761                                 $this->mTitle->getPrefixedDBkey() );
 762                 }
 763                 return $text;
 764         }
 765
 766         /**
 767          * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
 768          * can be called from an extension tag hook.
 769          *
 770          * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
 771          * instead, which means that lists and links have not been fully parsed yet,
 772          * and strip markers are still present.
 773          *
 774          * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
 775          *
 776          * Use this function if you're a parser tag hook and you want to parse
 777          * wikitext before or after applying additional transformations, and you
 778          * intend to *return the result as hook output*, which will cause it to go
 779          * through the rest of parsing process automatically.
 780          *
 781          * If $frame is not provided, then template variables (e.g., {{{1}}}) within
 782          * $text are not expanded
 783          *
 784          * @param string $text Text extension wants to have parsed
 785          * @param-taint $text escapes_htmlnoent
 786          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 787          * @return string UNSAFE half-parsed HTML
 788          * @return-taint escaped
 789          */
 790         public function recursiveTagParse( $text, $frame = false ) {
 791                 // Avoid PHP 7.1 warning from passing $this by reference
 792                 $parser = $this;
 793                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 794                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 795                 $text = $this->internalParse( $text, false, $frame );
 796                 return $text;
 797         }
 798
 799         /**
 800          * Fully parse wikitext to fully parsed HTML. This recursive parser entry
 801          * point can be called from an extension tag hook.
 802          *
 803          * The output of this function is fully-parsed HTML that is safe for output.
 804          * If you're a parser tag hook, you might want to use recursiveTagParse()
 805          * instead.
 806          *
 807          * If $frame is not provided, then template variables (e.g., {{{1}}}) within
 808          * $text are not expanded
 809          *
 810          * @since 1.25
 811          *
 812          * @param string $text Text extension wants to have parsed
 813          * @param-taint $text escapes_htmlnoent
 814          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 815          * @return string Fully parsed HTML
 816          * @return-taint escaped
 817          */
 818         public function recursiveTagParseFully( $text, $frame = false ) {
 819                 $text = $this->recursiveTagParse( $text, $frame );
 820                 $text = $this->internalParseHalfParsed( $text, false );
 821                 return $text;
 822         }
 823
 824         /**
 825          * Expand templates and variables in the text, producing valid, static wikitext.
 826          * Also removes comments.
 827          * Do not call this function recursively.
 828          * @param string $text
 829          * @param Title|null $title
 830          * @param ParserOptions $options
 831          * @param int|null $revid
 832          * @param bool|PPFrame $frame
 833          * @return mixed|string
 834          */
 835         public function preprocess( $text, Title $title = null,
 836                 ParserOptions $options, $revid = null, $frame = false
 837         ) {
 838                 $magicScopeVariable = $this->lock();
 839                 $this->startParse( $title, $options, self::OT_PREPROCESS, true );
 840                 if ( $revid !== null ) {
 841                         $this->mRevisionId = $revid;
 842                 }
 843                 // Avoid PHP 7.1 warning from passing $this by reference
 844                 $parser = $this;
 845                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 846                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 847                 $text = $this->replaceVariables( $text, $frame );
 848                 $text = $this->mStripState->unstripBoth( $text );
 849                 return $text;
 850         }
 851
 852         /**
 853          * Recursive parser entry point that can be called from an extension tag
 854          * hook.
 855          *
 856          * @param string $text Text to be expanded
 857          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 858          * @return string
 859          * @since 1.19
 860          */
 861         public function recursivePreprocess( $text, $frame = false ) {
 862                 $text = $this->replaceVariables( $text, $frame );
 863                 $text = $this->mStripState->unstripBoth( $text );
 864                 return $text;
 865         }
 866
 867         /**
 868          * Process the wikitext for the "?preload=" feature. (T7210)
 869          *
 870          * "<noinclude>", "<includeonly>" etc. are parsed as for template
 871          * transclusion, comments, templates, arguments, tags hooks and parser
 872          * functions are untouched.
 873          *
 874          * @param string $text
 875          * @param Title $title
 876          * @param ParserOptions $options
 877          * @param array $params
 878          * @return string
 879          */
 880         public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
 881                 $msg = new RawMessage( $text );
 882                 $text = $msg->params( $params )->plain();
 883
 884                 # Parser (re)initialisation
 885                 $magicScopeVariable = $this->lock();
 886                 $this->startParse( $title, $options, self::OT_PLAIN, true );
 887
 888                 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
 889                 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
 890                 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
 891                 $text = $this->mStripState->unstripBoth( $text );
 892                 return $text;
 893         }
 894
 895         /**
 896          * Set the current user.
 897          * Should only be used when doing pre-save transform.
 898          *
 899          * @param User|null $user User object or null (to reset)
 900          */
 901         public function setUser( $user ) {
 902                 $this->mUser = $user;
 903         }
 904
 905         /**
 906          * Set the context title
 907          *
 908          * @param Title $t
 909          */
 910         public function setTitle( $t ) {
 911                 if ( !$t ) {
 912                         $t = Title::newFromText( 'NO TITLE' );
 913                 }
 914
 915                 if ( $t->hasFragment() ) {
 916                         # Strip the fragment to avoid various odd effects
 917                         $this->mTitle = $t->createFragmentTarget( '' );
 918                 } else {
 919                         $this->mTitle = $t;
 920                 }
 921         }
 922
 923         /**
 924          * Accessor for the Title object
 925          *
 926          * @return Title|null
 927          */
 928         public function getTitle() {
 929                 return $this->mTitle;
 930         }
 931
 932         /**
 933          * Accessor/mutator for the Title object
 934          *
 935          * @param Title|null $x Title object or null to just get the current one
 936          * @return Title
 937          */
 938         public function Title( $x = null ) {
 939                 return wfSetVar( $this->mTitle, $x );
 940         }
 941
 942         /**
 943          * Set the output type
 944          *
 945          * @param int $ot New value
 946          */
 947         public function setOutputType( $ot ) {
 948                 $this->mOutputType = $ot;
 949                 # Shortcut alias
 950                 $this->ot = [
 951                         'html' => $ot == self::OT_HTML,
 952                         'wiki' => $ot == self::OT_WIKI,
 953                         'pre' => $ot == self::OT_PREPROCESS,
 954                         'plain' => $ot == self::OT_PLAIN,
 955                 ];
 956         }
 957
 958         /**
 959          * Accessor/mutator for the output type
 960          *
 961          * @param int|null $x New value or null to just get the current one
 962          * @return int
 963          */
 964         public function OutputType( $x = null ) {
 965                 return wfSetVar( $this->mOutputType, $x );
 966         }
 967
 968         /**
 969          * Get the ParserOutput object
 970          *
 971          * @return ParserOutput
 972          */
 973         public function getOutput() {
 974                 return $this->mOutput;
 975         }
 976
 977         /**
 978          * Get the ParserOptions object
 979          *
 980          * @return ParserOptions
 981          */
 982         public function getOptions() {
 983                 return $this->mOptions;
 984         }
 985
 986         /**
 987          * Accessor/mutator for the ParserOptions object
 988          *
 989          * @param ParserOptions|null $x New value or null to just get the current one
 990          * @return ParserOptions Current ParserOptions object
 991          */
 992         public function Options( $x = null ) {
 993                 return wfSetVar( $this->mOptions, $x );
 994         }
 995
 996         /**
 997          * @return int
 998          */
 999         public function nextLinkID() {
1000                 return $this->mLinkID++;
1001         }
1002
1003         /**
1004          * @param int $id
1005          */
1006         public function setLinkID( $id ) {
1007                 $this->mLinkID = $id;
1008         }
1009
1010         /**
1011          * Get a language object for use in parser functions such as {{FORMATNUM:}}
1012          * @return Language
1013          */
1014         public function getFunctionLang() {
1015                 return $this->getTargetLanguage();
1016         }
1017
1018         /**
1019          * Get the target language for the content being parsed. This is usually the
1020          * language that the content is in.
1021          *
1022          * @since 1.19
1023          *
1024          * @throws MWException
1025          * @return Language
1026          */
1027         public function getTargetLanguage() {
1028                 $target = $this->mOptions->getTargetLanguage();
1029
1030                 if ( $target !== null ) {
1031                         return $target;
1032                 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1033                         return $this->mOptions->getUserLangObj();
1034                 } elseif ( is_null( $this->mTitle ) ) {
1035                         throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1036                 }
1037
1038                 return $this->mTitle->getPageLanguage();
1039         }
1040
1041         /**
1042          * Get the language object for language conversion
1043          * @deprecated since 1.32, just use getTargetLanguage()
1044          * @return Language|null
1045          */
1046         public function getConverterLanguage() {
1047                 return $this->getTargetLanguage();
1048         }
1049
1050         /**
1051          * Get a User object either from $this->mUser, if set, or from the
1052          * ParserOptions object otherwise
1053          *
1054          * @return User
1055          */
1056         public function getUser() {
1057                 if ( !is_null( $this->mUser ) ) {
1058                         return $this->mUser;
1059                 }
1060                 return $this->mOptions->getUser();
1061         }
1062
1063         /**
1064          * Get a preprocessor object
1065          *
1066          * @return Preprocessor
1067          */
1068         public function getPreprocessor() {
1069                 if ( !isset( $this->mPreprocessor ) ) {
1070                         $class = $this->svcOptions->get( 'preprocessorClass' );
1071                         $this->mPreprocessor = new $class( $this );
1072                 }
1073                 return $this->mPreprocessor;
1074         }
1075
1076         /**
1077          * Get a LinkRenderer instance to make links with
1078          *
1079          * @since 1.28
1080          * @return LinkRenderer
1081          */
1082         public function getLinkRenderer() {
1083                 // XXX We make the LinkRenderer with current options and then cache it forever
1084                 if ( !$this->mLinkRenderer ) {
1085                         $this->mLinkRenderer = $this->linkRendererFactory->create();
1086                         $this->mLinkRenderer->setStubThreshold(
1087                                 $this->getOptions()->getStubThreshold()
1088                         );
1089                 }
1090
1091                 return $this->mLinkRenderer;
1092         }
1093
1094         /**
1095          * Get the MagicWordFactory that this Parser is using
1096          *
1097          * @since 1.32
1098          * @return MagicWordFactory
1099          */
1100         public function getMagicWordFactory() {
1101                 return $this->magicWordFactory;
1102         }
1103
1104         /**
1105          * Get the content language that this Parser is using
1106          *
1107          * @since 1.32
1108          * @return Language
1109          */
1110         public function getContentLanguage() {
1111                 return $this->contLang;
1112         }
1113
1114         /**
1115          * Replaces all occurrences of HTML-style comments and the given tags
1116          * in the text with a random marker and returns the next text. The output
1117          * parameter $matches will be an associative array filled with data in
1118          * the form:
1119          *
1120          * @code
1121          *   'UNIQ-xxxxx' => [
1122          *     'element',
1123          *     'tag content',
1124          *     [ 'param' => 'x' ],
1125          *     '<element param="x">tag content</element>' ]
1126          * @endcode
1127          *
1128          * @param array $elements List of element names. Comments are always extracted.
1129          * @param string $text Source text string.
1130          * @param array &$matches Out parameter, Array: extracted tags
1131          * @return string Stripped text
1132          */
1133         public static function extractTagsAndParams( $elements, $text, &$matches ) {
1134                 static $n = 1;
1135                 $stripped = '';
1136                 $matches = [];
1137
1138                 $taglist = implode( '|', $elements );
1139                 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1140
1141                 while ( $text != '' ) {
1142                         $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1143                         $stripped .= $p[0];
1144                         if ( count( $p ) < 5 ) {
1145                                 break;
1146                         }
1147                         if ( count( $p ) > 5 ) {
1148                                 # comment
1149                                 $element = $p[4];
1150                                 $attributes = '';
1151                                 $close = '';
1152                                 $inside = $p[5];
1153                         } else {
1154                                 # tag
1155                                 list( , $element, $attributes, $close, $inside ) = $p;
1156                         }
1157
1158                         $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1159                         $stripped .= $marker;
1160
1161                         if ( $close === '/>' ) {
1162                                 # Empty element tag, <tag />
1163                                 $content = null;
1164                                 $text = $inside;
1165                                 $tail = null;
1166                         } else {
1167                                 if ( $element === '!--' ) {
1168                                         $end = '/(-->)/';
1169                                 } else {
1170                                         $end = "/(<\\/$element\\s*>)/i";
1171                                 }
1172                                 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1173                                 $content = $q[0];
1174                                 if ( count( $q ) < 3 ) {
1175                                         # No end tag -- let it run out to the end of the text.
1176                                         $tail = '';
1177                                         $text = '';
1178                                 } else {
1179                                         list( , $tail, $text ) = $q;
1180                                 }
1181                         }
1182
1183                         $matches[$marker] = [ $element,
1184                                 $content,
1185                                 Sanitizer::decodeTagAttributes( $attributes ),
1186                                 "<$element$attributes$close$content$tail" ];
1187                 }
1188                 return $stripped;
1189         }
1190
1191         /**
1192          * Get a list of strippable XML-like elements
1193          *
1194          * @return array
1195          */
1196         public function getStripList() {
1197                 return $this->mStripList;
1198         }
1199
1200         /**
1201          * Get the StripState
1202          *
1203          * @return StripState
1204          */
1205         public function getStripState() {
1206                 return $this->mStripState;
1207         }
1208
1209         /**
1210          * Add an item to the strip state
1211          * Returns the unique tag which must be inserted into the stripped text
1212          * The tag will be replaced with the original text in unstrip()
1213          *
1214          * @param string $text
1215          *
1216          * @return string
1217          */
1218         public function insertStripItem( $text ) {
1219                 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1220                 $this->mMarkerIndex++;
1221                 $this->mStripState->addGeneral( $marker, $text );
1222                 return $marker;
1223         }
1224
1225         /**
1226          * parse the wiki syntax used to render tables
1227          *
1228          * @private
1229          * @param string $text
1230          * @return string
1231          */
1232         public function doTableStuff( $text ) {
1233                 $lines = StringUtils::explode( "\n", $text );
1234                 $out = '';
1235                 $td_history = []; # Is currently a td tag open?
1236                 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1237                 $tr_history = []; # Is currently a tr tag open?
1238                 $tr_attributes = []; # history of tr attributes
1239                 $has_opened_tr = []; # Did this table open a <tr> element?
1240                 $indent_level = 0; # indent level of the table
1241
1242                 foreach ( $lines as $outLine ) {
1243                         $line = trim( $outLine );
1244
1245                         if ( $line === '' ) { # empty line, go to next line
1246                                 $out .= $outLine . "\n";
1247                                 continue;
1248                         }
1249
1250                         $first_character = $line[0];
1251                         $first_two = substr( $line, 0, 2 );
1252                         $matches = [];
1253
1254                         if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1255                                 # First check if we are starting a new table
1256                                 $indent_level = strlen( $matches[1] );
1257
1258                                 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1259                                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1260
1261                                 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1262                                 array_push( $td_history, false );
1263                                 array_push( $last_tag_history, '' );
1264                                 array_push( $tr_history, false );
1265                                 array_push( $tr_attributes, '' );
1266                                 array_push( $has_opened_tr, false );
1267                         } elseif ( count( $td_history ) == 0 ) {
1268                                 # Don't do any of the following
1269                                 $out .= $outLine . "\n";
1270                                 continue;
1271                         } elseif ( $first_two === '|}' ) {
1272                                 # We are ending a table
1273                                 $line = '</table>' . substr( $line, 2 );
1274                                 $last_tag = array_pop( $last_tag_history );
1275
1276                                 if ( !array_pop( $has_opened_tr ) ) {
1277                                         $line = "<tr><td></td></tr>{$line}";
1278                                 }
1279
1280                                 if ( array_pop( $tr_history ) ) {
1281                                         $line = "</tr>{$line}";
1282                                 }
1283
1284                                 if ( array_pop( $td_history ) ) {
1285                                         $line = "</{$last_tag}>{$line}";
1286                                 }
1287                                 array_pop( $tr_attributes );
1288                                 if ( $indent_level > 0 ) {
1289                                         $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1290                                 } else {
1291                                         $outLine = $line;
1292                                 }
1293                         } elseif ( $first_two === '|-' ) {
1294                                 # Now we have a table row
1295                                 $line = preg_replace( '#^\|-+#', '', $line );
1296
1297                                 # Whats after the tag is now only attributes
1298                                 $attributes = $this->mStripState->unstripBoth( $line );
1299                                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1300                                 array_pop( $tr_attributes );
1301                                 array_push( $tr_attributes, $attributes );
1302
1303                                 $line = '';
1304                                 $last_tag = array_pop( $last_tag_history );
1305                                 array_pop( $has_opened_tr );
1306                                 array_push( $has_opened_tr, true );
1307
1308                                 if ( array_pop( $tr_history ) ) {
1309                                         $line = '</tr>';
1310                                 }
1311
1312                                 if ( array_pop( $td_history ) ) {
1313                                         $line = "</{$last_tag}>{$line}";
1314                                 }
1315
1316                                 $outLine = $line;
1317                                 array_push( $tr_history, false );
1318                                 array_push( $td_history, false );
1319                                 array_push( $last_tag_history, '' );
1320                         } elseif ( $first_character === '|'
1321                                 || $first_character === '!'
1322                                 || $first_two === '|+'
1323                         ) {
1324                                 # This might be cell elements, td, th or captions
1325                                 if ( $first_two === '|+' ) {
1326                                         $first_character = '+';
1327                                         $line = substr( $line, 2 );
1328                                 } else {
1329                                         $line = substr( $line, 1 );
1330                                 }
1331
1332                                 // Implies both are valid for table headings.
1333                                 if ( $first_character === '!' ) {
1334                                         $line = StringUtils::replaceMarkup( '!!', '||', $line );
1335                                 }
1336
1337                                 # Split up multiple cells on the same line.
1338                                 # FIXME : This can result in improper nesting of tags processed
1339                                 # by earlier parser steps.
1340                                 $cells = explode( '||', $line );
1341
1342                                 $outLine = '';
1343
1344                                 # Loop through each table cell
1345                                 foreach ( $cells as $cell ) {
1346                                         $previous = '';
1347                                         if ( $first_character !== '+' ) {
1348                                                 $tr_after = array_pop( $tr_attributes );
1349                                                 if ( !array_pop( $tr_history ) ) {
1350                                                         $previous = "<tr{$tr_after}>\n";
1351                                                 }
1352                                                 array_push( $tr_history, true );
1353                                                 array_push( $tr_attributes, '' );
1354                                                 array_pop( $has_opened_tr );
1355                                                 array_push( $has_opened_tr, true );
1356                                         }
1357
1358                                         $last_tag = array_pop( $last_tag_history );
1359
1360                                         if ( array_pop( $td_history ) ) {
1361                                                 $previous = "</{$last_tag}>\n{$previous}";
1362                                         }
1363
1364                                         if ( $first_character === '|' ) {
1365                                                 $last_tag = 'td';
1366                                         } elseif ( $first_character === '!' ) {
1367                                                 $last_tag = 'th';
1368                                         } elseif ( $first_character === '+' ) {
1369                                                 $last_tag = 'caption';
1370                                         } else {
1371                                                 $last_tag = '';
1372                                         }
1373
1374                                         array_push( $last_tag_history, $last_tag );
1375
1376                                         # A cell could contain both parameters and data
1377                                         $cell_data = explode( '|', $cell, 2 );
1378
1379                                         # T2553: Note that a '|' inside an invalid link should not
1380                                         # be mistaken as delimiting cell parameters
1381                                         # Bug T153140: Neither should language converter markup.
1382                                         if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1383                                                 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1384                                         } elseif ( count( $cell_data ) == 1 ) {
1385                                                 // Whitespace in cells is trimmed
1386                                                 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1387                                         } else {
1388                                                 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1389                                                 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1390                                                 // Whitespace in cells is trimmed
1391                                                 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1392                                         }
1393
1394                                         $outLine .= $cell;
1395                                         array_push( $td_history, true );
1396                                 }
1397                         }
1398                         $out .= $outLine . "\n";
1399                 }
1400
1401                 # Closing open td, tr && table
1402                 while ( count( $td_history ) > 0 ) {
1403                         if ( array_pop( $td_history ) ) {
1404                                 $out .= "</td>\n";
1405                         }
1406                         if ( array_pop( $tr_history ) ) {
1407                                 $out .= "</tr>\n";
1408                         }
1409                         if ( !array_pop( $has_opened_tr ) ) {
1410                                 $out .= "<tr><td></td></tr>\n";
1411                         }
1412
1413                         $out .= "</table>\n";
1414                 }
1415
1416                 # Remove trailing line-ending (b/c)
1417                 if ( substr( $out, -1 ) === "\n" ) {
1418                         $out = substr( $out, 0, -1 );
1419                 }
1420
1421                 # special case: don't return empty table
1422                 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1423                         $out = '';
1424                 }
1425
1426                 return $out;
1427         }
1428
1429         /**
1430          * Helper function for parse() that transforms wiki markup into half-parsed
1431          * HTML. Only called for $mOutputType == self::OT_HTML.
1432          *
1433          * @private
1434          *
1435          * @param string $text The text to parse
1436          * @param-taint $text escapes_html
1437          * @param bool $isMain Whether this is being called from the main parse() function
1438          * @param PPFrame|bool $frame A pre-processor frame
1439          *
1440          * @return string
1441          */
1442         public function internalParse( $text, $isMain = true, $frame = false ) {
1443                 $origText = $text;
1444
1445                 // Avoid PHP 7.1 warning from passing $this by reference
1446                 $parser = $this;
1447
1448                 # Hook to suspend the parser in this state
1449                 if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1450                         return $text;
1451                 }
1452
1453                 # if $frame is provided, then use $frame for replacing any variables
1454                 if ( $frame ) {
1455                         # use frame depth to infer how include/noinclude tags should be handled
1456                         # depth=0 means this is the top-level document; otherwise it's an included document
1457                         if ( !$frame->depth ) {
1458                                 $flag = 0;
1459                         } else {
1460                                 $flag = self::PTD_FOR_INCLUSION;
1461                         }
1462                         $dom = $this->preprocessToDom( $text, $flag );
1463                         $text = $frame->expand( $dom );
1464                 } else {
1465                         # if $frame is not provided, then use old-style replaceVariables
1466                         $text = $this->replaceVariables( $text );
1467                 }
1468
1469                 Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1470                 $text = Sanitizer::removeHTMLtags(
1471                         $text,
1472                         [ $this, 'attributeStripCallback' ],
1473                         false,
1474                         array_keys( $this->mTransparentTagHooks ),
1475                         [],
1476                         [ $this, 'addTrackingCategory' ]
1477                 );
1478                 Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1479
1480                 # Tables need to come after variable replacement for things to work
1481                 # properly; putting them before other transformations should keep
1482                 # exciting things like link expansions from showing up in surprising
1483                 # places.
1484                 $text = $this->doTableStuff( $text );
1485
1486                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1487
1488                 $text = $this->doDoubleUnderscore( $text );
1489
1490                 $text = $this->doHeadings( $text );
1491                 $text = $this->replaceInternalLinks( $text );
1492                 $text = $this->doAllQuotes( $text );
1493                 $text = $this->replaceExternalLinks( $text );
1494
1495                 # replaceInternalLinks may sometimes leave behind
1496                 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1497                 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1498
1499                 $text = $this->doMagicLinks( $text );
1500                 $text = $this->formatHeadings( $text, $origText, $isMain );
1501
1502                 return $text;
1503         }
1504
1505         /**
1506          * Helper function for parse() that transforms half-parsed HTML into fully
1507          * parsed HTML.
1508          *
1509          * @param string $text
1510          * @param bool $isMain
1511          * @param bool $linestart
1512          * @return string
1513          */
1514         private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1515                 $text = $this->mStripState->unstripGeneral( $text );
1516
1517                 // Avoid PHP 7.1 warning from passing $this by reference
1518                 $parser = $this;
1519
1520                 if ( $isMain ) {
1521                         Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1522                 }
1523
1524                 # Clean up special characters, only run once, next-to-last before doBlockLevels
1525                 $text = Sanitizer::armorFrenchSpaces( $text );
1526
1527                 $text = $this->doBlockLevels( $text, $linestart );
1528
1529                 $this->replaceLinkHolders( $text );
1530
1531                 /**
1532                  * The input doesn't get language converted if
1533                  * a) It's disabled
1534                  * b) Content isn't converted
1535                  * c) It's a conversion table
1536                  * d) it is an interface message (which is in the user language)
1537                  */
1538                 if ( !( $this->mOptions->getDisableContentConversion()
1539                         || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1540                         && !$this->mOptions->getInterfaceMessage()
1541                 ) {
1542                         # The position of the convert() call should not be changed. it
1543                         # assumes that the links are all replaced and the only thing left
1544                         # is the <nowiki> mark.
1545                         $text = $this->getTargetLanguage()->convert( $text );
1546                 }
1547
1548                 $text = $this->mStripState->unstripNoWiki( $text );
1549
1550                 if ( $isMain ) {
1551                         Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1552                 }
1553
1554                 $text = $this->replaceTransparentTags( $text );
1555                 $text = $this->mStripState->unstripGeneral( $text );
1556
1557                 $text = Sanitizer::normalizeCharReferences( $text );
1558
1559                 if ( MWTidy::isEnabled() ) {
1560                         if ( $this->mOptions->getTidy() ) {
1561                                 $text = MWTidy::tidy( $text );
1562                         }
1563                 } else {
1564                         # attempt to sanitize at least some nesting problems
1565                         # (T4702 and quite a few others)
1566                         # This code path is buggy and deprecated!
1567                         wfDeprecated( 'disabling tidy', '1.33' );
1568                         $tidyregs = [
1569                                 # ''Something [http://www.cool.com cool''] -->
1570                                 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1571                                 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1572                                 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1573                                 # fix up an anchor inside another anchor, only
1574                                 # at least for a single single nested link (T5695)
1575                                 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1576                                 '\\1\\2</a>\\3</a>\\1\\4</a>',
1577                                 # fix div inside inline elements- doBlockLevels won't wrap a line which
1578                                 # contains a div, so fix it up here; replace
1579                                 # div with escaped text
1580                                 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1581                                 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1582                                 # remove empty italic or bold tag pairs, some
1583                                 # introduced by rules above
1584                                 '/<([bi])><\/\\1>/' => '',
1585                         ];
1586
1587                         $text = preg_replace(
1588                                 array_keys( $tidyregs ),
1589                                 array_values( $tidyregs ),
1590                                 $text );
1591                 }
1592
1593                 if ( $isMain ) {
1594                         Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1595                 }
1596
1597                 return $text;
1598         }
1599
1600         /**
1601          * Replace special strings like "ISBN xxx" and "RFC xxx" with
1602          * magic external links.
1603          *
1604          * DML
1605          * @private
1606          *
1607          * @param string $text
1608          *
1609          * @return string
1610          */
1611         public function doMagicLinks( $text ) {
1612                 $prots = wfUrlProtocolsWithoutProtRel();
1613                 $urlChar = self::EXT_LINK_URL_CLASS;
1614                 $addr = self::EXT_LINK_ADDR;
1615                 $space = self::SPACE_NOT_NL; #  non-newline space
1616                 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1617                 $spaces = "$space++"; # possessive match of 1 or more spaces
1618                 $text = preg_replace_callback(
1619                         '!(?:                        # Start cases
1620                                 (<a[ \t\r\n>].*?</a>) |    # m[1]: Skip link text
1621                                 (<.*?>) |                  # m[2]: Skip stuff inside HTML elements' . "
1622                                 (\b                        # m[3]: Free external links
1623                                         (?i:$prots)
1624                                         ($addr$urlChar*)         # m[4]: Post-protocol path
1625                                 ) |
1626                                 \b(?:RFC|PMID) $spaces     # m[5]: RFC or PMID, capture number
1627                                         ([0-9]+)\b |
1628                                 \bISBN $spaces (           # m[6]: ISBN, capture number
1629                                         (?: 97[89] $spdash? )?   #  optional 13-digit ISBN prefix
1630                                         (?: [0-9]  $spdash? ){9} #  9 digits with opt. delimiters
1631                                         [0-9Xx]                  #  check digit
1632                                 )\b
1633                         )!xu", [ $this, 'magicLinkCallback' ], $text );
1634                 return $text;
1635         }
1636
1637         /**
1638          * @throws MWException
1639          * @param array $m
1640          * @return string HTML
1641          */
1642         public function magicLinkCallback( $m ) {
1643                 if ( isset( $m[1] ) && $m[1] !== '' ) {
1644                         # Skip anchor
1645                         return $m[0];
1646                 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1647                         # Skip HTML element
1648                         return $m[0];
1649                 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1650                         # Free external link
1651                         return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1652                 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1653                         # RFC or PMID
1654                         if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1655                                 if ( !$this->mOptions->getMagicRFCLinks() ) {
1656                                         return $m[0];
1657                                 }
1658                                 $keyword = 'RFC';
1659                                 $urlmsg = 'rfcurl';
1660                                 $cssClass = 'mw-magiclink-rfc';
1661                                 $trackingCat = 'magiclink-tracking-rfc';
1662                                 $id = $m[5];
1663                         } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1664                                 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1665                                         return $m[0];
1666                                 }
1667                                 $keyword = 'PMID';
1668                                 $urlmsg = 'pubmedurl';
1669                                 $cssClass = 'mw-magiclink-pmid';
1670                                 $trackingCat = 'magiclink-tracking-pmid';
1671                                 $id = $m[5];
1672                         } else {
1673                                 throw new MWException( __METHOD__ . ': unrecognised match type "' .
1674                                         substr( $m[0], 0, 20 ) . '"' );
1675                         }
1676                         $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1677                         $this->addTrackingCategory( $trackingCat );
1678                         return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1679                 } elseif ( isset( $m[6] ) && $m[6] !== ''
1680                         && $this->mOptions->getMagicISBNLinks()
1681                 ) {
1682                         # ISBN
1683                         $isbn = $m[6];
1684                         $space = self::SPACE_NOT_NL; #  non-newline space
1685                         $isbn = preg_replace( "/$space/", ' ', $isbn );
1686                         $num = strtr( $isbn, [
1687                                 '-' => '',
1688                                 ' ' => '',
1689                                 'x' => 'X',
1690                         ] );
1691                         $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1692                         return $this->getLinkRenderer()->makeKnownLink(
1693                                 SpecialPage::getTitleFor( 'Booksources', $num ),
1694                                 "ISBN $isbn",
1695                                 [
1696                                         'class' => 'internal mw-magiclink-isbn',
1697                                         'title' => false // suppress title attribute
1698                                 ]
1699                         );
1700                 } else {
1701                         return $m[0];
1702                 }
1703         }
1704
1705         /**
1706          * Make a free external link, given a user-supplied URL
1707          *
1708          * @param string $url
1709          * @param int $numPostProto
1710          *   The number of characters after the protocol.
1711          * @return string HTML
1712          * @private
1713          */
1714         public function makeFreeExternalLink( $url, $numPostProto ) {
1715                 $trail = '';
1716
1717                 # The characters '<' and '>' (which were escaped by
1718                 # removeHTMLtags()) should not be included in
1719                 # URLs, per RFC 2396.
1720                 # Make &nbsp; terminate a URL as well (bug T84937)
1721                 $m2 = [];
1722                 if ( preg_match(
1723                         '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1724                         $url,
1725                         $m2,
1726                         PREG_OFFSET_CAPTURE
1727                 ) ) {
1728                         $trail = substr( $url, $m2[0][1] ) . $trail;
1729                         $url = substr( $url, 0, $m2[0][1] );
1730                 }
1731
1732                 # Move trailing punctuation to $trail
1733                 $sep = ',;\.:!?';
1734                 # If there is no left bracket, then consider right brackets fair game too
1735                 if ( strpos( $url, '(' ) === false ) {
1736                         $sep .= ')';
1737                 }
1738
1739                 $urlRev = strrev( $url );
1740                 $numSepChars = strspn( $urlRev, $sep );
1741                 # Don't break a trailing HTML entity by moving the ; into $trail
1742                 # This is in hot code, so use substr_compare to avoid having to
1743                 # create a new string object for the comparison
1744                 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1745                         # more optimization: instead of running preg_match with a $
1746                         # anchor, which can be slow, do the match on the reversed
1747                         # string starting at the desired offset.
1748                         # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1749                         if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1750                                 $numSepChars--;
1751                         }
1752                 }
1753                 if ( $numSepChars ) {
1754                         $trail = substr( $url, -$numSepChars ) . $trail;
1755                         $url = substr( $url, 0, -$numSepChars );
1756                 }
1757
1758                 # Verify that we still have a real URL after trail removal, and
1759                 # not just lone protocol
1760                 if ( strlen( $trail ) >= $numPostProto ) {
1761                         return $url . $trail;
1762                 }
1763
1764                 $url = Sanitizer::cleanUrl( $url );
1765
1766                 # Is this an external image?
1767                 $text = $this->maybeMakeExternalImage( $url );
1768                 if ( $text === false ) {
1769                         # Not an image, make a link
1770                         $text = Linker::makeExternalLink( $url,
1771                                 $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1772                                 true, 'free',
1773                                 $this->getExternalLinkAttribs( $url ), $this->mTitle );
1774                         # Register it in the output object...
1775                         $this->mOutput->addExternalLink( $url );
1776                 }
1777                 return $text . $trail;
1778         }
1779
1780         /**
1781          * Parse headers and return html
1782          *
1783          * @private
1784          *
1785          * @param string $text
1786          *
1787          * @return string
1788          */
1789         public function doHeadings( $text ) {
1790                 for ( $i = 6; $i >= 1; --$i ) {
1791                         $h = str_repeat( '=', $i );
1792                         // Trim non-newline whitespace from headings
1793                         // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1794                         $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1795                 }
1796                 return $text;
1797         }
1798
1799         /**
1800          * Replace single quotes with HTML markup
1801          * @private
1802          *
1803          * @param string $text
1804          *
1805          * @return string The altered text
1806          */
1807         public function doAllQuotes( $text ) {
1808                 $outtext = '';
1809                 $lines = StringUtils::explode( "\n", $text );
1810                 foreach ( $lines as $line ) {
1811                         $outtext .= $this->doQuotes( $line ) . "\n";
1812                 }
1813                 $outtext = substr( $outtext, 0, -1 );
1814                 return $outtext;
1815         }
1816
1817         /**
1818          * Helper function for doAllQuotes()
1819          *
1820          * @param string $text
1821          *
1822          * @return string
1823          */
1824         public function doQuotes( $text ) {
1825                 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1826                 $countarr = count( $arr );
1827                 if ( $countarr == 1 ) {
1828                         return $text;
1829                 }
1830
1831                 // First, do some preliminary work. This may shift some apostrophes from
1832                 // being mark-up to being text. It also counts the number of occurrences
1833                 // of bold and italics mark-ups.
1834                 $numbold = 0;
1835                 $numitalics = 0;
1836                 for ( $i = 1; $i < $countarr; $i += 2 ) {
1837                         $thislen = strlen( $arr[$i] );
1838                         // If there are ever four apostrophes, assume the first is supposed to
1839                         // be text, and the remaining three constitute mark-up for bold text.
1840                         // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1841                         if ( $thislen == 4 ) {
1842                                 $arr[$i - 1] .= "'";
1843                                 $arr[$i] = "'''";
1844                                 $thislen = 3;
1845                         } elseif ( $thislen > 5 ) {
1846                                 // If there are more than 5 apostrophes in a row, assume they're all
1847                                 // text except for the last 5.
1848                                 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1849                                 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1850                                 $arr[$i] = "'''''";
1851                                 $thislen = 5;
1852                         }
1853                         // Count the number of occurrences of bold and italics mark-ups.
1854                         if ( $thislen == 2 ) {
1855                                 $numitalics++;
1856                         } elseif ( $thislen == 3 ) {
1857                                 $numbold++;
1858                         } elseif ( $thislen == 5 ) {
1859                                 $numitalics++;
1860                                 $numbold++;
1861                         }
1862                 }
1863
1864                 // If there is an odd number of both bold and italics, it is likely
1865                 // that one of the bold ones was meant to be an apostrophe followed
1866                 // by italics. Which one we cannot know for certain, but it is more
1867                 // likely to be one that has a single-letter word before it.
1868                 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1869                         $firstsingleletterword = -1;
1870                         $firstmultiletterword = -1;
1871                         $firstspace = -1;
1872                         for ( $i = 1; $i < $countarr; $i += 2 ) {
1873                                 if ( strlen( $arr[$i] ) == 3 ) {
1874                                         $x1 = substr( $arr[$i - 1], -1 );
1875                                         $x2 = substr( $arr[$i - 1], -2, 1 );
1876                                         if ( $x1 === ' ' ) {
1877                                                 if ( $firstspace == -1 ) {
1878                                                         $firstspace = $i;
1879                                                 }
1880                                         } elseif ( $x2 === ' ' ) {
1881                                                 $firstsingleletterword = $i;
1882                                                 // if $firstsingleletterword is set, we don't
1883                                                 // look at the other options, so we can bail early.
1884                                                 break;
1885                                         } elseif ( $firstmultiletterword == -1 ) {
1886                                                 $firstmultiletterword = $i;
1887                                         }
1888                                 }
1889                         }
1890
1891                         // If there is a single-letter word, use it!
1892                         if ( $firstsingleletterword > -1 ) {
1893                                 $arr[$firstsingleletterword] = "''";
1894                                 $arr[$firstsingleletterword - 1] .= "'";
1895                         } elseif ( $firstmultiletterword > -1 ) {
1896                                 // If not, but there's a multi-letter word, use that one.
1897                                 $arr[$firstmultiletterword] = "''";
1898                                 $arr[$firstmultiletterword - 1] .= "'";
1899                         } elseif ( $firstspace > -1 ) {
1900                                 // ... otherwise use the first one that has neither.
1901                                 // (notice that it is possible for all three to be -1 if, for example,
1902                                 // there is only one pentuple-apostrophe in the line)
1903                                 $arr[$firstspace] = "''";
1904                                 $arr[$firstspace - 1] .= "'";
1905                         }
1906                 }
1907
1908                 // Now let's actually convert our apostrophic mush to HTML!
1909                 $output = '';
1910                 $buffer = '';
1911                 $state = '';
1912                 $i = 0;
1913                 foreach ( $arr as $r ) {
1914                         if ( ( $i % 2 ) == 0 ) {
1915                                 if ( $state === 'both' ) {
1916                                         $buffer .= $r;
1917                                 } else {
1918                                         $output .= $r;
1919                                 }
1920                         } else {
1921                                 $thislen = strlen( $r );
1922                                 if ( $thislen == 2 ) {
1923                                         if ( $state === 'i' ) {
1924                                                 $output .= '</i>';
1925                                                 $state = '';
1926                                         } elseif ( $state === 'bi' ) {
1927                                                 $output .= '</i>';
1928                                                 $state = 'b';
1929                                         } elseif ( $state === 'ib' ) {
1930                                                 $output .= '</b></i><b>';
1931                                                 $state = 'b';
1932                                         } elseif ( $state === 'both' ) {
1933                                                 $output .= '<b><i>' . $buffer . '</i>';
1934                                                 $state = 'b';
1935                                         } else { // $state can be 'b' or ''
1936                                                 $output .= '<i>';
1937                                                 $state .= 'i';
1938                                         }
1939                                 } elseif ( $thislen == 3 ) {
1940                                         if ( $state === 'b' ) {
1941                                                 $output .= '</b>';
1942                                                 $state = '';
1943                                         } elseif ( $state === 'bi' ) {
1944                                                 $output .= '</i></b><i>';
1945                                                 $state = 'i';
1946                                         } elseif ( $state === 'ib' ) {
1947                                                 $output .= '</b>';
1948                                                 $state = 'i';
1949                                         } elseif ( $state === 'both' ) {
1950                                                 $output .= '<i><b>' . $buffer . '</b>';
1951                                                 $state = 'i';
1952                                         } else { // $state can be 'i' or ''
1953                                                 $output .= '<b>';
1954                                                 $state .= 'b';
1955                                         }
1956                                 } elseif ( $thislen == 5 ) {
1957                                         if ( $state === 'b' ) {
1958                                                 $output .= '</b><i>';
1959                                                 $state = 'i';
1960                                         } elseif ( $state === 'i' ) {
1961                                                 $output .= '</i><b>';
1962                                                 $state = 'b';
1963                                         } elseif ( $state === 'bi' ) {
1964                                                 $output .= '</i></b>';
1965                                                 $state = '';
1966                                         } elseif ( $state === 'ib' ) {
1967                                                 $output .= '</b></i>';
1968                                                 $state = '';
1969                                         } elseif ( $state === 'both' ) {
1970                                                 $output .= '<i><b>' . $buffer . '</b></i>';
1971                                                 $state = '';
1972                                         } else { // ($state == '')
1973                                                 $buffer = '';
1974                                                 $state = 'both';
1975                                         }
1976                                 }
1977                         }
1978                         $i++;
1979                 }
1980                 // Now close all remaining tags.  Notice that the order is important.
1981                 if ( $state === 'b' || $state === 'ib' ) {
1982                         $output .= '</b>';
1983                 }
1984                 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1985                         $output .= '</i>';
1986                 }
1987                 if ( $state === 'bi' ) {
1988                         $output .= '</b>';
1989                 }
1990                 // There might be lonely ''''', so make sure we have a buffer
1991                 if ( $state === 'both' && $buffer ) {
1992                         $output .= '<b><i>' . $buffer . '</i></b>';
1993                 }
1994                 return $output;
1995         }
1996
1997         /**
1998          * Replace external links (REL)
1999          *
2000          * Note: this is all very hackish and the order of execution matters a lot.
2001          * Make sure to run tests/parser/parserTests.php if you change this code.
2002          *
2003          * @private
2004          *
2005          * @param string $text
2006          *
2007          * @throws MWException
2008          * @return string
2009          */
2010         public function replaceExternalLinks( $text ) {
2011                 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2012                 if ( $bits === false ) {
2013                         throw new MWException( "PCRE needs to be compiled with "
2014                                 . "--enable-unicode-properties in order for MediaWiki to function" );
2015                 }
2016                 $s = array_shift( $bits );
2017
2018                 $i = 0;
2019                 while ( $i < count( $bits ) ) {
2020                         $url = $bits[$i++];
2021                         $i++; // protocol
2022                         $text = $bits[$i++];
2023                         $trail = $bits[$i++];
2024
2025                         # The characters '<' and '>' (which were escaped by
2026                         # removeHTMLtags()) should not be included in
2027                         # URLs, per RFC 2396.
2028                         $m2 = [];
2029                         if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2030                                 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2031                                 $url = substr( $url, 0, $m2[0][1] );
2032                         }
2033
2034                         # If the link text is an image URL, replace it with an <img> tag
2035                         # This happened by accident in the original parser, but some people used it extensively
2036                         $img = $this->maybeMakeExternalImage( $text );
2037                         if ( $img !== false ) {
2038                                 $text = $img;
2039                         }
2040
2041                         $dtrail = '';
2042
2043                         # Set linktype for CSS
2044                         $linktype = 'text';
2045
2046                         # No link text, e.g. [http://domain.tld/some.link]
2047                         if ( $text == '' ) {
2048                                 # Autonumber
2049                                 $langObj = $this->getTargetLanguage();
2050                                 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2051                                 $linktype = 'autonumber';
2052                         } else {
2053                                 # Have link text, e.g. [http://domain.tld/some.link text]s
2054                                 # Check for trail
2055                                 list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2056                         }
2057
2058                         // Excluding protocol-relative URLs may avoid many false positives.
2059                         if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2060                                 $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2061                         }
2062
2063                         $url = Sanitizer::cleanUrl( $url );
2064
2065                         # Use the encoded URL
2066                         # This means that users can paste URLs directly into the text
2067                         # Funny characters like ö aren't valid in URLs anyway
2068                         # This was changed in August 2004
2069                         $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2070                                 $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2071
2072                         # Register link in the output object.
2073                         $this->mOutput->addExternalLink( $url );
2074                 }
2075
2076                 return $s;
2077         }
2078
2079         /**
2080          * Get the rel attribute for a particular external link.
2081          *
2082          * @since 1.21
2083          * @param string|bool $url Optional URL, to extract the domain from for rel =>
2084          *   nofollow if appropriate
2085          * @param LinkTarget|null $title Optional LinkTarget, for wgNoFollowNsExceptions lookups
2086          * @return string|null Rel attribute for $url
2087          */
2088         public static function getExternalLinkRel( $url = false, $title = null ) {
2089                 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
2090                 $ns = $title ? $title->getNamespace() : false;
2091                 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2092                         && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2093                 ) {
2094                         return 'nofollow';
2095                 }
2096                 return null;
2097         }
2098
2099         /**
2100          * Get an associative array of additional HTML attributes appropriate for a
2101          * particular external link.  This currently may include rel => nofollow
2102          * (depending on configuration, namespace, and the URL's domain) and/or a
2103          * target attribute (depending on configuration).
2104          *
2105          * @param string $url URL to extract the domain from for rel =>
2106          *   nofollow if appropriate
2107          * @return array Associative array of HTML attributes
2108          */
2109         public function getExternalLinkAttribs( $url ) {
2110                 $attribs = [];
2111                 $rel = self::getExternalLinkRel( $url, $this->mTitle );
2112
2113                 $target = $this->mOptions->getExternalLinkTarget();
2114                 if ( $target ) {
2115                         $attribs['target'] = $target;
2116                         if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2117                                 // T133507. New windows can navigate parent cross-origin.
2118                                 // Including noreferrer due to lacking browser
2119                                 // support of noopener. Eventually noreferrer should be removed.
2120                                 if ( $rel !== '' ) {
2121                                         $rel .= ' ';
2122                                 }
2123                                 $rel .= 'noreferrer noopener';
2124                         }
2125                 }
2126                 $attribs['rel'] = $rel;
2127                 return $attribs;
2128         }
2129
2130         /**
2131          * Replace unusual escape codes in a URL with their equivalent characters
2132          *
2133          * This generally follows the syntax defined in RFC 3986, with special
2134          * consideration for HTTP query strings.
2135          *
2136          * @param string $url
2137          * @return string
2138          */
2139         public static function normalizeLinkUrl( $url ) {
2140                 # Test for RFC 3986 IPv6 syntax
2141                 $scheme = '[a-z][a-z0-9+.-]*:';
2142                 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2143                 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2144                 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2145                         IP::isValid( rawurldecode( $m[1] ) )
2146                 ) {
2147                         $isIPv6 = rawurldecode( $m[1] );
2148                 } else {
2149                         $isIPv6 = false;
2150                 }
2151
2152                 # Make sure unsafe characters are encoded
2153                 $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2154                         function ( $m ) {
2155                                 return rawurlencode( $m[0] );
2156                         },
2157                         $url
2158                 );
2159
2160                 $ret = '';
2161                 $end = strlen( $url );
2162
2163                 # Fragment part - 'fragment'
2164                 $start = strpos( $url, '#' );
2165                 if ( $start !== false && $start < $end ) {
2166                         $ret = self::normalizeUrlComponent(
2167                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2168                         $end = $start;
2169                 }
2170
2171                 # Query part - 'query' minus &=+;
2172                 $start = strpos( $url, '?' );
2173                 if ( $start !== false && $start < $end ) {
2174                         $ret = self::normalizeUrlComponent(
2175                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2176                         $end = $start;
2177                 }
2178
2179                 # Scheme and path part - 'pchar'
2180                 # (we assume no userinfo or encoded colons in the host)
2181                 $ret = self::normalizeUrlComponent(
2182                         substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2183
2184                 # Fix IPv6 syntax
2185                 if ( $isIPv6 !== false ) {
2186                         $ipv6Host = "%5B({$isIPv6})%5D";
2187                         $ret = preg_replace(
2188                                 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2189                                 "$1[$2]",
2190                                 $ret
2191                         );
2192                 }
2193
2194                 return $ret;
2195         }
2196
2197         private static function normalizeUrlComponent( $component, $unsafe ) {
2198                 $callback = function ( $matches ) use ( $unsafe ) {
2199                         $char = urldecode( $matches[0] );
2200                         $ord = ord( $char );
2201                         if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2202                                 # Unescape it
2203                                 return $char;
2204                         } else {
2205                                 # Leave it escaped, but use uppercase for a-f
2206                                 return strtoupper( $matches[0] );
2207                         }
2208                 };
2209                 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2210         }
2211
2212         /**
2213          * make an image if it's allowed, either through the global
2214          * option, through the exception, or through the on-wiki whitelist
2215          *
2216          * @param string $url
2217          *
2218          * @return string
2219          */
2220         private function maybeMakeExternalImage( $url ) {
2221                 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2222                 $imagesexception = !empty( $imagesfrom );
2223                 $text = false;
2224                 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2225                 if ( $imagesexception && is_array( $imagesfrom ) ) {
2226                         $imagematch = false;
2227                         foreach ( $imagesfrom as $match ) {
2228                                 if ( strpos( $url, $match ) === 0 ) {
2229                                         $imagematch = true;
2230                                         break;
2231                                 }
2232                         }
2233                 } elseif ( $imagesexception ) {
2234                         $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2235                 } else {
2236                         $imagematch = false;
2237                 }
2238
2239                 if ( $this->mOptions->getAllowExternalImages()
2240                         || ( $imagesexception && $imagematch )
2241                 ) {
2242                         if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2243                                 # Image found
2244                                 $text = Linker::makeExternalImage( $url );
2245                         }
2246                 }
2247                 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2248                         && preg_match( self::EXT_IMAGE_REGEX, $url )
2249                 ) {
2250                         $whitelist = explode(
2251                                 "\n",
2252                                 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2253                         );
2254
2255                         foreach ( $whitelist as $entry ) {
2256                                 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2257                                 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2258                                         continue;
2259                                 }
2260                                 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2261                                         # Image matches a whitelist entry
2262                                         $text = Linker::makeExternalImage( $url );
2263                                         break;
2264                                 }
2265                         }
2266                 }
2267                 return $text;
2268         }
2269
2270         /**
2271          * Process [[ ]] wikilinks
2272          *
2273          * @param string $s
2274          *
2275          * @return string Processed text
2276          *
2277          * @private
2278          */
2279         public function replaceInternalLinks( $s ) {
2280                 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2281                 return $s;
2282         }
2283
2284         /**
2285          * Process [[ ]] wikilinks (RIL)
2286          * @param string &$s
2287          * @throws MWException
2288          * @return LinkHolderArray
2289          *
2290          * @private
2291          */
2292         public function replaceInternalLinks2( &$s ) {
2293                 static $tc = false, $e1, $e1_img;
2294                 # the % is needed to support urlencoded titles as well
2295                 if ( !$tc ) {
2296                         $tc = Title::legalChars() . '#%';
2297                         # Match a link having the form [[namespace:link|alternate]]trail
2298                         $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2299                         # Match cases where there is no "]]", which might still be images
2300                         $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2301                 }
2302
2303                 $holders = new LinkHolderArray( $this );
2304
2305                 # split the entire text string on occurrences of [[
2306                 $a = StringUtils::explode( '[[', ' ' . $s );
2307                 # get the first element (all text up to first [[), and remove the space we added
2308                 $s = $a->current();
2309                 $a->next();
2310                 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2311                 $s = substr( $s, 1 );
2312
2313                 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2314                 $e2 = null;
2315                 if ( $useLinkPrefixExtension ) {
2316                         # Match the end of a line for a word that's not followed by whitespace,
2317                         # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2318                         $charset = $this->contLang->linkPrefixCharset();
2319                         $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2320                 }
2321
2322                 if ( is_null( $this->mTitle ) ) {
2323                         throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2324                 }
2325                 $nottalk = !$this->mTitle->isTalkPage();
2326
2327                 if ( $useLinkPrefixExtension ) {
2328                         $m = [];
2329                         if ( preg_match( $e2, $s, $m ) ) {
2330                                 $first_prefix = $m[2];
2331                         } else {
2332                                 $first_prefix = false;
2333                         }
2334                 } else {
2335                         $prefix = '';
2336                 }
2337
2338                 $useSubpages = $this->areSubpagesAllowed();
2339
2340                 # Loop for each link
2341                 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2342                         # Check for excessive memory usage
2343                         if ( $holders->isBig() ) {
2344                                 # Too big
2345                                 # Do the existence check, replace the link holders and clear the array
2346                                 $holders->replace( $s );
2347                                 $holders->clear();
2348                         }
2349
2350                         if ( $useLinkPrefixExtension ) {
2351                                 if ( preg_match( $e2, $s, $m ) ) {
2352                                         list( , $s, $prefix ) = $m;
2353                                 } else {
2354                                         $prefix = '';
2355                                 }
2356                                 # first link
2357                                 if ( $first_prefix ) {
2358                                         $prefix = $first_prefix;
2359                                         $first_prefix = false;
2360                                 }
2361                         }
2362
2363                         $might_be_img = false;
2364
2365                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2366                                 $text = $m[2];
2367                                 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2368                                 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2369                                 # the real problem is with the $e1 regex
2370                                 # See T1500.
2371                                 # Still some problems for cases where the ] is meant to be outside punctuation,
2372                                 # and no image is in sight. See T4095.
2373                                 if ( $text !== ''
2374                                         && substr( $m[3], 0, 1 ) === ']'
2375                                         && strpos( $text, '[' ) !== false
2376                                 ) {
2377                                         $text .= ']'; # so that replaceExternalLinks($text) works later
2378                                         $m[3] = substr( $m[3], 1 );
2379                                 }
2380                                 # fix up urlencoded title texts
2381                                 if ( strpos( $m[1], '%' ) !== false ) {
2382                                         # Should anchors '#' also be rejected?
2383                                         $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2384                                 }
2385                                 $trail = $m[3];
2386                         } elseif ( preg_match( $e1_img, $line, $m ) ) {
2387                                 # Invalid, but might be an image with a link in its caption
2388                                 $might_be_img = true;
2389                                 $text = $m[2];
2390                                 if ( strpos( $m[1], '%' ) !== false ) {
2391                                         $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2392                                 }
2393                                 $trail = "";
2394                         } else { # Invalid form; output directly
2395                                 $s .= $prefix . '[[' . $line;
2396                                 continue;
2397                         }
2398
2399                         $origLink = ltrim( $m[1], ' ' );
2400
2401                         # Don't allow internal links to pages containing
2402                         # PROTO: where PROTO is a valid URL protocol; these
2403                         # should be external links.
2404                         if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2405                                 $s .= $prefix . '[[' . $line;
2406                                 continue;
2407                         }
2408
2409                         # Make subpage if necessary
2410                         if ( $useSubpages ) {
2411                                 $link = $this->maybeDoSubpageLink( $origLink, $text );
2412                         } else {
2413                                 $link = $origLink;
2414                         }
2415
2416                         // \x7f isn't a default legal title char, so most likely strip
2417                         // markers will force us into the "invalid form" path above.  But,
2418                         // just in case, let's assert that xmlish tags aren't valid in
2419                         // the title position.
2420                         $unstrip = $this->mStripState->killMarkers( $link );
2421                         $noMarkers = ( $unstrip === $link );
2422
2423                         $nt = $noMarkers ? Title::newFromText( $link ) : null;
2424                         if ( $nt === null ) {
2425                                 $s .= $prefix . '[[' . $line;
2426                                 continue;
2427                         }
2428
2429                         $ns = $nt->getNamespace();
2430                         $iw = $nt->getInterwiki();
2431
2432                         $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2433
2434                         if ( $might_be_img ) { # if this is actually an invalid link
2435                                 if ( $ns == NS_FILE && $noforce ) { # but might be an image
2436                                         $found = false;
2437                                         while ( true ) {
2438                                                 # look at the next 'line' to see if we can close it there
2439                                                 $a->next();
2440                                                 $next_line = $a->current();
2441                                                 if ( $next_line === false || $next_line === null ) {
2442                                                         break;
2443                                                 }
2444                                                 $m = explode( ']]', $next_line, 3 );
2445                                                 if ( count( $m ) == 3 ) {
2446                                                         # the first ]] closes the inner link, the second the image
2447                                                         $found = true;
2448                                                         $text .= "[[{$m[0]}]]{$m[1]}";
2449                                                         $trail = $m[2];
2450                                                         break;
2451                                                 } elseif ( count( $m ) == 2 ) {
2452                                                         # if there's exactly one ]] that's fine, we'll keep looking
2453                                                         $text .= "[[{$m[0]}]]{$m[1]}";
2454                                                 } else {
2455                                                         # if $next_line is invalid too, we need look no further
2456                                                         $text .= '[[' . $next_line;
2457                                                         break;
2458                                                 }
2459                                         }
2460                                         if ( !$found ) {
2461                                                 # we couldn't find the end of this imageLink, so output it raw
2462                                                 # but don't ignore what might be perfectly normal links in the text we've examined
2463                                                 $holders->merge( $this->replaceInternalLinks2( $text ) );
2464                                                 $s .= "{$prefix}[[$link|$text";
2465                                                 # note: no $trail, because without an end, there *is* no trail
2466                                                 continue;
2467                                         }
2468                                 } else { # it's not an image, so output it raw
2469                                         $s .= "{$prefix}[[$link|$text";
2470                                         # note: no $trail, because without an end, there *is* no trail
2471                                         continue;
2472                                 }
2473                         }
2474
2475                         $wasblank = ( $text == '' );
2476                         if ( $wasblank ) {
2477                                 $text = $link;
2478                                 if ( !$noforce ) {
2479                                         # Strip off leading ':'
2480                                         $text = substr( $text, 1 );
2481                                 }
2482                         } else {
2483                                 # T6598 madness. Handle the quotes only if they come from the alternate part
2484                                 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2485                                 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2486                                 #    -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2487                                 $text = $this->doQuotes( $text );
2488                         }
2489
2490                         # Link not escaped by : , create the various objects
2491                         if ( $noforce && !$nt->wasLocalInterwiki() ) {
2492                                 # Interwikis
2493                                 if (
2494                                         $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2495                                                 Language::fetchLanguageName( $iw, null, 'mw' ) ||
2496                                                 in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2497                                         )
2498                                 ) {
2499                                         # T26502: filter duplicates
2500                                         if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2501                                                 $this->mLangLinkLanguages[$iw] = true;
2502                                                 $this->mOutput->addLanguageLink( $nt->getFullText() );
2503                                         }
2504
2505                                         /**
2506                                          * Strip the whitespace interwiki links produce, see T10897
2507                                          */
2508                                         $s = rtrim( $s . $prefix ) . $trail; # T175416
2509                                         continue;
2510                                 }
2511
2512                                 if ( $ns == NS_FILE ) {
2513                                         if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->mTitle ) ) {
2514                                                 if ( $wasblank ) {
2515                                                         # if no parameters were passed, $text
2516                                                         # becomes something like "File:Foo.png",
2517                                                         # which we don't want to pass on to the
2518                                                         # image generator
2519                                                         $text = '';
2520                                                 } else {
2521                                                         # recursively parse links inside the image caption
2522                                                         # actually, this will parse them in any other parameters, too,
2523                                                         # but it might be hard to fix that, and it doesn't matter ATM
2524                                                         $text = $this->replaceExternalLinks( $text );
2525                                                         $holders->merge( $this->replaceInternalLinks2( $text ) );
2526                                                 }
2527                                                 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2528                                                 $s .= $prefix . $this->armorLinks(
2529                                                         $this->makeImage( $nt, $text, $holders ) ) . $trail;
2530                                                 continue;
2531                                         }
2532                                 } elseif ( $ns == NS_CATEGORY ) {
2533                                         /**
2534                                          * Strip the whitespace Category links produce, see T2087
2535                                          */
2536                                         $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2537
2538                                         if ( $wasblank ) {
2539                                                 $sortkey = $this->getDefaultSort();
2540                                         } else {
2541                                                 $sortkey = $text;
2542                                         }
2543                                         $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2544                                         $sortkey = str_replace( "\n", '', $sortkey );
2545                                         $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2546                                         $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2547
2548                                         continue;
2549                                 }
2550                         }
2551
2552                         # Self-link checking. For some languages, variants of the title are checked in
2553                         # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2554                         # for linking to a different variant.
2555                         if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2556                                 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2557                                 continue;
2558                         }
2559
2560                         # NS_MEDIA is a pseudo-namespace for linking directly to a file
2561                         # @todo FIXME: Should do batch file existence checks, see comment below
2562                         if ( $ns == NS_MEDIA ) {
2563                                 # Give extensions a chance to select the file revision for us
2564                                 $options = [];
2565                                 $descQuery = false;
2566                                 Hooks::run( 'BeforeParserFetchFileAndTitle',
2567                                         [ $this, $nt, &$options, &$descQuery ] );
2568                                 # Fetch and register the file (file title may be different via hooks)
2569                                 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2570                                 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2571                                 $s .= $prefix . $this->armorLinks(
2572                                         Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2573                                 continue;
2574                         }
2575
2576                         # Some titles, such as valid special pages or files in foreign repos, should
2577                         # be shown as bluelinks even though they're not included in the page table
2578                         # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2579                         # batch file existence checks for NS_FILE and NS_MEDIA
2580                         if ( $iw == '' && $nt->isAlwaysKnown() ) {
2581                                 $this->mOutput->addLink( $nt );
2582                                 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2583                         } else {
2584                                 # Links will be added to the output link list after checking
2585                                 $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2586                         }
2587                 }
2588                 return $holders;
2589         }
2590
2591         /**
2592          * Render a forced-blue link inline; protect against double expansion of
2593          * URLs if we're in a mode that prepends full URL prefixes to internal links.
2594          * Since this little disaster has to split off the trail text to avoid
2595          * breaking URLs in the following text without breaking trails on the
2596          * wiki links, it's been made into a horrible function.
2597          *
2598          * @param Title $nt
2599          * @param string $text
2600          * @param string $trail
2601          * @param string $prefix
2602          * @return string HTML-wikitext mix oh yuck
2603          */
2604         protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2605                 list( $inside, $trail ) = Linker::splitTrail( $trail );
2606
2607                 if ( $text == '' ) {
2608                         $text = htmlspecialchars( $nt->getPrefixedText() );
2609                 }
2610
2611                 $link = $this->getLinkRenderer()->makeKnownLink(
2612                         $nt, new HtmlArmor( "$prefix$text$inside" )
2613                 );
2614
2615                 return $this->armorLinks( $link ) . $trail;
2616         }
2617
2618         /**
2619          * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2620          * going to go through further parsing steps before inline URL expansion.
2621          *
2622          * Not needed quite as much as it used to be since free links are a bit
2623          * more sensible these days. But bracketed links are still an issue.
2624          *
2625          * @param string $text More-or-less HTML
2626          * @return string Less-or-more HTML with NOPARSE bits
2627          */
2628         public function armorLinks( $text ) {
2629                 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2630                         self::MARKER_PREFIX . "NOPARSE$1", $text );
2631         }
2632
2633         /**
2634          * Return true if subpage links should be expanded on this page.
2635          * @return bool
2636          */
2637         public function areSubpagesAllowed() {
2638                 # Some namespaces don't allow subpages
2639                 return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2640         }
2641
2642         /**
2643          * Handle link to subpage if necessary
2644          *
2645          * @param string $target The source of the link
2646          * @param string &$text The link text, modified as necessary
2647          * @return string The full name of the link
2648          * @private
2649          */
2650         public function maybeDoSubpageLink( $target, &$text ) {
2651                 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2652         }
2653
2654         /**
2655          * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2656          *
2657          * @param string $text
2658          * @param bool $linestart Whether or not this is at the start of a line.
2659          * @private
2660          * @return string The lists rendered as HTML
2661          */
2662         public function doBlockLevels( $text, $linestart ) {
2663                 return BlockLevelPass::doBlockLevels( $text, $linestart );
2664         }
2665
2666         /**
2667          * Return value of a magic variable (like PAGENAME)
2668          *
2669          * @private
2670          *
2671          * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2672          * @param bool|PPFrame $frame
2673          *
2674          * @throws MWException
2675          * @return string
2676          */
2677         public function getVariableValue( $index, $frame = false ) {
2678                 if ( is_null( $this->mTitle ) ) {
2679                         // If no title set, bad things are going to happen
2680                         // later. Title should always be set since this
2681                         // should only be called in the middle of a parse
2682                         // operation (but the unit-tests do funky stuff)
2683                         throw new MWException( __METHOD__ . ' Should only be '
2684                                 . ' called while parsing (no title set)' );
2685                 }
2686
2687                 // Avoid PHP 7.1 warning from passing $this by reference
2688                 $parser = $this;
2689
2690                 /**
2691                  * Some of these require message or data lookups and can be
2692                  * expensive to check many times.
2693                  */
2694                 if (
2695                         Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2696                         isset( $this->mVarCache[$index] )
2697                 ) {
2698                         return $this->mVarCache[$index];
2699                 }
2700
2701                 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2702                 Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2703
2704                 $pageLang = $this->getFunctionLang();
2705
2706                 switch ( $index ) {
2707                         case '!':
2708                                 $value = '|';
2709                                 break;
2710                         case 'currentmonth':
2711                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2712                                 break;
2713                         case 'currentmonth1':
2714                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2715                                 break;
2716                         case 'currentmonthname':
2717                                 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2718                                 break;
2719                         case 'currentmonthnamegen':
2720                                 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2721                                 break;
2722                         case 'currentmonthabbrev':
2723                                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2724                                 break;
2725                         case 'currentday':
2726                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2727                                 break;
2728                         case 'currentday2':
2729                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2730                                 break;
2731                         case 'localmonth':
2732                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2733                                 break;
2734                         case 'localmonth1':
2735                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2736                                 break;
2737                         case 'localmonthname':
2738                                 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2739                                 break;
2740                         case 'localmonthnamegen':
2741                                 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2742                                 break;
2743                         case 'localmonthabbrev':
2744                                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2745                                 break;
2746                         case 'localday':
2747                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2748                                 break;
2749                         case 'localday2':
2750                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2751                                 break;
2752                         case 'pagename':
2753                                 $value = wfEscapeWikiText( $this->mTitle->getText() );
2754                                 break;
2755                         case 'pagenamee':
2756                                 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2757                                 break;
2758                         case 'fullpagename':
2759                                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2760                                 break;
2761                         case 'fullpagenamee':
2762                                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2763                                 break;
2764                         case 'subpagename':
2765                                 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2766                                 break;
2767                         case 'subpagenamee':
2768                                 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2769                                 break;
2770                         case 'rootpagename':
2771                                 $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2772                                 break;
2773                         case 'rootpagenamee':
2774                                 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2775                                         ' ',
2776                                         '_',
2777                                         $this->mTitle->getRootText()
2778                                 ) ) );
2779                                 break;
2780                         case 'basepagename':
2781                                 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2782                                 break;
2783                         case 'basepagenamee':
2784                                 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2785                                         ' ',
2786                                         '_',
2787                                         $this->mTitle->getBaseText()
2788                                 ) ) );
2789                                 break;
2790                         case 'talkpagename':
2791                                 if ( $this->mTitle->canHaveTalkPage() ) {
2792                                         $talkPage = $this->mTitle->getTalkPage();
2793                                         $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2794                                 } else {
2795                                         $value = '';
2796                                 }
2797                                 break;
2798                         case 'talkpagenamee':
2799                                 if ( $this->mTitle->canHaveTalkPage() ) {
2800                                         $talkPage = $this->mTitle->getTalkPage();
2801                                         $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2802                                 } else {
2803                                         $value = '';
2804                                 }
2805                                 break;
2806                         case 'subjectpagename':
2807                                 $subjPage = $this->mTitle->getSubjectPage();
2808                                 $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2809                                 break;
2810                         case 'subjectpagenamee':
2811                                 $subjPage = $this->mTitle->getSubjectPage();
2812                                 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2813                                 break;
2814                         case 'pageid': // requested in T25427
2815                                 # Inform the edit saving system that getting the canonical output
2816                                 # after page insertion requires a parse that used that exact page ID
2817                                 $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2818                                 $value = $this->mTitle->getArticleID();
2819                                 if ( !$value ) {
2820                                         $value = $this->mOptions->getSpeculativePageId();
2821                                         if ( $value ) {
2822                                                 $this->mOutput->setSpeculativePageIdUsed( $value );
2823                                         }
2824                                 }
2825                                 break;
2826                         case 'revisionid':
2827                                 if (
2828                                         $this->svcOptions->get( 'MiserMode' ) &&
2829                                         !$this->mOptions->getInterfaceMessage() &&
2830                                         // @TODO: disallow this word on all namespaces
2831                                         $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2832                                 ) {
2833                                         // Use a stub result instead of the actual revision ID in order to avoid
2834                                         // double parses on page save but still allow preview detection (T137900)
2835                                         if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2836                                                 $value = '-';
2837                                         } else {
2838                                                 $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2839                                                 $value = '';
2840                                         }
2841                                 } else {
2842                                         # Inform the edit saving system that getting the canonical output after
2843                                         # revision insertion requires a parse that used that exact revision ID
2844                                         $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2845                                         $value = $this->getRevisionId();
2846                                         if ( $value === 0 ) {
2847                                                 $rev = $this->getRevisionObject();
2848                                                 $value = $rev ? $rev->getId() : $value;
2849                                         }
2850                                         if ( !$value ) {
2851                                                 $value = $this->mOptions->getSpeculativeRevId();
2852                                                 if ( $value ) {
2853                                                         $this->mOutput->setSpeculativeRevIdUsed( $value );
2854                                                 }
2855                                         }
2856                                 }
2857                                 break;
2858                         case 'revisionday':
2859                                 $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2860                                 break;
2861                         case 'revisionday2':
2862                                 $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2863                                 break;
2864                         case 'revisionmonth':
2865                                 $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2866                                 break;
2867                         case 'revisionmonth1':
2868                                 $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2869                                 break;
2870                         case 'revisionyear':
2871                                 $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2872                                 break;
2873                         case 'revisiontimestamp':
2874                                 $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2875                                 break;
2876                         case 'revisionuser':
2877                                 # Inform the edit saving system that getting the canonical output after
2878                                 # revision insertion requires a parse that used the actual user ID
2879                                 $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2880                                 $value = $this->getRevisionUser();
2881                                 break;
2882                         case 'revisionsize':
2883                                 $value = $this->getRevisionSize();
2884                                 break;
2885                         case 'namespace':
2886                                 $value = str_replace( '_', ' ',
2887                                         $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2888                                 break;
2889                         case 'namespacee':
2890                                 $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2891                                 break;
2892                         case 'namespacenumber':
2893                                 $value = $this->mTitle->getNamespace();
2894                                 break;
2895                         case 'talkspace':
2896                                 $value = $this->mTitle->canHaveTalkPage()
2897                                         ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2898                                         : '';
2899                                 break;
2900                         case 'talkspacee':
2901                                 $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2902                                 break;
2903                         case 'subjectspace':
2904                                 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2905                                 break;
2906                         case 'subjectspacee':
2907                                 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2908                                 break;
2909                         case 'currentdayname':
2910                                 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2911                                 break;
2912                         case 'currentyear':
2913                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2914                                 break;
2915                         case 'currenttime':
2916                                 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2917                                 break;
2918                         case 'currenthour':
2919                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2920                                 break;
2921                         case 'currentweek':
2922                                 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2923                                 # int to remove the padding
2924                                 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2925                                 break;
2926                         case 'currentdow':
2927                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2928                                 break;
2929                         case 'localdayname':
2930                                 $value = $pageLang->getWeekdayName(
2931                                         (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2932                                 );
2933                                 break;
2934                         case 'localyear':
2935                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2936                                 break;
2937                         case 'localtime':
2938                                 $value = $pageLang->time(
2939                                         MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2940                                         false,
2941                                         false
2942                                 );
2943                                 break;
2944                         case 'localhour':
2945                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2946                                 break;
2947                         case 'localweek':
2948                                 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2949                                 # int to remove the padding
2950                                 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2951                                 break;
2952                         case 'localdow':
2953                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2954                                 break;
2955                         case 'numberofarticles':
2956                                 $value = $pageLang->formatNum( SiteStats::articles() );
2957                                 break;
2958                         case 'numberoffiles':
2959                                 $value = $pageLang->formatNum( SiteStats::images() );
2960                                 break;
2961                         case 'numberofusers':
2962                                 $value = $pageLang->formatNum( SiteStats::users() );
2963                                 break;
2964                         case 'numberofactiveusers':
2965                                 $value = $pageLang->formatNum( SiteStats::activeUsers() );
2966                                 break;
2967                         case 'numberofpages':
2968                                 $value = $pageLang->formatNum( SiteStats::pages() );
2969                                 break;
2970                         case 'numberofadmins':
2971                                 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2972                                 break;
2973                         case 'numberofedits':
2974                                 $value = $pageLang->formatNum( SiteStats::edits() );
2975                                 break;
2976                         case 'currenttimestamp':
2977                                 $value = wfTimestamp( TS_MW, $ts );
2978                                 break;
2979                         case 'localtimestamp':
2980                                 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2981                                 break;
2982                         case 'currentversion':
2983                                 $value = SpecialVersion::getVersion();
2984                                 break;
2985                         case 'articlepath':
2986                                 return $this->svcOptions->get( 'ArticlePath' );
2987                         case 'sitename':
2988                                 return $this->svcOptions->get( 'Sitename' );
2989                         case 'server':
2990                                 return $this->svcOptions->get( 'Server' );
2991                         case 'servername':
2992                                 return $this->svcOptions->get( 'ServerName' );
2993                         case 'scriptpath':
2994                                 return $this->svcOptions->get( 'ScriptPath' );
2995                         case 'stylepath':
2996                                 return $this->svcOptions->get( 'StylePath' );
2997                         case 'directionmark':
2998                                 return $pageLang->getDirMark();
2999                         case 'contentlanguage':
3000                                 return $this->svcOptions->get( 'LanguageCode' );
3001                         case 'pagelanguage':
3002                                 $value = $pageLang->getCode();
3003                                 break;
3004                         case 'cascadingsources':
3005                                 $value = CoreParserFunctions::cascadingsources( $this );
3006                                 break;
3007                         default:
3008                                 $ret = null;
3009                                 Hooks::run(
3010                                         'ParserGetVariableValueSwitch',
3011                                         [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3012                                 );
3013
3014                                 return $ret;
3015                 }
3016
3017                 if ( $index ) {
3018                         $this->mVarCache[$index] = $value;
3019                 }
3020
3021                 return $value;
3022         }
3023
3024         /**
3025          * @param int $start
3026          * @param int $len
3027          * @param int $mtts Max time-till-save; sets vary-revision-timestamp if result changes by then
3028          * @param string $variable Parser variable name
3029          * @return string
3030          */
3031         private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3032                 # Get the timezone-adjusted timestamp to be used for this revision
3033                 $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3034                 # Possibly set vary-revision if there is not yet an associated revision
3035                 if ( !$this->getRevisionObject() ) {
3036                         # Get the timezone-adjusted timestamp $mtts seconds in the future.
3037                         # This future is relative to the current time and not that of the
3038                         # parser options. The rendered timestamp can be compared to that
3039                         # of the timestamp specified by the parser options.
3040                         $resThen = substr(
3041                                 $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3042                                 $start,
3043                                 $len
3044                         );
3045
3046                         if ( $resNow !== $resThen ) {
3047                                 # Inform the edit saving system that getting the canonical output after
3048                                 # revision insertion requires a parse that used an actual revision timestamp
3049                                 $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3050                         }
3051                 }
3052
3053                 return $resNow;
3054         }
3055
3056         /**
3057          * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
3058          *
3059          * @private
3060          */
3061         public function initialiseVariables() {
3062                 $variableIDs = $this->magicWordFactory->getVariableIDs();
3063                 $substIDs = $this->magicWordFactory->getSubstIDs();
3064
3065                 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3066                 $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3067         }
3068
3069         /**
3070          * Preprocess some wikitext and return the document tree.
3071          * This is the ghost of replace_variables().
3072          *
3073          * @param string $text The text to parse
3074          * @param int $flags Bitwise combination of:
3075          *   - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
3076          *     included. Default is to assume a direct page view.
3077          *
3078          * The generated DOM tree must depend only on the input text and the flags.
3079          * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
3080          *
3081          * Any flag added to the $flags parameter here, or any other parameter liable to cause a
3082          * change in the DOM tree for a given text, must be passed through the section identifier
3083          * in the section edit link and thus back to extractSections().
3084          *
3085          * The output of this function is currently only cached in process memory, but a persistent
3086          * cache may be implemented at a later date which takes further advantage of these strict
3087          * dependency requirements.
3088          *
3089          * @return PPNode
3090          */
3091         public function preprocessToDom( $text, $flags = 0 ) {
3092                 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3093                 return $dom;
3094         }
3095
3096         /**
3097          * Return a three-element array: leading whitespace, string contents, trailing whitespace
3098          *
3099          * @param string $s
3100          *
3101          * @return array
3102          */
3103         public static function splitWhitespace( $s ) {
3104                 $ltrimmed = ltrim( $s );
3105                 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3106                 $trimmed = rtrim( $ltrimmed );
3107                 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3108                 if ( $diff > 0 ) {
3109                         $w2 = substr( $ltrimmed, -$diff );
3110                 } else {
3111                         $w2 = '';
3112                 }
3113                 return [ $w1, $trimmed, $w2 ];
3114         }
3115
3116         /**
3117          * Replace magic variables, templates, and template arguments
3118          * with the appropriate text. Templates are substituted recursively,
3119          * taking care to avoid infinite loops.
3120          *
3121          * Note that the substitution depends on value of $mOutputType:
3122          *  self::OT_WIKI: only {{subst:}} templates
3123          *  self::OT_PREPROCESS: templates but not extension tags
3124          *  self::OT_HTML: all templates and extension tags
3125          *
3126          * @param string $text The text to transform
3127          * @param false|PPFrame|array $frame Object describing the arguments passed to the
3128          *   template. Arguments may also be provided as an associative array, as
3129          *   was the usual case before MW1.12. Providing arguments this way may be
3130          *   useful for extensions wishing to perform variable replacement
3131          *   explicitly.
3132          * @param bool $argsOnly Only do argument (triple-brace) expansion, not
3133          *   double-brace expansion.
3134          * @return string
3135          */
3136         public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3137                 # Is there any text? Also, Prevent too big inclusions!
3138                 $textSize = strlen( $text );
3139                 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3140                         return $text;
3141                 }
3142
3143                 if ( $frame === false ) {
3144                         $frame = $this->getPreprocessor()->newFrame();
3145                 } elseif ( !( $frame instanceof PPFrame ) ) {
3146                         $this->logger->debug(
3147                                 __METHOD__ . " called using plain parameters instead of " .
3148                                 "a PPFrame instance. Creating custom frame."
3149                         );
3150                         $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3151                 }
3152
3153                 $dom = $this->preprocessToDom( $text );
3154                 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3155                 $text = $frame->expand( $dom, $flags );
3156
3157                 return $text;
3158         }
3159
3160         /**
3161          * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
3162          *
3163          * @param array $args
3164          *
3165          * @return array
3166          */
3167         public static function createAssocArgs( $args ) {
3168                 $assocArgs = [];
3169                 $index = 1;
3170                 foreach ( $args as $arg ) {
3171                         $eqpos = strpos( $arg, '=' );
3172                         if ( $eqpos === false ) {
3173                                 $assocArgs[$index++] = $arg;
3174                         } else {
3175                                 $name = trim( substr( $arg, 0, $eqpos ) );
3176                                 $value = trim( substr( $arg, $eqpos + 1 ) );
3177                                 if ( $value === false ) {
3178                                         $value = '';
3179                                 }
3180                                 if ( $name !== false ) {
3181                                         $assocArgs[$name] = $value;
3182                                 }
3183                         }
3184                 }
3185
3186                 return $assocArgs;
3187         }
3188
3189         /**
3190          * Warn the user when a parser limitation is reached
3191          * Will warn at most once the user per limitation type
3192          *
3193          * The results are shown during preview and run through the Parser (See EditPage.php)
3194          *
3195          * @param string $limitationType Should be one of:
3196          *   'expensive-parserfunction' (corresponding messages:
3197          *       'expensive-parserfunction-warning',
3198          *       'expensive-parserfunction-category')
3199          *   'post-expand-template-argument' (corresponding messages:
3200          *       'post-expand-template-argument-warning',
3201          *       'post-expand-template-argument-category')
3202          *   'post-expand-template-inclusion' (corresponding messages:
3203          *       'post-expand-template-inclusion-warning',
3204          *       'post-expand-template-inclusion-category')
3205          *   'node-count-exceeded' (corresponding messages:
3206          *       'node-count-exceeded-warning',
3207          *       'node-count-exceeded-category')
3208          *   'expansion-depth-exceeded' (corresponding messages:
3209          *       'expansion-depth-exceeded-warning',
3210          *       'expansion-depth-exceeded-category')
3211          * @param string|int|null $current Current value
3212          * @param string|int|null $max Maximum allowed, when an explicit limit has been
3213          *       exceeded, provide the values (optional)
3214          */
3215         public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3216                 # does no harm if $current and $max are present but are unnecessary for the message
3217                 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3218                 # only during preview, and that would split the parser cache unnecessarily.
3219                 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3220                         ->text();
3221                 $this->mOutput->addWarning( $warning );
3222                 $this->addTrackingCategory( "$limitationType-category" );
3223         }
3224
3225         /**
3226          * Return the text of a template, after recursively
3227          * replacing any variables or templates within the template.
3228          *
3229          * @param array $piece The parts of the template
3230          *   $piece['title']: the title, i.e. the part before the |
3231          *   $piece['parts']: the parameter array
3232          *   $piece['lineStart']: whether the brace was at the start of a line
3233          * @param PPFrame $frame The current frame, contains template arguments
3234          * @throws Exception
3235          * @return string|array The text of the template
3236          */
3237         public function braceSubstitution( $piece, $frame ) {
3238                 // Flags
3239
3240                 // $text has been filled
3241                 $found = false;
3242                 // wiki markup in $text should be escaped
3243                 $nowiki = false;
3244                 // $text is HTML, armour it against wikitext transformation
3245                 $isHTML = false;
3246                 // Force interwiki transclusion to be done in raw mode not rendered
3247                 $forceRawInterwiki = false;
3248                 // $text is a DOM node needing expansion in a child frame
3249                 $isChildObj = false;
3250                 // $text is a DOM node needing expansion in the current frame
3251                 $isLocalObj = false;
3252
3253                 # Title object, where $text came from
3254                 $title = false;
3255
3256                 # $part1 is the bit before the first |, and must contain only title characters.
3257                 # Various prefixes will be stripped from it later.
3258                 $titleWithSpaces = $frame->expand( $piece['title'] );
3259                 $part1 = trim( $titleWithSpaces );
3260                 $titleText = false;
3261
3262                 # Original title text preserved for various purposes
3263                 $originalTitle = $part1;
3264
3265                 # $args is a list of argument nodes, starting from index 0, not including $part1
3266                 # @todo FIXME: If piece['parts'] is null then the call to getLength()
3267                 # below won't work b/c this $args isn't an object
3268                 $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3269
3270                 $profileSection = null; // profile templates
3271
3272                 # SUBST
3273                 if ( !$found ) {
3274                         $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3275
3276                         # Possibilities for substMatch: "subst", "safesubst" or FALSE
3277                         # Decide whether to expand template or keep wikitext as-is.
3278                         if ( $this->ot['wiki'] ) {
3279                                 if ( $substMatch === false ) {
3280                                         $literal = true;  # literal when in PST with no prefix
3281                                 } else {
3282                                         $literal = false; # expand when in PST with subst: or safesubst:
3283                                 }
3284                         } else {
3285                                 if ( $substMatch == 'subst' ) {
3286                                         $literal = true;  # literal when not in PST with plain subst:
3287                                 } else {
3288                                         $literal = false; # expand when not in PST with safesubst: or no prefix
3289                                 }
3290                         }
3291                         if ( $literal ) {
3292                                 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3293                                 $isLocalObj = true;
3294                                 $found = true;
3295                         }
3296                 }
3297
3298                 # Variables
3299                 if ( !$found && $args->getLength() == 0 ) {
3300                         $id = $this->mVariables->matchStartToEnd( $part1 );
3301                         if ( $id !== false ) {
3302                                 $text = $this->getVariableValue( $id, $frame );
3303                                 if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3304                                         $this->mOutput->updateCacheExpiry(
3305                                                 $this->magicWordFactory->getCacheTTL( $id ) );
3306                                 }
3307                                 $found = true;
3308                         }
3309                 }
3310
3311                 # MSG, MSGNW and RAW
3312                 if ( !$found ) {
3313                         # Check for MSGNW:
3314                         $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3315                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3316                                 $nowiki = true;
3317                         } else {
3318                                 # Remove obsolete MSG:
3319                                 $mwMsg = $this->magicWordFactory->get( 'msg' );
3320                                 $mwMsg->matchStartAndRemove( $part1 );
3321                         }
3322
3323                         # Check for RAW:
3324                         $mwRaw = $this->magicWordFactory->get( 'raw' );
3325                         if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3326                                 $forceRawInterwiki = true;
3327                         }
3328                 }
3329
3330                 # Parser functions
3331                 if ( !$found ) {
3332                         $colonPos = strpos( $part1, ':' );
3333                         if ( $colonPos !== false ) {
3334                                 $func = substr( $part1, 0, $colonPos );
3335                                 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3336                                 $argsLength = $args->getLength();
3337                                 for ( $i = 0; $i < $argsLength; $i++ ) {
3338                                         $funcArgs[] = $args->item( $i );
3339                                 }
3340
3341                                 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3342
3343                                 // Extract any forwarded flags
3344                                 if ( isset( $result['title'] ) ) {
3345                                         $title = $result['title'];
3346                                 }
3347                                 if ( isset( $result['found'] ) ) {
3348                                         $found = $result['found'];
3349                                 }
3350                                 if ( array_key_exists( 'text', $result ) ) {
3351                                         // a string or null
3352                                         $text = $result['text'];
3353                                 }
3354                                 if ( isset( $result['nowiki'] ) ) {
3355                                         $nowiki = $result['nowiki'];
3356                                 }
3357                                 if ( isset( $result['isHTML'] ) ) {
3358                                         $isHTML = $result['isHTML'];
3359                                 }
3360                                 if ( isset( $result['forceRawInterwiki'] ) ) {
3361                                         $forceRawInterwiki = $result['forceRawInterwiki'];
3362                                 }
3363                                 if ( isset( $result['isChildObj'] ) ) {
3364                                         $isChildObj = $result['isChildObj'];
3365                                 }
3366                                 if ( isset( $result['isLocalObj'] ) ) {
3367                                         $isLocalObj = $result['isLocalObj'];
3368                                 }
3369                         }
3370                 }
3371
3372                 # Finish mangling title and then check for loops.
3373                 # Set $title to a Title object and $titleText to the PDBK
3374                 if ( !$found ) {
3375                         $ns = NS_TEMPLATE;
3376                         # Split the title into page and subpage
3377                         $subpage = '';
3378                         $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3379                         if ( $part1 !== $relative ) {
3380                                 $part1 = $relative;
3381                                 $ns = $this->mTitle->getNamespace();
3382                         }
3383                         $title = Title::newFromText( $part1, $ns );
3384                         if ( $title ) {
3385                                 $titleText = $title->getPrefixedText();
3386                                 # Check for language variants if the template is not found
3387                                 if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3388                                         $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3389                                 }
3390                                 # Do recursion depth check
3391                                 $limit = $this->mOptions->getMaxTemplateDepth();
3392                                 if ( $frame->depth >= $limit ) {
3393                                         $found = true;
3394                                         $text = '<span class="error">'
3395                                                 . wfMessage( 'parser-template-recursion-depth-warning' )
3396                                                         ->numParams( $limit )->inContentLanguage()->text()
3397                                                 . '</span>';
3398                                 }
3399                         }
3400                 }
3401
3402                 # Load from database
3403                 if ( !$found && $title ) {
3404                         $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3405                         if ( !$title->isExternal() ) {
3406                                 if ( $title->isSpecialPage()
3407                                         && $this->mOptions->getAllowSpecialInclusion()
3408                                         && $this->ot['html']
3409                                 ) {
3410                                         $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3411                                         // Pass the template arguments as URL parameters.
3412                                         // "uselang" will have no effect since the Language object
3413                                         // is forced to the one defined in ParserOptions.
3414                                         $pageArgs = [];
3415                                         $argsLength = $args->getLength();
3416                                         for ( $i = 0; $i < $argsLength; $i++ ) {
3417                                                 $bits = $args->item( $i )->splitArg();
3418                                                 if ( strval( $bits['index'] ) === '' ) {
3419                                                         $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3420                                                         $value = trim( $frame->expand( $bits['value'] ) );
3421                                                         $pageArgs[$name] = $value;
3422                                                 }
3423                                         }
3424
3425                                         // Create a new context to execute the special page
3426                                         $context = new RequestContext;
3427                                         $context->setTitle( $title );
3428                                         $context->setRequest( new FauxRequest( $pageArgs ) );
3429                                         if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3430                                                 $context->setUser( $this->getUser() );
3431                                         } else {
3432                                                 // If this page is cached, then we better not be per user.
3433                                                 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3434                                         }
3435                                         $context->setLanguage( $this->mOptions->getUserLangObj() );
3436                                         $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3437                                         if ( $ret ) {
3438                                                 $text = $context->getOutput()->getHTML();
3439                                                 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3440                                                 $found = true;
3441                                                 $isHTML = true;
3442                                                 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3443                                                         $this->mOutput->updateRuntimeAdaptiveExpiry(
3444                                                                 $specialPage->maxIncludeCacheTime()
3445                                                         );
3446                                                 }
3447                                         }
3448                                 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3449                                         $found = false; # access denied
3450                                         $this->logger->debug(
3451                                                 __METHOD__ .
3452                                                 ": template inclusion denied for " . $title->getPrefixedDBkey()
3453                                         );
3454                                 } else {
3455                                         list( $text, $title ) = $this->getTemplateDom( $title );
3456                                         if ( $text !== false ) {
3457                                                 $found = true;
3458                                                 $isChildObj = true;
3459                                         }
3460                                 }
3461
3462                                 # If the title is valid but undisplayable, make a link to it
3463                                 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3464                                         $text = "[[:$titleText]]";
3465                                         $found = true;
3466                                 }
3467                         } elseif ( $title->isTrans() ) {
3468                                 # Interwiki transclusion
3469                                 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3470                                         $text = $this->interwikiTransclude( $title, 'render' );
3471                                         $isHTML = true;
3472                                 } else {
3473                                         $text = $this->interwikiTransclude( $title, 'raw' );
3474                                         # Preprocess it like a template
3475                                         $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3476                                         $isChildObj = true;
3477                                 }
3478                                 $found = true;
3479                         }
3480
3481                         # Do infinite loop check
3482                         # This has to be done after redirect resolution to avoid infinite loops via redirects
3483                         if ( !$frame->loopCheck( $title ) ) {
3484                                 $found = true;
3485                                 $text = '<span class="error">'
3486                                         . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3487                                         . '</span>';
3488                                 $this->addTrackingCategory( 'template-loop-category' );
3489                                 $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3490                                         wfEscapeWikiText( $titleText ) )->text() );
3491                                 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3492                         }
3493                 }
3494
3495                 # If we haven't found text to substitute by now, we're done
3496                 # Recover the source wikitext and return it
3497                 if ( !$found ) {
3498                         $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3499                         if ( $profileSection ) {
3500                                 $this->mProfiler->scopedProfileOut( $profileSection );
3501                         }
3502                         return [ 'object' => $text ];
3503                 }
3504
3505                 # Expand DOM-style return values in a child frame
3506                 if ( $isChildObj ) {
3507                         # Clean up argument array
3508                         $newFrame = $frame->newChild( $args, $title );
3509
3510                         if ( $nowiki ) {
3511                                 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3512                         } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3513                                 # Expansion is eligible for the empty-frame cache
3514                                 $text = $newFrame->cachedExpand( $titleText, $text );
3515                         } else {
3516                                 # Uncached expansion
3517                                 $text = $newFrame->expand( $text );
3518                         }
3519                 }
3520                 if ( $isLocalObj && $nowiki ) {
3521                         $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3522                         $isLocalObj = false;
3523                 }
3524
3525                 if ( $profileSection ) {
3526                         $this->mProfiler->scopedProfileOut( $profileSection );
3527                 }
3528
3529                 # Replace raw HTML by a placeholder
3530                 if ( $isHTML ) {
3531                         $text = $this->insertStripItem( $text );
3532                 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3533                         # Escape nowiki-style return values
3534                         $text = wfEscapeWikiText( $text );
3535                 } elseif ( is_string( $text )
3536                         && !$piece['lineStart']
3537                         && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3538                 ) {
3539                         # T2529: if the template begins with a table or block-level
3540                         # element, it should be treated as beginning a new line.
3541                         # This behavior is somewhat controversial.
3542                         $text = "\n" . $text;
3543                 }
3544
3545                 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3546                         # Error, oversize inclusion
3547                         if ( $titleText !== false ) {
3548                                 # Make a working, properly escaped link if possible (T25588)
3549                                 $text = "[[:$titleText]]";
3550                         } else {
3551                                 # This will probably not be a working link, but at least it may
3552                                 # provide some hint of where the problem is
3553                                 preg_replace( '/^:/', '', $originalTitle );
3554                                 $text = "[[:$originalTitle]]";
3555                         }
3556                         $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3557                                 . 'post-expand include size too large -->' );
3558                         $this->limitationWarn( 'post-expand-template-inclusion' );
3559                 }
3560
3561                 if ( $isLocalObj ) {
3562                         $ret = [ 'object' => $text ];
3563                 } else {
3564                         $ret = [ 'text' => $text ];
3565                 }
3566
3567                 return $ret;
3568         }
3569
3570         /**
3571          * Call a parser function and return an array with text and flags.
3572          *
3573          * The returned array will always contain a boolean 'found', indicating
3574          * whether the parser function was found or not. It may also contain the
3575          * following:
3576          *  text: string|object, resulting wikitext or PP DOM object
3577          *  isHTML: bool, $text is HTML, armour it against wikitext transformation
3578          *  isChildObj: bool, $text is a DOM node needing expansion in a child frame
3579          *  isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3580          *  nowiki: bool, wiki markup in $text should be escaped
3581          *
3582          * @since 1.21
3583          * @param PPFrame $frame The current frame, contains template arguments
3584          * @param string $function Function name
3585          * @param array $args Arguments to the function
3586          * @throws MWException
3587          * @return array
3588          */
3589         public function callParserFunction( $frame, $function, array $args = [] ) {
3590                 # Case sensitive functions
3591                 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3592                         $function = $this->mFunctionSynonyms[1][$function];
3593                 } else {
3594                         # Case insensitive functions
3595                         $function = $this->contLang->lc( $function );
3596                         if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3597                                 $function = $this->mFunctionSynonyms[0][$function];
3598                         } else {
3599                                 return [ 'found' => false ];
3600                         }
3601                 }
3602
3603                 list( $callback, $flags ) = $this->mFunctionHooks[$function];
3604
3605                 // Avoid PHP 7.1 warning from passing $this by reference
3606                 $parser = $this;
3607
3608                 $allArgs = [ &$parser ];
3609                 if ( $flags & self::SFH_OBJECT_ARGS ) {
3610                         # Convert arguments to PPNodes and collect for appending to $allArgs
3611                         $funcArgs = [];
3612                         foreach ( $args as $k => $v ) {
3613                                 if ( $v instanceof PPNode || $k === 0 ) {
3614                                         $funcArgs[] = $v;
3615                                 } else {
3616                                         $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3617                                 }
3618                         }
3619
3620                         # Add a frame parameter, and pass the arguments as an array
3621                         $allArgs[] = $frame;
3622                         $allArgs[] = $funcArgs;
3623                 } else {
3624                         # Convert arguments to plain text and append to $allArgs
3625                         foreach ( $args as $k => $v ) {
3626                                 if ( $v instanceof PPNode ) {
3627                                         $allArgs[] = trim( $frame->expand( $v ) );
3628                                 } elseif ( is_int( $k ) && $k >= 0 ) {
3629                                         $allArgs[] = trim( $v );
3630                                 } else {
3631                                         $allArgs[] = trim( "$k=$v" );
3632                                 }
3633                         }
3634                 }
3635
3636                 $result = $callback( ...$allArgs );
3637
3638                 # The interface for function hooks allows them to return a wikitext
3639                 # string or an array containing the string and any flags. This mungs
3640                 # things around to match what this method should return.
3641                 if ( !is_array( $result ) ) {
3642                         $result = [
3643                                 'found' => true,
3644                                 'text' => $result,
3645                         ];
3646                 } else {
3647                         if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3648                                 $result['text'] = $result[0];
3649                         }
3650                         unset( $result[0] );
3651                         $result += [
3652                                 'found' => true,
3653                         ];
3654                 }
3655
3656                 $noparse = true;
3657                 $preprocessFlags = 0;
3658                 if ( isset( $result['noparse'] ) ) {
3659                         $noparse = $result['noparse'];
3660                 }
3661                 if ( isset( $result['preprocessFlags'] ) ) {
3662                         $preprocessFlags = $result['preprocessFlags'];
3663                 }
3664
3665                 if ( !$noparse ) {
3666                         $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3667                         $result['isChildObj'] = true;
3668                 }
3669
3670                 return $result;
3671         }
3672
3673         /**
3674          * Get the semi-parsed DOM representation of a template with a given title,
3675          * and its redirect destination title. Cached.
3676          *
3677          * @param Title $title
3678          *
3679          * @return array
3680          */
3681         public function getTemplateDom( $title ) {
3682                 $cacheTitle = $title;
3683                 $titleText = $title->getPrefixedDBkey();
3684
3685                 if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3686                         list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3687                         $title = Title::makeTitle( $ns, $dbk );
3688                         $titleText = $title->getPrefixedDBkey();
3689                 }
3690                 if ( isset( $this->mTplDomCache[$titleText] ) ) {
3691                         return [ $this->mTplDomCache[$titleText], $title ];
3692                 }
3693
3694                 # Cache miss, go to the database
3695                 list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3696
3697                 if ( $text === false ) {
3698                         $this->mTplDomCache[$titleText] = false;
3699                         return [ false, $title ];
3700                 }
3701
3702                 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3703                 $this->mTplDomCache[$titleText] = $dom;
3704
3705                 if ( !$title->equals( $cacheTitle ) ) {
3706                         $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3707                                 [ $title->getNamespace(), $title->getDBkey() ];
3708                 }
3709
3710                 return [ $dom, $title ];
3711         }
3712
3713         /**
3714          * Fetch the current revision of a given title. Note that the revision
3715          * (and even the title) may not exist in the database, so everything
3716          * contributing to the output of the parser should use this method
3717          * where possible, rather than getting the revisions themselves. This
3718          * method also caches its results, so using it benefits performance.
3719          *
3720          * @since 1.24
3721          * @param Title $title
3722          * @return Revision
3723          */
3724         public function fetchCurrentRevisionOfTitle( $title ) {
3725                 $cacheKey = $title->getPrefixedDBkey();
3726                 if ( !$this->currentRevisionCache ) {
3727                         $this->currentRevisionCache = new MapCacheLRU( 100 );
3728                 }
3729                 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3730                         $this->currentRevisionCache->set( $cacheKey,
3731                                 // Defaults to Parser::statelessFetchRevision()
3732                                 call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3733                         );
3734                 }
3735                 return $this->currentRevisionCache->get( $cacheKey );
3736         }
3737
3738         /**
3739          * @param Title $title
3740          * @return bool
3741          * @since 1.34
3742          */
3743         public function isCurrentRevisionOfTitleCached( $title ) {
3744                 return (
3745                         $this->currentRevisionCache &&
3746                         $this->currentRevisionCache->has( $title->getPrefixedText() )
3747                 );
3748         }
3749
3750         /**
3751          * Wrapper around Revision::newFromTitle to allow passing additional parameters
3752          * without passing them on to it.
3753          *
3754          * @since 1.24
3755          * @param Title $title
3756          * @param Parser|bool $parser
3757          * @return Revision|bool False if missing
3758          */
3759         public static function statelessFetchRevision( Title $title, $parser = false ) {
3760                 $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $title );
3761
3762                 return $rev;
3763         }
3764
3765         /**
3766          * Fetch the unparsed text of a template and register a reference to it.
3767          * @param Title $title
3768          * @return array ( string or false, Title )
3769          */
3770         public function fetchTemplateAndTitle( $title ) {
3771                 // Defaults to Parser::statelessFetchTemplate()
3772                 $templateCb = $this->mOptions->getTemplateCallback();
3773                 $stuff = call_user_func( $templateCb, $title, $this );
3774                 $rev = $stuff['revision'] ?? null;
3775                 $text = $stuff['text'];
3776                 if ( is_string( $stuff['text'] ) ) {
3777                         // We use U+007F DELETE to distinguish strip markers from regular text
3778                         $text = strtr( $text, "\x7f", "?" );
3779                 }
3780                 $finalTitle = $stuff['finalTitle'] ?? $title;
3781                 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3782                         $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3783                         if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3784                                 // Self-transclusion; final result may change based on the new page version
3785                                 $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3786                                 $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3787                         }
3788                 }
3789
3790                 return [ $text, $finalTitle ];
3791         }
3792
3793         /**
3794          * Fetch the unparsed text of a template and register a reference to it.
3795          * @param Title $title
3796          * @return string|bool
3797          */
3798         public function fetchTemplate( $title ) {
3799                 return $this->fetchTemplateAndTitle( $title )[0];
3800         }
3801
3802         /**
3803          * Static function to get a template
3804          * Can be overridden via ParserOptions::setTemplateCallback().
3805          *
3806          * @param Title $title
3807          * @param bool|Parser $parser
3808          *
3809          * @return array
3810          */
3811         public static function statelessFetchTemplate( $title, $parser = false ) {
3812                 $text = $skip = false;
3813                 $finalTitle = $title;
3814                 $deps = [];
3815                 $rev = null;
3816
3817                 # Loop to fetch the article, with up to 1 redirect
3818                 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3819                         # Give extensions a chance to select the revision instead
3820                         $id = false; # Assume current
3821                         Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3822                                 [ $parser, $title, &$skip, &$id ] );
3823
3824                         if ( $skip ) {
3825                                 $text = false;
3826                                 $deps[] = [
3827                                         'title' => $title,
3828                                         'page_id' => $title->getArticleID(),
3829                                         'rev_id' => null
3830                                 ];
3831                                 break;
3832                         }
3833                         # Get the revision
3834                         if ( $id ) {
3835                                 $rev = Revision::newFromId( $id );
3836                         } elseif ( $parser ) {
3837                                 $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3838                         } else {
3839                                 $rev = Revision::newFromTitle( $title );
3840                         }
3841                         $rev_id = $rev ? $rev->getId() : 0;
3842                         # If there is no current revision, there is no page
3843                         if ( $id === false && !$rev ) {
3844                                 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3845                                 $linkCache->addBadLinkObj( $title );
3846                         }
3847
3848                         $deps[] = [
3849                                 'title' => $title,
3850                                 'page_id' => $title->getArticleID(),
3851                                 'rev_id' => $rev_id
3852                         ];
3853                         if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3854                                 # We fetched a rev from a different title; register it too...
3855                                 $deps[] = [
3856                                         'title' => $rev->getTitle(),
3857                                         'page_id' => $rev->getPage(),
3858                                         'rev_id' => $rev_id
3859                                 ];
3860                         }
3861
3862                         if ( $rev ) {
3863                                 $content = $rev->getContent();
3864                                 $text = $content ? $content->getWikitextForTransclusion() : null;
3865
3866                                 Hooks::run( 'ParserFetchTemplate',
3867                                         [ $parser, $title, $rev, &$text, &$deps ] );
3868
3869                                 if ( $text === false || $text === null ) {
3870                                         $text = false;
3871                                         break;
3872                                 }
3873                         } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3874                                 $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3875                                         lcfirst( $title->getText() ) )->inContentLanguage();
3876                                 if ( !$message->exists() ) {
3877                                         $text = false;
3878                                         break;
3879                                 }
3880                                 $content = $message->content();
3881                                 $text = $message->plain();
3882                         } else {
3883                                 break;
3884                         }
3885                         if ( !$content ) {
3886                                 break;
3887                         }
3888                         # Redirect?
3889                         $finalTitle = $title;
3890                         $title = $content->getRedirectTarget();
3891                 }
3892                 return [
3893                         'revision' => $rev,
3894                         'text' => $text,
3895                         'finalTitle' => $finalTitle,
3896                         'deps' => $deps
3897                 ];
3898         }
3899
3900         /**
3901          * Fetch a file and its title and register a reference to it.
3902          * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3903          * @param Title $title
3904          * @param array $options Array of options to RepoGroup::findFile
3905          * @return array ( File or false, Title of file )
3906          */
3907         public function fetchFileAndTitle( $title, $options = [] ) {
3908                 $file = $this->fetchFileNoRegister( $title, $options );
3909
3910                 $time = $file ? $file->getTimestamp() : false;
3911                 $sha1 = $file ? $file->getSha1() : false;
3912                 # Register the file as a dependency...
3913                 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3914                 if ( $file && !$title->equals( $file->getTitle() ) ) {
3915                         # Update fetched file title
3916                         $title = $file->getTitle();
3917                         $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3918                 }
3919                 return [ $file, $title ];
3920         }
3921
3922         /**
3923          * Helper function for fetchFileAndTitle.
3924          *
3925          * Also useful if you need to fetch a file but not use it yet,
3926          * for example to get the file's handler.
3927          *
3928          * @param Title $title
3929          * @param array $options Array of options to RepoGroup::findFile
3930          * @return File|bool
3931          */
3932         protected function fetchFileNoRegister( $title, $options = [] ) {
3933                 if ( isset( $options['broken'] ) ) {
3934                         $file = false; // broken thumbnail forced by hook
3935                 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3936                         $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3937                 } else { // get by (name,timestamp)
3938                         $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3939                 }
3940                 return $file;
3941         }
3942
3943         /**
3944          * Transclude an interwiki link.
3945          *
3946          * @param Title $title
3947          * @param string $action Usually one of (raw, render)
3948          *
3949          * @return string
3950          */
3951         public function interwikiTransclude( $title, $action ) {
3952                 if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3953                         return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3954                 }
3955
3956                 $url = $title->getFullURL( [ 'action' => $action ] );
3957                 if ( strlen( $url ) > 1024 ) {
3958                         return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3959                 }
3960
3961                 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3962
3963                 $fname = __METHOD__;
3964                 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3965
3966                 $data = $cache->getWithSetCallback(
3967                         $cache->makeGlobalKey(
3968                                 'interwiki-transclude',
3969                                 ( $wikiId !== false ) ? $wikiId : 'external',
3970                                 sha1( $url )
3971                         ),
3972                         $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3973                         function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3974                                 $req = MWHttpRequest::factory( $url, [], $fname );
3975
3976                                 $status = $req->execute(); // Status object
3977                                 if ( !$status->isOK() ) {
3978                                         $ttl = $cache::TTL_UNCACHEABLE;
3979                                 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3980                                         $ttl = min( $cache::TTL_LAGGED, $ttl );
3981                                 }
3982
3983                                 return [
3984                                         'text' => $status->isOK() ? $req->getContent() : null,
3985                                         'code' => $req->getStatus()
3986                                 ];
3987                         },
3988                         [
3989                                 'checkKeys' => ( $wikiId !== false )
3990                                         ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3991                                         : [],
3992                                 'pcGroup' => 'interwiki-transclude:5',
3993                                 'pcTTL' => $cache::TTL_PROC_LONG
3994                         ]
3995                 );
3996
3997                 if ( is_string( $data['text'] ) ) {
3998                         $text = $data['text'];
3999                 } elseif ( $data['code'] != 200 ) {
4000                         // Though we failed to fetch the content, this status is useless.
4001                         $text = wfMessage( 'scarytranscludefailed-httpstatus' )
4002                                 ->params( $url, $data['code'] )->inContentLanguage()->text();
4003                 } else {
4004                         $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4005                 }
4006
4007                 return $text;
4008         }
4009
4010         /**
4011          * Triple brace replacement -- used for template arguments
4012          * @private
4013          *
4014          * @param array $piece
4015          * @param PPFrame $frame
4016          *
4017          * @return array
4018          */
4019         public function argSubstitution( $piece, $frame ) {
4020                 $error = false;
4021                 $parts = $piece['parts'];
4022                 $nameWithSpaces = $frame->expand( $piece['title'] );
4023                 $argName = trim( $nameWithSpaces );
4024                 $object = false;
4025                 $text = $frame->getArgument( $argName );
4026                 if ( $text === false && $parts->getLength() > 0
4027                         && ( $this->ot['html']
4028                                 || $this->ot['pre']
4029                                 || ( $this->ot['wiki'] && $frame->isTemplate() )
4030                         )
4031                 ) {
4032                         # No match in frame, use the supplied default
4033                         $object = $parts->item( 0 )->getChildren();
4034                 }
4035                 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4036                         $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4037                         $this->limitationWarn( 'post-expand-template-argument' );
4038                 }
4039
4040                 if ( $text === false && $object === false ) {
4041                         # No match anywhere
4042                         $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4043                 }
4044                 if ( $error !== false ) {
4045                         $text .= $error;
4046                 }
4047                 if ( $object !== false ) {
4048                         $ret = [ 'object' => $object ];
4049                 } else {
4050                         $ret = [ 'text' => $text ];
4051                 }
4052
4053                 return $ret;
4054         }
4055
4056         /**
4057          * Return the text to be used for a given extension tag.
4058          * This is the ghost of strip().
4059          *
4060          * @param array $params Associative array of parameters:
4061          *     name       PPNode for the tag name
4062          *     attr       PPNode for unparsed text where tag attributes are thought to be
4063          *     attributes Optional associative array of parsed attributes
4064          *     inner      Contents of extension element
4065          *     noClose    Original text did not have a close tag
4066          * @param PPFrame $frame
4067          *
4068          * @throws MWException
4069          * @return string
4070          */
4071         public function extensionSubstitution( $params, $frame ) {
4072                 static $errorStr = '<span class="error">';
4073                 static $errorLen = 20;
4074
4075                 $name = $frame->expand( $params['name'] );
4076                 if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4077                         // Probably expansion depth or node count exceeded. Just punt the
4078                         // error up.
4079                         return $name;
4080                 }
4081
4082                 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4083                 if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4084                         // See above
4085                         return $attrText;
4086                 }
4087
4088                 // We can't safely check if the expansion for $content resulted in an
4089                 // error, because the content could happen to be the error string
4090                 // (T149622).
4091                 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4092
4093                 $marker = self::MARKER_PREFIX . "-$name-"
4094                         . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4095
4096                 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4097                         ( $this->ot['html'] || $this->ot['pre'] );
4098                 if ( $isFunctionTag ) {
4099                         $markerType = 'none';
4100                 } else {
4101                         $markerType = 'general';
4102                 }
4103                 if ( $this->ot['html'] || $isFunctionTag ) {
4104                         $name = strtolower( $name );
4105                         $attributes = Sanitizer::decodeTagAttributes( $attrText );
4106                         if ( isset( $params['attributes'] ) ) {
4107                                 $attributes += $params['attributes'];
4108                         }
4109
4110                         if ( isset( $this->mTagHooks[$name] ) ) {
4111                                 $output = call_user_func_array( $this->mTagHooks[$name],
4112                                         [ $content, $attributes, $this, $frame ] );
4113                         } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4114                                 list( $callback, ) = $this->mFunctionTagHooks[$name];
4115
4116                                 // Avoid PHP 7.1 warning from passing $this by reference
4117                                 $parser = $this;
4118                                 $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4119                         } else {
4120                                 $output = '<span class="error">Invalid tag extension name: ' .
4121                                         htmlspecialchars( $name ) . '</span>';
4122                         }
4123
4124                         if ( is_array( $output ) ) {
4125                                 // Extract flags
4126                                 $flags = $output;
4127                                 $output = $flags[0];
4128                                 if ( isset( $flags['markerType'] ) ) {
4129                                         $markerType = $flags['markerType'];
4130                                 }
4131                         }
4132                 } else {
4133                         if ( is_null( $attrText ) ) {
4134                                 $attrText = '';
4135                         }
4136                         if ( isset( $params['attributes'] ) ) {
4137                                 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4138                                         $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4139                                                 htmlspecialchars( $attrValue ) . '"';
4140                                 }
4141                         }
4142                         if ( $content === null ) {
4143                                 $output = "<$name$attrText/>";
4144                         } else {
4145                                 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4146                                 if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4147                                         // See above
4148                                         return $close;
4149                                 }
4150                                 $output = "<$name$attrText>$content$close";
4151                         }
4152                 }
4153
4154                 if ( $markerType === 'none' ) {
4155                         return $output;
4156                 } elseif ( $markerType === 'nowiki' ) {
4157                         $this->mStripState->addNoWiki( $marker, $output );
4158                 } elseif ( $markerType === 'general' ) {
4159                         $this->mStripState->addGeneral( $marker, $output );
4160                 } else {
4161                         throw new MWException( __METHOD__ . ': invalid marker type' );
4162                 }
4163                 return $marker;
4164         }
4165
4166         /**
4167          * Increment an include size counter
4168          *
4169          * @param string $type The type of expansion
4170          * @param int $size The size of the text
4171          * @return bool False if this inclusion would take it over the maximum, true otherwise
4172          */
4173         public function incrementIncludeSize( $type, $size ) {
4174                 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4175                         return false;
4176                 } else {
4177                         $this->mIncludeSizes[$type] += $size;
4178                         return true;
4179                 }
4180         }
4181
4182         /**
4183          * Increment the expensive function count
4184          *
4185          * @return bool False if the limit has been exceeded
4186          */
4187         public function incrementExpensiveFunctionCount() {
4188                 $this->mExpensiveFunctionCount++;
4189                 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4190         }
4191
4192         /**
4193          * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
4194          * Fills $this->mDoubleUnderscores, returns the modified text
4195          *
4196          * @param string $text
4197          *
4198          * @return string
4199          */
4200         public function doDoubleUnderscore( $text ) {
4201                 # The position of __TOC__ needs to be recorded
4202                 $mw = $this->magicWordFactory->get( 'toc' );
4203                 if ( $mw->match( $text ) ) {
4204                         $this->mShowToc = true;
4205                         $this->mForceTocPosition = true;
4206
4207                         # Set a placeholder. At the end we'll fill it in with the TOC.
4208                         $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4209
4210                         # Only keep the first one.
4211                         $text = $mw->replace( '', $text );
4212                 }
4213
4214                 # Now match and remove the rest of them
4215                 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4216                 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4217
4218                 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4219                         $this->mOutput->mNoGallery = true;
4220                 }
4221                 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4222                         $this->mShowToc = false;
4223                 }
4224                 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4225                         && $this->mTitle->getNamespace() == NS_CATEGORY
4226                 ) {
4227                         $this->addTrackingCategory( 'hidden-category-category' );
4228                 }
4229                 # (T10068) Allow control over whether robots index a page.
4230                 # __INDEX__ always overrides __NOINDEX__, see T16899
4231                 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4232                         $this->mOutput->setIndexPolicy( 'noindex' );
4233                         $this->addTrackingCategory( 'noindex-category' );
4234                 }
4235                 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4236                         $this->mOutput->setIndexPolicy( 'index' );
4237                         $this->addTrackingCategory( 'index-category' );
4238                 }
4239
4240                 # Cache all double underscores in the database
4241                 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4242                         $this->mOutput->setProperty( $key, '' );
4243                 }
4244
4245                 return $text;
4246         }
4247
4248         /**
4249          * @see ParserOutput::addTrackingCategory()
4250          * @param string $msg Message key
4251          * @return bool Whether the addition was successful
4252          */
4253         public function addTrackingCategory( $msg ) {
4254                 return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4255         }
4256
4257         /**
4258          * This function accomplishes several tasks:
4259          * 1) Auto-number headings if that option is enabled
4260          * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4261          * 3) Add a Table of contents on the top for users who have enabled the option
4262          * 4) Auto-anchor headings
4263          *
4264          * It loops through all headlines, collects the necessary data, then splits up the
4265          * string and re-inserts the newly formatted headlines.
4266          *
4267          * @param string $text
4268          * @param string $origText Original, untouched wikitext
4269          * @param bool $isMain
4270          * @return mixed|string
4271          * @private
4272          */
4273         public function formatHeadings( $text, $origText, $isMain = true ) {
4274                 # Inhibit editsection links if requested in the page
4275                 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4276                         $maybeShowEditLink = false;
4277                 } else {
4278                         $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4279                 }
4280
4281                 # Get all headlines for numbering them and adding funky stuff like [edit]
4282                 # links - this is for later, but we need the number of headlines right now
4283                 # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4284                 # be trimmed here since whitespace in HTML headings is significant.
4285                 $matches = [];
4286                 $numMatches = preg_match_all(
4287                         '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4288                         $text,
4289                         $matches
4290                 );
4291
4292                 # if there are fewer than 4 headlines in the article, do not show TOC
4293                 # unless it's been explicitly enabled.
4294                 $enoughToc = $this->mShowToc &&
4295                         ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4296
4297                 # Allow user to stipulate that a page should have a "new section"
4298                 # link added via __NEWSECTIONLINK__
4299                 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4300                         $this->mOutput->setNewSection( true );
4301                 }
4302
4303                 # Allow user to remove the "new section"
4304                 # link via __NONEWSECTIONLINK__
4305                 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4306                         $this->mOutput->hideNewSection( true );
4307                 }
4308
4309                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4310                 # override above conditions and always show TOC above first header
4311                 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4312                         $this->mShowToc = true;
4313                         $enoughToc = true;
4314                 }
4315
4316                 # headline counter
4317                 $headlineCount = 0;
4318                 $numVisible = 0;
4319
4320                 # Ugh .. the TOC should have neat indentation levels which can be
4321                 # passed to the skin functions. These are determined here
4322                 $toc = '';
4323                 $full = '';
4324                 $head = [];
4325                 $sublevelCount = [];
4326                 $levelCount = [];
4327                 $level = 0;
4328                 $prevlevel = 0;
4329                 $toclevel = 0;
4330                 $prevtoclevel = 0;
4331                 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4332                 $baseTitleText = $this->mTitle->getPrefixedDBkey();
4333                 $oldType = $this->mOutputType;
4334                 $this->setOutputType( self::OT_WIKI );
4335                 $frame = $this->getPreprocessor()->newFrame();
4336                 $root = $this->preprocessToDom( $origText );
4337                 $node = $root->getFirstChild();
4338                 $byteOffset = 0;
4339                 $tocraw = [];
4340                 $refers = [];
4341
4342                 $headlines = $numMatches !== false ? $matches[3] : [];
4343
4344                 $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4345                 foreach ( $headlines as $headline ) {
4346                         $isTemplate = false;
4347                         $titleText = false;
4348                         $sectionIndex = false;
4349                         $numbering = '';
4350                         $markerMatches = [];
4351                         if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4352                                 $serial = $markerMatches[1];
4353                                 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4354                                 $isTemplate = ( $titleText != $baseTitleText );
4355                                 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4356                         }
4357
4358                         if ( $toclevel ) {
4359                                 $prevlevel = $level;
4360                         }
4361                         $level = $matches[1][$headlineCount];
4362
4363                         if ( $level > $prevlevel ) {
4364                                 # Increase TOC level
4365                                 $toclevel++;
4366                                 $sublevelCount[$toclevel] = 0;
4367                                 if ( $toclevel < $maxTocLevel ) {
4368                                         $prevtoclevel = $toclevel;
4369                                         $toc .= Linker::tocIndent();
4370                                         $numVisible++;
4371                                 }
4372                         } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4373                                 # Decrease TOC level, find level to jump to
4374
4375                                 for ( $i = $toclevel; $i > 0; $i-- ) {
4376                                         if ( $levelCount[$i] == $level ) {
4377                                                 # Found last matching level
4378                                                 $toclevel = $i;
4379                                                 break;
4380                                         } elseif ( $levelCount[$i] < $level ) {
4381                                                 # Found first matching level below current level
4382                                                 $toclevel = $i + 1;
4383                                                 break;
4384                                         }
4385                                 }
4386                                 if ( $i == 0 ) {
4387                                         $toclevel = 1;
4388                                 }
4389                                 if ( $toclevel < $maxTocLevel ) {
4390                                         if ( $prevtoclevel < $maxTocLevel ) {
4391                                                 # Unindent only if the previous toc level was shown :p
4392                                                 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4393                                                 $prevtoclevel = $toclevel;
4394                                         } else {
4395                                                 $toc .= Linker::tocLineEnd();
4396                                         }
4397                                 }
4398                         } else {
4399                                 # No change in level, end TOC line
4400                                 if ( $toclevel < $maxTocLevel ) {
4401                                         $toc .= Linker::tocLineEnd();
4402                                 }
4403                         }
4404
4405                         $levelCount[$toclevel] = $level;
4406
4407                         # count number of headlines for each level
4408                         $sublevelCount[$toclevel]++;
4409                         $dot = 0;
4410                         for ( $i = 1; $i <= $toclevel; $i++ ) {
4411                                 if ( !empty( $sublevelCount[$i] ) ) {
4412                                         if ( $dot ) {
4413                                                 $numbering .= '.';
4414                                         }
4415                                         $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4416                                         $dot = 1;
4417                                 }
4418                         }
4419
4420                         # The safe header is a version of the header text safe to use for links
4421
4422                         # Remove link placeholders by the link text.
4423                         #     <!--LINK number-->
4424                         # turns into
4425                         #     link text with suffix
4426                         # Do this before unstrip since link text can contain strip markers
4427                         $safeHeadline = $this->replaceLinkHoldersText( $headline );
4428
4429                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4430                         $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4431
4432                         # Remove any <style> or <script> tags (T198618)
4433                         $safeHeadline = preg_replace(
4434                                 '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4435                                 '',
4436                                 $safeHeadline
4437                         );
4438
4439                         # Strip out HTML (first regex removes any tag not allowed)
4440                         # Allowed tags are:
4441                         # * <sup> and <sub> (T10393)
4442                         # * <i> (T28375)
4443                         # * <b> (r105284)
4444                         # * <bdi> (T74884)
4445                         # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4446                         # * <s> and <strike> (T35715)
4447                         # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4448                         # to allow setting directionality in toc items.
4449                         $tocline = preg_replace(
4450                                 [
4451                                         '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4452                                         '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4453                                 ],
4454                                 [ '', '<$1>' ],
4455                                 $safeHeadline
4456                         );
4457
4458                         # Strip '<span></span>', which is the result from the above if
4459                         # <span id="foo"></span> is used to produce an additional anchor
4460                         # for a section.
4461                         $tocline = str_replace( '<span></span>', '', $tocline );
4462
4463                         $tocline = trim( $tocline );
4464
4465                         # For the anchor, strip out HTML-y stuff period
4466                         $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4467                         $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4468
4469                         # Save headline for section edit hint before it's escaped
4470                         $headlineHint = $safeHeadline;
4471
4472                         # Decode HTML entities
4473                         $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4474
4475                         $safeHeadline = self::normalizeSectionName( $safeHeadline );
4476
4477                         $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4478                         $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4479                         $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4480                         if ( $fallbackHeadline === $safeHeadline ) {
4481                                 # No reason to have both (in fact, we can't)
4482                                 $fallbackHeadline = false;
4483                         }
4484
4485                         # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4486                         # @todo FIXME: We may be changing them depending on the current locale.
4487                         $arrayKey = strtolower( $safeHeadline );
4488                         if ( $fallbackHeadline === false ) {
4489                                 $fallbackArrayKey = false;
4490                         } else {
4491                                 $fallbackArrayKey = strtolower( $fallbackHeadline );
4492                         }
4493
4494                         # Create the anchor for linking from the TOC to the section
4495                         $anchor = $safeHeadline;
4496                         $fallbackAnchor = $fallbackHeadline;
4497                         if ( isset( $refers[$arrayKey] ) ) {
4498                                 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4499                                 for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4500                                 $anchor .= "_$i";
4501                                 $linkAnchor .= "_$i";
4502                                 $refers["${arrayKey}_$i"] = true;
4503                         } else {
4504                                 $refers[$arrayKey] = true;
4505                         }
4506                         if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4507                                 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4508                                 for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4509                                 $fallbackAnchor .= "_$i";
4510                                 $refers["${fallbackArrayKey}_$i"] = true;
4511                         } else {
4512                                 $refers[$fallbackArrayKey] = true;
4513                         }
4514
4515                         # Don't number the heading if it is the only one (looks silly)
4516                         if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4517                                 # the two are different if the line contains a link
4518                                 $headline = Html::element(
4519                                         'span',
4520                                         [ 'class' => 'mw-headline-number' ],
4521                                         $numbering
4522                                 ) . ' ' . $headline;
4523                         }
4524
4525                         if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4526                                 $toc .= Linker::tocLine( $linkAnchor, $tocline,
4527                                         $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4528                         }
4529
4530                         # Add the section to the section tree
4531                         # Find the DOM node for this header
4532                         $noOffset = ( $isTemplate || $sectionIndex === false );
4533                         while ( $node && !$noOffset ) {
4534                                 if ( $node->getName() === 'h' ) {
4535                                         $bits = $node->splitHeading();
4536                                         if ( $bits['i'] == $sectionIndex ) {
4537                                                 break;
4538                                         }
4539                                 }
4540                                 $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4541                                         $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4542                                 $node = $node->getNextSibling();
4543                         }
4544                         $tocraw[] = [
4545                                 'toclevel' => $toclevel,
4546                                 'level' => $level,
4547                                 'line' => $tocline,
4548                                 'number' => $numbering,
4549                                 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4550                                 'fromtitle' => $titleText,
4551                                 'byteoffset' => ( $noOffset ? null : $byteOffset ),
4552                                 'anchor' => $anchor,
4553                         ];
4554
4555                         # give headline the correct <h#> tag
4556                         if ( $maybeShowEditLink && $sectionIndex !== false ) {
4557                                 // Output edit section links as markers with styles that can be customized by skins
4558                                 if ( $isTemplate ) {
4559                                         # Put a T flag in the section identifier, to indicate to extractSections()
4560                                         # that sections inside <includeonly> should be counted.
4561                                         $editsectionPage = $titleText;
4562                                         $editsectionSection = "T-$sectionIndex";
4563                                         $editsectionContent = null;
4564                                 } else {
4565                                         $editsectionPage = $this->mTitle->getPrefixedText();
4566                                         $editsectionSection = $sectionIndex;
4567                                         $editsectionContent = $headlineHint;
4568                                 }
4569                                 // We use a bit of pesudo-xml for editsection markers. The
4570                                 // language converter is run later on. Using a UNIQ style marker
4571                                 // leads to the converter screwing up the tokens when it
4572                                 // converts stuff. And trying to insert strip tags fails too. At
4573                                 // this point all real inputted tags have already been escaped,
4574                                 // so we don't have to worry about a user trying to input one of
4575                                 // these markers directly. We use a page and section attribute
4576                                 // to stop the language converter from converting these
4577                                 // important bits of data, but put the headline hint inside a
4578                                 // content block because the language converter is supposed to
4579                                 // be able to convert that piece of data.
4580                                 // Gets replaced with html in ParserOutput::getText
4581                                 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4582                                 $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4583                                 if ( $editsectionContent !== null ) {
4584                                         $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4585                                 } else {
4586                                         $editlink .= '/>';
4587                                 }
4588                         } else {
4589                                 $editlink = '';
4590                         }
4591                         $head[$headlineCount] = Linker::makeHeadline( $level,
4592                                 $matches['attrib'][$headlineCount], $anchor, $headline,
4593                                 $editlink, $fallbackAnchor );
4594
4595                         $headlineCount++;
4596                 }
4597
4598                 $this->setOutputType( $oldType );
4599
4600                 # Never ever show TOC if no headers
4601                 if ( $numVisible < 1 ) {
4602                         $enoughToc = false;
4603                 }
4604
4605                 if ( $enoughToc ) {
4606                         if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4607                                 $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4608                         }
4609                         $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4610                         $this->mOutput->setTOCHTML( $toc );
4611                         $toc = self::TOC_START . $toc . self::TOC_END;
4612                 }
4613
4614                 if ( $isMain ) {
4615                         $this->mOutput->setSections( $tocraw );
4616                 }
4617
4618                 # split up and insert constructed headlines
4619                 $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4620                 $i = 0;
4621
4622                 // build an array of document sections
4623                 $sections = [];
4624                 foreach ( $blocks as $block ) {
4625                         // $head is zero-based, sections aren't.
4626                         if ( empty( $head[$i - 1] ) ) {
4627                                 $sections[$i] = $block;
4628                         } else {
4629                                 $sections[$i] = $head[$i - 1] . $block;
4630                         }
4631
4632                         /**
4633                          * Send a hook, one per section.
4634                          * The idea here is to be able to make section-level DIVs, but to do so in a
4635                          * lower-impact, more correct way than r50769
4636                          *
4637                          * $this : caller
4638                          * $section : the section number
4639                          * &$sectionContent : ref to the content of the section
4640                          * $maybeShowEditLinks : boolean describing whether this section has an edit link
4641                          */
4642                         Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4643
4644                         $i++;
4645                 }
4646
4647                 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4648                         // append the TOC at the beginning
4649                         // Top anchor now in skin
4650                         $sections[0] .= $toc . "\n";
4651                 }
4652
4653                 $full .= implode( '', $sections );
4654
4655                 if ( $this->mForceTocPosition ) {
4656                         return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4657                 } else {
4658                         return $full;
4659                 }
4660         }
4661
4662         /**
4663          * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4664          * conversion, substituting signatures, {{subst:}} templates, etc.
4665          *
4666          * @param string $text The text to transform
4667          * @param Title $title The Title object for the current article
4668          * @param User $user The User object describing the current user
4669          * @param ParserOptions $options Parsing options
4670          * @param bool $clearState Whether to clear the parser state first
4671          * @return string The altered wiki markup
4672          */
4673         public function preSaveTransform( $text, Title $title, User $user,
4674                 ParserOptions $options, $clearState = true
4675         ) {
4676                 if ( $clearState ) {
4677                         $magicScopeVariable = $this->lock();
4678                 }
4679                 $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4680                 $this->setUser( $user );
4681
4682                 // Strip U+0000 NULL (T159174)
4683                 $text = str_replace( "\000", '', $text );
4684
4685                 // We still normalize line endings for backwards-compatibility
4686                 // with other code that just calls PST, but this should already
4687                 // be handled in TextContent subclasses
4688                 $text = TextContent::normalizeLineEndings( $text );
4689
4690                 if ( $options->getPreSaveTransform() ) {
4691                         $text = $this->pstPass2( $text, $user );
4692                 }
4693                 $text = $this->mStripState->unstripBoth( $text );
4694
4695                 $this->setUser( null ); # Reset
4696
4697                 return $text;
4698         }
4699
4700         /**
4701          * Pre-save transform helper function
4702          *
4703          * @param string $text
4704          * @param User $user
4705          *
4706          * @return string
4707          */
4708         private function pstPass2( $text, $user ) {
4709                 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4710                 # $this->contLang here in order to give everyone the same signature and use the default one
4711                 # rather than the one selected in each user's preferences.  (see also T14815)
4712                 $ts = $this->mOptions->getTimestamp();
4713                 $timestamp = MWTimestamp::getLocalInstance( $ts );
4714                 $ts = $timestamp->format( 'YmdHis' );
4715                 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4716
4717                 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4718
4719                 # Variable replacement
4720                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4721                 $text = $this->replaceVariables( $text );
4722
4723                 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4724                 # which may corrupt this parser instance via its wfMessage()->text() call-
4725
4726                 # Signatures
4727                 if ( strpos( $text, '~~~' ) !== false ) {
4728                         $sigText = $this->getUserSig( $user );
4729                         $text = strtr( $text, [
4730                                 '~~~~~' => $d,
4731                                 '~~~~' => "$sigText $d",
4732                                 '~~~' => $sigText
4733                         ] );
4734                         # The main two signature forms used above are time-sensitive
4735                         $this->setOutputFlag( 'user-signature', 'User signature detected' );
4736                 }
4737
4738                 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4739                 $tc = '[' . Title::legalChars() . ']';
4740                 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4741
4742                 // [[ns:page (context)|]]
4743                 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4744                 // [[ns:page（context）|]] (double-width brackets, added in r40257)
4745                 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?（$tc+）)\\|]]/";
4746                 // [[ns:page (context), context|]] (using either single or double-width comma)
4747                 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |，)$tc+|)\\|]]/";
4748                 // [[|page]] (reverse pipe trick: add context from page title)
4749                 $p2 = "/\[\[\\|($tc+)]]/";
4750
4751                 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4752                 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4753                 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4754                 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4755
4756                 $t = $this->mTitle->getText();
4757                 $m = [];
4758                 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4759                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4760                 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4761                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4762                 } else {
4763                         # if there's no context, don't bother duplicating the title
4764                         $text = preg_replace( $p2, '[[\\1]]', $text );
4765                 }
4766
4767                 return $text;
4768         }
4769
4770         /**
4771          * Fetch the user's signature text, if any, and normalize to
4772          * validated, ready-to-insert wikitext.
4773          * If you have pre-fetched the nickname or the fancySig option, you can
4774          * specify them here to save a database query.
4775          * Do not reuse this parser instance after calling getUserSig(),
4776          * as it may have changed.
4777          *
4778          * @param User &$user
4779          * @param string|bool $nickname Nickname to use or false to use user's default nickname
4780          * @param bool|null $fancySig whether the nicknname is the complete signature
4781          *    or null to use default value
4782          * @return string
4783          */
4784         public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4785                 $username = $user->getName();
4786
4787                 # If not given, retrieve from the user object.
4788                 if ( $nickname === false ) {
4789                         $nickname = $user->getOption( 'nickname' );
4790                 }
4791
4792                 if ( is_null( $fancySig ) ) {
4793                         $fancySig = $user->getBoolOption( 'fancysig' );
4794                 }
4795
4796                 $nickname = $nickname == null ? $username : $nickname;
4797
4798                 if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4799                         $nickname = $username;
4800                         $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4801                 } elseif ( $fancySig !== false ) {
4802                         # Sig. might contain markup; validate this
4803                         if ( $this->validateSig( $nickname ) !== false ) {
4804                                 # Validated; clean up (if needed) and return it
4805                                 return $this->cleanSig( $nickname, true );
4806                         } else {
4807                                 # Failed to validate; fall back to the default
4808                                 $nickname = $username;
4809                                 $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4810                         }
4811                 }
4812
4813                 # Make sure nickname doesnt get a sig in a sig
4814                 $nickname = self::cleanSigInSig( $nickname );
4815
4816                 # If we're still here, make it a link to the user page
4817                 $userText = wfEscapeWikiText( $username );
4818                 $nickText = wfEscapeWikiText( $nickname );
4819                 $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4820
4821                 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4822                         ->title( $this->getTitle() )->text();
4823         }
4824
4825         /**
4826          * Check that the user's signature contains no bad XML
4827          *
4828          * @param string $text
4829          * @return string|bool An expanded string, or false if invalid.
4830          */
4831         public function validateSig( $text ) {
4832                 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4833         }
4834
4835         /**
4836          * Clean up signature text
4837          *
4838          * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4839          * 2) Substitute all transclusions
4840          *
4841          * @param string $text
4842          * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4843          * @return string Signature text
4844          */
4845         public function cleanSig( $text, $parsing = false ) {
4846                 if ( !$parsing ) {
4847                         global $wgTitle;
4848                         $magicScopeVariable = $this->lock();
4849                         $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4850                 }
4851
4852                 # Option to disable this feature
4853                 if ( !$this->mOptions->getCleanSignatures() ) {
4854                         return $text;
4855                 }
4856
4857                 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4858                 #  => Move this logic to braceSubstitution()
4859                 $substWord = $this->magicWordFactory->get( 'subst' );
4860                 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4861                 $substText = '{{' . $substWord->getSynonym( 0 );
4862
4863                 $text = preg_replace( $substRegex, $substText, $text );
4864                 $text = self::cleanSigInSig( $text );
4865                 $dom = $this->preprocessToDom( $text );
4866                 $frame = $this->getPreprocessor()->newFrame();
4867                 $text = $frame->expand( $dom );
4868
4869                 if ( !$parsing ) {
4870                         $text = $this->mStripState->unstripBoth( $text );
4871                 }
4872
4873                 return $text;
4874         }
4875
4876         /**
4877          * Strip 3, 4 or 5 tildes out of signatures.
4878          *
4879          * @param string $text
4880          * @return string Signature text with /~{3,5}/ removed
4881          */
4882         public static function cleanSigInSig( $text ) {
4883                 $text = preg_replace( '/~{3,5}/', '', $text );
4884                 return $text;
4885         }
4886
4887         /**
4888          * Set up some variables which are usually set up in parse()
4889          * so that an external function can call some class members with confidence
4890          *
4891          * @param Title|null $title
4892          * @param ParserOptions $options
4893          * @param int $outputType
4894          * @param bool $clearState
4895          * @param int|null $revId
4896          */
4897         public function startExternalParse( Title $title = null, ParserOptions $options,
4898                 $outputType, $clearState = true, $revId = null
4899         ) {
4900                 $this->startParse( $title, $options, $outputType, $clearState );
4901                 if ( $revId !== null ) {
4902                         $this->mRevisionId = $revId;
4903                 }
4904         }
4905
4906         /**
4907          * @param Title|null $title
4908          * @param ParserOptions $options
4909          * @param int $outputType
4910          * @param bool $clearState
4911          */
4912         private function startParse( Title $title = null, ParserOptions $options,
4913                 $outputType, $clearState = true
4914         ) {
4915                 $this->setTitle( $title );
4916                 $this->mOptions = $options;
4917                 $this->setOutputType( $outputType );
4918                 if ( $clearState ) {
4919                         $this->clearState();
4920                 }
4921         }
4922
4923         /**
4924          * Wrapper for preprocess()
4925          *
4926          * @param string $text The text to preprocess
4927          * @param ParserOptions $options
4928          * @param Title|null $title Title object or null to use $wgTitle
4929          * @return string
4930          */
4931         public function transformMsg( $text, $options, $title = null ) {
4932                 static $executing = false;
4933
4934                 # Guard against infinite recursion
4935                 if ( $executing ) {
4936                         return $text;
4937                 }
4938                 $executing = true;
4939
4940                 if ( !$title ) {
4941                         global $wgTitle;
4942                         $title = $wgTitle;
4943                 }
4944
4945                 $text = $this->preprocess( $text, $title, $options );
4946
4947                 $executing = false;
4948                 return $text;
4949         }
4950
4951         /**
4952          * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4953          * The callback should have the following form:
4954          *    function myParserHook( $text, $params, $parser, $frame ) { ... }
4955          *
4956          * Transform and return $text. Use $parser for any required context, e.g. use
4957          * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4958          *
4959          * Hooks may return extended information by returning an array, of which the
4960          * first numbered element (index 0) must be the return string, and all other
4961          * entries are extracted into local variables within an internal function
4962          * in the Parser class.
4963          *
4964          * This interface (introduced r61913) appears to be undocumented, but
4965          * 'markerType' is used by some core tag hooks to override which strip
4966          * array their results are placed in. **Use great caution if attempting
4967          * this interface, as it is not documented and injudicious use could smash
4968          * private variables.**
4969          *
4970          * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4971          * @param callable $callback The callback function (and object) to use for the tag
4972          * @throws MWException
4973          * @return callable|null The old value of the mTagHooks array associated with the hook
4974          */
4975         public function setHook( $tag, callable $callback ) {
4976                 $tag = strtolower( $tag );
4977                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4978                         throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4979                 }
4980                 $oldVal = $this->mTagHooks[$tag] ?? null;
4981                 $this->mTagHooks[$tag] = $callback;
4982                 if ( !in_array( $tag, $this->mStripList ) ) {
4983                         $this->mStripList[] = $tag;
4984                 }
4985
4986                 return $oldVal;
4987         }
4988
4989         /**
4990          * As setHook(), but letting the contents be parsed.
4991          *
4992          * Transparent tag hooks are like regular XML-style tag hooks, except they
4993          * operate late in the transformation sequence, on HTML instead of wikitext.
4994          *
4995          * This is probably obsoleted by things dealing with parser frames?
4996          * The only extension currently using it is geoserver.
4997          *
4998          * @since 1.10
4999          * @todo better document or deprecate this
5000          *
5001          * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
5002          * @param callable $callback The callback function (and object) to use for the tag
5003          * @throws MWException
5004          * @return callable|null The old value of the mTagHooks array associated with the hook
5005          */
5006         public function setTransparentTagHook( $tag, callable $callback ) {
5007                 $tag = strtolower( $tag );
5008                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5009                         throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5010                 }
5011                 $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
5012                 $this->mTransparentTagHooks[$tag] = $callback;
5013
5014                 return $oldVal;
5015         }
5016
5017         /**
5018          * Remove all tag hooks
5019          */
5020         public function clearTagHooks() {
5021                 $this->mTagHooks = [];
5022                 $this->mFunctionTagHooks = [];
5023                 $this->mStripList = $this->mDefaultStripList;
5024         }
5025
5026         /**
5027          * Create a function, e.g. {{sum:1|2|3}}
5028          * The callback function should have the form:
5029          *    function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
5030          *
5031          * Or with Parser::SFH_OBJECT_ARGS:
5032          *    function myParserFunction( $parser, $frame, $args ) { ... }
5033          *
5034          * The callback may either return the text result of the function, or an array with the text
5035          * in element 0, and a number of flags in the other elements. The names of the flags are
5036          * specified in the keys. Valid flags are:
5037          *   found                     The text returned is valid, stop processing the template. This
5038          *                             is on by default.
5039          *   nowiki                    Wiki markup in the return value should be escaped
5040          *   isHTML                    The returned text is HTML, armour it against wikitext transformation
5041          *
5042          * @param string $id The magic word ID
5043          * @param callable $callback The callback function (and object) to use
5044          * @param int $flags A combination of the following flags:
5045          *     Parser::SFH_NO_HASH      No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
5046          *
5047          *     Parser::SFH_OBJECT_ARGS  Pass the template arguments as PPNode objects instead of text.
5048          *     This allows for conditional expansion of the parse tree, allowing you to eliminate dead
5049          *     branches and thus speed up parsing. It is also possible to analyse the parse tree of
5050          *     the arguments, and to control the way they are expanded.
5051          *
5052          *     The $frame parameter is a PPFrame. This can be used to produce expanded text from the
5053          *     arguments, for instance:
5054          *         $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
5055          *
5056          *     For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
5057          *     future versions. Please call $frame->expand() on it anyway so that your code keeps
5058          *     working if/when this is changed.
5059          *
5060          *     If you want whitespace to be trimmed from $args, you need to do it yourself, post-
5061          *     expansion.
5062          *
5063          *     Please read the documentation in includes/parser/Preprocessor.php for more information
5064          *     about the methods available in PPFrame and PPNode.
5065          *
5066          * @throws MWException
5067          * @return string|callable The old callback function for this name, if any
5068          */
5069         public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5070                 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5071                 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5072
5073                 # Add to function cache
5074                 $mw = $this->magicWordFactory->get( $id );
5075                 if ( !$mw ) {
5076                         throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5077                 }
5078
5079                 $synonyms = $mw->getSynonyms();
5080                 $sensitive = intval( $mw->isCaseSensitive() );
5081
5082                 foreach ( $synonyms as $syn ) {
5083                         # Case
5084                         if ( !$sensitive ) {
5085                                 $syn = $this->contLang->lc( $syn );
5086                         }
5087                         # Add leading hash
5088                         if ( !( $flags & self::SFH_NO_HASH ) ) {
5089                                 $syn = '#' . $syn;
5090                         }
5091                         # Remove trailing colon
5092                         if ( substr( $syn, -1, 1 ) === ':' ) {
5093                                 $syn = substr( $syn, 0, -1 );
5094                         }
5095                         $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5096                 }
5097                 return $oldVal;
5098         }
5099
5100         /**
5101          * Get all registered function hook identifiers
5102          *
5103          * @return array
5104          */
5105         public function getFunctionHooks() {
5106                 $this->firstCallInit();
5107                 return array_keys( $this->mFunctionHooks );
5108         }
5109
5110         /**
5111          * Create a tag function, e.g. "<test>some stuff</test>".
5112          * Unlike tag hooks, tag functions are parsed at preprocessor level.
5113          * Unlike parser functions, their content is not preprocessed.
5114          * @param string $tag
5115          * @param callable $callback
5116          * @param int $flags
5117          * @throws MWException
5118          * @return null
5119          */
5120         public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5121                 $tag = strtolower( $tag );
5122                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5123                         throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5124                 }
5125                 $old = $this->mFunctionTagHooks[$tag] ?? null;
5126                 $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5127
5128                 if ( !in_array( $tag, $this->mStripList ) ) {
5129                         $this->mStripList[] = $tag;
5130                 }
5131
5132                 return $old;
5133         }
5134
5135         /**
5136          * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5137          * Placeholders created in Linker::link()
5138          *
5139          * @param string &$text
5140          * @param int $options
5141          */
5142         public function replaceLinkHolders( &$text, $options = 0 ) {
5143                 $this->mLinkHolders->replace( $text );
5144         }
5145
5146         /**
5147          * Replace "<!--LINK-->" link placeholders with plain text of links
5148          * (not HTML-formatted).
5149          *
5150          * @param string $text
5151          * @return string
5152          */
5153         public function replaceLinkHoldersText( $text ) {
5154                 return $this->mLinkHolders->replaceText( $text );
5155         }
5156
5157         /**
5158          * Renders an image gallery from a text with one line per image.
5159          * text labels may be given by using |-style alternative text. E.g.
5160          *   Image:one.jpg|The number "1"
5161          *   Image:tree.jpg|A tree
5162          * given as text will return the HTML of a gallery with two images,
5163          * labeled 'The number "1"' and
5164          * 'A tree'.
5165          *
5166          * @param string $text
5167          * @param array $params
5168          * @return string HTML
5169          */
5170         public function renderImageGallery( $text, $params ) {
5171                 $mode = false;
5172                 if ( isset( $params['mode'] ) ) {
5173                         $mode = $params['mode'];
5174                 }
5175
5176                 try {
5177                         $ig = ImageGalleryBase::factory( $mode );
5178                 } catch ( Exception $e ) {
5179                         // If invalid type set, fallback to default.
5180                         $ig = ImageGalleryBase::factory( false );
5181                 }
5182
5183                 $ig->setContextTitle( $this->mTitle );
5184                 $ig->setShowBytes( false );
5185                 $ig->setShowDimensions( false );
5186                 $ig->setShowFilename( false );
5187                 $ig->setParser( $this );
5188                 $ig->setHideBadImages();
5189                 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5190
5191                 if ( isset( $params['showfilename'] ) ) {
5192                         $ig->setShowFilename( true );
5193                 } else {
5194                         $ig->setShowFilename( false );
5195                 }
5196                 if ( isset( $params['caption'] ) ) {
5197                         // NOTE: We aren't passing a frame here or below.  Frame info
5198                         // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5199                         // See T107332#4030581
5200                         $caption = $this->recursiveTagParse( $params['caption'] );
5201                         $ig->setCaptionHtml( $caption );
5202                 }
5203                 if ( isset( $params['perrow'] ) ) {
5204                         $ig->setPerRow( $params['perrow'] );
5205                 }
5206                 if ( isset( $params['widths'] ) ) {
5207                         $ig->setWidths( $params['widths'] );
5208                 }
5209                 if ( isset( $params['heights'] ) ) {
5210                         $ig->setHeights( $params['heights'] );
5211                 }
5212                 $ig->setAdditionalOptions( $params );
5213
5214                 // Avoid PHP 7.1 warning from passing $this by reference
5215                 $parser = $this;
5216                 Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5217
5218                 $lines = StringUtils::explode( "\n", $text );
5219                 foreach ( $lines as $line ) {
5220                         # match lines like these:
5221                         # Image:someimage.jpg|This is some image
5222                         $matches = [];
5223                         preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5224                         # Skip empty lines
5225                         if ( count( $matches ) == 0 ) {
5226                                 continue;
5227                         }
5228
5229                         if ( strpos( $matches[0], '%' ) !== false ) {
5230                                 $matches[1] = rawurldecode( $matches[1] );
5231                         }
5232                         $title = Title::newFromText( $matches[1], NS_FILE );
5233                         if ( is_null( $title ) ) {
5234                                 # Bogus title. Ignore these so we don't bomb out later.
5235                                 continue;
5236                         }
5237
5238                         # We need to get what handler the file uses, to figure out parameters.
5239                         # Note, a hook can overide the file name, and chose an entirely different
5240                         # file (which potentially could be of a different type and have different handler).
5241                         $options = [];
5242                         $descQuery = false;
5243                         Hooks::run( 'BeforeParserFetchFileAndTitle',
5244                                 [ $this, $title, &$options, &$descQuery ] );
5245                         # Don't register it now, as TraditionalImageGallery does that later.
5246                         $file = $this->fetchFileNoRegister( $title, $options );
5247                         $handler = $file ? $file->getHandler() : false;
5248
5249                         $paramMap = [
5250                                 'img_alt' => 'gallery-internal-alt',
5251                                 'img_link' => 'gallery-internal-link',
5252                         ];
5253                         if ( $handler ) {
5254                                 $paramMap += $handler->getParamMap();
5255                                 // We don't want people to specify per-image widths.
5256                                 // Additionally the width parameter would need special casing anyhow.
5257                                 unset( $paramMap['img_width'] );
5258                         }
5259
5260                         $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5261
5262                         $label = '';
5263                         $alt = '';
5264                         $link = '';
5265                         $handlerOptions = [];
5266                         if ( isset( $matches[3] ) ) {
5267                                 // look for an |alt= definition while trying not to break existing
5268                                 // captions with multiple pipes (|) in it, until a more sensible grammar
5269                                 // is defined for images in galleries
5270
5271                                 // FIXME: Doing recursiveTagParse at this stage, and the trim before
5272                                 // splitting on '|' is a bit odd, and different from makeImage.
5273                                 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5274                                 // Protect LanguageConverter markup
5275                                 $parameterMatches = StringUtils::delimiterExplode(
5276                                         '-{', '}-', '|', $matches[3], true /* nested */
5277                                 );
5278
5279                                 foreach ( $parameterMatches as $parameterMatch ) {
5280                                         list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5281                                         if ( $magicName ) {
5282                                                 $paramName = $paramMap[$magicName];
5283
5284                                                 switch ( $paramName ) {
5285                                                         case 'gallery-internal-alt':
5286                                                                 $alt = $this->stripAltText( $match, false );
5287                                                                 break;
5288                                                         case 'gallery-internal-link':
5289                                                                 $linkValue = $this->stripAltText( $match, false );
5290                                                                 if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5291                                                                         // Result of LanguageConverter::markNoConversion
5292                                                                         // invoked on an external link.
5293                                                                         $linkValue = substr( $linkValue, 4, -2 );
5294                                                                 }
5295                                                                 list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5296                                                                 if ( $type === 'link-url' ) {
5297                                                                         $link = $target;
5298                                                                         $this->mOutput->addExternalLink( $target );
5299                                                                 } elseif ( $type === 'link-title' ) {
5300                                                                         $link = $target->getLinkURL();
5301                                                                         $this->mOutput->addLink( $target );
5302                                                                 }
5303                                                                 break;
5304                                                         default:
5305                                                                 // Must be a handler specific parameter.
5306                                                                 if ( $handler->validateParam( $paramName, $match ) ) {
5307                                                                         $handlerOptions[$paramName] = $match;
5308                                                                 } else {
5309                                                                         // Guess not, consider it as caption.
5310                                                                         $this->logger->debug(
5311                                                                                 "$parameterMatch failed parameter validation" );
5312                                                                         $label = $parameterMatch;
5313                                                                 }
5314                                                 }
5315
5316                                         } else {
5317                                                 // Last pipe wins.
5318                                                 $label = $parameterMatch;
5319                                         }
5320                                 }
5321                         }
5322
5323                         $ig->add( $title, $label, $alt, $link, $handlerOptions );
5324                 }
5325                 $html = $ig->toHTML();
5326                 Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5327                 return $html;
5328         }
5329
5330         /**
5331          * @param MediaHandler $handler
5332          * @return array
5333          */
5334         public function getImageParams( $handler ) {
5335                 if ( $handler ) {
5336                         $handlerClass = get_class( $handler );
5337                 } else {
5338                         $handlerClass = '';
5339                 }
5340                 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5341                         # Initialise static lists
5342                         static $internalParamNames = [
5343                                 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5344                                 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5345                                         'bottom', 'text-bottom' ],
5346                                 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5347                                         'upright', 'border', 'link', 'alt', 'class' ],
5348                         ];
5349                         static $internalParamMap;
5350                         if ( !$internalParamMap ) {
5351                                 $internalParamMap = [];
5352                                 foreach ( $internalParamNames as $type => $names ) {
5353                                         foreach ( $names as $name ) {
5354                                                 // For grep: img_left, img_right, img_center, img_none,
5355                                                 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5356                                                 // img_bottom, img_text_bottom,
5357                                                 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5358                                                 // img_border, img_link, img_alt, img_class
5359                                                 $magicName = str_replace( '-', '_', "img_$name" );
5360                                                 $internalParamMap[$magicName] = [ $type, $name ];
5361                                         }
5362                                 }
5363                         }
5364
5365                         # Add handler params
5366                         $paramMap = $internalParamMap;
5367                         if ( $handler ) {
5368                                 $handlerParamMap = $handler->getParamMap();
5369                                 foreach ( $handlerParamMap as $magic => $paramName ) {
5370                                         $paramMap[$magic] = [ 'handler', $paramName ];
5371                                 }
5372                         }
5373                         $this->mImageParams[$handlerClass] = $paramMap;
5374                         $this->mImageParamsMagicArray[$handlerClass] =
5375                                 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5376                 }
5377                 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5378         }
5379
5380         /**
5381          * Parse image options text and use it to make an image
5382          *
5383          * @param Title $title
5384          * @param string $options
5385          * @param LinkHolderArray|bool $holders
5386          * @return string HTML
5387          */
5388         public function makeImage( $title, $options, $holders = false ) {
5389                 # Check if the options text is of the form "options|alt text"
5390                 # Options are:
5391                 #  * thumbnail  make a thumbnail with enlarge-icon and caption, alignment depends on lang
5392                 #  * left       no resizing, just left align. label is used for alt= only
5393                 #  * right      same, but right aligned
5394                 #  * none       same, but not aligned
5395                 #  * ___px      scale to ___ pixels width, no aligning. e.g. use in taxobox
5396                 #  * center     center the image
5397                 #  * frame      Keep original image size, no magnify-button.
5398                 #  * framed     Same as "frame"
5399                 #  * frameless  like 'thumb' but without a frame. Keeps user preferences for width
5400                 #  * upright    reduce width for upright images, rounded to full __0 px
5401                 #  * border     draw a 1px border around the image
5402                 #  * alt        Text for HTML alt attribute (defaults to empty)
5403                 #  * class      Set a class for img node
5404                 #  * link       Set the target of the image link. Can be external, interwiki, or local
5405                 # vertical-align values (no % or length right now):
5406                 #  * baseline
5407                 #  * sub
5408                 #  * super
5409                 #  * top
5410                 #  * text-top
5411                 #  * middle
5412                 #  * bottom
5413                 #  * text-bottom
5414
5415                 # Protect LanguageConverter markup when splitting into parts
5416                 $parts = StringUtils::delimiterExplode(
5417                         '-{', '}-', '|', $options, true /* allow nesting */
5418                 );
5419
5420                 # Give extensions a chance to select the file revision for us
5421                 $options = [];
5422                 $descQuery = false;
5423                 Hooks::run( 'BeforeParserFetchFileAndTitle',
5424                         [ $this, $title, &$options, &$descQuery ] );
5425                 # Fetch and register the file (file title may be different via hooks)
5426                 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5427
5428                 # Get parameter map
5429                 $handler = $file ? $file->getHandler() : false;
5430
5431                 list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5432
5433                 if ( !$file ) {
5434                         $this->addTrackingCategory( 'broken-file-category' );
5435                 }
5436
5437                 # Process the input parameters
5438                 $caption = '';
5439                 $params = [ 'frame' => [], 'handler' => [],
5440                         'horizAlign' => [], 'vertAlign' => [] ];
5441                 $seenformat = false;
5442                 foreach ( $parts as $part ) {
5443                         $part = trim( $part );
5444                         list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5445                         $validated = false;
5446                         if ( isset( $paramMap[$magicName] ) ) {
5447                                 list( $type, $paramName ) = $paramMap[$magicName];
5448
5449                                 # Special case; width and height come in one variable together
5450                                 if ( $type === 'handler' && $paramName === 'width' ) {
5451                                         $parsedWidthParam = self::parseWidthParam( $value );
5452                                         if ( isset( $parsedWidthParam['width'] ) ) {
5453                                                 $width = $parsedWidthParam['width'];
5454                                                 if ( $handler->validateParam( 'width', $width ) ) {
5455                                                         $params[$type]['width'] = $width;
5456                                                         $validated = true;
5457                                                 }
5458                                         }
5459                                         if ( isset( $parsedWidthParam['height'] ) ) {
5460                                                 $height = $parsedWidthParam['height'];
5461                                                 if ( $handler->validateParam( 'height', $height ) ) {
5462                                                         $params[$type]['height'] = $height;
5463                                                         $validated = true;
5464                                                 }
5465                                         }
5466                                         # else no validation -- T15436
5467                                 } else {
5468                                         if ( $type === 'handler' ) {
5469                                                 # Validate handler parameter
5470                                                 $validated = $handler->validateParam( $paramName, $value );
5471                                         } else {
5472                                                 # Validate internal parameters
5473                                                 switch ( $paramName ) {
5474                                                         case 'manualthumb':
5475                                                         case 'alt':
5476                                                         case 'class':
5477                                                                 # @todo FIXME: Possibly check validity here for
5478                                                                 # manualthumb? downstream behavior seems odd with
5479                                                                 # missing manual thumbs.
5480                                                                 $validated = true;
5481                                                                 $value = $this->stripAltText( $value, $holders );
5482                                                                 break;
5483                                                         case 'link':
5484                                                                 list( $paramName, $value ) =
5485                                                                         $this->parseLinkParameter(
5486                                                                                 $this->stripAltText( $value, $holders )
5487                                                                         );
5488                                                                 if ( $paramName ) {
5489                                                                         $validated = true;
5490                                                                         if ( $paramName === 'no-link' ) {
5491                                                                                 $value = true;
5492                                                                         }
5493                                                                         if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5494                                                                                 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5495                                                                         }
5496                                                                 }
5497                                                                 break;
5498                                                         case 'frameless':
5499                                                         case 'framed':
5500                                                         case 'thumbnail':
5501                                                                 // use first appearing option, discard others.
5502                                                                 $validated = !$seenformat;
5503                                                                 $seenformat = true;
5504                                                                 break;
5505                                                         default:
5506                                                                 # Most other things appear to be empty or numeric...
5507                                                                 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5508                                                 }
5509                                         }
5510
5511                                         if ( $validated ) {
5512                                                 $params[$type][$paramName] = $value;
5513                                         }
5514                                 }
5515                         }
5516                         if ( !$validated ) {
5517                                 $caption = $part;
5518                         }
5519                 }
5520
5521                 # Process alignment parameters
5522                 if ( $params['horizAlign'] ) {
5523                         $params['frame']['align'] = key( $params['horizAlign'] );
5524                 }
5525                 if ( $params['vertAlign'] ) {
5526                         $params['frame']['valign'] = key( $params['vertAlign'] );
5527                 }
5528
5529                 $params['frame']['caption'] = $caption;
5530
5531                 # Will the image be presented in a frame, with the caption below?
5532                 $imageIsFramed = isset( $params['frame']['frame'] )
5533                         || isset( $params['frame']['framed'] )
5534                         || isset( $params['frame']['thumbnail'] )
5535                         || isset( $params['frame']['manualthumb'] );
5536
5537                 # In the old days, [[Image:Foo|text...]] would set alt text.  Later it
5538                 # came to also set the caption, ordinary text after the image -- which
5539                 # makes no sense, because that just repeats the text multiple times in
5540                 # screen readers.  It *also* came to set the title attribute.
5541                 # Now that we have an alt attribute, we should not set the alt text to
5542                 # equal the caption: that's worse than useless, it just repeats the
5543                 # text.  This is the framed/thumbnail case.  If there's no caption, we
5544                 # use the unnamed parameter for alt text as well, just for the time be-
5545                 # ing, if the unnamed param is set and the alt param is not.
5546                 # For the future, we need to figure out if we want to tweak this more,
5547                 # e.g., introducing a title= parameter for the title; ignoring the un-
5548                 # named parameter entirely for images without a caption; adding an ex-
5549                 # plicit caption= parameter and preserving the old magic unnamed para-
5550                 # meter for BC; ...
5551                 if ( $imageIsFramed ) { # Framed image
5552                         if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5553                                 # No caption or alt text, add the filename as the alt text so
5554                                 # that screen readers at least get some description of the image
5555                                 $params['frame']['alt'] = $title->getText();
5556                         }
5557                         # Do not set $params['frame']['title'] because tooltips don't make sense
5558                         # for framed images
5559                 } else { # Inline image
5560                         if ( !isset( $params['frame']['alt'] ) ) {
5561                                 # No alt text, use the "caption" for the alt text
5562                                 if ( $caption !== '' ) {
5563                                         $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5564                                 } else {
5565                                         # No caption, fall back to using the filename for the
5566                                         # alt text
5567                                         $params['frame']['alt'] = $title->getText();
5568                                 }
5569                         }
5570                         # Use the "caption" for the tooltip text
5571                         $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5572                 }
5573                 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5574
5575                 Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5576
5577                 # Linker does the rest
5578                 $time = $options['time'] ?? false;
5579                 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5580                         $time, $descQuery, $this->mOptions->getThumbSize() );
5581
5582                 # Give the handler a chance to modify the parser object
5583                 if ( $handler ) {
5584                         $handler->parserTransformHook( $this, $file );
5585                 }
5586
5587                 return $ret;
5588         }
5589
5590         /**
5591          * Parse the value of 'link' parameter in image syntax (`[[File:Foo.jpg|link=<value>]]`).
5592          *
5593          * Adds an entry to appropriate link tables.
5594          *
5595          * @since 1.32
5596          * @param string $value
5597          * @return array of `[ type, target ]`, where:
5598          *   - `type` is one of:
5599          *     - `null`: Given value is not a valid link target, use default
5600          *     - `'no-link'`: Given value is empty, do not generate a link
5601          *     - `'link-url'`: Given value is a valid external link
5602          *     - `'link-title'`: Given value is a valid internal link
5603          *   - `target` is:
5604          *     - When `type` is `null` or `'no-link'`: `false`
5605          *     - When `type` is `'link-url'`: URL string corresponding to given value
5606          *     - When `type` is `'link-title'`: Title object corresponding to given value
5607          */
5608         public function parseLinkParameter( $value ) {
5609                 $chars = self::EXT_LINK_URL_CLASS;
5610                 $addr = self::EXT_LINK_ADDR;
5611                 $prots = $this->mUrlProtocols;
5612                 $type = null;
5613                 $target = false;
5614                 if ( $value === '' ) {
5615                         $type = 'no-link';
5616                 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5617                         if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5618                                 $this->mOutput->addExternalLink( $value );
5619                                 $type = 'link-url';
5620                                 $target = $value;
5621                         }
5622                 } else {
5623                         $linkTitle = Title::newFromText( $value );
5624                         if ( $linkTitle ) {
5625                                 $this->mOutput->addLink( $linkTitle );
5626                                 $type = 'link-title';
5627                                 $target = $linkTitle;
5628                         }
5629                 }
5630                 return [ $type, $target ];
5631         }
5632
5633         /**
5634          * @param string $caption
5635          * @param LinkHolderArray|bool $holders
5636          * @return mixed|string
5637          */
5638         protected function stripAltText( $caption, $holders ) {
5639                 # Strip bad stuff out of the title (tooltip).  We can't just use
5640                 # replaceLinkHoldersText() here, because if this function is called
5641                 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5642                 if ( $holders ) {
5643                         $tooltip = $holders->replaceText( $caption );
5644                 } else {
5645                         $tooltip = $this->replaceLinkHoldersText( $caption );
5646                 }
5647
5648                 # make sure there are no placeholders in thumbnail attributes
5649                 # that are later expanded to html- so expand them now and
5650                 # remove the tags
5651                 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5652                 # Compatibility hack!  In HTML certain entity references not terminated
5653                 # by a semicolon are decoded (but not if we're in an attribute; that's
5654                 # how link URLs get away without properly escaping & in queries).
5655                 # But wikitext has always required semicolon-termination of entities,
5656                 # so encode & where needed to avoid decode of semicolon-less entities.
5657                 # See T209236 and
5658                 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5659                 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5660                 $tooltip = preg_replace( "/
5661                         &                       # 1. entity prefix
5662                         (?=                     # 2. followed by:
5663                         (?:                     #  a. one of the legacy semicolon-less named entities
5664                                 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5665                                 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5666                                 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5667                                 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5668                                 U(?:acute|circ|grave|uml)|Yacute|
5669                                 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5670                                 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5671                                 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5672                                 frac(?:1(?:2|4)|34)|
5673                                 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5674                                 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5675                                 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5676                                 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5677                                 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5678                                 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5679                                 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5680                                 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5681                                 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5682                         )
5683                         (?:[^;]|$))     #  b. and not followed by a semicolon
5684                         # S = study, for efficiency
5685                         /Sx", '&amp;', $tooltip );
5686                 $tooltip = Sanitizer::stripAllTags( $tooltip );
5687
5688                 return $tooltip;
5689         }
5690
5691         /**
5692          * Set a flag in the output object indicating that the content is dynamic and
5693          * shouldn't be cached.
5694          * @deprecated since 1.28; use getOutput()->updateCacheExpiry()
5695          */
5696         public function disableCache() {
5697                 $this->logger->debug( "Parser output marked as uncacheable." );
5698                 if ( !$this->mOutput ) {
5699                         throw new MWException( __METHOD__ .
5700                                 " can only be called when actually parsing something" );
5701                 }
5702                 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5703         }
5704
5705         /**
5706          * Callback from the Sanitizer for expanding items found in HTML attribute
5707          * values, so they can be safely tested and escaped.
5708          *
5709          * @param string &$text
5710          * @param bool|PPFrame $frame
5711          * @return string
5712          */
5713         public function attributeStripCallback( &$text, $frame = false ) {
5714                 $text = $this->replaceVariables( $text, $frame );
5715                 $text = $this->mStripState->unstripBoth( $text );
5716                 return $text;
5717         }
5718
5719         /**
5720          * Accessor
5721          *
5722          * @return array
5723          */
5724         public function getTags() {
5725                 $this->firstCallInit();
5726                 return array_merge(
5727                         array_keys( $this->mTransparentTagHooks ),
5728                         array_keys( $this->mTagHooks ),
5729                         array_keys( $this->mFunctionTagHooks )
5730                 );
5731         }
5732
5733         /**
5734          * @since 1.32
5735          * @return array
5736          */
5737         public function getFunctionSynonyms() {
5738                 $this->firstCallInit();
5739                 return $this->mFunctionSynonyms;
5740         }
5741
5742         /**
5743          * @since 1.32
5744          * @return string
5745          */
5746         public function getUrlProtocols() {
5747                 return $this->mUrlProtocols;
5748         }
5749
5750         /**
5751          * Replace transparent tags in $text with the values given by the callbacks.
5752          *
5753          * Transparent tag hooks are like regular XML-style tag hooks, except they
5754          * operate late in the transformation sequence, on HTML instead of wikitext.
5755          *
5756          * @param string $text
5757          *
5758          * @return string
5759          */
5760         public function replaceTransparentTags( $text ) {
5761                 $matches = [];
5762                 $elements = array_keys( $this->mTransparentTagHooks );
5763                 $text = self::extractTagsAndParams( $elements, $text, $matches );
5764                 $replacements = [];
5765
5766                 foreach ( $matches as $marker => $data ) {
5767                         list( $element, $content, $params, $tag ) = $data;
5768                         $tagName = strtolower( $element );
5769                         if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5770                                 $output = call_user_func_array(
5771                                         $this->mTransparentTagHooks[$tagName],
5772                                         [ $content, $params, $this ]
5773                                 );
5774                         } else {
5775                                 $output = $tag;
5776                         }
5777                         $replacements[$marker] = $output;
5778                 }
5779                 return strtr( $text, $replacements );
5780         }
5781
5782         /**
5783          * Break wikitext input into sections, and either pull or replace
5784          * some particular section's text.
5785          *
5786          * External callers should use the getSection and replaceSection methods.
5787          *
5788          * @param string $text Page wikitext
5789          * @param string|int $sectionId A section identifier string of the form:
5790          *   "<flag1> - <flag2> - ... - <section number>"
5791          *
5792          * Currently the only recognised flag is "T", which means the target section number
5793          * was derived during a template inclusion parse, in other words this is a template
5794          * section edit link. If no flags are given, it was an ordinary section edit link.
5795          * This flag is required to avoid a section numbering mismatch when a section is
5796          * enclosed by "<includeonly>" (T8563).
5797          *
5798          * The section number 0 pulls the text before the first heading; other numbers will
5799          * pull the given section along with its lower-level subsections. If the section is
5800          * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5801          *
5802          * Section 0 is always considered to exist, even if it only contains the empty
5803          * string. If $text is the empty string and section 0 is replaced, $newText is
5804          * returned.
5805          *
5806          * @param string $mode One of "get" or "replace"
5807          * @param string $newText Replacement text for section data.
5808          * @return string For "get", the extracted section text.
5809          *   for "replace", the whole page with the section replaced.
5810          */
5811         private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5812                 global $wgTitle; # not generally used but removes an ugly failure mode
5813
5814                 $magicScopeVariable = $this->lock();
5815                 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5816                 $outText = '';
5817                 $frame = $this->getPreprocessor()->newFrame();
5818
5819                 # Process section extraction flags
5820                 $flags = 0;
5821                 $sectionParts = explode( '-', $sectionId );
5822                 $sectionIndex = array_pop( $sectionParts );
5823                 foreach ( $sectionParts as $part ) {
5824                         if ( $part === 'T' ) {
5825                                 $flags |= self::PTD_FOR_INCLUSION;
5826                         }
5827                 }
5828
5829                 # Check for empty input
5830                 if ( strval( $text ) === '' ) {
5831                         # Only sections 0 and T-0 exist in an empty document
5832                         if ( $sectionIndex == 0 ) {
5833                                 if ( $mode === 'get' ) {
5834                                         return '';
5835                                 }
5836
5837                                 return $newText;
5838                         } else {
5839                                 if ( $mode === 'get' ) {
5840                                         return $newText;
5841                                 }
5842
5843                                 return $text;
5844                         }
5845                 }
5846
5847                 # Preprocess the text
5848                 $root = $this->preprocessToDom( $text, $flags );
5849
5850                 # <h> nodes indicate section breaks
5851                 # They can only occur at the top level, so we can find them by iterating the root's children
5852                 $node = $root->getFirstChild();
5853
5854                 # Find the target section
5855                 if ( $sectionIndex == 0 ) {
5856                         # Section zero doesn't nest, level=big
5857                         $targetLevel = 1000;
5858                 } else {
5859                         while ( $node ) {
5860                                 if ( $node->getName() === 'h' ) {
5861                                         $bits = $node->splitHeading();
5862                                         if ( $bits['i'] == $sectionIndex ) {
5863                                                 $targetLevel = $bits['level'];
5864                                                 break;
5865                                         }
5866                                 }
5867                                 if ( $mode === 'replace' ) {
5868                                         $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5869                                 }
5870                                 $node = $node->getNextSibling();
5871                         }
5872                 }
5873
5874                 if ( !$node ) {
5875                         # Not found
5876                         if ( $mode === 'get' ) {
5877                                 return $newText;
5878                         } else {
5879                                 return $text;
5880                         }
5881                 }
5882
5883                 # Find the end of the section, including nested sections
5884                 do {
5885                         if ( $node->getName() === 'h' ) {
5886                                 $bits = $node->splitHeading();
5887                                 $curLevel = $bits['level'];
5888                                 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5889                                         break;
5890                                 }
5891                         }
5892                         if ( $mode === 'get' ) {
5893                                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5894                         }
5895                         $node = $node->getNextSibling();
5896                 } while ( $node );
5897
5898                 # Write out the remainder (in replace mode only)
5899                 if ( $mode === 'replace' ) {
5900                         # Output the replacement text
5901                         # Add two newlines on -- trailing whitespace in $newText is conventionally
5902                         # stripped by the editor, so we need both newlines to restore the paragraph gap
5903                         # Only add trailing whitespace if there is newText
5904                         if ( $newText != "" ) {
5905                                 $outText .= $newText . "\n\n";
5906                         }
5907
5908                         while ( $node ) {
5909                                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5910                                 $node = $node->getNextSibling();
5911                         }
5912                 }
5913
5914                 if ( is_string( $outText ) ) {
5915                         # Re-insert stripped tags
5916                         $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5917                 }
5918
5919                 return $outText;
5920         }
5921
5922         /**
5923          * This function returns the text of a section, specified by a number ($section).
5924          * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5925          * the first section before any such heading (section 0).
5926          *
5927          * If a section contains subsections, these are also returned.
5928          *
5929          * @param string $text Text to look in
5930          * @param string|int $sectionId Section identifier as a number or string
5931          * (e.g. 0, 1 or 'T-1').
5932          * @param string $defaultText Default to return if section is not found
5933          *
5934          * @return string Text of the requested section
5935          */
5936         public function getSection( $text, $sectionId, $defaultText = '' ) {
5937                 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5938         }
5939
5940         /**
5941          * This function returns $oldtext after the content of the section
5942          * specified by $section has been replaced with $text. If the target
5943          * section does not exist, $oldtext is returned unchanged.
5944          *
5945          * @param string $oldText Former text of the article
5946          * @param string|int $sectionId Section identifier as a number or string
5947          * (e.g. 0, 1 or 'T-1').
5948          * @param string $newText Replacing text
5949          *
5950          * @return string Modified text
5951          */
5952         public function replaceSection( $oldText, $sectionId, $newText ) {
5953                 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5954         }
5955
5956         /**
5957          * Get the ID of the revision we are parsing
5958          *
5959          * The return value will be either:
5960          *   - a) Positive, indicating a specific revision ID (current or old)
5961          *   - b) Zero, meaning the revision ID is specified by getCurrentRevisionCallback()
5962          *   - c) Null, meaning the parse is for preview mode and there is no revision
5963          *
5964          * @return int|null
5965          */
5966         public function getRevisionId() {
5967                 return $this->mRevisionId;
5968         }
5969
5970         /**
5971          * Get the revision object for $this->mRevisionId
5972          *
5973          * @return Revision|null Either a Revision object or null
5974          * @since 1.23 (public since 1.23)
5975          */
5976         public function getRevisionObject() {
5977                 if ( $this->mRevisionObject ) {
5978                         return $this->mRevisionObject;
5979                 }
5980
5981                 // NOTE: try to get the RevisionObject even if mRevisionId is null.
5982                 // This is useful when parsing a revision that has not yet been saved.
5983                 // However, if we get back a saved revision even though we are in
5984                 // preview mode, we'll have to ignore it, see below.
5985                 // NOTE: This callback may be used to inject an OLD revision that was
5986                 // already loaded, so "current" is a bit of a misnomer. We can't just
5987                 // skip it if mRevisionId is set.
5988                 $rev = call_user_func(
5989                         $this->mOptions->getCurrentRevisionCallback(),
5990                         $this->getTitle(),
5991                         $this
5992                 );
5993
5994                 if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5995                         // We are in preview mode (mRevisionId is null), and the current revision callback
5996                         // returned an existing revision. Ignore it and return null, it's probably the page's
5997                         // current revision, which is not what we want here. Note that we do want to call the
5998                         // callback to allow the unsaved revision to be injected here, e.g. for
5999                         // self-transclusion previews.
6000                         return null;
6001                 }
6002
6003                 // If the parse is for a new revision, then the callback should have
6004                 // already been set to force the object and should match mRevisionId.
6005                 // If not, try to fetch by mRevisionId for sanity.
6006                 if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
6007                         $rev = Revision::newFromId( $this->mRevisionId );
6008                 }
6009
6010                 $this->mRevisionObject = $rev;
6011
6012                 return $this->mRevisionObject;
6013         }
6014
6015         /**
6016          * Get the timestamp associated with the current revision, adjusted for
6017          * the default server-local timestamp
6018          * @return string TS_MW timestamp
6019          */
6020         public function getRevisionTimestamp() {
6021                 if ( $this->mRevisionTimestamp !== null ) {
6022                         return $this->mRevisionTimestamp;
6023                 }
6024
6025                 # Use specified revision timestamp, falling back to the current timestamp
6026                 $revObject = $this->getRevisionObject();
6027                 $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6028                 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6029
6030                 # The cryptic '' timezone parameter tells to use the site-default
6031                 # timezone offset instead of the user settings.
6032                 # Since this value will be saved into the parser cache, served
6033                 # to other users, and potentially even used inside links and such,
6034                 # it needs to be consistent for all visitors.
6035                 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6036
6037                 return $this->mRevisionTimestamp;
6038         }
6039
6040         /**
6041          * Get the name of the user that edited the last revision
6042          *
6043          * @return string User name
6044          */
6045         public function getRevisionUser() {
6046                 if ( is_null( $this->mRevisionUser ) ) {
6047                         $revObject = $this->getRevisionObject();
6048
6049                         # if this template is subst: the revision id will be blank,
6050                         # so just use the current user's name
6051                         if ( $revObject ) {
6052                                 $this->mRevisionUser = $revObject->getUserText();
6053                         } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6054                                 $this->mRevisionUser = $this->getUser()->getName();
6055                         }
6056                 }
6057                 return $this->mRevisionUser;
6058         }
6059
6060         /**
6061          * Get the size of the revision
6062          *
6063          * @return int|null Revision size
6064          */
6065         public function getRevisionSize() {
6066                 if ( is_null( $this->mRevisionSize ) ) {
6067                         $revObject = $this->getRevisionObject();
6068
6069                         # if this variable is subst: the revision id will be blank,
6070                         # so just use the parser input size, because the own substituation
6071                         # will change the size.
6072                         if ( $revObject ) {
6073                                 $this->mRevisionSize = $revObject->getSize();
6074                         } else {
6075                                 $this->mRevisionSize = $this->mInputSize;
6076                         }
6077                 }
6078                 return $this->mRevisionSize;
6079         }
6080
6081         /**
6082          * Mutator for $mDefaultSort
6083          *
6084          * @param string $sort New value
6085          */
6086         public function setDefaultSort( $sort ) {
6087                 $this->mDefaultSort = $sort;
6088                 $this->mOutput->setProperty( 'defaultsort', $sort );
6089         }
6090
6091         /**
6092          * Accessor for $mDefaultSort
6093          * Will use the empty string if none is set.
6094          *
6095          * This value is treated as a prefix, so the
6096          * empty string is equivalent to sorting by
6097          * page name.
6098          *
6099          * @return string
6100          */
6101         public function getDefaultSort() {
6102                 if ( $this->mDefaultSort !== false ) {
6103                         return $this->mDefaultSort;
6104                 } else {
6105                         return '';
6106                 }
6107         }
6108
6109         /**
6110          * Accessor for $mDefaultSort
6111          * Unlike getDefaultSort(), will return false if none is set
6112          *
6113          * @return string|bool
6114          */
6115         public function getCustomDefaultSort() {
6116                 return $this->mDefaultSort;
6117         }
6118
6119         private static function getSectionNameFromStrippedText( $text ) {
6120                 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6121                 $text = Sanitizer::decodeCharReferences( $text );
6122                 $text = self::normalizeSectionName( $text );
6123                 return $text;
6124         }
6125
6126         private static function makeAnchor( $sectionName ) {
6127                 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6128         }
6129
6130         private function makeLegacyAnchor( $sectionName ) {
6131                 $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6132                 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6133                         // ForAttribute() and ForLink() are the same for legacy encoding
6134                         $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6135                 } else {
6136                         $id = Sanitizer::escapeIdForLink( $sectionName );
6137                 }
6138
6139                 return "#$id";
6140         }
6141
6142         /**
6143          * Try to guess the section anchor name based on a wikitext fragment
6144          * presumably extracted from a heading, for example "Header" from
6145          * "== Header ==".
6146          *
6147          * @param string $text
6148          * @return string Anchor (starting with '#')
6149          */
6150         public function guessSectionNameFromWikiText( $text ) {
6151                 # Strip out wikitext links(they break the anchor)
6152                 $text = $this->stripSectionName( $text );
6153                 $sectionName = self::getSectionNameFromStrippedText( $text );
6154                 return self::makeAnchor( $sectionName );
6155         }
6156
6157         /**
6158          * Same as guessSectionNameFromWikiText(), but produces legacy anchors
6159          * instead, if possible. For use in redirects, since various versions
6160          * of Microsoft browsers interpret Location: headers as something other
6161          * than UTF-8, resulting in breakage.
6162          *
6163          * @param string $text The section name
6164          * @return string Anchor (starting with '#')
6165          */
6166         public function guessLegacySectionNameFromWikiText( $text ) {
6167                 # Strip out wikitext links(they break the anchor)
6168                 $text = $this->stripSectionName( $text );
6169                 $sectionName = self::getSectionNameFromStrippedText( $text );
6170                 return $this->makeLegacyAnchor( $sectionName );
6171         }
6172
6173         /**
6174          * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
6175          * @param string $text Section name (plain text)
6176          * @return string Anchor (starting with '#')
6177          */
6178         public static function guessSectionNameFromStrippedText( $text ) {
6179                 $sectionName = self::getSectionNameFromStrippedText( $text );
6180                 return self::makeAnchor( $sectionName );
6181         }
6182
6183         /**
6184          * Apply the same normalization as code making links to this section would
6185          *
6186          * @param string $text
6187          * @return string
6188          */
6189         private static function normalizeSectionName( $text ) {
6190                 # T90902: ensure the same normalization is applied for IDs as to links
6191                 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6192                 try {
6193
6194                         $parts = $titleParser->splitTitleString( "#$text" );
6195                 } catch ( MalformedTitleException $ex ) {
6196                         return $text;
6197                 }
6198                 return $parts['fragment'];
6199         }
6200
6201         /**
6202          * Strips a text string of wikitext for use in a section anchor
6203          *
6204          * Accepts a text string and then removes all wikitext from the
6205          * string and leaves only the resultant text (i.e. the result of
6206          * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
6207          * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
6208          * to create valid section anchors by mimicing the output of the
6209          * parser when headings are parsed.
6210          *
6211          * @param string $text Text string to be stripped of wikitext
6212          * for use in a Section anchor
6213          * @return string Filtered text string
6214          */
6215         public function stripSectionName( $text ) {
6216                 # Strip internal link markup
6217                 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6218                 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6219
6220                 # Strip external link markup
6221                 # @todo FIXME: Not tolerant to blank link text
6222                 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6223                 # on how many empty links there are on the page - need to figure that out.
6224                 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6225
6226                 # Parse wikitext quotes (italics & bold)
6227                 $text = $this->doQuotes( $text );
6228
6229                 # Strip HTML tags
6230                 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6231                 return $text;
6232         }
6233
6234         /**
6235          * strip/replaceVariables/unstrip for preprocessor regression testing
6236          *
6237          * @param string $text
6238          * @param Title $title
6239          * @param ParserOptions $options
6240          * @param int $outputType
6241          *
6242          * @return string
6243          */
6244         public function testSrvus( $text, Title $title, ParserOptions $options,
6245                 $outputType = self::OT_HTML
6246         ) {
6247                 $magicScopeVariable = $this->lock();
6248                 $this->startParse( $title, $options, $outputType, true );
6249
6250                 $text = $this->replaceVariables( $text );
6251                 $text = $this->mStripState->unstripBoth( $text );
6252                 $text = Sanitizer::removeHTMLtags( $text );
6253                 return $text;
6254         }
6255
6256         /**
6257          * @param string $text
6258          * @param Title $title
6259          * @param ParserOptions $options
6260          * @return string
6261          */
6262         public function testPst( $text, Title $title, ParserOptions $options ) {
6263                 return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6264         }
6265
6266         /**
6267          * @param string $text
6268          * @param Title $title
6269          * @param ParserOptions $options
6270          * @return string
6271          */
6272         public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6273                 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6274         }
6275
6276         /**
6277          * Call a callback function on all regions of the given text that are not
6278          * inside strip markers, and replace those regions with the return value
6279          * of the callback. For example, with input:
6280          *
6281          *  aaa<MARKER>bbb
6282          *
6283          * This will call the callback function twice, with 'aaa' and 'bbb'. Those
6284          * two strings will be replaced with the value returned by the callback in
6285          * each case.
6286          *
6287          * @param string $s
6288          * @param callable $callback
6289          *
6290          * @return string
6291          */
6292         public function markerSkipCallback( $s, $callback ) {
6293                 $i = 0;
6294                 $out = '';
6295                 while ( $i < strlen( $s ) ) {
6296                         $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6297                         if ( $markerStart === false ) {
6298                                 $out .= call_user_func( $callback, substr( $s, $i ) );
6299                                 break;
6300                         } else {
6301                                 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6302                                 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6303                                 if ( $markerEnd === false ) {
6304                                         $out .= substr( $s, $markerStart );
6305                                         break;
6306                                 } else {
6307                                         $markerEnd += strlen( self::MARKER_SUFFIX );
6308                                         $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6309                                         $i = $markerEnd;
6310                                 }
6311                         }
6312                 }
6313                 return $out;
6314         }
6315
6316         /**
6317          * Remove any strip markers found in the given text.
6318          *
6319          * @param string $text
6320          * @return string
6321          */
6322         public function killMarkers( $text ) {
6323                 return $this->mStripState->killMarkers( $text );
6324         }
6325
6326         /**
6327          * Save the parser state required to convert the given half-parsed text to
6328          * HTML. "Half-parsed" in this context means the output of
6329          * recursiveTagParse() or internalParse(). This output has strip markers
6330          * from replaceVariables (extensionSubstitution() etc.), and link
6331          * placeholders from replaceLinkHolders().
6332          *
6333          * Returns an array which can be serialized and stored persistently. This
6334          * array can later be loaded into another parser instance with
6335          * unserializeHalfParsedText(). The text can then be safely incorporated into
6336          * the return value of a parser hook.
6337          *
6338          * @deprecated since 1.31
6339          * @param string $text
6340          *
6341          * @return array
6342          */
6343         public function serializeHalfParsedText( $text ) {
6344                 wfDeprecated( __METHOD__, '1.31' );
6345                 $data = [
6346                         'text' => $text,
6347                         'version' => self::HALF_PARSED_VERSION,
6348                         'stripState' => $this->mStripState->getSubState( $text ),
6349                         'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6350                 ];
6351                 return $data;
6352         }
6353
6354         /**
6355          * Load the parser state given in the $data array, which is assumed to
6356          * have been generated by serializeHalfParsedText(). The text contents is
6357          * extracted from the array, and its markers are transformed into markers
6358          * appropriate for the current Parser instance. This transformed text is
6359          * returned, and can be safely included in the return value of a parser
6360          * hook.
6361          *
6362          * If the $data array has been stored persistently, the caller should first
6363          * check whether it is still valid, by calling isValidHalfParsedText().
6364          *
6365          * @deprecated since 1.31
6366          * @param array $data Serialized data
6367          * @throws MWException
6368          * @return string
6369          */
6370         public function unserializeHalfParsedText( $data ) {
6371                 wfDeprecated( __METHOD__, '1.31' );
6372                 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6373                         throw new MWException( __METHOD__ . ': invalid version' );
6374                 }
6375
6376                 # First, extract the strip state.
6377                 $texts = [ $data['text'] ];
6378                 $texts = $this->mStripState->merge( $data['stripState'], $texts );
6379
6380                 # Now renumber links
6381                 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6382
6383                 # Should be good to go.
6384                 return $texts[0];
6385         }
6386
6387         /**
6388          * Returns true if the given array, presumed to be generated by
6389          * serializeHalfParsedText(), is compatible with the current version of the
6390          * parser.
6391          *
6392          * @deprecated since 1.31
6393          * @param array $data
6394          *
6395          * @return bool
6396          */
6397         public function isValidHalfParsedText( $data ) {
6398                 wfDeprecated( __METHOD__, '1.31' );
6399                 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6400         }
6401
6402         /**
6403          * Parsed a width param of imagelink like 300px or 200x300px
6404          *
6405          * @param string $value
6406          * @param bool $parseHeight
6407          *
6408          * @return array
6409          * @since 1.20
6410          */
6411         public static function parseWidthParam( $value, $parseHeight = true ) {
6412                 $parsedWidthParam = [];
6413                 if ( $value === '' ) {
6414                         return $parsedWidthParam;
6415                 }
6416                 $m = [];
6417                 # (T15500) In both cases (width/height and width only),
6418                 # permit trailing "px" for backward compatibility.
6419                 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6420                         $width = intval( $m[1] );
6421                         $height = intval( $m[2] );
6422                         $parsedWidthParam['width'] = $width;
6423                         $parsedWidthParam['height'] = $height;
6424                 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6425                         $width = intval( $value );
6426                         $parsedWidthParam['width'] = $width;
6427                 }
6428                 return $parsedWidthParam;
6429         }
6430
6431         /**
6432          * Lock the current instance of the parser.
6433          *
6434          * This is meant to stop someone from calling the parser
6435          * recursively and messing up all the strip state.
6436          *
6437          * @throws MWException If parser is in a parse
6438          * @return ScopedCallback The lock will be released once the return value goes out of scope.
6439          */
6440         protected function lock() {
6441                 if ( $this->mInParse ) {
6442                         throw new MWException( "Parser state cleared while parsing. "
6443                                 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6444                 }
6445
6446                 // Save the backtrace when locking, so that if some code tries locking again,
6447                 // we can print the lock owner's backtrace for easier debugging
6448                 $e = new Exception;
6449                 $this->mInParse = $e->getTraceAsString();
6450
6451                 $recursiveCheck = new ScopedCallback( function () {
6452                         $this->mInParse = false;
6453                 } );
6454
6455                 return $recursiveCheck;
6456         }
6457
6458         /**
6459          * Strip outer <p></p> tag from the HTML source of a single paragraph.
6460          *
6461          * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6462          * or if there is more than one <p/> tag in the input HTML.
6463          *
6464          * @param string $html
6465          * @return string
6466          * @since 1.24
6467          */
6468         public static function stripOuterParagraph( $html ) {
6469                 $m = [];
6470                 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6471                         $html = $m[1];
6472                 }
6473
6474                 return $html;
6475         }
6476
6477         /**
6478          * Return this parser if it is not doing anything, otherwise
6479          * get a fresh parser. You can use this method by doing
6480          * $newParser = $oldParser->getFreshParser(), or more simply
6481          * $oldParser->getFreshParser()->parse( ... );
6482          * if you're unsure if $oldParser is safe to use.
6483          *
6484          * @since 1.24
6485          * @return Parser A parser object that is not parsing anything
6486          */
6487         public function getFreshParser() {
6488                 if ( $this->mInParse ) {
6489                         return $this->factory->create();
6490                 } else {
6491                         return $this;
6492                 }
6493         }
6494
6495         /**
6496          * Set's up the PHP implementation of OOUI for use in this request
6497          * and instructs OutputPage to enable OOUI for itself.
6498          *
6499          * @since 1.26
6500          */
6501         public function enableOOUI() {
6502                 OutputPage::setupOOUI();
6503                 $this->mOutput->setEnableOOUI( true );
6504         }
6505
6506         /**
6507          * @param string $flag
6508          * @param string $reason
6509          */
6510         protected function setOutputFlag( $flag, $reason ) {
6511                 $this->mOutput->setFlag( $flag );
6512                 $name = $this->mTitle->getPrefixedText();
6513                 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6514         }
6515 }