From 2caa7829fcc6a0ab45f91c4346c0d5a9100ef4dc Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Sat, 15 Sep 2012 14:51:58 -0700 Subject: [PATCH] Generated node count limit To prevent large template DOM caches from sending servers into swap, throw an exception when more than some number of DOM elements are parsed. Unfortunately, it wasn't possible to return a normal error message, because it broke PST and extractSections and corrupted the article text. It's safer to refuse to save the edit, and we don't have decent ways to do that short of throwing an exception. Ideally we would like to have an upstream patch that hooks libxml to allocate memory from PHP's request pool, then a fatal error would be raised instead of swapping. Change-Id: I4cb4f6fd313e1e0940b56cc5e586afd1bea9267a --- includes/DefaultSettings.php | 10 +++++++++- includes/parser/Parser.php | 8 ++++++-- includes/parser/ParserOptions.php | 11 ++++++++++- includes/parser/Preprocessor_DOM.php | 9 +++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index ac4ee04132..2fa0c57a54 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -3218,10 +3218,18 @@ $wgParserConf = array( $wgMaxTocLevel = 999; /** - * A complexity limit on template expansion + * A complexity limit on template expansion: the maximum number of nodes visited + * by PPFrame::expand() */ $wgMaxPPNodeCount = 1000000; +/** + * A complexity limit on template expansion: the maximum number of nodes + * generated by Preprocessor::preprocessToObj() + */ +$wgMaxGeneratedPPNodeCount = 1000000; + + /** * Maximum recursion depth for templates within templates. * The current parser adds two levels to the PHP call stack for each template, diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index de55de0c0f..4b6af7f381 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -163,7 +163,8 @@ class Parser { var $mLinkHolders; var $mLinkID; - var $mIncludeSizes, $mPPNodeCount, $mHighestExpansionDepth, $mDefaultSort; + var $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth; + var $mDefaultSort; var $mTplExpandCache; # empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; var $mExpensiveFunctionCount; # number of expensive parser function calls @@ -306,6 +307,7 @@ class Parser { 'arg' => 0, ); $this->mPPNodeCount = 0; + $this->mGeneratedPPNodeCount = 0; $this->mHighestExpansionDepth = 0; $this->mDefaultSort = false; $this->mHeadings = array(); @@ -482,7 +484,9 @@ class Parser { $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n"; $limitReport = "NewPP limit report\n" . - "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . + "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" . + "Preprocessor generated node count: " . + "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" . "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n". "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n". diff --git a/includes/parser/ParserOptions.php b/includes/parser/ParserOptions.php index 211fcd63cf..ff9f7efa6c 100644 --- a/includes/parser/ParserOptions.php +++ b/includes/parser/ParserOptions.php @@ -94,6 +94,11 @@ class ParserOptions { * Maximum number of nodes touched by PPFrame::expand() */ var $mMaxPPNodeCount; + + /** + * Maximum number of nodes generated by Preprocessor::preprocessToObj() + */ + var $mMaxGeneratedPPNodeCount; /** * Maximum recursion depth in PPFrame::expand() @@ -219,6 +224,7 @@ class ParserOptions { function getTargetLanguage() { return $this->mTargetLanguage; } function getMaxIncludeSize() { return $this->mMaxIncludeSize; } function getMaxPPNodeCount() { return $this->mMaxPPNodeCount; } + function getMaxGeneratedPPNodeCount() { return $this->mMaxGeneratedPPNodeCount; } function getMaxPPExpandDepth() { return $this->mMaxPPExpandDepth; } function getMaxTemplateDepth() { return $this->mMaxTemplateDepth; } /* @since 1.20 */ @@ -307,6 +313,7 @@ class ParserOptions { function setTargetLanguage( $x ) { return wfSetVar( $this->mTargetLanguage, $x, true ); } function setMaxIncludeSize( $x ) { return wfSetVar( $this->mMaxIncludeSize, $x ); } function setMaxPPNodeCount( $x ) { return wfSetVar( $this->mMaxPPNodeCount, $x ); } + function setMaxGeneratedPPNodeCount( $x ) { return wfSetVar( $this->mMaxGeneratedPPNodeCount, $x ); } function setMaxTemplateDepth( $x ) { return wfSetVar( $this->mMaxTemplateDepth, $x ); } /* @since 1.20 */ function setExpensiveParserFunctionLimit( $x ) { return wfSetVar( $this->mExpensiveParserFunctionLimit, $x ); } @@ -404,7 +411,8 @@ class ParserOptions { global $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages, $wgAllowExternalImagesFrom, $wgEnableImageWhitelist, $wgAllowSpecialInclusion, $wgMaxArticleSize, $wgMaxPPNodeCount, $wgMaxTemplateDepth, $wgMaxPPExpandDepth, - $wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit; + $wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit, + $wgMaxGeneratedPPNodeCount; wfProfileIn( __METHOD__ ); @@ -416,6 +424,7 @@ class ParserOptions { $this->mAllowSpecialInclusion = $wgAllowSpecialInclusion; $this->mMaxIncludeSize = $wgMaxArticleSize * 1024; $this->mMaxPPNodeCount = $wgMaxPPNodeCount; + $this->mMaxGeneratedPPNodeCount = $wgMaxGeneratedPPNodeCount; $this->mMaxPPExpandDepth = $wgMaxPPExpandDepth; $this->mMaxTemplateDepth = $wgMaxTemplateDepth; $this->mExpensiveParserFunctionLimit = $wgExpensiveParserFunctionLimit; diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index e75237b41b..34de0ba541 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -162,6 +162,15 @@ class Preprocessor_DOM implements Preprocessor { } } + + // Fail if the number of elements exceeds acceptable limits + // Do not attempt to generate the DOM + $this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' ); + $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount(); + if ( $this->parser->mGeneratedPPNodeCount > $max ) { + throw new MWException( __METHOD__.': generated node count limit exceeded' ); + } + wfProfileIn( __METHOD__.'-loadXML' ); $dom = new DOMDocument; wfSuppressWarnings(); -- 2.20.1