Generated node count limit
authorTim Starling <tstarling@wikimedia.org>
Sat, 15 Sep 2012 21:51:58 +0000 (14:51 -0700)
committerTim Starling <tstarling@wikimedia.org>
Sat, 15 Sep 2012 21:51:58 +0000 (14:51 -0700)
To prevent large template DOM caches from sending servers into swap,
throw an exception when more than some number of DOM elements are
parsed. Unfortunately, it wasn't possible to return a normal error
message, because it broke PST and extractSections and corrupted the
article text. It's safer to refuse to save the edit, and we don't
have decent ways to do that short of throwing an exception.

Ideally we would like to have an upstream patch that hooks libxml to
allocate memory from PHP's request pool, then a fatal error would be
raised instead of swapping.

Change-Id: I4cb4f6fd313e1e0940b56cc5e586afd1bea9267a

includes/DefaultSettings.php
includes/parser/Parser.php
includes/parser/ParserOptions.php
includes/parser/Preprocessor_DOM.php

index ac4ee04..2fa0c57 100644 (file)
@@ -3218,10 +3218,18 @@ $wgParserConf = array(
 $wgMaxTocLevel = 999;
 
 /**
- * A complexity limit on template expansion
+ * A complexity limit on template expansion: the maximum number of nodes visited
+ * by PPFrame::expand()
  */
 $wgMaxPPNodeCount = 1000000;
 
+/**
+ * A complexity limit on template expansion: the maximum number of nodes 
+ * generated by Preprocessor::preprocessToObj()
+ */
+$wgMaxGeneratedPPNodeCount = 1000000;
+
+
 /**
  * Maximum recursion depth for templates within templates.
  * The current parser adds two levels to the PHP call stack for each template,
index de55de0..4b6af7f 100644 (file)
@@ -163,7 +163,8 @@ class Parser {
        var $mLinkHolders;
 
        var $mLinkID;
-       var $mIncludeSizes, $mPPNodeCount, $mHighestExpansionDepth, $mDefaultSort;
+       var $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
+       var $mDefaultSort;
        var $mTplExpandCache; # empty-frame expansion cache
        var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
        var $mExpensiveFunctionCount; # number of expensive parser function calls
@@ -306,6 +307,7 @@ class Parser {
                        'arg' => 0,
                );
                $this->mPPNodeCount = 0;
+               $this->mGeneratedPPNodeCount = 0;
                $this->mHighestExpansionDepth = 0;
                $this->mDefaultSort = false;
                $this->mHeadings = array();
@@ -482,7 +484,9 @@ class Parser {
                        $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n";
                        $limitReport =
                                "NewPP limit report\n" .
-                               "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
+                               "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
+                               "Preprocessor generated node count: " .
+                                       "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" .
                                "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
                                "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
                                "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n".
index 211fcd6..ff9f7ef 100644 (file)
@@ -94,6 +94,11 @@ class ParserOptions {
         * Maximum number of nodes touched by PPFrame::expand()
         */
        var $mMaxPPNodeCount;
+
+       /**
+        * Maximum number of nodes generated by Preprocessor::preprocessToObj()
+        */
+       var $mMaxGeneratedPPNodeCount;
        
        /**
         * Maximum recursion depth in PPFrame::expand()
@@ -219,6 +224,7 @@ class ParserOptions {
        function getTargetLanguage()                { return $this->mTargetLanguage; }
        function getMaxIncludeSize()                { return $this->mMaxIncludeSize; }
        function getMaxPPNodeCount()                { return $this->mMaxPPNodeCount; }
+       function getMaxGeneratedPPNodeCount()       { return $this->mMaxGeneratedPPNodeCount; }
        function getMaxPPExpandDepth()              { return $this->mMaxPPExpandDepth; }
        function getMaxTemplateDepth()              { return $this->mMaxTemplateDepth; }
        /* @since 1.20 */
@@ -307,6 +313,7 @@ class ParserOptions {
        function setTargetLanguage( $x )            { return wfSetVar( $this->mTargetLanguage, $x, true ); }
        function setMaxIncludeSize( $x )            { return wfSetVar( $this->mMaxIncludeSize, $x ); }
        function setMaxPPNodeCount( $x )            { return wfSetVar( $this->mMaxPPNodeCount, $x ); }
+       function setMaxGeneratedPPNodeCount( $x )   { return wfSetVar( $this->mMaxGeneratedPPNodeCount, $x ); }
        function setMaxTemplateDepth( $x )          { return wfSetVar( $this->mMaxTemplateDepth, $x ); }
        /* @since 1.20 */
        function setExpensiveParserFunctionLimit( $x ) { return wfSetVar( $this->mExpensiveParserFunctionLimit, $x ); }
@@ -404,7 +411,8 @@ class ParserOptions {
                global $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages,
                        $wgAllowExternalImagesFrom, $wgEnableImageWhitelist, $wgAllowSpecialInclusion,
                        $wgMaxArticleSize, $wgMaxPPNodeCount, $wgMaxTemplateDepth, $wgMaxPPExpandDepth,
-                       $wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit;
+                       $wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit,
+                       $wgMaxGeneratedPPNodeCount;
 
                wfProfileIn( __METHOD__ );
 
@@ -416,6 +424,7 @@ class ParserOptions {
                $this->mAllowSpecialInclusion = $wgAllowSpecialInclusion;
                $this->mMaxIncludeSize = $wgMaxArticleSize * 1024;
                $this->mMaxPPNodeCount = $wgMaxPPNodeCount;
+               $this->mMaxGeneratedPPNodeCount = $wgMaxGeneratedPPNodeCount;
                $this->mMaxPPExpandDepth = $wgMaxPPExpandDepth;
                $this->mMaxTemplateDepth = $wgMaxTemplateDepth;
                $this->mExpensiveParserFunctionLimit = $wgExpensiveParserFunctionLimit;
index e75237b..34de0ba 100644 (file)
@@ -162,6 +162,15 @@ class Preprocessor_DOM implements Preprocessor {
                        }
 
                }
+
+               // Fail if the number of elements exceeds acceptable limits
+               // Do not attempt to generate the DOM 
+               $this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' );
+               $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
+               if ( $this->parser->mGeneratedPPNodeCount > $max ) {
+                       throw new MWException( __METHOD__.': generated node count limit exceeded' );
+               }
+
                wfProfileIn( __METHOD__.'-loadXML' );
                $dom = new DOMDocument;
                wfSuppressWarnings();