(bug 35315) Detect circular references in strip tags
authorTim Starling <tstarling@wikimedia.org>
Mon, 23 Apr 2012 02:03:02 +0000 (12:03 +1000)
committerTim Starling <tstarling@wikimedia.org>
Tue, 8 May 2012 04:36:32 +0000 (14:36 +1000)
Explicitly detect circular references in strip tags and break the loop,
similar to how we deal with circular references in templates. This is
necessary to support Scribunto since we imagine we will provide an API
that allows strip markers to be forged.

The recursion depth limit is a consequence of changing the algorithm
from iterative to recursive, it's required to protect the stack against
deeply nested #tag invocations.

Change-Id: Icc8dc4aedbced55ad75b3b5a5429a376d06d9b31

includes/parser/StripState.php
languages/messages/MessagesEn.php

index 321fcd8..b08aa14 100644 (file)
@@ -31,6 +31,10 @@ class StripState {
        protected $regex;
 
        protected $tempType, $tempMergePrefix;
+       protected $circularRefGuard;
+       protected $recursionLevel = 0;
+
+       const UNSTRIP_RECURSION_LIMIT = 20;
 
        /**
         * @param $prefix string
@@ -42,6 +46,7 @@ class StripState {
                        'general' => array()
                );
                $this->regex = "/{$this->prefix}([^\x7f]+)" . Parser::MARKER_SUFFIX . '/';
+               $this->circularRefGuard = array();
        }
 
        /**
@@ -113,12 +118,10 @@ class StripState {
                }
 
                wfProfileIn( __METHOD__ );
+               $oldType = $this->tempType;
                $this->tempType = $type;
-               do {
-                       $oldText = $text;
-                       $text = preg_replace_callback( $this->regex, array( $this, 'unstripCallback' ), $text );
-               } while ( $text !== $oldText );
-               $this->tempType = null;
+               $text = preg_replace_callback( $this->regex, array( $this, 'unstripCallback' ), $text );
+               $this->tempType = $oldType;
                wfProfileOut( __METHOD__ );
                return $text;
        }
@@ -128,8 +131,22 @@ class StripState {
         * @return array
         */
        protected function unstripCallback( $m ) {
-               if ( isset( $this->data[$this->tempType][$m[1]] ) ) {
-                       return $this->data[$this->tempType][$m[1]];
+               $marker = $m[1];
+               if ( isset( $this->data[$this->tempType][$marker] ) ) {
+                       if ( isset( $this->circularRefGuard[$marker] ) ) {
+                               return '<span class="error">' . wfMsgForContent( 'parser-unstrip-loop-warning' ) . '</span>';
+                       }
+                       if ( $this->recursionLevel >= self::UNSTRIP_RECURSION_LIMIT ) {
+                               return '<span class="error">' . 
+                                       wfMsgForContent( 'parser-unstrip-recursion-limit', self::UNSTRIP_RECURSION_LIMIT ) . 
+                                       '</span>';
+                       }
+                       $this->circularRefGuard[$marker] = true;
+                       $this->recursionLevel++;
+                       $ret = $this->unstripType( $this->tempType, $this->data[$this->tempType][$marker] );
+                       $this->recursionLevel--;
+                       unset( $this->circularRefGuard[$marker] );
+                       return $ret;
                } else {
                        return $m[0];
                }
index 34cb9f1..3066e6c 100644 (file)
@@ -1482,6 +1482,8 @@ These arguments have been omitted.",
 'node-count-exceeded-warning'             => 'Page exceeded the node-count',
 'expansion-depth-exceeded-category'       => 'Pages where expansion depth is exceeded',
 'expansion-depth-exceeded-warning'        => 'Page exceeded the expansion depth',
+'parser-unstrip-loop-warning'             => 'Unstrip loop detected',
+'parser-unstrip-recursion-limit'          => 'Unstrip recursion limit exceeded ($1)',
 
 # "Undo" feature
 'undo-success' => 'The edit can be undone.