From f0247e05bda76a0c16755c3f11f2c05a88003ce3 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Wed, 28 Feb 2018 20:42:40 +1100 Subject: [PATCH] StripState testing and cleanup * Added StripState unit tests * Deprecated unmaintained "half-parsed" serialization experiment * Renamed some variables for brevity and removed unused "prefix" Change-Id: I838d7ac7f9a2189e13d39c6939dba5d70e74a6b7 --- RELEASE-NOTES-1.31 | 6 + includes/parser/Parser.php | 6 + includes/parser/StripState.php | 50 ++++--- .../includes/parser/StripStateTest.php | 136 ++++++++++++++++++ 4 files changed, 178 insertions(+), 20 deletions(-) create mode 100644 tests/phpunit/includes/parser/StripStateTest.php diff --git a/RELEASE-NOTES-1.31 b/RELEASE-NOTES-1.31 index 5e9ce8b26d..83669e6fdc 100644 --- a/RELEASE-NOTES-1.31 +++ b/RELEASE-NOTES-1.31 @@ -279,6 +279,12 @@ changes to languages because of Phabricator reports. * ::setExtendedLoginCookie() Note that User::setCookies() remains, and is not deprecated. * The global functions wfProfileIn and wfProfileOut, deprecated in 1.25, have been removed. +* The following methods related to caching of half-parsed HTML were deprecated: + * Parser::serializeHalfParsedText() + * Parser::unserializeHalfParsedText() + * Parser::isValidHalfParsedText() + * StripState::getSubState() + * StripState::merge() == Compatibility == MediaWiki 1.31 requires PHP 5.5.9 or later. Although HHVM 3.18.5 or later is supported, diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index c775cd9af7..8e5dcbdab1 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -5998,11 +5998,13 @@ class Parser { * unserializeHalfParsedText(). The text can then be safely incorporated into * the return value of a parser hook. * + * @deprecated since 1.31 * @param string $text * * @return array */ public function serializeHalfParsedText( $text ) { + wfDeprecated( __METHOD__, '1.31' ); $data = [ 'text' => $text, 'version' => self::HALF_PARSED_VERSION, @@ -6023,11 +6025,13 @@ class Parser { * If the $data array has been stored persistently, the caller should first * check whether it is still valid, by calling isValidHalfParsedText(). * + * @deprecated since 1.31 * @param array $data Serialized data * @throws MWException * @return string */ public function unserializeHalfParsedText( $data ) { + wfDeprecated( __METHOD__, '1.31' ); if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) { throw new MWException( __METHOD__ . ': invalid version' ); } @@ -6048,11 +6052,13 @@ class Parser { * serializeHalfParsedText(), is compatible with the current version of the * parser. * + * @deprecated since 1.31 * @param array $data * * @return bool */ public function isValidHalfParsedText( $data ) { + wfDeprecated( __METHOD__, '1.31' ); return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION; } diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php index 9f064dfabf..d329f693fe 100644 --- a/includes/parser/StripState.php +++ b/includes/parser/StripState.php @@ -26,24 +26,24 @@ * @ingroup Parser */ class StripState { - protected $prefix; protected $data; protected $regex; protected $parser; protected $circularRefGuard; - protected $recursionLevel = 0; - protected $highestRecursionLevel = 0; + protected $depth = 0; + protected $highestDepth = 0; protected $expandSize = 0; - const UNSTRIP_RECURSION_LIMIT = 20; - const UNSTRIP_SIZE_LIMIT = 5000000; + protected $depthLimit = 20; + protected $sizeLimit = 5000000; /** * @param Parser|null $parser + * @param array $options */ - public function __construct( Parser $parser = null ) { + public function __construct( Parser $parser = null, $options = [] ) { $this->data = [ 'nowiki' => [], 'general' => [] @@ -51,6 +51,13 @@ class StripState { $this->regex = '/' . Parser::MARKER_PREFIX . "([^\x7f<>&'\"]+)" . Parser::MARKER_SUFFIX . '/'; $this->circularRefGuard = []; $this->parser = $parser; + + if ( isset( $options['depthLimit'] ) ) { + $this->depthLimit = $options['depthLimit']; + } + if ( isset( $options['sizeLimit'] ) ) { + $this->sizeLimit = $options['sizeLimit']; + } } /** @@ -128,12 +135,11 @@ class StripState { return $this->getWarning( 'parser-unstrip-loop-warning' ); } - if ( $this->recursionLevel > $this->highestRecursionLevel ) { - $this->highestRecursionLevel = $this->recursionLevel; + if ( $this->depth > $this->highestDepth ) { + $this->highestDepth = $this->depth; } - if ( $this->recursionLevel >= self::UNSTRIP_RECURSION_LIMIT ) { - return $this->getLimitationWarning( 'unstrip-depth', - self::UNSTRIP_RECURSION_LIMIT ); + if ( $this->depth >= $this->depthLimit ) { + return $this->getLimitationWarning( 'unstrip-depth', $this->depthLimit ); } $value = $this->data[$type][$marker]; @@ -142,15 +148,14 @@ class StripState { } $this->expandSize += strlen( $value ); - if ( $this->expandSize > self::UNSTRIP_SIZE_LIMIT ) { - return $this->getLimitationWarning( 'unstrip-size', - self::UNSTRIP_SIZE_LIMIT ); + if ( $this->expandSize > $this->sizeLimit ) { + return $this->getLimitationWarning( 'unstrip-size', $this->sizeLimit ); } $this->circularRefGuard[$marker] = true; - $this->recursionLevel++; + $this->depth++; $ret = $this->unstripType( $type, $value ); - $this->recursionLevel--; + $this->depth--; unset( $this->circularRefGuard[$marker] ); return $ret; @@ -201,14 +206,14 @@ class StripState { return [ [ 'limitreport-unstrip-depth', [ - $this->highestRecursionLevel, - self::UNSTRIP_RECURSION_LIMIT + $this->highestDepth, + $this->depthLimit ], ], [ 'limitreport-unstrip-size', [ $this->expandSize, - self::UNSTRIP_SIZE_LIMIT + $this->sizeLimit ], ] ]; @@ -218,11 +223,13 @@ class StripState { * Get a StripState object which is sufficient to unstrip the given text. * It will contain the minimum subset of strip items necessary. * + * @deprecated since 1.31 * @param string $text - * * @return StripState */ public function getSubState( $text ) { + wfDeprecated( __METHOD__, '1.31' ); + $subState = new StripState; $pos = 0; while ( true ) { @@ -254,11 +261,14 @@ class StripState { * will not be preserved. The strings in the $texts array will have their * strip markers rewritten, the resulting array of strings will be returned. * + * @deprecated since 1.31 * @param StripState $otherState * @param array $texts * @return array */ public function merge( $otherState, $texts ) { + wfDeprecated( __METHOD__, '1.31' ); + $mergePrefix = wfRandomString( 16 ); foreach ( $otherState->data as $type => $items ) { diff --git a/tests/phpunit/includes/parser/StripStateTest.php b/tests/phpunit/includes/parser/StripStateTest.php new file mode 100644 index 0000000000..0f4f6e0fae --- /dev/null +++ b/tests/phpunit/includes/parser/StripStateTest.php @@ -0,0 +1,136 @@ +setContentLang( 'qqx' ); + } + + private function getMarker() { + static $i; + return Parser::MARKER_PREFIX . '-blah-' . sprintf( '%08X', $i++ ) . Parser::MARKER_SUFFIX; + } + + private static function getWarning( $message, $max = '' ) { + return "($message: $max)"; + } + + public function testAddNoWiki() { + $ss = new StripState; + $marker = $this->getMarker(); + $ss->addNoWiki( $marker, '<>' ); + $text = "x{$marker}y"; + $text = $ss->unstripGeneral( $text ); + $text = str_replace( '<', '', $text ); + $text = $ss->unstripNoWiki( $text ); + $this->assertSame( 'x<>y', $text ); + } + + public function testAddGeneral() { + $ss = new StripState; + $marker = $this->getMarker(); + $ss->addGeneral( $marker, '<>' ); + $text = "x{$marker}y"; + $text = $ss->unstripNoWiki( $text ); + $text = str_replace( '<', '', $text ); + $text = $ss->unstripGeneral( $text ); + $this->assertSame( 'x<>y', $text ); + } + + public function testUnstripBoth() { + $ss = new StripState; + $mk1 = $this->getMarker(); + $mk2 = $this->getMarker(); + $ss->addNoWiki( $mk1, '<1>' ); + $ss->addGeneral( $mk2, '<2>' ); + $text = "x{$mk1}{$mk2}y"; + $text = str_replace( '<', '', $text ); + $text = $ss->unstripBoth( $text ); + $this->assertSame( 'x<1><2>y', $text ); + } + + public static function provideUnstripRecursive() { + return [ + [ 0, 'text' ], + [ 1, '=text=' ], + [ 2, '==text==' ], + [ 3, '==' . self::getWarning( 'unstrip-depth-warning', 2 ) . '==' ], + ]; + } + + /** @dataProvider provideUnstripRecursive */ + public function testUnstripRecursive( $depth, $expected ) { + $ss = new StripState( null, [ 'depthLimit' => 2 ] ); + $text = 'text'; + for ( $i = 0; $i < $depth; $i++ ) { + $mk = $this->getMarker(); + $ss->addNoWiki( $mk, "={$text}=" ); + $text = $mk; + } + $text = $ss->unstripNoWiki( $text ); + $this->assertSame( $expected, $text ); + } + + public function testUnstripLoop() { + $ss = new StripState( null, [ 'depthLimit' => 2 ] ); + $mk = $this->getMarker(); + $ss->addNoWiki( $mk, $mk ); + $text = $ss->unstripNoWiki( $mk ); + $this->assertSame( self::getWarning( 'parser-unstrip-loop-warning' ), $text ); + } + + public static function provideUnstripSize() { + return [ + [ 0, 'x' ], + [ 1, 'xx' ], + [ 2, str_repeat( self::getWarning( 'unstrip-size-warning', 5 ), 2 ) ] + ]; + } + + /** @dataProvider provideUnstripSize */ + public function testUnstripSize( $depth, $expected ) { + $ss = new StripState( null, [ 'sizeLimit' => 5 ] ); + $text = 'x'; + for ( $i = 0; $i < $depth; $i++ ) { + $mk = $this->getMarker(); + $ss->addNoWiki( $mk, $text ); + $text = "$mk$mk"; + } + $text = $ss->unstripNoWiki( $text ); + $this->assertSame( $expected, $text ); + } + + public function provideGetLimitReport() { + for ( $i = 1; $i < 4; $i++ ) { + yield [ $i ]; + } + } + + /** @dataProvider provideGetLimitReport */ + public function testGetLimitReport( $depth ) { + $sizeLimit = 100000; + $ss = new StripState( null, [ 'depthLimit' => 5, 'sizeLimit' => $sizeLimit ] ); + $text = 'x'; + for ( $i = 0; $i < $depth; $i++ ) { + $mk = $this->getMarker(); + $ss->addNoWiki( $mk, $text ); + $text = "$mk$mk"; + } + $text = $ss->unstripNoWiki( $text ); + $report = $ss->getLimitReport(); + $messages = []; + foreach ( $report as list( $msg, $params ) ) { + $messages[$msg] = $params; + } + $this->assertSame( [ $depth - 1, 5 ], $messages['limitreport-unstrip-depth'] ); + $this->assertSame( + [ + strlen( $this->getMarker() ) * 2 * ( pow( 2, $depth ) - 2 ) + pow( 2, $depth ), + $sizeLimit + ], + $messages['limitreport-unstrip-size' ] ); + } +} -- 2.20.1