From ce8e466e44e773ca1b6dc72edd88a52ef17761e2 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Mon, 20 Oct 2014 21:42:17 +0000 Subject: [PATCH] Revert "Use a fixed regex for StripState" Breaks extensions, doesn't entirely fix the problem it was meant to fix. This reverts commit 6da3f169ac55ae87837a4ba3cf3e30f83fbf9d7d. Change-Id: Ic193abcff8c72b0c8b434fcac514f88603a45beb --- includes/parser/Parser.php | 29 ++++++++++------------ includes/parser/StripState.php | 44 +++++++++++----------------------- 2 files changed, 27 insertions(+), 46 deletions(-) diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 95cef5d1e9..e6478a4ed9 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -112,20 +112,8 @@ class Parser { const OT_MSG = 3; const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged. - /** - * Prefix for temporary replacement strings generated by the preprocessor - * ("strip markers"). Using \x7f at the front gives us a little extra - * robustness since it shouldn't match when butted up against - * identifier-like string constructs. - * - * Must not consist of all title characters, or else it will change - * the behavior of in a link. - */ - const MARKER_PREFIX = "\x7fUNIQ"; - /** Suffix for strip markers */ + # Marker Suffix needs to be accessible staticly. const MARKER_SUFFIX = "-QINU\x7f"; - /** Regex which matches the state ID part of strip markers */ - const MARKER_STATE_ID_REGEX = '[0-9a-f]{16}'; # Markers used for wrapping the table of contents const TOC_START = ''; @@ -342,9 +330,18 @@ class Parser { $this->mLangLinkLanguages = array(); $this->currentRevisionCache = null; - $stripId = self::getRandomString(); - $this->mUniqPrefix = self::MARKER_PREFIX . $stripId; - $this->mStripState = new StripState( $stripId ); + /** + * Prefix for temporary replacement strings for the multipass parser. + * \x07 should never appear in input as it's disallowed in XML. + * Using it at the front also gives us a little extra robustness + * since it shouldn't match when butted up against identifier-like + * string constructs. + * + * Must not consist of all title characters, or else it will change + * the behavior of in a link. + */ + $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString(); + $this->mStripState = new StripState( $this->mUniqPrefix ); # Clear these on every parse, bug 4549 $this->mTplRedirCache = $this->mTplDomCache = array(); diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php index a1d362b113..5d1743e61c 100644 --- a/includes/parser/StripState.php +++ b/includes/parser/StripState.php @@ -26,7 +26,6 @@ * @ingroup Parser */ class StripState { - protected $id; protected $prefix; protected $data; protected $regex; @@ -38,17 +37,15 @@ class StripState { const UNSTRIP_RECURSION_LIMIT = 20; /** - * @param string $id + * @param string $prefix */ - public function __construct( $id ) { - $this->id = $id; - $this->prefix = Parser::MARKER_PREFIX . $id; + public function __construct( $prefix ) { + $this->prefix = $prefix; $this->data = array( 'nowiki' => array(), 'general' => array() ); - $this->regex = "/" . Parser::MARKER_PREFIX . - '(' . Parser::MARKER_STATE_ID_REGEX . ")([^\x7f]+)" . Parser::MARKER_SUFFIX . '/'; + $this->regex = "/{$this->prefix}([^\x7f]+)" . Parser::MARKER_SUFFIX . '/'; $this->circularRefGuard = array(); } @@ -76,11 +73,11 @@ class StripState { * @param string $value */ protected function addItem( $type, $marker, $value ) { - if ( !preg_match( $this->regex, $marker, $m ) || $m[1] !== $this->id ) { + if ( !preg_match( $this->regex, $marker, $m ) ) { throw new MWException( "Invalid marker: $marker" ); } - $this->data[$type][$m[2]] = $value; + $this->data[$type][$m[1]] = $value; } /** @@ -134,8 +131,8 @@ class StripState { * @return array */ protected function unstripCallback( $m ) { - $marker = $m[2]; - if ( $m[1] === $this->id && isset( $this->data[$this->tempType][$marker] ) ) { + $marker = $m[1]; + if ( isset( $this->data[$this->tempType][$marker] ) ) { if ( isset( $this->circularRefGuard[$marker] ) ) { return '' . wfMessage( 'parser-unstrip-loop-warning' )->inContentLanguage()->text() @@ -167,7 +164,7 @@ class StripState { * @return StripState */ public function getSubState( $text ) { - $subState = new StripState( $this->id ); + $subState = new StripState( $this->prefix ); $pos = 0; while ( true ) { $startPos = strpos( $text, $this->prefix, $pos ); @@ -178,11 +175,11 @@ class StripState { $endPos += strlen( Parser::MARKER_SUFFIX ); $marker = substr( $text, $startPos, $endPos - $startPos ); - if ( !preg_match( $this->regex, $marker, $m ) || $m[1] !== $this->id ) { + if ( !preg_match( $this->regex, $marker, $m ) ) { continue; } - $key = $m[2]; + $key = $m[1]; if ( isset( $this->data['nowiki'][$key] ) ) { $subState->data['nowiki'][$key] = $this->data['nowiki'][$key]; } elseif ( isset( $this->data['general'][$key] ) ) { @@ -222,12 +219,8 @@ class StripState { * @return string */ protected function mergeCallback( $m ) { - if ( $m[1] === $this->id ) { - $key = $m[2]; - return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX; - } else { - return $m[0]; - } + $key = $m[1]; + return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX; } /** @@ -237,15 +230,6 @@ class StripState { * @return string */ public function killMarkers( $text ) { - $id = $this->id; // PHP 5.3 hack - return preg_replace_callback( $this->regex, - function ( $m ) use ( $id ) { - if ( $m[1] === $id ) { - return ''; - } else { - return $m[0]; - } - }, - $text ); + return preg_replace( $this->regex, '', $text ); } } -- 2.20.1