Revert "Use a fixed regex for StripState"
authorTim Starling <tstarling@wikimedia.org>
Mon, 20 Oct 2014 21:42:17 +0000 (21:42 +0000)
committerTim Starling <tstarling@wikimedia.org>
Mon, 20 Oct 2014 21:42:53 +0000 (21:42 +0000)
Breaks extensions, doesn't entirely fix the problem it was meant to fix.

This reverts commit 6da3f169ac55ae87837a4ba3cf3e30f83fbf9d7d.

Change-Id: Ic193abcff8c72b0c8b434fcac514f88603a45beb

includes/parser/Parser.php
includes/parser/StripState.php

index 95cef5d..e6478a4 100644 (file)
@@ -112,20 +112,8 @@ class Parser {
        const OT_MSG = 3;
        const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
 
-       /**
-        * Prefix for temporary replacement strings generated by the preprocessor
-        * ("strip markers"). Using \x7f at the front gives us a little extra
-        * robustness since it shouldn't match when butted up against
-        * identifier-like string constructs.
-        *
-        * Must not consist of all title characters, or else it will change
-        * the behavior of <nowiki> in a link.
-        */
-       const MARKER_PREFIX = "\x7fUNIQ";
-       /** Suffix for strip markers */
+       # Marker Suffix needs to be accessible staticly.
        const MARKER_SUFFIX = "-QINU\x7f";
-       /** Regex which matches the state ID part of strip markers */
-       const MARKER_STATE_ID_REGEX = '[0-9a-f]{16}';
 
        # Markers used for wrapping the table of contents
        const TOC_START = '<mw:toc>';
@@ -342,9 +330,18 @@ class Parser {
                $this->mLangLinkLanguages = array();
                $this->currentRevisionCache = null;
 
-               $stripId = self::getRandomString();
-               $this->mUniqPrefix = self::MARKER_PREFIX . $stripId;
-               $this->mStripState = new StripState( $stripId );
+               /**
+                * Prefix for temporary replacement strings for the multipass parser.
+                * \x07 should never appear in input as it's disallowed in XML.
+                * Using it at the front also gives us a little extra robustness
+                * since it shouldn't match when butted up against identifier-like
+                * string constructs.
+                *
+                * Must not consist of all title characters, or else it will change
+                * the behavior of <nowiki> in a link.
+                */
+               $this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
+               $this->mStripState = new StripState( $this->mUniqPrefix );
 
                # Clear these on every parse, bug 4549
                $this->mTplRedirCache = $this->mTplDomCache = array();
index a1d362b..5d1743e 100644 (file)
@@ -26,7 +26,6 @@
  * @ingroup Parser
  */
 class StripState {
-       protected $id;
        protected $prefix;
        protected $data;
        protected $regex;
@@ -38,17 +37,15 @@ class StripState {
        const UNSTRIP_RECURSION_LIMIT = 20;
 
        /**
-        * @param string $id
+        * @param string $prefix
         */
-       public function __construct( $id ) {
-               $this->id = $id;
-               $this->prefix = Parser::MARKER_PREFIX . $id;
+       public function __construct( $prefix ) {
+               $this->prefix = $prefix;
                $this->data = array(
                        'nowiki' => array(),
                        'general' => array()
                );
-               $this->regex = "/" . Parser::MARKER_PREFIX .
-                       '(' . Parser::MARKER_STATE_ID_REGEX . ")([^\x7f]+)" . Parser::MARKER_SUFFIX . '/';
+               $this->regex = "/{$this->prefix}([^\x7f]+)" . Parser::MARKER_SUFFIX . '/';
                $this->circularRefGuard = array();
        }
 
@@ -76,11 +73,11 @@ class StripState {
         * @param string $value
         */
        protected function addItem( $type, $marker, $value ) {
-               if ( !preg_match( $this->regex, $marker, $m ) || $m[1] !== $this->id ) {
+               if ( !preg_match( $this->regex, $marker, $m ) ) {
                        throw new MWException( "Invalid marker: $marker" );
                }
 
-               $this->data[$type][$m[2]] = $value;
+               $this->data[$type][$m[1]] = $value;
        }
 
        /**
@@ -134,8 +131,8 @@ class StripState {
         * @return array
         */
        protected function unstripCallback( $m ) {
-               $marker = $m[2];
-               if ( $m[1] === $this->id && isset( $this->data[$this->tempType][$marker] ) ) {
+               $marker = $m[1];
+               if ( isset( $this->data[$this->tempType][$marker] ) ) {
                        if ( isset( $this->circularRefGuard[$marker] ) ) {
                                return '<span class="error">'
                                        . wfMessage( 'parser-unstrip-loop-warning' )->inContentLanguage()->text()
@@ -167,7 +164,7 @@ class StripState {
         * @return StripState
         */
        public function getSubState( $text ) {
-               $subState = new StripState( $this->id );
+               $subState = new StripState( $this->prefix );
                $pos = 0;
                while ( true ) {
                        $startPos = strpos( $text, $this->prefix, $pos );
@@ -178,11 +175,11 @@ class StripState {
 
                        $endPos += strlen( Parser::MARKER_SUFFIX );
                        $marker = substr( $text, $startPos, $endPos - $startPos );
-                       if ( !preg_match( $this->regex, $marker, $m ) || $m[1] !== $this->id ) {
+                       if ( !preg_match( $this->regex, $marker, $m ) ) {
                                continue;
                        }
 
-                       $key = $m[2];
+                       $key = $m[1];
                        if ( isset( $this->data['nowiki'][$key] ) ) {
                                $subState->data['nowiki'][$key] = $this->data['nowiki'][$key];
                        } elseif ( isset( $this->data['general'][$key] ) ) {
@@ -222,12 +219,8 @@ class StripState {
         * @return string
         */
        protected function mergeCallback( $m ) {
-               if ( $m[1] === $this->id ) {
-                       $key = $m[2];
-                       return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX;
-               } else {
-                       return $m[0];
-               }
+               $key = $m[1];
+               return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX;
        }
 
        /**
@@ -237,15 +230,6 @@ class StripState {
         * @return string
         */
        public function killMarkers( $text ) {
-               $id = $this->id; // PHP 5.3 hack
-               return preg_replace_callback( $this->regex,
-                       function ( $m ) use ( $id ) {
-                               if ( $m[1] === $id ) {
-                                       return '';
-                               } else {
-                                       return $m[0];
-                               }
-                       },
-                       $text );
+               return preg_replace( $this->regex, '', $text );
        }
 }