*/
public $mLangLinkLanguages;
+ /**
+ * @var MapCacheLRU|null
+ * @since 1.24
+ *
+ * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
+ */
+ public $currentRevisionCache;
+
/**
* @var bool Recursive call protection.
* This variable should be treated as if it were private.
*/
public function __clone() {
$this->mInParse = false;
+
+ // Bug 56226: When you create a reference "to" an object field, that
+ // makes the object field itself be a reference too (until the other
+ // reference goes out of scope). When cloning, any field that's a
+ // reference is copied as a reference in the new object. Both of these
+ // are defined PHP5 behaviors, as inconvenient as it is for us when old
+ // hooks from PHP4 days are passing fields by reference.
+ foreach ( array( 'mStripState', 'mVarCache' ) as $k ) {
+ // Make a non-reference copy of the field, then rebind the field to
+ // reference the new copy.
+ $tmp = $this->$k;
+ $this->$k =& $tmp;
+ unset( $tmp );
+ }
+
wfRunHooks( 'ParserCloned', array( $this ) );
}
$this->mVarCache = array();
$this->mUser = null;
$this->mLangLinkLanguages = array();
+ $this->currentRevisionCache = null;
/**
* Prefix for temporary replacement strings for the multipass parser.
$this->startParse( $title, $options, self::OT_HTML, $clearState );
+ $this->currentRevisionCache = null;
$this->mInputSize = strlen( $text );
if ( $this->mOptions->getEnableLimitReport() ) {
$this->mOutput->resetParseStartTime();
$this->mRevisionUser = $oldRevisionUser;
$this->mRevisionSize = $oldRevisionSize;
$this->mInputSize = false;
+ $this->currentRevisionCache = null;
wfProfileOut( $fname );
wfProfileOut( __METHOD__ );
$this->getExternalLinkAttribs( $url ) );
# Register it in the output object...
# Replace unnecessary URL escape codes with their equivalent characters
- $pasteurized = self::replaceUnusualEscapes( $url );
+ $pasteurized = self::normalizeLinkUrl( $url );
$this->mOutput->addExternalLink( $pasteurized );
}
wfProfileOut( __METHOD__ );
# Register link in the output object.
# Replace unnecessary URL escape codes with the referenced character
# This prevents spammers from hiding links from the filters
- $pasteurized = self::replaceUnusualEscapes( $url );
+ $pasteurized = self::normalizeLinkUrl( $url );
$this->mOutput->addExternalLink( $pasteurized );
}
}
/**
- * Replace unusual URL escape codes with their equivalent characters
+ * Replace unusual escape codes in a URL with their equivalent characters
*
+ * @deprecated since 1.24, use normalizeLinkUrl
* @param string $url
* @return string
- *
- * @todo This can merge genuinely required bits in the path or query string,
- * breaking legit URLs. A proper fix would treat the various parts of
- * the URL differently; as a workaround, just use the output for
- * statistical records, not for actual linking/output.
*/
public static function replaceUnusualEscapes( $url ) {
- return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
- array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+ wfDeprecated( __METHOD__, '1.24' );
+ return self::normalizeLinkUrl( $url );
}
/**
- * Callback function used in replaceUnusualEscapes().
- * Replaces unusual URL escape codes with their equivalent character
+ * Replace unusual escape codes in a URL with their equivalent characters
*
- * @param array $matches
+ * This generally follows the syntax defined in RFC 3986, with special
+ * consideration for HTTP query strings.
*
+ * @param string $url
* @return string
*/
- private static function replaceUnusualEscapesCallback( $matches ) {
- $char = urldecode( $matches[0] );
- $ord = ord( $char );
- # Is it an unsafe or HTTP reserved character according to RFC 1738?
- if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
- # No, shouldn't be escaped
- return $char;
- } else {
- # Yes, leave it escaped
- return $matches[0];
+ public static function normalizeLinkUrl( $url ) {
+ # First, make sure unsafe characters are encoded
+ $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
+ function ( $m ) {
+ return rawurlencode( $m[0] );
+ },
+ $url
+ );
+
+ $ret = '';
+ $end = strlen( $url );
+
+ # Fragment part - 'fragment'
+ $start = strpos( $url, '#' );
+ if ( $start !== false && $start < $end ) {
+ $ret = self::normalizeUrlComponent(
+ substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
+ $end = $start;
}
+
+ # Query part - 'query' minus &=+;
+ $start = strpos( $url, '?' );
+ if ( $start !== false && $start < $end ) {
+ $ret = self::normalizeUrlComponent(
+ substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
+ $end = $start;
+ }
+
+ # Scheme and path part - 'pchar'
+ # (we assume no userinfo or encoded colons in the host)
+ $ret = self::normalizeUrlComponent(
+ substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
+
+ return $ret;
+ }
+
+ private static function normalizeUrlComponent( $component, $unsafe ) {
+ $callback = function ( $matches ) use ( $unsafe ) {
+ $char = urldecode( $matches[0] );
+ $ord = ord( $char );
+ if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
+ # Unescape it
+ return $char;
+ } else {
+ # Leave it escaped, but use uppercase for a-f
+ return strtoupper( $matches[0] );
+ }
+ };
+ return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
}
/**
return array( $dom, $title );
}
+ /**
+ * Fetch the current revision of a given title. Note that the revision
+ * (and even the title) may not exist in the database, so everything
+ * contributing to the output of the parser should use this method
+ * where possible, rather than getting the revisions themselves. This
+ * method also caches its results, so using it benefits performance.
+ *
+ * @since 1.24
+ * @param Title $title
+ * @return Revision
+ */
+ public function fetchCurrentRevisionOfTitle( $title ) {
+ $cacheKey = $title->getPrefixedDBkey();
+ if ( !$this->currentRevisionCache ) {
+ $this->currentRevisionCache = new MapCacheLRU( 100 );
+ }
+ if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
+ $this->currentRevisionCache->set( $cacheKey,
+ // Defaults to Parser::statelessFetchRevision()
+ call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
+ );
+ }
+ return $this->currentRevisionCache->get( $cacheKey );
+ }
+
+ /**
+ * Wrapper around Revision::newFromTitle to allow passing additional parameters
+ * without passing them on to it.
+ *
+ * @since 1.24
+ * @param Title $title
+ * @param Parser|bool $parser
+ * @return Revision
+ */
+ public static function statelessFetchRevision( $title, $parser = false ) {
+ return Revision::newFromTitle( $title );
+ }
+
/**
* Fetch the unparsed text of a template and register a reference to it.
* @param Title $title
break;
}
# Get the revision
- $rev = $id
- ? Revision::newFromId( $id )
- : Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
+ if ( $id ) {
+ $rev = Revision::newFromId( $id );
+ } elseif ( $parser ) {
+ $rev = $parser->fetchCurrentRevisionOfTitle( $title );
+ } else {
+ $rev = Revision::newFromTitle( $title );
+ }
$rev_id = $rev ? $rev->getId() : 0;
# If there is no current revision, there is no page
if ( $id === false && !$rev ) {
*/
public $mRemoveComments = true;
+ /**
+ * Callback for current revision fetching. Used as first argument to call_user_func().
+ */
+ public $mCurrentRevisionCallback =
+ array( 'Parser', 'statelessFetchRevision' );
+
/**
* Callback for template fetching. Used as first argument to call_user_func().
*/
*/
protected $onAccessCallback = null;
+ /**
+ * If the page being parsed is a redirect, this should hold the redirect
+ * target.
+ * @var Title|null
+ */
+ private $redirectTarget = null;
+
public function getInterwikiMagic() {
return $this->mInterwikiMagic;
}
return $this->mRemoveComments;
}
+ /* @since 1.24 */
+ public function getCurrentRevisionCallback() {
+ return $this->mCurrentRevisionCallback;
+ }
+
public function getTemplateCallback() {
return $this->mTemplateCallback;
}
return wfSetVar( $this->mRemoveComments, $x );
}
+ /* @since 1.24 */
+ public function setCurrentRevisionCallback( $x ) {
+ return wfSetVar( $this->mCurrentRevisionCallback, $x );
+ }
+
public function setTemplateCallback( $x ) {
return wfSetVar( $this->mTemplateCallback, $x );
}
return wfSetVar( $this->mIsPrintable, $x );
}
+ /**
+ * Set the redirect target.
+ *
+ * Note that setting or changing this does not *make* the page a redirect
+ * or change its target, it merely records the information for reference
+ * during the parse.
+ *
+ * @since 1.24
+ * @param Title|null $title
+ */
+ function setRedirectTarget( $title ) {
+ $this->redirectTarget = $title;
+ }
+
+ /**
+ * Get the previously-set redirect target.
+ *
+ * @since 1.24
+ * @return Title|null
+ */
+ function getRedirectTarget() {
+ return $this->redirectTarget;
+ }
+
/**
* Extra key that should be present in the parser cache key.
* @param string $key