*/
public $mLangLinkLanguages;
+ /**
+ * @var MapCacheLRU|null
+ * @since 1.24
+ *
+ * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
+ */
+ public $currentRevisionCache;
+
/**
* @var bool Recursive call protection.
* This variable should be treated as if it were private.
*/
public function __clone() {
$this->mInParse = false;
+
+ // Bug 56226: When you create a reference "to" an object field, that
+ // makes the object field itself be a reference too (until the other
+ // reference goes out of scope). When cloning, any field that's a
+ // reference is copied as a reference in the new object. Both of these
+ // are defined PHP5 behaviors, as inconvenient as it is for us when old
+ // hooks from PHP4 days are passing fields by reference.
+ foreach ( array( 'mStripState', 'mVarCache' ) as $k ) {
+ // Make a non-reference copy of the field, then rebind the field to
+ // reference the new copy.
+ $tmp = $this->$k;
+ $this->$k =& $tmp;
+ unset( $tmp );
+ }
+
wfRunHooks( 'ParserCloned', array( $this ) );
}
$this->mVarCache = array();
$this->mUser = null;
$this->mLangLinkLanguages = array();
+ $this->currentRevisionCache = null;
/**
* Prefix for temporary replacement strings for the multipass parser.
$this->startParse( $title, $options, self::OT_HTML, $clearState );
+ $this->currentRevisionCache = null;
$this->mInputSize = strlen( $text );
if ( $this->mOptions->getEnableLimitReport() ) {
$this->mOutput->resetParseStartTime();
$this->mRevisionUser = $oldRevisionUser;
$this->mRevisionSize = $oldRevisionSize;
$this->mInputSize = false;
+ $this->currentRevisionCache = null;
wfProfileOut( $fname );
wfProfileOut( __METHOD__ );
$this->getExternalLinkAttribs( $url ) );
# Register it in the output object...
# Replace unnecessary URL escape codes with their equivalent characters
- $pasteurized = self::replaceUnusualEscapes( $url );
+ $pasteurized = self::normalizeLinkUrl( $url );
$this->mOutput->addExternalLink( $pasteurized );
}
wfProfileOut( __METHOD__ );
# Register link in the output object.
# Replace unnecessary URL escape codes with the referenced character
# This prevents spammers from hiding links from the filters
- $pasteurized = self::replaceUnusualEscapes( $url );
+ $pasteurized = self::normalizeLinkUrl( $url );
$this->mOutput->addExternalLink( $pasteurized );
}
}
/**
- * Replace unusual URL escape codes with their equivalent characters
+ * Replace unusual escape codes in a URL with their equivalent characters
*
+ * @deprecated since 1.24, use normalizeLinkUrl
* @param string $url
* @return string
- *
- * @todo This can merge genuinely required bits in the path or query string,
- * breaking legit URLs. A proper fix would treat the various parts of
- * the URL differently; as a workaround, just use the output for
- * statistical records, not for actual linking/output.
*/
public static function replaceUnusualEscapes( $url ) {
- return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
- array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+ wfDeprecated( __METHOD__, '1.24' );
+ return self::normalizeLinkUrl( $url );
}
/**
- * Callback function used in replaceUnusualEscapes().
- * Replaces unusual URL escape codes with their equivalent character
+ * Replace unusual escape codes in a URL with their equivalent characters
*
- * @param array $matches
+ * This generally follows the syntax defined in RFC 3986, with special
+ * consideration for HTTP query strings.
*
+ * @param string $url
* @return string
*/
- private static function replaceUnusualEscapesCallback( $matches ) {
- $char = urldecode( $matches[0] );
- $ord = ord( $char );
- # Is it an unsafe or HTTP reserved character according to RFC 1738?
- if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
- # No, shouldn't be escaped
- return $char;
- } else {
- # Yes, leave it escaped
- return $matches[0];
+ public static function normalizeLinkUrl( $url ) {
+ # First, make sure unsafe characters are encoded
+ $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
+ function ( $m ) {
+ return rawurlencode( $m[0] );
+ },
+ $url
+ );
+
+ $ret = '';
+ $end = strlen( $url );
+
+ # Fragment part - 'fragment'
+ $start = strpos( $url, '#' );
+ if ( $start !== false && $start < $end ) {
+ $ret = self::normalizeUrlComponent(
+ substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
+ $end = $start;
+ }
+
+ # Query part - 'query' minus &=+;
+ $start = strpos( $url, '?' );
+ if ( $start !== false && $start < $end ) {
+ $ret = self::normalizeUrlComponent(
+ substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
+ $end = $start;
}
+
+ # Scheme and path part - 'pchar'
+ # (we assume no userinfo or encoded colons in the host)
+ $ret = self::normalizeUrlComponent(
+ substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
+
+ return $ret;
+ }
+
+ private static function normalizeUrlComponent( $component, $unsafe ) {
+ $callback = function ( $matches ) use ( $unsafe ) {
+ $char = urldecode( $matches[0] );
+ $ord = ord( $char );
+ if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
+ # Unescape it
+ return $char;
+ } else {
+ # Leave it escaped, but use uppercase for a-f
+ return strtoupper( $matches[0] );
+ }
+ };
+ return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
}
/**
return array( $dom, $title );
}
+ /**
+ * Fetch the current revision of a given title. Note that the revision
+ * (and even the title) may not exist in the database, so everything
+ * contributing to the output of the parser should use this method
+ * where possible, rather than getting the revisions themselves. This
+ * method also caches its results, so using it benefits performance.
+ *
+ * @since 1.24
+ * @param Title $title
+ * @return Revision
+ */
+ public function fetchCurrentRevisionOfTitle( $title ) {
+ $cacheKey = $title->getPrefixedDBkey();
+ if ( !$this->currentRevisionCache ) {
+ $this->currentRevisionCache = new MapCacheLRU( 100 );
+ }
+ if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
+ $this->currentRevisionCache->set( $cacheKey,
+ // Defaults to Parser::statelessFetchRevision()
+ call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
+ );
+ }
+ return $this->currentRevisionCache->get( $cacheKey );
+ }
+
+ /**
+ * Wrapper around Revision::newFromTitle to allow passing additional parameters
+ * without passing them on to it.
+ *
+ * @since 1.24
+ * @param Title $title
+ * @param Parser|bool $parser
+ * @return Revision
+ */
+ public static function statelessFetchRevision( $title, $parser = false ) {
+ return Revision::newFromTitle( $title );
+ }
+
/**
* Fetch the unparsed text of a template and register a reference to it.
* @param Title $title
break;
}
# Get the revision
- $rev = $id
- ? Revision::newFromId( $id )
- : Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
+ if ( $id ) {
+ $rev = Revision::newFromId( $id );
+ } elseif ( $parser ) {
+ $rev = $parser->fetchCurrentRevisionOfTitle( $title );
+ } else {
+ $rev = Revision::newFromTitle( $title );
+ }
$rev_id = $rev ? $rev->getId() : 0;
# If there is no current revision, there is no page
if ( $id === false && !$rev ) {
if ( $isTemplate ) {
# Put a T flag in the section identifier, to indicate to extractSections()
# that sections inside <includeonly> should be counted.
- $editlinkArgs = array( $titleText, "T-$sectionIndex"/*, null */ );
+ $editsectionPage = $titleText;
+ $editsectionSection = "T-$sectionIndex";
+ $editsectionContent = null;
} else {
- $editlinkArgs = array(
- $this->mTitle->getPrefixedText(),
- $sectionIndex,
- $headlineHint
- );
+ $editsectionPage = $this->mTitle->getPrefixedText();
+ $editsectionSection = $sectionIndex;
+ $editsectionContent = $headlineHint;
}
// We use a bit of pesudo-xml for editsection markers. The
// language converter is run later on. Using a UNIQ style marker
// important bits of data, but put the headline hint inside a
// content block because the language converter is supposed to
// be able to convert that piece of data.
- $editlink = '<mw:editsection page="' . htmlspecialchars( $editlinkArgs[0] );
- $editlink .= '" section="' . htmlspecialchars( $editlinkArgs[1] ) . '"';
- if ( isset( $editlinkArgs[2] ) ) {
- $editlink .= '>' . $editlinkArgs[2] . '</mw:editsection>';
+ // Gets replaced with html in ParserOutput::getText
+ $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
+ $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
+ if ( $editsectionContent !== null ) {
+ $editlink .= '>' . $editsectionContent . '</mw:editsection>';
} else {
$editlink .= '/>';
}
$paramName = 'no-link';
$value = true;
$validated = true;
- } elseif ( preg_match( "/^(?i)$prots/", $value ) ) {
+ } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
$paramName = 'link-url';
$this->mOutput->addExternalLink( $value );