--- /dev/null
+<?php
+/**
+ * Parser with LinkHooks experiment
+ * @ingroup Parser
+ */
+class Parser_LinkHooks extends Parser
+{
+ /**
+ * Update this version number when the ParserOutput format
+ * changes in an incompatible way, so the parser cache
+ * can automatically discard old data.
+ */
+ const VERSION = '1.6.4';
+
+ # Flags for Parser::setFunctionHook
+ # Also available as global constants from Defines.php
+ const SFH_NO_HASH = 1;
+ const SFH_OBJECT_ARGS = 2;
+
+ # Constants needed for external link processing
+ # Everything except bracket, space, or control characters
+ const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]';
+ const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
+ \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
+
+ // State constants for the definition list colon extraction
+ const COLON_STATE_TEXT = 0;
+ const COLON_STATE_TAG = 1;
+ const COLON_STATE_TAGSTART = 2;
+ const COLON_STATE_CLOSETAG = 3;
+ const COLON_STATE_TAGSLASH = 4;
+ const COLON_STATE_COMMENT = 5;
+ const COLON_STATE_COMMENTDASH = 6;
+ const COLON_STATE_COMMENTDASHDASH = 7;
+
+ // Flags for preprocessToDom
+ const PTD_FOR_INCLUSION = 1;
+
+ // Allowed values for $this->mOutputType
+ // Parameter to startExternalParse().
+ const OT_HTML = 1;
+ const OT_WIKI = 2;
+ const OT_PREPROCESS = 3;
+ const OT_MSG = 3;
+
+ // Marker Suffix needs to be accessible staticly.
+ const MARKER_SUFFIX = "-QINU\x7f";
+
+ /**
+ * Replace unusual URL escape codes with their equivalent characters
+ * @param string
+ * @return string
+ * @static
+ * @todo This can merge genuinely required bits in the path or query string,
+ * breaking legit URLs. A proper fix would treat the various parts of
+ * the URL differently; as a workaround, just use the output for
+ * statistical records, not for actual linking/output.
+ */
+ static function replaceUnusualEscapes( $url ) {
+ return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
+ array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+ }
+
+ /**
+ * Callback function used in replaceUnusualEscapes().
+ * Replaces unusual URL escape codes with their equivalent character
+ * @static
+ * @private
+ */
+ private static function replaceUnusualEscapesCallback( $matches ) {
+ $char = urldecode( $matches[0] );
+ $ord = ord( $char );
+ // Is it an unsafe or HTTP reserved character according to RFC 1738?
+ if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
+ // No, shouldn't be escaped
+ return $char;
+ } else {
+ // Yes, leave it escaped
+ return $matches[0];
+ }
+ }
+
+ /*
+ * Return a three-element array: leading whitespace, string contents, trailing whitespace
+ */
+ public static function splitWhitespace( $s ) {
+ $ltrimmed = ltrim( $s );
+ $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
+ $trimmed = rtrim( $ltrimmed );
+ $diff = strlen( $ltrimmed ) - strlen( $trimmed );
+ if ( $diff > 0 ) {
+ $w2 = substr( $ltrimmed, -$diff );
+ } else {
+ $w2 = '';
+ }
+ return array( $w1, $trimmed, $w2 );
+ }
+
+ /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
+ static function createAssocArgs( $args ) {
+ $assocArgs = array();
+ $index = 1;
+ foreach( $args as $arg ) {
+ $eqpos = strpos( $arg, '=' );
+ if ( $eqpos === false ) {
+ $assocArgs[$index++] = $arg;
+ } else {
+ $name = trim( substr( $arg, 0, $eqpos ) );
+ $value = trim( substr( $arg, $eqpos+1 ) );
+ if ( $value === false ) {
+ $value = '';
+ }
+ if ( $name !== false ) {
+ $assocArgs[$name] = $value;
+ }
+ }
+ }
+
+ return $assocArgs;
+ }
+
+ /**
+ * Static function to get a template
+ * Can be overridden via ParserOptions::setTemplateCallback().
+ */
+ static function statelessFetchTemplate( $title, $parser=false ) {
+ $text = $skip = false;
+ $finalTitle = $title;
+ $deps = array();
+
+ // Loop to fetch the article, with up to 1 redirect
+ for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
+ # Give extensions a chance to select the revision instead
+ $id = false; // Assume current
+ wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) );
+
+ if( $skip ) {
+ $text = false;
+ $deps[] = array(
+ 'title' => $title,
+ 'page_id' => $title->getArticleID(),
+ 'rev_id' => null );
+ break;
+ }
+ $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title );
+ $rev_id = $rev ? $rev->getId() : 0;
+ // If there is no current revision, there is no page
+ if( $id === false && !$rev ) {
+ $linkCache = LinkCache::singleton();
+ $linkCache->addBadLinkObj( $title );
+ }
+
+ $deps[] = array(
+ 'title' => $title,
+ 'page_id' => $title->getArticleID(),
+ 'rev_id' => $rev_id );
+
+ if( $rev ) {
+ $text = $rev->getText();
+ } elseif( $title->getNamespace() == NS_MEDIAWIKI ) {
+ global $wgLang;
+ $message = $wgLang->lcfirst( $title->getText() );
+ $text = wfMsgForContentNoTrans( $message );
+ if( wfEmptyMsg( $message, $text ) ) {
+ $text = false;
+ break;
+ }
+ } else {
+ break;
+ }
+ if ( $text === false ) {
+ break;
+ }
+ // Redirect?
+ $finalTitle = $title;
+ $title = Title::newFromRedirect( $text );
+ }
+ return array(
+ 'text' => $text,
+ 'finalTitle' => $finalTitle,
+ 'deps' => $deps );
+ }
+
+}