Remove <a> tag hook for now, pending resolution of implementation issues as discussed...
[lhc/web/wiklou.git] / includes / parser / Parser_LinkHooks.php
index 5f8ed7f..2b30693 100644 (file)
@@ -12,10 +12,9 @@ class Parser_LinkHooks extends Parser
         */
        const VERSION = '1.6.4';
        
-       # Flags for Parser::setFunctionHook
+       # Flags for Parser::setLinkHook
        # Also available as global constants from Defines.php
-       const SFH_NO_HASH = 1;
-       const SFH_OBJECT_ARGS = 2;
+       const SLH_PATTERN = 1;
 
        # Constants needed for external link processing
        # Everything except bracket, space, or control characters
@@ -23,162 +22,294 @@ class Parser_LinkHooks extends Parser
        const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
                \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
 
-       // State constants for the definition list colon extraction
-       const COLON_STATE_TEXT = 0;
-       const COLON_STATE_TAG = 1;
-       const COLON_STATE_TAGSTART = 2;
-       const COLON_STATE_CLOSETAG = 3;
-       const COLON_STATE_TAGSLASH = 4;
-       const COLON_STATE_COMMENT = 5;
-       const COLON_STATE_COMMENTDASH = 6;
-       const COLON_STATE_COMMENTDASHDASH = 7;
-
-       // Flags for preprocessToDom
-       const PTD_FOR_INCLUSION = 1;
+       /**#@+
+        * @private
+        */
+       # Persistent:
+       var $mLinkHooks;
 
-       // Allowed values for $this->mOutputType
-       // Parameter to startExternalParse().
-       const OT_HTML = 1;
-       const OT_WIKI = 2;
-       const OT_PREPROCESS = 3;
-       const OT_MSG = 3;
+       /**#@-*/
 
-       // Marker Suffix needs to be accessible staticly.
-       const MARKER_SUFFIX = "-QINU\x7f";
-       
        /**
-        * Replace unusual URL escape codes with their equivalent characters
-        * @param string
-        * @return string
-        * @static
-        * @todo  This can merge genuinely required bits in the path or query string,
-        *        breaking legit URLs. A proper fix would treat the various parts of
-        *        the URL differently; as a workaround, just use the output for
-        *        statistical records, not for actual linking/output.
+        * Constructor
+        *
+        * @public
         */
-       static function replaceUnusualEscapes( $url ) {
-               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
-                       array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+       function __construct( $conf = array() ) {
+               parent::__construct( $conf );
+               $this->mLinkHooks = array();
        }
-       
+
        /**
-        * Callback function used in replaceUnusualEscapes().
-        * Replaces unusual URL escape codes with their equivalent character
-        * @static
-        * @private
+        * Do various kinds of initialisation on the first call of the parser
         */
-       private static function replaceUnusualEscapesCallback( $matches ) {
-               $char = urldecode( $matches[0] );
-               $ord = ord( $char );
-               // Is it an unsafe or HTTP reserved character according to RFC 1738?
-               if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
-                       // No, shouldn't be escaped
-                       return $char;
-               } else {
-                       // Yes, leave it escaped
-                       return $matches[0];
+       function firstCallInit() {
+               parent::__construct();
+               if ( !$this->mFirstCall ) {
+                       return;
                }
+               $this->mFirstCall = false;
+
+               wfProfileIn( __METHOD__ );
+
+               $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
+               CoreParserFunctions::register( $this );
+               CoreLinkFunctions::register( $this );
+               $this->initialiseVariables();
+
+               wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
+               wfProfileOut( __METHOD__ );
        }
-       
-       /*
-        * Return a three-element array: leading whitespace, string contents, trailing whitespace
+
+       /**
+        * Create a link hook, e.g. [[Namepsace:...|display}}
+        * The callback function should have the form:
+        *    function myLinkCallback( $parser, $holders, $markers,
+        *      Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { ... }
+        *
+        * Or with SLH_PATTERN:
+        *    function myLinkCallback( $parser, $holders, $markers, )
+        *      &$titleText, &$sortText = null, &$leadingColon = false ) { ... }
+        *
+        * The callback may either return a number of different possible values:
+        * String) Text result of the link
+        * True) (Treat as link) Parse the link according to normal link rules
+        * False) (Bad link) Just output the raw wikitext (You may modify the text first)
+        *
+        * @public
+        *
+        * @param integer|string $ns The Namespace ID or regex pattern if SLH_PATTERN is set
+        * @param mixed $callback The callback function (and object) to use
+        * @param integer $flags a combination of the following flags:
+        *     SLH_PATTERN   Use a regex link pattern rather than a namespace
+        *
+        * @return The old callback function for this name, if any
         */
-       public static function splitWhitespace( $s ) {
-               $ltrimmed = ltrim( $s );
-               $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
-               $trimmed = rtrim( $ltrimmed );
-               $diff = strlen( $ltrimmed ) - strlen( $trimmed );
-               if ( $diff > 0 ) {
-                       $w2 = substr( $ltrimmed, -$diff );
-               } else {
-                       $w2 = '';
-               }
-               return array( $w1, $trimmed, $w2 );
+       function setLinkHook( $ns, $callback, $flags = 0 ) {
+               if( $flags & SLH_PATTERN && !is_string($ns) )
+                       throw new MWException( __METHOD__.'() expecting a regex string pattern.' );
+               elseif( $flags | ~SLH_PATTERN && !is_int($ns) )
+                       throw new MWException( __METHOD__.'() expecting a namespace index.' );
+               $oldVal = isset( $this->mLinkHooks[$ns] ) ? $this->mLinkHooks[$ns][0] : null;
+               $this->mLinkHooks[$ns] = array( $callback, $flags );
+               return $oldVal;
        }
        
-       /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
-       static function createAssocArgs( $args ) {
-               $assocArgs = array();
-               $index = 1;
-               foreach( $args as $arg ) {
-                       $eqpos = strpos( $arg, '=' );
-                       if ( $eqpos === false ) {
-                               $assocArgs[$index++] = $arg;
-                       } else {
-                               $name = trim( substr( $arg, 0, $eqpos ) );
-                               $value = trim( substr( $arg, $eqpos+1 ) );
-                               if ( $value === false ) {
-                                       $value = '';
-                               }
-                               if ( $name !== false ) {
-                                       $assocArgs[$name] = $value;
-                               }
-                       }
-               }
-
-               return $assocArgs;
+       /**
+        * Get all registered link hook identifiers
+        *
+        * @return array
+        */
+       function getLinkHooks() {
+               return array_keys( $this->mLinkHooks );
        }
        
        /**
-        * Static function to get a template
-        * Can be overridden via ParserOptions::setTemplateCallback().
+        * Process [[ ]] wikilinks
+        * @return LinkHolderArray
+        *
+        * @private
         */
-       static function statelessFetchTemplate( $title, $parser=false ) {
-               $text = $skip = false;
-               $finalTitle = $title;
-               $deps = array();
+       function replaceInternalLinks2( &$s ) {
+               global $wgContLang;
 
-               // Loop to fetch the article, with up to 1 redirect
-               for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
-                       # Give extensions a chance to select the revision instead
-                       $id = false; // Assume current
-                       wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) );
-
-                       if( $skip ) {
-                               $text = false;
-                               $deps[] = array(
-                                       'title' => $title,
-                                       'page_id' => $title->getArticleID(),
-                                       'rev_id' => null );
-                               break;
-                       }
-                       $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title );
-                       $rev_id = $rev ? $rev->getId() : 0;
-                       // If there is no current revision, there is no page
-                       if( $id === false && !$rev ) {
-                               $linkCache = LinkCache::singleton();
-                               $linkCache->addBadLinkObj( $title );
-                       }
+               wfProfileIn( __METHOD__ );
 
-                       $deps[] = array(
-                               'title' => $title,
-                               'page_id' => $title->getArticleID(),
-                               'rev_id' => $rev_id );
+               wfProfileIn( __METHOD__.'-setup' );
+               static $tc = FALSE, $titleRegex;//$e1, $e1_img;
+               if( !$tc ) {
+                       # the % is needed to support urlencoded titles as well
+                       $tc = Title::legalChars() . '#%';
+                       # Match a link having the form [[namespace:link|alternate]]trail
+                       //$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
+                       # Match cases where there is no "]]", which might still be images
+                       //$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
+                       # Match a valid plain title
+                       $titleRegex = "/^([{$tc}]+)$/sD";
+               }
 
-                       if( $rev ) {
-                               $text = $rev->getText();
-                       } elseif( $title->getNamespace() == NS_MEDIAWIKI ) {
-                               global $wgLang;
-                               $message = $wgLang->lcfirst( $title->getText() );
-                               $text = wfMsgForContentNoTrans( $message );
-                               if( wfEmptyMsg( $message, $text ) ) {
-                                       $text = false;
-                                       break;
+               $sk = $this->mOptions->getSkin();
+               $holders = new LinkHolderArray( $this );
+               
+               if( is_null( $this->mTitle ) ) {
+                       wfProfileOut( __METHOD__ );
+                       wfProfileOut( __METHOD__.'-setup' );
+                       throw new MWException( __METHOD__.": \$this->mTitle is null\n" );
+               }
+               $nottalk = !$this->mTitle->isTalkPage();
+               
+               if($wgContLang->hasVariants()) {
+                       $selflink = $wgContLang->convertLinkToAllVariants($this->mTitle->getPrefixedText());
+               } else {
+                       $selflink = array($this->mTitle->getPrefixedText());
+               }
+               wfProfileOut( __METHOD__.'-setup' );
+               
+               $offset = 0;
+               $offsetStack = array();
+               $markers = new LinkMarkerReplacer( $this, $holders, array( &$this, 'replaceInternalLinksCallback' ) );
+               while( true ) {
+                       $startBracketOffset = strpos( $s, '[[', $offset );
+                       $endBracketOffset   = strpos( $s, ']]', $offset );
+                       # Finish when there are no more brackets
+                       if( $startBracketOffset === false && $endBracketOffset === false ) break;
+                       # Determine if the bracket is a starting or ending bracket
+                       # When we find both, use the first one
+                       elseif( $startBracketOffset !== false && $endBracketOffset !== false )
+                            $isStart = $startBracketOffset <= $endBracketOffset;
+                       # When we only found one, check which it is
+                       else $isStart = $startBracketOffset !== false;
+                       $bracketOffset = $isStart ? $startBracketOffset : $endBracketOffset;
+                       if( $isStart ) {
+                               /** Opening bracket **/
+                               # Just push our current offset in the string onto the stack
+                               $offsetStack[] = $startBracketOffset;
+                       } else {
+                               /** Closing bracket **/
+                               # Pop the start pos for our current link zone off the stack
+                               $startBracketOffset = array_pop($offsetStack);
+                               # Just to clean up the code, lets place offsets on the outer ends
+                               $endBracketOffset += 2;
+                               
+                               # Only do logic if we actually have a opening bracket for this
+                               if( isset($startBracketOffset) ) {
+                                       # Extract text inside the link
+                                       @list( $titleText, $paramText ) = explode('|',
+                                               substr($s, $startBracketOffset+2, $endBracketOffset-$startBracketOffset-4), 2);
+                                       # Create markers only for valid links
+                                       if( preg_match( $titleRegex, $titleText ) ) {
+                                               # Store the text for the marker
+                                               $marker = $markers->addMarker($titleText, $paramText);
+                                               # Replace the current link with the marker
+                                               $s = substr($s,0,$startBracketOffset).
+                                                       $marker.
+                                                       substr($s, $endBracketOffset);
+                                               # We have modified $s, because of this we need to set the
+                                               # offset manually since the end position is different now
+                                               $offset = $startBracketOffset+strlen($marker);
+                                               continue;
+                                       }
+                                       # ToDo: Some LinkHooks may allow recursive links inside of
+                                       # the link text, create a regex that also matches our
+                                       # <!-- LINKMARKER ### --> sequence in titles
+                                       # ToDO: Some LinkHooks use patterns rather than namespaces
+                                       # these need to be tested at this point here
                                }
+                               
+                       }
+                       # Bump our offset to after our current bracket
+                       $offset = $bracketOffset+2;
+               }
+               
+               
+               # Now expand our tree
+               wfProfileIn( __METHOD__.'-expand' );
+               $s = $markers->expand( $s );
+               wfProfileOut( __METHOD__.'-expand' );
+               
+               wfProfileOut( __METHOD__ );
+               return $holders;
+       }
+       
+       function replaceInternalLinksCallback( $parser, $holders, $markers, $titleText, $paramText ) {
+               wfProfileIn( __METHOD__ );
+               $wt = isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]";
+               wfProfileIn( __METHOD__."-misc" );
+               # Don't allow internal links to pages containing
+               # PROTO: where PROTO is a valid URL protocol; these
+               # should be external links.
+               if( preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $titleText) ) {
+                       wfProfileOut( __METHOD__ );
+                       return $wt;
+               }
+               
+               # Make subpage if necessary
+               if( $this->areSubpagesAllowed() ) {
+                       $titleText = $this->maybeDoSubpageLink( $titleText, $paramText );
+               }
+               
+               # Check for a leading colon and strip it if it is there
+               $leadingColon = $titleText[0] == ':';
+               if( $leadingColon ) $titleText = substr( $titleText, 1 );
+               
+               wfProfileOut( __METHOD__."-misc" );
+               # Make title object
+               wfProfileIn( __METHOD__."-title" );
+               $title = Title::newFromText( $this->mStripState->unstripNoWiki($titleText) );
+               if( !$title ) {
+                       wfProfileOut( __METHOD__."-title" );
+                       wfProfileOut( __METHOD__ );
+                       return $wt;
+               }
+               $ns = $title->getNamespace();
+               wfProfileOut( __METHOD__."-title" );
+               
+               # Default for Namespaces is a default link
+               # ToDo: Default for patterns is plain wikitext
+               $return = true;
+               if( isset($this->mLinkHooks[$ns]) ) {
+                       list( $callback, $flags ) = $this->mLinkHooks[$ns];
+                       if( $flags & SLH_PATTERN ) {
+                               $args = array( $parser, $holders, $markers, $titleText, &$paramText, &$leadingColon );
                        } else {
-                               break;
+                               $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon );
                        }
-                       if ( $text === false ) {
-                               break;
+                       # Workaround for PHP bug 35229 and similar
+                       if ( !is_callable( $callback ) ) {
+                               throw new MWException( "Tag hook for $name is not callable\n" );
                        }
-                       // Redirect?
-                       $finalTitle = $title;
-                       $title = Title::newFromRedirect( $text );
+                       $return = call_user_func_array( $callback, $args );
+               }
+               if( $return === true ) {
+                       # True (treat as plain link) was returned, call the defaultLinkHook
+                       $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon );
+                       $return = call_user_func_array( array( 'CoreLinkFunctions', 'defaultLinkHook' ), $args );
+               }
+               if( $return === false ) {
+                       # False (no link) was returned, output plain wikitext
+                       # Build it again as the hook is allowed to modify $paramText
+                       return isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]";
                }
-               return array(
-                       'text' => $text,
-                       'finalTitle' => $finalTitle,
-                       'deps' => $deps );
+               # Content was returned, return it
+               return $return;
+       }
+       
+}
+
+class LinkMarkerReplacer {
+       
+       protected $markers, $nextId, $parser, $holders, $callback;
+       
+       function __construct( $parser, $holders, $callback ) {
+               $this->nextId   = 0;
+               $this->markers  = array();
+               $this->parser   = $parser;
+               $this->holders  = $holders;
+               $this->callback = $callback;
+       }
+       
+       function addMarker($titleText, $paramText) {
+               $id = $this->nextId++;
+               $this->markers[$id] = array( $titleText, $paramText );
+               return "<!-- LINKMARKER $id -->";
+       }
+       
+       function findMarker( $string ) {
+               return (bool) preg_match('/<!-- LINKMARKER [0-9]+ -->/', $string );
+       }
+       
+       function expand( $string ) {
+               return StringUtils::delimiterReplaceCallback( "<!-- LINKMARKER ", " -->", array( &$this, 'callback' ), $string );
+       }
+       
+       function callback( $m ) {
+               $id = intval($m[1]);
+               if( !array_key_exists($id, $this->markers) ) return $m[0];
+               $args = $this->markers[$id];
+               array_unshift( $args, $this );
+               array_unshift( $args, $this->holders );
+               array_unshift( $args, $this->parser );
+               return call_user_func_array( $this->callback, $args );
        }
        
 }