Create a placeholder for the LinkHook experimentation.
authorDaniel Friesen <dantman@users.mediawiki.org>
Mon, 18 Aug 2008 03:56:38 +0000 (03:56 +0000)
committerDaniel Friesen <dantman@users.mediawiki.org>
Mon, 18 Aug 2008 03:56:38 +0000 (03:56 +0000)
This will allow us to develop a new method of parsing links (non-hacky), without breaking current syntax, and also allow us to make sure new methods don't break syntax.

Currently this class merely inherits from the Parser class. Constants and static functions are coppied so that use of self:: won't break when we modify things.

includes/AutoLoader.php
includes/parser/Parser_LinkHooks.php [new file with mode: 0644]

index c8be601..0c16bb6 100644 (file)
@@ -383,6 +383,7 @@ $wgAutoloadLocalClasses = array(
        'ParserOptions' => 'includes/parser/ParserOptions.php',
        'ParserOutput' => 'includes/parser/ParserOutput.php',
        'Parser_DiffTest' => 'includes/parser/Parser_DiffTest.php',
+       'Parser_LinkHooks' => 'includes/parser/Parser_LinkHooks.php',
        'Parser_OldPP' => 'includes/parser/Parser_OldPP.php',
        'Preprocessor' => 'includes/parser/Preprocessor.php',
        'Preprocessor_DOM' => 'includes/parser/Preprocessor_DOM.php',
diff --git a/includes/parser/Parser_LinkHooks.php b/includes/parser/Parser_LinkHooks.php
new file mode 100644 (file)
index 0000000..5f8ed7f
--- /dev/null
@@ -0,0 +1,184 @@
+<?php
+/**
+ * Parser with LinkHooks experiment
+ * @ingroup Parser
+ */
+class Parser_LinkHooks extends Parser
+{
+       /**
+        * Update this version number when the ParserOutput format
+        * changes in an incompatible way, so the parser cache
+        * can automatically discard old data.
+        */
+       const VERSION = '1.6.4';
+       
+       # Flags for Parser::setFunctionHook
+       # Also available as global constants from Defines.php
+       const SFH_NO_HASH = 1;
+       const SFH_OBJECT_ARGS = 2;
+
+       # Constants needed for external link processing
+       # Everything except bracket, space, or control characters
+       const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]';
+       const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
+               \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
+
+       // State constants for the definition list colon extraction
+       const COLON_STATE_TEXT = 0;
+       const COLON_STATE_TAG = 1;
+       const COLON_STATE_TAGSTART = 2;
+       const COLON_STATE_CLOSETAG = 3;
+       const COLON_STATE_TAGSLASH = 4;
+       const COLON_STATE_COMMENT = 5;
+       const COLON_STATE_COMMENTDASH = 6;
+       const COLON_STATE_COMMENTDASHDASH = 7;
+
+       // Flags for preprocessToDom
+       const PTD_FOR_INCLUSION = 1;
+
+       // Allowed values for $this->mOutputType
+       // Parameter to startExternalParse().
+       const OT_HTML = 1;
+       const OT_WIKI = 2;
+       const OT_PREPROCESS = 3;
+       const OT_MSG = 3;
+
+       // Marker Suffix needs to be accessible staticly.
+       const MARKER_SUFFIX = "-QINU\x7f";
+       
+       /**
+        * Replace unusual URL escape codes with their equivalent characters
+        * @param string
+        * @return string
+        * @static
+        * @todo  This can merge genuinely required bits in the path or query string,
+        *        breaking legit URLs. A proper fix would treat the various parts of
+        *        the URL differently; as a workaround, just use the output for
+        *        statistical records, not for actual linking/output.
+        */
+       static function replaceUnusualEscapes( $url ) {
+               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
+                       array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+       }
+       
+       /**
+        * Callback function used in replaceUnusualEscapes().
+        * Replaces unusual URL escape codes with their equivalent character
+        * @static
+        * @private
+        */
+       private static function replaceUnusualEscapesCallback( $matches ) {
+               $char = urldecode( $matches[0] );
+               $ord = ord( $char );
+               // Is it an unsafe or HTTP reserved character according to RFC 1738?
+               if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
+                       // No, shouldn't be escaped
+                       return $char;
+               } else {
+                       // Yes, leave it escaped
+                       return $matches[0];
+               }
+       }
+       
+       /*
+        * Return a three-element array: leading whitespace, string contents, trailing whitespace
+        */
+       public static function splitWhitespace( $s ) {
+               $ltrimmed = ltrim( $s );
+               $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
+               $trimmed = rtrim( $ltrimmed );
+               $diff = strlen( $ltrimmed ) - strlen( $trimmed );
+               if ( $diff > 0 ) {
+                       $w2 = substr( $ltrimmed, -$diff );
+               } else {
+                       $w2 = '';
+               }
+               return array( $w1, $trimmed, $w2 );
+       }
+       
+       /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
+       static function createAssocArgs( $args ) {
+               $assocArgs = array();
+               $index = 1;
+               foreach( $args as $arg ) {
+                       $eqpos = strpos( $arg, '=' );
+                       if ( $eqpos === false ) {
+                               $assocArgs[$index++] = $arg;
+                       } else {
+                               $name = trim( substr( $arg, 0, $eqpos ) );
+                               $value = trim( substr( $arg, $eqpos+1 ) );
+                               if ( $value === false ) {
+                                       $value = '';
+                               }
+                               if ( $name !== false ) {
+                                       $assocArgs[$name] = $value;
+                               }
+                       }
+               }
+
+               return $assocArgs;
+       }
+       
+       /**
+        * Static function to get a template
+        * Can be overridden via ParserOptions::setTemplateCallback().
+        */
+       static function statelessFetchTemplate( $title, $parser=false ) {
+               $text = $skip = false;
+               $finalTitle = $title;
+               $deps = array();
+
+               // Loop to fetch the article, with up to 1 redirect
+               for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
+                       # Give extensions a chance to select the revision instead
+                       $id = false; // Assume current
+                       wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) );
+
+                       if( $skip ) {
+                               $text = false;
+                               $deps[] = array(
+                                       'title' => $title,
+                                       'page_id' => $title->getArticleID(),
+                                       'rev_id' => null );
+                               break;
+                       }
+                       $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title );
+                       $rev_id = $rev ? $rev->getId() : 0;
+                       // If there is no current revision, there is no page
+                       if( $id === false && !$rev ) {
+                               $linkCache = LinkCache::singleton();
+                               $linkCache->addBadLinkObj( $title );
+                       }
+
+                       $deps[] = array(
+                               'title' => $title,
+                               'page_id' => $title->getArticleID(),
+                               'rev_id' => $rev_id );
+
+                       if( $rev ) {
+                               $text = $rev->getText();
+                       } elseif( $title->getNamespace() == NS_MEDIAWIKI ) {
+                               global $wgLang;
+                               $message = $wgLang->lcfirst( $title->getText() );
+                               $text = wfMsgForContentNoTrans( $message );
+                               if( wfEmptyMsg( $message, $text ) ) {
+                                       $text = false;
+                                       break;
+                               }
+                       } else {
+                               break;
+                       }
+                       if ( $text === false ) {
+                               break;
+                       }
+                       // Redirect?
+                       $finalTitle = $title;
+                       $title = Title::newFromRedirect( $text );
+               }
+               return array(
+                       'text' => $text,
+                       'finalTitle' => $finalTitle,
+                       'deps' => $deps );
+       }
+       
+}