Create a placeholder for the LinkHook experimentation.

author Daniel Friesen <dantman@users.mediawiki.org>

Mon, 18 Aug 2008 03:56:38 +0000 (03:56 +0000)

committer Daniel Friesen <dantman@users.mediawiki.org>

Mon, 18 Aug 2008 03:56:38 +0000 (03:56 +0000)
author Daniel Friesen <dantman@users.mediawiki.org>
Mon, 18 Aug 2008 03:56:38 +0000 (03:56 +0000)
committer Daniel Friesen <dantman@users.mediawiki.org>
Mon, 18 Aug 2008 03:56:38 +0000 (03:56 +0000)
diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php

index c8be601..0c16bb6 100644 (file)
--- a/includes/AutoLoader.php
+++ b/includes/AutoLoader.php
@@ -383,6 +383,7 @@ $wgAutoloadLocalClasses = array(
         'ParserOptions' => 'includes/parser/ParserOptions.php',
         'ParserOutput' => 'includes/parser/ParserOutput.php',
         'Parser_DiffTest' => 'includes/parser/Parser_DiffTest.php',
+       'Parser_LinkHooks' => 'includes/parser/Parser_LinkHooks.php',
         'Parser_OldPP' => 'includes/parser/Parser_OldPP.php',
         'Preprocessor' => 'includes/parser/Preprocessor.php',
         'Preprocessor_DOM' => 'includes/parser/Preprocessor_DOM.php',
diff --git a/includes/parser/Parser_LinkHooks.php b/includes/parser/Parser_LinkHooks.php

new file mode 100644 (file)

index 0000000..5f8ed7f
--- /dev/null
+++ b/includes/parser/Parser_LinkHooks.php
@@ -0,0 +1,184 @@
+<?php
+/**
+ * Parser with LinkHooks experiment
+ * @ingroup Parser
+ */
+class Parser_LinkHooks extends Parser
+{
+       /**
+        * Update this version number when the ParserOutput format
+        * changes in an incompatible way, so the parser cache
+        * can automatically discard old data.
+        */
+       const VERSION = '1.6.4';
+       
+       # Flags for Parser::setFunctionHook
+       # Also available as global constants from Defines.php
+       const SFH_NO_HASH = 1;
+       const SFH_OBJECT_ARGS = 2;
+
+       # Constants needed for external link processing
+       # Everything except bracket, space, or control characters
+       const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]';
+       const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
+               \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
+
+       // State constants for the definition list colon extraction
+       const COLON_STATE_TEXT = 0;
+       const COLON_STATE_TAG = 1;
+       const COLON_STATE_TAGSTART = 2;
+       const COLON_STATE_CLOSETAG = 3;
+       const COLON_STATE_TAGSLASH = 4;
+       const COLON_STATE_COMMENT = 5;
+       const COLON_STATE_COMMENTDASH = 6;
+       const COLON_STATE_COMMENTDASHDASH = 7;
+
+       // Flags for preprocessToDom
+       const PTD_FOR_INCLUSION = 1;
+
+       // Allowed values for $this->mOutputType
+       // Parameter to startExternalParse().
+       const OT_HTML = 1;
+       const OT_WIKI = 2;
+       const OT_PREPROCESS = 3;
+       const OT_MSG = 3;
+
+       // Marker Suffix needs to be accessible staticly.
+       const MARKER_SUFFIX = "-QINU\x7f";
+       
+       /**
+        * Replace unusual URL escape codes with their equivalent characters
+        * @param string
+        * @return string
+        * @static
+        * @todo  This can merge genuinely required bits in the path or query string,
+        *        breaking legit URLs. A proper fix would treat the various parts of
+        *        the URL differently; as a workaround, just use the output for
+        *        statistical records, not for actual linking/output.
+        */
+       static function replaceUnusualEscapes( $url ) {
+               return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
+                       array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
+       }
+       
+       /**
+        * Callback function used in replaceUnusualEscapes().
+        * Replaces unusual URL escape codes with their equivalent character
+        * @static
+        * @private
+        */
+       private static function replaceUnusualEscapesCallback( $matches ) {
+               $char = urldecode( $matches[0] );
+               $ord = ord( $char );
+               // Is it an unsafe or HTTP reserved character according to RFC 1738?
+               if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
+                       // No, shouldn't be escaped
+                       return $char;
+               } else {
+                       // Yes, leave it escaped
+                       return $matches[0];
+               }
+       }
+       
+       /*
+        * Return a three-element array: leading whitespace, string contents, trailing whitespace
+        */
+       public static function splitWhitespace( $s ) {
+               $ltrimmed = ltrim( $s );
+               $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
+               $trimmed = rtrim( $ltrimmed );
+               $diff = strlen( $ltrimmed ) - strlen( $trimmed );
+               if ( $diff > 0 ) {
+                       $w2 = substr( $ltrimmed, -$diff );
+               } else {
+                       $w2 = '';
+               }
+               return array( $w1, $trimmed, $w2 );
+       }
+       
+       /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
+       static function createAssocArgs( $args ) {
+               $assocArgs = array();
+               $index = 1;
+               foreach( $args as $arg ) {
+                       $eqpos = strpos( $arg, '=' );
+                       if ( $eqpos === false ) {
+                               $assocArgs[$index++] = $arg;
+                       } else {
+                               $name = trim( substr( $arg, 0, $eqpos ) );
+                               $value = trim( substr( $arg, $eqpos+1 ) );
+                               if ( $value === false ) {
+                                       $value = '';
+                               }
+                               if ( $name !== false ) {
+                                       $assocArgs[$name] = $value;
+                               }
+                       }
+               }
+
+               return $assocArgs;
+       }
+       
+       /**
+        * Static function to get a template
+        * Can be overridden via ParserOptions::setTemplateCallback().
+        */
+       static function statelessFetchTemplate( $title, $parser=false ) {
+               $text = $skip = false;
+               $finalTitle = $title;
+               $deps = array();
+
+               // Loop to fetch the article, with up to 1 redirect
+               for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
+                       # Give extensions a chance to select the revision instead
+                       $id = false; // Assume current
+                       wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) );
+
+                       if( $skip ) {
+                               $text = false;
+                               $deps[] = array(
+                                       'title' => $title,
+                                       'page_id' => $title->getArticleID(),
+                                       'rev_id' => null );
+                               break;
+                       }
+                       $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title );
+                       $rev_id = $rev ? $rev->getId() : 0;
+                       // If there is no current revision, there is no page
+                       if( $id === false && !$rev ) {
+                               $linkCache = LinkCache::singleton();
+                               $linkCache->addBadLinkObj( $title );
+                       }
+
+                       $deps[] = array(
+                               'title' => $title,
+                               'page_id' => $title->getArticleID(),
+                               'rev_id' => $rev_id );
+
+                       if( $rev ) {
+                               $text = $rev->getText();
+                       } elseif( $title->getNamespace() == NS_MEDIAWIKI ) {
+                               global $wgLang;
+                               $message = $wgLang->lcfirst( $title->getText() );
+                               $text = wfMsgForContentNoTrans( $message );
+                               if( wfEmptyMsg( $message, $text ) ) {
+                                       $text = false;
+                                       break;
+                               }
+                       } else {
+                               break;
+                       }
+                       if ( $text === false ) {
+                               break;
+                       }
+                       // Redirect?
+                       $finalTitle = $title;
+                       $title = Title::newFromRedirect( $text );
+               }
+               return array(
+                       'text' => $text,
+                       'finalTitle' => $finalTitle,
+                       'deps' => $deps );
+       }
+       
+}
author	Daniel Friesen <dantman@users.mediawiki.org>
	Mon, 18 Aug 2008 03:56:38 +0000 (03:56 +0000)
committer	Daniel Friesen <dantman@users.mediawiki.org>
	Mon, 18 Aug 2008 03:56:38 +0000 (03:56 +0000)
includes/AutoLoader.php		patch \| blob \| history
includes/parser/Parser_LinkHooks.php	[new file with mode: 0644]	patch \| blob