From: Daniel Friesen Date: Mon, 18 Aug 2008 03:56:38 +0000 (+0000) Subject: Create a placeholder for the LinkHook experimentation. X-Git-Tag: 1.31.0-rc.0~45845 X-Git-Url: https://git.cyclocoop.org//%22?a=commitdiff_plain;h=22c1e38461d483b505c96af3a44fc8217cf55b87;p=lhc%2Fweb%2Fwiklou.git Create a placeholder for the LinkHook experimentation. This will allow us to develop a new method of parsing links (non-hacky), without breaking current syntax, and also allow us to make sure new methods don't break syntax. Currently this class merely inherits from the Parser class. Constants and static functions are coppied so that use of self:: won't break when we modify things. --- diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index c8be601472..0c16bb6606 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -383,6 +383,7 @@ $wgAutoloadLocalClasses = array( 'ParserOptions' => 'includes/parser/ParserOptions.php', 'ParserOutput' => 'includes/parser/ParserOutput.php', 'Parser_DiffTest' => 'includes/parser/Parser_DiffTest.php', + 'Parser_LinkHooks' => 'includes/parser/Parser_LinkHooks.php', 'Parser_OldPP' => 'includes/parser/Parser_OldPP.php', 'Preprocessor' => 'includes/parser/Preprocessor.php', 'Preprocessor_DOM' => 'includes/parser/Preprocessor_DOM.php', diff --git a/includes/parser/Parser_LinkHooks.php b/includes/parser/Parser_LinkHooks.php new file mode 100644 index 0000000000..5f8ed7f7ec --- /dev/null +++ b/includes/parser/Parser_LinkHooks.php @@ -0,0 +1,184 @@ +"\\x00-\\x20\\x7F]'; + const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+) + \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx'; + + // State constants for the definition list colon extraction + const COLON_STATE_TEXT = 0; + const COLON_STATE_TAG = 1; + const COLON_STATE_TAGSTART = 2; + const COLON_STATE_CLOSETAG = 3; + const COLON_STATE_TAGSLASH = 4; + const COLON_STATE_COMMENT = 5; + const COLON_STATE_COMMENTDASH = 6; + const COLON_STATE_COMMENTDASHDASH = 7; + + // Flags for preprocessToDom + const PTD_FOR_INCLUSION = 1; + + // Allowed values for $this->mOutputType + // Parameter to startExternalParse(). + const OT_HTML = 1; + const OT_WIKI = 2; + const OT_PREPROCESS = 3; + const OT_MSG = 3; + + // Marker Suffix needs to be accessible staticly. + const MARKER_SUFFIX = "-QINU\x7f"; + + /** + * Replace unusual URL escape codes with their equivalent characters + * @param string + * @return string + * @static + * @todo This can merge genuinely required bits in the path or query string, + * breaking legit URLs. A proper fix would treat the various parts of + * the URL differently; as a workaround, just use the output for + * statistical records, not for actual linking/output. + */ + static function replaceUnusualEscapes( $url ) { + return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', + array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url ); + } + + /** + * Callback function used in replaceUnusualEscapes(). + * Replaces unusual URL escape codes with their equivalent character + * @static + * @private + */ + private static function replaceUnusualEscapesCallback( $matches ) { + $char = urldecode( $matches[0] ); + $ord = ord( $char ); + // Is it an unsafe or HTTP reserved character according to RFC 1738? + if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) { + // No, shouldn't be escaped + return $char; + } else { + // Yes, leave it escaped + return $matches[0]; + } + } + + /* + * Return a three-element array: leading whitespace, string contents, trailing whitespace + */ + public static function splitWhitespace( $s ) { + $ltrimmed = ltrim( $s ); + $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) ); + $trimmed = rtrim( $ltrimmed ); + $diff = strlen( $ltrimmed ) - strlen( $trimmed ); + if ( $diff > 0 ) { + $w2 = substr( $ltrimmed, -$diff ); + } else { + $w2 = ''; + } + return array( $w1, $trimmed, $w2 ); + } + + /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. + static function createAssocArgs( $args ) { + $assocArgs = array(); + $index = 1; + foreach( $args as $arg ) { + $eqpos = strpos( $arg, '=' ); + if ( $eqpos === false ) { + $assocArgs[$index++] = $arg; + } else { + $name = trim( substr( $arg, 0, $eqpos ) ); + $value = trim( substr( $arg, $eqpos+1 ) ); + if ( $value === false ) { + $value = ''; + } + if ( $name !== false ) { + $assocArgs[$name] = $value; + } + } + } + + return $assocArgs; + } + + /** + * Static function to get a template + * Can be overridden via ParserOptions::setTemplateCallback(). + */ + static function statelessFetchTemplate( $title, $parser=false ) { + $text = $skip = false; + $finalTitle = $title; + $deps = array(); + + // Loop to fetch the article, with up to 1 redirect + for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { + # Give extensions a chance to select the revision instead + $id = false; // Assume current + wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) ); + + if( $skip ) { + $text = false; + $deps[] = array( + 'title' => $title, + 'page_id' => $title->getArticleID(), + 'rev_id' => null ); + break; + } + $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title ); + $rev_id = $rev ? $rev->getId() : 0; + // If there is no current revision, there is no page + if( $id === false && !$rev ) { + $linkCache = LinkCache::singleton(); + $linkCache->addBadLinkObj( $title ); + } + + $deps[] = array( + 'title' => $title, + 'page_id' => $title->getArticleID(), + 'rev_id' => $rev_id ); + + if( $rev ) { + $text = $rev->getText(); + } elseif( $title->getNamespace() == NS_MEDIAWIKI ) { + global $wgLang; + $message = $wgLang->lcfirst( $title->getText() ); + $text = wfMsgForContentNoTrans( $message ); + if( wfEmptyMsg( $message, $text ) ) { + $text = false; + break; + } + } else { + break; + } + if ( $text === false ) { + break; + } + // Redirect? + $finalTitle = $title; + $title = Title::newFromRedirect( $text ); + } + return array( + 'text' => $text, + 'finalTitle' => $finalTitle, + 'deps' => $deps ); + } + +}