3 * Parser with LinkHooks experiment
6 class Parser_LinkHooks
extends Parser
9 * Update this version number when the ParserOutput format
10 * changes in an incompatible way, so the parser cache
11 * can automatically discard old data.
13 const VERSION
= '1.6.4';
15 # Flags for Parser::setFunctionHook
16 # Also available as global constants from Defines.php
17 const SFH_NO_HASH
= 1;
18 const SFH_OBJECT_ARGS
= 2;
20 # Constants needed for external link processing
21 # Everything except bracket, space, or control characters
22 const EXT_LINK_URL_CLASS
= '[^][<>"\\x00-\\x20\\x7F]';
23 const EXT_IMAGE_REGEX
= '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
24 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
26 // State constants for the definition list colon extraction
27 const COLON_STATE_TEXT
= 0;
28 const COLON_STATE_TAG
= 1;
29 const COLON_STATE_TAGSTART
= 2;
30 const COLON_STATE_CLOSETAG
= 3;
31 const COLON_STATE_TAGSLASH
= 4;
32 const COLON_STATE_COMMENT
= 5;
33 const COLON_STATE_COMMENTDASH
= 6;
34 const COLON_STATE_COMMENTDASHDASH
= 7;
36 // Flags for preprocessToDom
37 const PTD_FOR_INCLUSION
= 1;
39 // Allowed values for $this->mOutputType
40 // Parameter to startExternalParse().
43 const OT_PREPROCESS
= 3;
46 // Marker Suffix needs to be accessible staticly.
47 const MARKER_SUFFIX
= "-QINU\x7f";
50 * Replace unusual URL escape codes with their equivalent characters
54 * @todo This can merge genuinely required bits in the path or query string,
55 * breaking legit URLs. A proper fix would treat the various parts of
56 * the URL differently; as a workaround, just use the output for
57 * statistical records, not for actual linking/output.
59 static function replaceUnusualEscapes( $url ) {
60 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
61 array( __CLASS__
, 'replaceUnusualEscapesCallback' ), $url );
65 * Callback function used in replaceUnusualEscapes().
66 * Replaces unusual URL escape codes with their equivalent character
70 private static function replaceUnusualEscapesCallback( $matches ) {
71 $char = urldecode( $matches[0] );
73 // Is it an unsafe or HTTP reserved character according to RFC 1738?
74 if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
75 // No, shouldn't be escaped
78 // Yes, leave it escaped
84 * Return a three-element array: leading whitespace, string contents, trailing whitespace
86 public static function splitWhitespace( $s ) {
87 $ltrimmed = ltrim( $s );
88 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
89 $trimmed = rtrim( $ltrimmed );
90 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
92 $w2 = substr( $ltrimmed, -$diff );
96 return array( $w1, $trimmed, $w2 );
99 /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
100 static function createAssocArgs( $args ) {
101 $assocArgs = array();
103 foreach( $args as $arg ) {
104 $eqpos = strpos( $arg, '=' );
105 if ( $eqpos === false ) {
106 $assocArgs[$index++
] = $arg;
108 $name = trim( substr( $arg, 0, $eqpos ) );
109 $value = trim( substr( $arg, $eqpos+
1 ) );
110 if ( $value === false ) {
113 if ( $name !== false ) {
114 $assocArgs[$name] = $value;
123 * Static function to get a template
124 * Can be overridden via ParserOptions::setTemplateCallback().
126 static function statelessFetchTemplate( $title, $parser=false ) {
127 $text = $skip = false;
128 $finalTitle = $title;
131 // Loop to fetch the article, with up to 1 redirect
132 for ( $i = 0; $i < 2 && is_object( $title ); $i++
) {
133 # Give extensions a chance to select the revision instead
134 $id = false; // Assume current
135 wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) );
141 'page_id' => $title->getArticleID(),
145 $rev = $id ? Revision
::newFromId( $id ) : Revision
::newFromTitle( $title );
146 $rev_id = $rev ?
$rev->getId() : 0;
147 // If there is no current revision, there is no page
148 if( $id === false && !$rev ) {
149 $linkCache = LinkCache
::singleton();
150 $linkCache->addBadLinkObj( $title );
155 'page_id' => $title->getArticleID(),
156 'rev_id' => $rev_id );
159 $text = $rev->getText();
160 } elseif( $title->getNamespace() == NS_MEDIAWIKI
) {
162 $message = $wgLang->lcfirst( $title->getText() );
163 $text = wfMsgForContentNoTrans( $message );
164 if( wfEmptyMsg( $message, $text ) ) {
171 if ( $text === false ) {
175 $finalTitle = $title;
176 $title = Title
::newFromRedirect( $text );
180 'finalTitle' => $finalTitle,
185 * Process [[ ]] wikilinks
186 * @return LinkHolderArray
190 function replaceInternalLinks2( &$s ) {
193 wfProfileIn( __METHOD__
);
195 wfProfileIn( __METHOD__
.'-setup' );
196 static $tc = FALSE, $titleRegex;//$e1, $e1_img;
198 # the % is needed to support urlencoded titles as well
199 $tc = Title
::legalChars() . '#%';
200 # Match a link having the form [[namespace:link|alternate]]trail
201 //$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
202 # Match cases where there is no "]]", which might still be images
203 //$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
204 # Match a valid plain title
205 $titleRegex = "/^([{$tc}]+)$/sD";
208 $sk = $this->mOptions
->getSkin();
209 $holders = new LinkHolderArray( $this );
211 if( is_null( $this->mTitle
) ) {
212 wfProfileOut( __METHOD__
);
213 wfProfileOut( __METHOD__
.'-setup' );
214 throw new MWException( __METHOD__
.": \$this->mTitle is null\n" );
216 $nottalk = !$this->mTitle
->isTalkPage();
218 if($wgContLang->hasVariants()) {
219 $selflink = $wgContLang->convertLinkToAllVariants($this->mTitle
->getPrefixedText());
221 $selflink = array($this->mTitle
->getPrefixedText());
223 wfProfileOut( __METHOD__
.'-setup' );
226 $offsetStack = array();
227 $markerReplacer = new LinkMarkerReplacer( array( &$this, 'replaceInternalLinksCallback' ) );
228 $markerReplacer->holders( $holders );
230 $startBracketOffset = strpos( $s, '[[', $offset );
231 $endBracketOffset = strpos( $s, ']]', $offset );
232 # Finish when there are no more brackets
233 if( $startBracketOffset === false && $endBracketOffset === false ) break;
234 # Determine if the bracket is a starting or ending bracket
235 # When we find both, use the first one
236 elseif( $startBracketOffset !== false && $endBracketOffset !== false )
237 $isStart = $startBracketOffset <= $endBracketOffset;
238 # When we only found one, check which it is
239 else $isStart = $startBracketOffset !== false;
240 $bracketOffset = $isStart ?
$startBracketOffset : $endBracketOffset;
242 /** Opening bracket **/
243 # Just push our current offset in the string onto the stack
244 $offsetStack[] = $startBracketOffset;
246 /** Closing bracket **/
247 # Pop the start pos for our current link zone off the stack
248 $startBracketOffset = array_pop($offsetStack);
249 # Just to clean up the code, lets place offsets on the outer ends
250 $endBracketOffset +
= 2;
252 # Only do logic if we actually have a opening bracket for this
253 if( isset($startBracketOffset) ) {
254 # Extract text inside the link
255 @list
( $titleText, $paramText ) = explode('|',
256 substr($s, $startBracketOffset+
2, $endBracketOffset-$startBracketOffset-4), 2);
257 # Create markers only for valid links
258 if( preg_match( $titleRegex, $titleText ) ) {
259 # Store the text for the marker
260 $marker = $markerReplacer->addMarker($titleText, $paramText);
261 # Replace the current link with the marker
262 $s = substr($s,0,$startBracketOffset).
264 substr($s, $endBracketOffset);
265 # We have modified $s, because of this we need to set the
266 # offset manually since the end position is different now
267 $offset = $startBracketOffset+
strlen($marker);
270 # ToDo: Some LinkHooks may allow recursive links inside of
271 # the link text, create a regex that also matches our
272 # <!-- LINKMARKER ### --> sequence in titles
273 # ToDO: Some LinkHooks use patterns rather than namespaces
274 # these need to be tested at this point here
278 # Bump our offset to after our current bracket
279 $offset = $bracketOffset+
2;
283 # Now expand our tree
284 wfProfileIn( __METHOD__
.'-expand' );
285 $s = $markerReplacer->expand( $s );
286 wfProfileOut( __METHOD__
.'-expand' );
288 wfProfileOut( __METHOD__
);
292 function replaceInternalLinksCallback( $markerReplacer, $titleText, $paramText ) {
293 wfProfileIn( __METHOD__
);
294 $wt = isset($paramText) ?
"[[$titleText|$paramText]]" : "[[$titleText]]";
295 wfProfileIn( __METHOD__
."-misc" );
296 # Don't allow internal links to pages containing
297 # PROTO: where PROTO is a valid URL protocol; these
298 # should be external links.
299 if( preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $titleText) ) {
300 wfProfileOut( __METHOD__
);
304 # Make subpage if necessary
305 if( $this->areSubpagesAllowed() ) {
306 $titleText = $this->maybeDoSubpageLink( $titleText, $paramText );
309 # Check for a leading colon and strip it if it is there
310 $leadingColon = $titleText[0] == ':';
311 if( $leadingColon ) $titleText = substr( $titleText, 1 );
313 wfProfileOut( __METHOD__
."-misc" );
315 wfProfileIn( __METHOD__
."-title" );
316 $title = Title
::newFromText( $this->mStripState
->unstripNoWiki($titleText) );
318 wfProfileOut( __METHOD__
."-title" );
319 wfProfileOut( __METHOD__
);
322 $ns = $title->getNamespace();
323 wfProfileOut( __METHOD__
."-title" );
325 $callback = array( 'CoreLinkFunctions', 'defaultLinkHook' );
326 $args = array( $markerReplacer, $title, $titleText, &$paramText, &$leadingColon );
327 $return = call_user_func_array( $callback, $args );
328 if( $return === false ) {
329 # False (no link) was returned, output plain wikitext
330 # Build it again as the hook is allowed to modify $paramText
331 return isset($paramText) ?
"[[$titleText|$paramText]]" : "[[$titleText]]";
332 } elseif( $return === true ) {
333 # True (treat as plain link) was returned, call the defaultLinkHook
334 $args = array( $markerReplacer, $title, $titleText, &$paramText, &$leadingColon );
335 $return = call_user_func_array( array( &$this, 'defaultLinkHook' ), $args );
337 # Content was returned, return it
343 class LinkMarkerReplacer
{
345 protected $markers, $nextId, $holders;
347 function __construct( $callback ) {
349 $this->markers
= array();
350 $this->callback
= $callback;
351 $this->holders
= null;
354 # Note: This is a bit of an ugly way to do this. It works for now, but before
355 # this feature becomes usable we should come up with a better arg list.
356 # $parser, $holders, and $linkMarkers appear to be 3 needed ones
357 function holders( $holders = null ) { return wfSetVar( $this->holders
, $holders ); }
359 function addMarker($titleText, $paramText) {
360 $id = $this->nextId++
;
361 $this->markers
[$id] = array( $titleText, $paramText );
362 return "<!-- LINKMARKER $id -->";
365 function findMarker( $string ) {
366 return (bool) preg_match('/<!-- LINKMARKER [0-9]+ -->/', $string );
369 function expand( $string ) {
370 return StringUtils
::delimiterReplaceCallback( "<!-- LINKMARKER ", " -->", array( &$this, 'callback' ), $string );
373 function callback( $m ) {
375 if( !array_key_exists($id, $this->markers
) ) return $m[0];
376 $args = $this->markers
[$id];
377 array_unshift( $args, $this );
378 return call_user_func_array( $this->callback
, $args );