mark static
[lhc/web/wiklou.git] / includes / parser / Parser_LinkHooks.php
1 <?php
2 /**
3 * Parser with LinkHooks experiment
4 * @ingroup Parser
5 */
6 class Parser_LinkHooks extends Parser
7 {
8 /**
9 * Update this version number when the ParserOutput format
10 * changes in an incompatible way, so the parser cache
11 * can automatically discard old data.
12 */
13 const VERSION = '1.6.4';
14
15 # Flags for Parser::setFunctionHook
16 # Also available as global constants from Defines.php
17 const SFH_NO_HASH = 1;
18 const SFH_OBJECT_ARGS = 2;
19
20 # Constants needed for external link processing
21 # Everything except bracket, space, or control characters
22 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]';
23 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
24 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
25
26 // State constants for the definition list colon extraction
27 const COLON_STATE_TEXT = 0;
28 const COLON_STATE_TAG = 1;
29 const COLON_STATE_TAGSTART = 2;
30 const COLON_STATE_CLOSETAG = 3;
31 const COLON_STATE_TAGSLASH = 4;
32 const COLON_STATE_COMMENT = 5;
33 const COLON_STATE_COMMENTDASH = 6;
34 const COLON_STATE_COMMENTDASHDASH = 7;
35
36 // Flags for preprocessToDom
37 const PTD_FOR_INCLUSION = 1;
38
39 // Allowed values for $this->mOutputType
40 // Parameter to startExternalParse().
41 const OT_HTML = 1;
42 const OT_WIKI = 2;
43 const OT_PREPROCESS = 3;
44 const OT_MSG = 3;
45
46 // Marker Suffix needs to be accessible staticly.
47 const MARKER_SUFFIX = "-QINU\x7f";
48
49 /**
50 * Replace unusual URL escape codes with their equivalent characters
51 * @param string
52 * @return string
53 * @static
54 * @todo This can merge genuinely required bits in the path or query string,
55 * breaking legit URLs. A proper fix would treat the various parts of
56 * the URL differently; as a workaround, just use the output for
57 * statistical records, not for actual linking/output.
58 */
59 static function replaceUnusualEscapes( $url ) {
60 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
61 array( __CLASS__, 'replaceUnusualEscapesCallback' ), $url );
62 }
63
64 /**
65 * Callback function used in replaceUnusualEscapes().
66 * Replaces unusual URL escape codes with their equivalent character
67 * @static
68 * @private
69 */
70 private static function replaceUnusualEscapesCallback( $matches ) {
71 $char = urldecode( $matches[0] );
72 $ord = ord( $char );
73 // Is it an unsafe or HTTP reserved character according to RFC 1738?
74 if ( $ord > 32 && $ord < 127 && strpos( '<>"#{}|\^~[]`;/?', $char ) === false ) {
75 // No, shouldn't be escaped
76 return $char;
77 } else {
78 // Yes, leave it escaped
79 return $matches[0];
80 }
81 }
82
83 /*
84 * Return a three-element array: leading whitespace, string contents, trailing whitespace
85 */
86 public static function splitWhitespace( $s ) {
87 $ltrimmed = ltrim( $s );
88 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
89 $trimmed = rtrim( $ltrimmed );
90 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
91 if ( $diff > 0 ) {
92 $w2 = substr( $ltrimmed, -$diff );
93 } else {
94 $w2 = '';
95 }
96 return array( $w1, $trimmed, $w2 );
97 }
98
99 /// Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
100 static function createAssocArgs( $args ) {
101 $assocArgs = array();
102 $index = 1;
103 foreach( $args as $arg ) {
104 $eqpos = strpos( $arg, '=' );
105 if ( $eqpos === false ) {
106 $assocArgs[$index++] = $arg;
107 } else {
108 $name = trim( substr( $arg, 0, $eqpos ) );
109 $value = trim( substr( $arg, $eqpos+1 ) );
110 if ( $value === false ) {
111 $value = '';
112 }
113 if ( $name !== false ) {
114 $assocArgs[$name] = $value;
115 }
116 }
117 }
118
119 return $assocArgs;
120 }
121
122 /**
123 * Static function to get a template
124 * Can be overridden via ParserOptions::setTemplateCallback().
125 */
126 static function statelessFetchTemplate( $title, $parser=false ) {
127 $text = $skip = false;
128 $finalTitle = $title;
129 $deps = array();
130
131 // Loop to fetch the article, with up to 1 redirect
132 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
133 # Give extensions a chance to select the revision instead
134 $id = false; // Assume current
135 wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) );
136
137 if( $skip ) {
138 $text = false;
139 $deps[] = array(
140 'title' => $title,
141 'page_id' => $title->getArticleID(),
142 'rev_id' => null );
143 break;
144 }
145 $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title );
146 $rev_id = $rev ? $rev->getId() : 0;
147 // If there is no current revision, there is no page
148 if( $id === false && !$rev ) {
149 $linkCache = LinkCache::singleton();
150 $linkCache->addBadLinkObj( $title );
151 }
152
153 $deps[] = array(
154 'title' => $title,
155 'page_id' => $title->getArticleID(),
156 'rev_id' => $rev_id );
157
158 if( $rev ) {
159 $text = $rev->getText();
160 } elseif( $title->getNamespace() == NS_MEDIAWIKI ) {
161 global $wgLang;
162 $message = $wgLang->lcfirst( $title->getText() );
163 $text = wfMsgForContentNoTrans( $message );
164 if( wfEmptyMsg( $message, $text ) ) {
165 $text = false;
166 break;
167 }
168 } else {
169 break;
170 }
171 if ( $text === false ) {
172 break;
173 }
174 // Redirect?
175 $finalTitle = $title;
176 $title = Title::newFromRedirect( $text );
177 }
178 return array(
179 'text' => $text,
180 'finalTitle' => $finalTitle,
181 'deps' => $deps );
182 }
183
184 /**
185 * Process [[ ]] wikilinks
186 * @return LinkHolderArray
187 *
188 * @private
189 */
190 function replaceInternalLinks2( &$s ) {
191 global $wgContLang;
192
193 wfProfileIn( __METHOD__ );
194
195 wfProfileIn( __METHOD__.'-setup' );
196 static $tc = FALSE, $titleRegex;//$e1, $e1_img;
197 if( !$tc ) {
198 # the % is needed to support urlencoded titles as well
199 $tc = Title::legalChars() . '#%';
200 # Match a link having the form [[namespace:link|alternate]]trail
201 //$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
202 # Match cases where there is no "]]", which might still be images
203 //$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
204 # Match a valid plain title
205 $titleRegex = "/^([{$tc}]+)$/sD";
206 }
207
208 $sk = $this->mOptions->getSkin();
209 $holders = new LinkHolderArray( $this );
210
211 if( is_null( $this->mTitle ) ) {
212 wfProfileOut( __METHOD__ );
213 wfProfileOut( __METHOD__.'-setup' );
214 throw new MWException( __METHOD__.": \$this->mTitle is null\n" );
215 }
216 $nottalk = !$this->mTitle->isTalkPage();
217
218 if($wgContLang->hasVariants()) {
219 $selflink = $wgContLang->convertLinkToAllVariants($this->mTitle->getPrefixedText());
220 } else {
221 $selflink = array($this->mTitle->getPrefixedText());
222 }
223 wfProfileOut( __METHOD__.'-setup' );
224
225 $offset = 0;
226 $offsetStack = array();
227 $markerReplacer = new LinkMarkerReplacer( array( &$this, 'replaceInternalLinksCallback' ) );
228 $markerReplacer->holders( $holders );
229 while( true ) {
230 $startBracketOffset = strpos( $s, '[[', $offset );
231 $endBracketOffset = strpos( $s, ']]', $offset );
232 # Finish when there are no more brackets
233 if( $startBracketOffset === false && $endBracketOffset === false ) break;
234 # Determine if the bracket is a starting or ending bracket
235 # When we find both, use the first one
236 elseif( $startBracketOffset !== false && $endBracketOffset !== false )
237 $isStart = $startBracketOffset <= $endBracketOffset;
238 # When we only found one, check which it is
239 else $isStart = $startBracketOffset !== false;
240 $bracketOffset = $isStart ? $startBracketOffset : $endBracketOffset;
241 if( $isStart ) {
242 /** Opening bracket **/
243 # Just push our current offset in the string onto the stack
244 $offsetStack[] = $startBracketOffset;
245 } else {
246 /** Closing bracket **/
247 # Pop the start pos for our current link zone off the stack
248 $startBracketOffset = array_pop($offsetStack);
249 # Just to clean up the code, lets place offsets on the outer ends
250 $endBracketOffset += 2;
251
252 # Only do logic if we actually have a opening bracket for this
253 if( isset($startBracketOffset) ) {
254 # Extract text inside the link
255 @list( $titleText, $paramText ) = explode('|',
256 substr($s, $startBracketOffset+2, $endBracketOffset-$startBracketOffset-4), 2);
257 # Create markers only for valid links
258 if( preg_match( $titleRegex, $titleText ) ) {
259 # Store the text for the marker
260 $marker = $markerReplacer->addMarker($titleText, $paramText);
261 # Replace the current link with the marker
262 $s = substr($s,0,$startBracketOffset).
263 $marker.
264 substr($s, $endBracketOffset);
265 # We have modified $s, because of this we need to set the
266 # offset manually since the end position is different now
267 $offset = $startBracketOffset+strlen($marker);
268 continue;
269 }
270 # ToDo: Some LinkHooks may allow recursive links inside of
271 # the link text, create a regex that also matches our
272 # <!-- LINKMARKER ### --> sequence in titles
273 # ToDO: Some LinkHooks use patterns rather than namespaces
274 # these need to be tested at this point here
275 }
276
277 }
278 # Bump our offset to after our current bracket
279 $offset = $bracketOffset+2;
280 }
281
282
283 # Now expand our tree
284 wfProfileIn( __METHOD__.'-expand' );
285 $s = $markerReplacer->expand( $s );
286 wfProfileOut( __METHOD__.'-expand' );
287
288 wfProfileOut( __METHOD__ );
289 return $holders;
290 }
291
292 function replaceInternalLinksCallback( $markerReplacer, $titleText, $paramText ) {
293 wfProfileIn( __METHOD__ );
294 $wt = isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]";
295 wfProfileIn( __METHOD__."-misc" );
296 # Don't allow internal links to pages containing
297 # PROTO: where PROTO is a valid URL protocol; these
298 # should be external links.
299 if( preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $titleText) ) {
300 wfProfileOut( __METHOD__ );
301 return $wt;
302 }
303
304 # Make subpage if necessary
305 if( $this->areSubpagesAllowed() ) {
306 $titleText = $this->maybeDoSubpageLink( $titleText, $paramText );
307 }
308
309 # Check for a leading colon and strip it if it is there
310 $leadingColon = $titleText[0] == ':';
311 if( $leadingColon ) $titleText = substr( $titleText, 1 );
312
313 wfProfileOut( __METHOD__."-misc" );
314 # Make title object
315 wfProfileIn( __METHOD__."-title" );
316 $title = Title::newFromText( $this->mStripState->unstripNoWiki($titleText) );
317 if( !$title ) {
318 wfProfileOut( __METHOD__."-title" );
319 wfProfileOut( __METHOD__ );
320 return $wt;
321 }
322 $ns = $title->getNamespace();
323 wfProfileOut( __METHOD__."-title" );
324
325 $callback = array( 'CoreLinkFunctions', 'defaultLinkHook' );
326 $args = array( $markerReplacer, $title, $titleText, &$paramText, &$leadingColon );
327 $return = call_user_func_array( $callback, $args );
328 if( $return === false ) {
329 # False (no link) was returned, output plain wikitext
330 # Build it again as the hook is allowed to modify $paramText
331 return isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]";
332 } elseif( $return === true ) {
333 # True (treat as plain link) was returned, call the defaultLinkHook
334 $args = array( $markerReplacer, $title, $titleText, &$paramText, &$leadingColon );
335 $return = call_user_func_array( array( &$this, 'defaultLinkHook' ), $args );
336 }
337 # Content was returned, return it
338 return $return;
339 }
340
341 }
342
343 class LinkMarkerReplacer {
344
345 protected $markers, $nextId, $holders;
346
347 function __construct( $callback ) {
348 $this->nextId = 0;
349 $this->markers = array();
350 $this->callback = $callback;
351 $this->holders = null;
352 }
353
354 # Note: This is a bit of an ugly way to do this. It works for now, but before
355 # this feature becomes usable we should come up with a better arg list.
356 # $parser, $holders, and $linkMarkers appear to be 3 needed ones
357 function holders( $holders = null ) { return wfSetVar( $this->holders, $holders ); }
358
359 function addMarker($titleText, $paramText) {
360 $id = $this->nextId++;
361 $this->markers[$id] = array( $titleText, $paramText );
362 return "<!-- LINKMARKER $id -->";
363 }
364
365 static function findMarker( $string ) {
366 return (bool) preg_match('/<!-- LINKMARKER [0-9]+ -->/', $string );
367 }
368
369 function expand( $string ) {
370 return StringUtils::delimiterReplaceCallback( "<!-- LINKMARKER ", " -->", array( &$this, 'callback' ), $string );
371 }
372
373 function callback( $m ) {
374 $id = intval($m[1]);
375 if( !array_key_exists($id, $this->markers) ) return $m[0];
376 $args = $this->markers[$id];
377 array_unshift( $args, $this );
378 return call_user_func_array( $this->callback, $args );
379 }
380
381 }