Fixed $wgCacheEpoch handling
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 }
81
82 # First pass--just handle <nowiki> sections, pass the rest off
83 # to internalParse() which does all the real work.
84 #
85 # Returns a ParserOutput
86 #
87 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
88 {
89 global $wgUseTidy;
90 $fname = "Parser::parse";
91 wfProfileIn( $fname );
92
93 if ( $clearState ) {
94 $this->clearState();
95 }
96
97 $this->mOptions = $options;
98 $this->mTitle =& $title;
99 $this->mOutputType = OT_HTML;
100
101 $stripState = NULL;
102 $text = $this->strip( $text, $this->mStripState );
103 $text = $this->internalParse( $text, $linestart );
104 $text = $this->unstrip( $text, $this->mStripState );
105 # Clean up special characters, only run once, next-to-last before doBlockLevels
106 if(!$wgUseTidy) {
107 $fixtags = array(
108 # french spaces, last one Guillemet-left
109 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
110 # french spaces, Guillemet-right
111 "/(\\302\\253) /i"=>"\\1&nbsp;",
112 "/<hr *>/i" => '<hr/>',
113 "/<br *>/i" => '<br/>',
114 "/<center *>/i"=>'<div class="center">',
115 "/<\\/center *>/i" => '</div>',
116 # Clean up spare ampersands; note that we probably ought to be
117 # more careful about named entities.
118 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
119 );
120 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
121 } else {
122 $fixtags = array(
123 # french spaces, last one Guillemet-left
124 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
125 # french spaces, Guillemet-right
126 "/(\\302\\253) /i"=>"\\1&nbsp;",
127 "/<center *>/i"=>'<div class="center">',
128 "/<\\/center *>/i" => '</div>'
129 );
130 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
131 }
132 # only once and last
133 $text = $this->doBlockLevels( $text, $linestart );
134 if($wgUseTidy) {
135 $text = $this->tidy($text);
136 }
137 $this->mOutput->setText( $text );
138 wfProfileOut( $fname );
139 return $this->mOutput;
140 }
141
142 /* static */ function getRandomString()
143 {
144 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
145 }
146
147 # Replaces all occurrences of <$tag>content</$tag> in the text
148 # with a random marker and returns the new text. the output parameter
149 # $content will be an associative array filled with data on the form
150 # $unique_marker => content.
151
152 # If $content is already set, the additional entries will be appended
153
154 # If $tag is set to STRIP_COMMENTS, the function will extract
155 # <!-- HTML comments -->
156
157 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
158 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
159 if ( !$content ) {
160 $content = array( );
161 }
162 $n = 1;
163 $stripped = "";
164
165 while ( "" != $text ) {
166 if($tag==STRIP_COMMENTS) {
167 $p = preg_split( "/<!--/i", $text, 2 );
168 } else {
169 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
170 }
171 $stripped .= $p[0];
172 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
173 $text = "";
174 } else {
175 if($tag==STRIP_COMMENTS) {
176 $q = preg_split( "/-->/i", $p[1], 2 );
177 } else {
178 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
179 }
180 $marker = $rnd . sprintf("%08X", $n++);
181 $content[$marker] = $q[0];
182 $stripped .= $marker;
183 $text = $q[1];
184 }
185 }
186 return $stripped;
187 }
188
189 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
190 # If $render is set, performs necessary rendering operations on plugins
191 # Returns the text, and fills an array with data needed in unstrip()
192 # If the $state is already a valid strip state, it adds to the state
193
194 # When $stripcomments is set, HTML comments <!-- like this -->
195 # will be stripped in addition to other tags. This is important
196 # for section editing, where these comments cause confusion when
197 # counting the sections in the wikisource
198 function strip( $text, &$state, $stripcomments = false )
199 {
200 $render = ($this->mOutputType == OT_HTML);
201 $nowiki_content = array();
202 $hiero_content = array();
203 $timeline_content = array();
204 $math_content = array();
205 $pre_content = array();
206 $comment_content = array();
207
208 # Replace any instances of the placeholders
209 $uniq_prefix = UNIQ_PREFIX;
210 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
211
212 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
213 foreach( $nowiki_content as $marker => $content ){
214 if( $render ){
215 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
216 } else {
217 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
218 }
219 }
220
221 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
222 foreach( $hiero_content as $marker => $content ){
223 if( $render && $GLOBALS['wgUseWikiHiero']){
224 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
225 } else {
226 $hiero_content[$marker] = "<hiero>$content</hiero>";
227 }
228 }
229
230 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
231 foreach( $timeline_content as $marker => $content ){
232 if( $render && $GLOBALS['wgUseTimeline']){
233 $timeline_content[$marker] = renderTimeline( $content );
234 } else {
235 $timeline_content[$marker] = "<timeline>$content</timeline>";
236 }
237 }
238
239 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
240 foreach( $math_content as $marker => $content ){
241 if( $render ) {
242 if( $this->mOptions->getUseTeX() ) {
243 $math_content[$marker] = renderMath( $content );
244 } else {
245 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
246 }
247 } else {
248 $math_content[$marker] = "<math>$content</math>";
249 }
250 }
251
252 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
253 foreach( $pre_content as $marker => $content ){
254 if( $render ){
255 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
256 } else {
257 $pre_content[$marker] = "<pre>$content</pre>";
258 }
259 }
260 if($stripcomments) {
261 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
262 foreach( $comment_content as $marker => $content ){
263 $comment_content[$marker] = "<!--$content-->";
264 }
265 }
266
267 # Merge state with the pre-existing state, if there is one
268 if ( $state ) {
269 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
270 $state['hiero'] = $state['hiero'] + $hiero_content;
271 $state['timeline'] = $state['timeline'] + $timeline_content;
272 $state['math'] = $state['math'] + $math_content;
273 $state['pre'] = $state['pre'] + $pre_content;
274 $state['comment'] = $state['comment'] + $comment_content;
275 } else {
276 $state = array(
277 'nowiki' => $nowiki_content,
278 'hiero' => $hiero_content,
279 'timeline' => $timeline_content,
280 'math' => $math_content,
281 'pre' => $pre_content,
282 'comment' => $comment_content
283 );
284 }
285 return $text;
286 }
287
288 function unstrip( $text, &$state )
289 {
290 # Must expand in reverse order, otherwise nested tags will be corrupted
291 $contentDict = end( $state );
292 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
293 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
294 $text = str_replace( key( $contentDict ), $content, $text );
295 }
296 }
297
298 return $text;
299 }
300
301 # Add an item to the strip state
302 # Returns the unique tag which must be inserted into the stripped text
303 # The tag will be replaced with the original text in unstrip()
304
305 function insertStripItem( $text, &$state )
306 {
307 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
308 if ( !$state ) {
309 $state = array(
310 'nowiki' => array(),
311 'hiero' => array(),
312 'math' => array(),
313 'pre' => array()
314 );
315 }
316 $state['item'][$rnd] = $text;
317 return $rnd;
318 }
319
320 # This method generates the list of subcategories and pages for a category
321 function categoryMagic ()
322 {
323 global $wgLang , $wgUser ;
324 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
325
326 $cns = Namespace::getCategory() ;
327 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
328
329 $r = "<br style=\"clear:both;\"/>\n";
330
331
332 $sk =& $wgUser->getSkin() ;
333
334 $articles = array() ;
335 $children = array() ;
336 $data = array () ;
337 $id = $this->mTitle->getArticleID() ;
338
339 # FIXME: add limits
340 $t = wfStrencode( $this->mTitle->getDBKey() );
341 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
342 $res = wfQuery ( $sql, DB_READ ) ;
343 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
344
345 # For all pages that link to this category
346 foreach ( $data AS $x )
347 {
348 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
349 if ( $t != "" ) $t .= ":" ;
350 $t .= $x->cur_title ;
351
352 if ( $x->cur_namespace == $cns ) {
353 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
354 } else {
355 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
356 }
357 }
358 wfFreeResult ( $res ) ;
359
360 # Showing subcategories
361 if ( count ( $children ) > 0 ) {
362 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
363 $r .= implode ( ", " , $children ) ;
364 }
365
366 # Showing pages in this category
367 if ( count ( $articles ) > 0 ) {
368 $ti = $this->mTitle->getText() ;
369 $h = wfMsg( "category_header", $ti );
370 $r .= "<h2>{$h}</h2>\n" ;
371 $r .= implode ( ", " , $articles ) ;
372 }
373
374
375 return $r ;
376 }
377
378 function getHTMLattrs ()
379 {
380 $htmlattrs = array( # Allowed attributes--no scripting, etc.
381 "title", "align", "lang", "dir", "width", "height",
382 "bgcolor", "clear", /* BR */ "noshade", /* HR */
383 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
384 /* FONT */ "type", "start", "value", "compact",
385 /* For various lists, mostly deprecated but safe */
386 "summary", "width", "border", "frame", "rules",
387 "cellspacing", "cellpadding", "valign", "char",
388 "charoff", "colgroup", "col", "span", "abbr", "axis",
389 "headers", "scope", "rowspan", "colspan", /* Tables */
390 "id", "class", "name", "style" /* For CSS */
391 );
392 return $htmlattrs ;
393 }
394
395 function fixTagAttributes ( $t )
396 {
397 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
398 $htmlattrs = $this->getHTMLattrs() ;
399
400 # Strip non-approved attributes from the tag
401 $t = preg_replace(
402 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
403 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
404 $t);
405 # Strip javascript "expression" from stylesheets. Brute force approach:
406 # If anythin offensive is found, all attributes of the HTML tag are dropped
407
408 if( preg_match(
409 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
410 wfMungeToUtf8( $t ) ) )
411 {
412 $t="";
413 }
414
415 return trim ( $t ) ;
416 }
417
418 /* interface with html tidy, used if $wgUseTidy = true */
419 function tidy ( $text ) {
420 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
421 global $wgInputEncoding, $wgOutputEncoding;
422 $fname = "Parser::tidy";
423 wfProfileIn( $fname );
424
425 $cleansource = '';
426 switch(strtoupper($wgOutputEncoding)) {
427 case 'ISO-8859-1':
428 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
429 break;
430 case 'UTF-8':
431 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
432 break;
433 default:
434 $wgTidyOpts .= ' -raw';
435 }
436
437 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
438 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
439 '<head><title>test</title></head><body>'.$text.'</body></html>';
440 $descriptorspec = array(
441 0 => array("pipe", "r"),
442 1 => array("pipe", "w"),
443 2 => array("file", "/dev/null", "a")
444 );
445 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
446 if (is_resource($process)) {
447 fwrite($pipes[0], $text);
448 fclose($pipes[0]);
449 while (!feof($pipes[1])) {
450 $cleansource .= fgets($pipes[1], 1024);
451 }
452 fclose($pipes[1]);
453 $return_value = proc_close($process);
454 }
455
456 wfProfileOut( $fname );
457
458 if( $cleansource == '' && $text != '') {
459 wfDebug( "Tidy error detected!\n" );
460 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
461 } else {
462 return $cleansource;
463 }
464 }
465
466 function doTableStuff ( $t )
467 {
468 $t = explode ( "\n" , $t ) ;
469 $td = array () ; # Is currently a td tag open?
470 $ltd = array () ; # Was it TD or TH?
471 $tr = array () ; # Is currently a tr tag open?
472 $ltr = array () ; # tr attributes
473 foreach ( $t AS $k => $x )
474 {
475 $x = trim ( $x ) ;
476 $fc = substr ( $x , 0 , 1 ) ;
477 if ( "{|" == substr ( $x , 0 , 2 ) )
478 {
479 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
480 array_push ( $td , false ) ;
481 array_push ( $ltd , "" ) ;
482 array_push ( $tr , false ) ;
483 array_push ( $ltr , "" ) ;
484 }
485 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
486 else if ( "|}" == substr ( $x , 0 , 2 ) )
487 {
488 $z = "</table>\n" ;
489 $l = array_pop ( $ltd ) ;
490 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
491 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
492 array_pop ( $ltr ) ;
493 $t[$k] = $z ;
494 }
495 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
496 {
497 $z = trim ( substr ( $x , 2 ) ) ;
498 $t[$k] = "<caption>{$z}</caption>\n" ;
499 }*/
500 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
501 {
502 $x = substr ( $x , 1 ) ;
503 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
504 $z = "" ;
505 $l = array_pop ( $ltd ) ;
506 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
507 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
508 array_pop ( $ltr ) ;
509 $t[$k] = $z ;
510 array_push ( $tr , false ) ;
511 array_push ( $td , false ) ;
512 array_push ( $ltd , "" ) ;
513 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
514 }
515 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
516 {
517 if ( "|+" == substr ( $x , 0 , 2 ) )
518 {
519 $fc = "+" ;
520 $x = substr ( $x , 1 ) ;
521 }
522 $after = substr ( $x , 1 ) ;
523 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
524 $after = explode ( "||" , $after ) ;
525 $t[$k] = "" ;
526 foreach ( $after AS $theline )
527 {
528 $z = "" ;
529 if ( $fc != "+" )
530 {
531 $tra = array_pop ( $ltr ) ;
532 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
533 array_push ( $tr , true ) ;
534 array_push ( $ltr , "" ) ;
535 }
536
537 $l = array_pop ( $ltd ) ;
538 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
539 if ( $fc == "|" ) $l = "td" ;
540 else if ( $fc == "!" ) $l = "th" ;
541 else if ( $fc == "+" ) $l = "caption" ;
542 else $l = "" ;
543 array_push ( $ltd , $l ) ;
544 $y = explode ( "|" , $theline , 2 ) ;
545 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
546 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
547 $t[$k] .= $y ;
548 array_push ( $td , true ) ;
549 }
550 }
551 }
552
553 # Closing open td, tr && table
554 while ( count ( $td ) > 0 )
555 {
556 if ( array_pop ( $td ) ) $t[] = "</td>" ;
557 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
558 $t[] = "</table>" ;
559 }
560
561 $t = implode ( "\n" , $t ) ;
562 # $t = $this->removeHTMLtags( $t );
563 return $t ;
564 }
565
566 # Parses the text and adds the result to the strip state
567 # Returns the strip tag
568 function stripParse( $text, $linestart, $args )
569 {
570 $text = $this->strip( $text, $this->mStripState );
571 $text = $this->internalParse( $text, $linestart, $args, false );
572 if( $linestart ) {
573 $text = "\n" . $text;
574 }
575 return $this->insertStripItem( $text, $this->mStripState );
576 }
577
578 function internalParse( $text, $linestart, $args = array(), $isMain=true )
579 {
580 $fname = "Parser::internalParse";
581 wfProfileIn( $fname );
582
583 $text = $this->removeHTMLtags( $text );
584 $text = $this->replaceVariables( $text, $args );
585
586 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr/>", $text );
587
588 $text = $this->doHeadings( $text );
589 if($this->mOptions->getUseDynamicDates()) {
590 global $wgDateFormatter;
591 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
592 }
593 $text = $this->doAllQuotes( $text );
594 $text = $this->replaceExternalLinks( $text );
595 $text = $this->replaceInternalLinks ( $text );
596 $text = $this->replaceInternalLinks ( $text );
597 //$text = $this->doTokenizedParser ( $text );
598 $text = $this->doTableStuff ( $text ) ;
599 $text = $this->magicISBN( $text );
600 $text = $this->magicRFC( $text );
601 $text = $this->formatHeadings( $text, $isMain );
602 $sk =& $this->mOptions->getSkin();
603 $text = $sk->transformContent( $text );
604
605 if ( !isset ( $this->categoryMagicDone ) ) {
606 $text .= $this->categoryMagic () ;
607 $this->categoryMagicDone = true ;
608 }
609
610 wfProfileOut( $fname );
611 return $text;
612 }
613
614
615 /* private */ function doHeadings( $text )
616 {
617 for ( $i = 6; $i >= 1; --$i ) {
618 $h = substr( "======", 0, $i );
619 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
620 "<h{$i}>\\1</h{$i}>\\2", $text );
621 }
622 return $text;
623 }
624
625 /* private */ function doAllQuotes( $text )
626 {
627 $outtext = "";
628 $lines = explode( "\n", $text );
629 foreach ( $lines as $line ) {
630 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
631 }
632 return substr($outtext, 0,-1);
633 }
634
635 /* private */ function doQuotes( $pre, $text, $mode )
636 {
637 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
638 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
639 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
640 if ( substr ($m[2], 0, 1) == "'" ) {
641 $m[2] = substr ($m[2], 1);
642 if ($mode == "em") {
643 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
644 } else if ($mode == "strong") {
645 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
646 } else if (($mode == "emstrong") || ($mode == "both")) {
647 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
648 } else if ($mode == "strongem") {
649 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
650 } else {
651 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
652 }
653 } else {
654 if ($mode == "strong") {
655 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
656 } else if ($mode == "em") {
657 return $m1_em . $this->doQuotes ( "", $m[2], "" );
658 } else if ($mode == "emstrong") {
659 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
660 } else if (($mode == "strongem") || ($mode == "both")) {
661 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
662 } else {
663 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
664 }
665 }
666 } else {
667 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
668 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
669 if ($mode == "") {
670 return $pre . $text;
671 } else if ($mode == "em") {
672 return $pre . $text_em;
673 } else if ($mode == "strong") {
674 return $pre . $text_strong;
675 } else if ($mode == "strongem") {
676 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
677 } else {
678 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
679 }
680 }
681 }
682
683 # Note: we have to do external links before the internal ones,
684 # and otherwise take great care in the order of things here, so
685 # that we don't end up interpreting some URLs twice.
686
687 /* private */ function replaceExternalLinks( $text )
688 {
689 $fname = "Parser::replaceExternalLinks";
690 wfProfileIn( $fname );
691 $text = $this->subReplaceExternalLinks( $text, "http", true );
692 $text = $this->subReplaceExternalLinks( $text, "https", true );
693 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
694 $text = $this->subReplaceExternalLinks( $text, "irc", false );
695 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
696 $text = $this->subReplaceExternalLinks( $text, "news", false );
697 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
698 wfProfileOut( $fname );
699 return $text;
700 }
701
702 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
703 {
704 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
705 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
706
707 # this is the list of separators that should be ignored if they
708 # are the last character of an URL but that should be included
709 # if they occur within the URL, e.g. "go to www.foo.com, where .."
710 # in this case, the last comma should not become part of the URL,
711 # but in "www.foo.com/123,2342,32.htm" it should.
712 $sep = ",;\.:";
713 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
714 $images = "gif|png|jpg|jpeg";
715
716 # PLEASE NOTE: The curly braces { } are not part of the regex,
717 # they are interpreted as part of the string (used to tell PHP
718 # that the content of the string should be inserted there).
719 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
720 "((?i){$images})([^{$uc}]|$)/";
721
722 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
723 $sk =& $this->mOptions->getSkin();
724
725 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
726 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
727 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
728 }
729 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
730 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
731 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
732 "</a>\\5", $s );
733 $s = str_replace( $unique, $protocol, $s );
734
735 $a = explode( "[{$protocol}:", " " . $s );
736 $s = array_shift( $a );
737 $s = substr( $s, 1 );
738
739 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
740 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
741
742 foreach ( $a as $line ) {
743 if ( preg_match( $e1, $line, $m ) ) {
744 $link = "{$protocol}:{$m[1]}";
745 $trail = $m[2];
746 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
747 else { $text = wfEscapeHTML( $link ); }
748 } else if ( preg_match( $e2, $line, $m ) ) {
749 $link = "{$protocol}:{$m[1]}";
750 $text = $m[2];
751 $trail = $m[3];
752 } else {
753 $s .= "[{$protocol}:" . $line;
754 continue;
755 }
756 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
757 $paren = "";
758 } else {
759 # Expand the URL for printable version
760 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
761 }
762 $la = $sk->getExternalLinkAttributes( $link, $text );
763 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
764
765 }
766 return $s;
767 }
768
769
770 /* private */ function replaceInternalLinks( $s )
771 {
772 global $wgLang, $wgLinkCache;
773 global $wgNamespacesWithSubpages, $wgLanguageCode;
774 static $fname = "Parser::replaceInternalLink" ;
775 wfProfileIn( $fname );
776
777 wfProfileIn( "$fname-setup" );
778 static $tc = FALSE;
779 # the % is needed to support urlencoded titles as well
780 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
781 $sk =& $this->mOptions->getSkin();
782
783 $a = explode( "[[", " " . $s );
784 $s = array_shift( $a );
785 $s = substr( $s, 1 );
786
787 # Match a link having the form [[namespace:link|alternate]]trail
788 static $e1 = FALSE;
789 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
790 # Match the end of a line for a word that's not followed by whitespace,
791 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
792 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
793 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
794 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
795
796
797 # Special and Media are pseudo-namespaces; no pages actually exist in them
798 static $image = FALSE;
799 static $special = FALSE;
800 static $media = FALSE;
801 static $category = FALSE;
802 if ( !$image ) { $image = Namespace::getImage(); }
803 if ( !$special ) { $special = Namespace::getSpecial(); }
804 if ( !$media ) { $media = Namespace::getMedia(); }
805 if ( !$category ) { $category = Namespace::getCategory(); }
806
807 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
808
809 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
810 $new_prefix = $m[2];
811 $s = $m[1];
812 } else {
813 $new_prefix="";
814 }
815
816 wfProfileOut( "$fname-setup" );
817
818 foreach ( $a as $line ) {
819 $prefix = $new_prefix;
820
821 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
822 $text = $m[2];
823 # fix up urlencoded title texts
824 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
825 $trail = $m[3];
826 } else { # Invalid form; output directly
827 $s .= $prefix . "[[" . $line ;
828 wfProfileOut( $fname );
829 continue;
830 }
831
832 /* Valid link forms:
833 Foobar -- normal
834 :Foobar -- override special treatment of prefix (images, language links)
835 /Foobar -- convert to CurrentPage/Foobar
836 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
837 */
838 $c = substr($m[1],0,1);
839 $noforce = ($c != ":");
840 if( $c == "/" ) { # subpage
841 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
842 $m[1]=substr($m[1],1,strlen($m[1])-2);
843 $noslash=$m[1];
844 } else {
845 $noslash=substr($m[1],1);
846 }
847 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
848 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
849 if( "" == $text ) {
850 $text= $m[1];
851 } # this might be changed for ugliness reasons
852 } else {
853 $link = $noslash; # no subpage allowed, use standard link
854 }
855 } elseif( $noforce ) { # no subpage
856 $link = $m[1];
857 } else {
858 $link = substr( $m[1], 1 );
859 }
860 $wasblank = ( "" == $text );
861 if( $wasblank )
862 $text = $link;
863
864 $nt = Title::newFromText( $link );
865 if( !$nt ) {
866 $s .= $prefix . "[[" . $line;
867 wfProfileOut( $fname );
868 continue;
869 }
870 $ns = $nt->getNamespace();
871 $iw = $nt->getInterWiki();
872 if( $noforce ) {
873 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
874 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
875 $tmp = $prefix . $trail ;
876 wfProfileOut( $fname );
877 $s .= (trim($tmp) == '')? '': $tmp;
878 continue;
879 }
880 if ( $ns == $image ) {
881 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
882 $wgLinkCache->addImageLinkObj( $nt );
883 wfProfileOut( $fname );
884 continue;
885 }
886 if ( $ns == $category ) {
887 $t = $nt->getText() ;
888 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
889
890 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
891 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
892 $wgLinkCache->resume();
893
894 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
895 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
896 $this->mOutput->mCategoryLinks[] = $t ;
897 $s .= $prefix . $trail ;
898 wfProfileOut( $fname );
899 continue;
900 }
901 }
902 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
903 ( strpos( $link, "#" ) == FALSE ) ) {
904 # Self-links are handled specially; generally de-link and change to bold.
905 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
906 wfProfileOut( $fname );
907 continue;
908 }
909
910 if( $ns == $media ) {
911 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
912 $wgLinkCache->addImageLinkObj( $nt );
913 wfProfileOut( $fname );
914 continue;
915 } elseif( $ns == $special ) {
916 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
917 wfProfileOut( $fname );
918 continue;
919 }
920 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
921 }
922 wfProfileOut( $fname );
923 return $s;
924 }
925
926 # Some functions here used by doBlockLevels()
927 #
928 /* private */ function closeParagraph()
929 {
930 $result = "";
931 if ( '' != $this->mLastSection ) {
932 $result = "</" . $this->mLastSection . ">\n";
933 }
934 $this->mInPre = false;
935 $this->mLastSection = "";
936 return $result;
937 }
938 # getCommon() returns the length of the longest common substring
939 # of both arguments, starting at the beginning of both.
940 #
941 /* private */ function getCommon( $st1, $st2 )
942 {
943 $fl = strlen( $st1 );
944 $shorter = strlen( $st2 );
945 if ( $fl < $shorter ) { $shorter = $fl; }
946
947 for ( $i = 0; $i < $shorter; ++$i ) {
948 if ( $st1{$i} != $st2{$i} ) { break; }
949 }
950 return $i;
951 }
952 # These next three functions open, continue, and close the list
953 # element appropriate to the prefix character passed into them.
954 #
955 /* private */ function openList( $char )
956 {
957 $result = $this->closeParagraph();
958
959 if ( "*" == $char ) { $result .= "<ul><li>"; }
960 else if ( "#" == $char ) { $result .= "<ol><li>"; }
961 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
962 else if ( ";" == $char ) {
963 $result .= "<dl><dt>";
964 $this->mDTopen = true;
965 }
966 else { $result = "<!-- ERR 1 -->"; }
967
968 return $result;
969 }
970
971 /* private */ function nextItem( $char )
972 {
973 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
974 else if ( ":" == $char || ";" == $char ) {
975 $close = "</dd>";
976 if ( $this->mDTopen ) { $close = "</dt>"; }
977 if ( ";" == $char ) {
978 $this->mDTopen = true;
979 return $close . "<dt>";
980 } else {
981 $this->mDTopen = false;
982 return $close . "<dd>";
983 }
984 }
985 return "<!-- ERR 2 -->";
986 }
987
988 /* private */function closeList( $char )
989 {
990 if ( "*" == $char ) { $text = "</li></ul>"; }
991 else if ( "#" == $char ) { $text = "</li></ol>"; }
992 else if ( ":" == $char ) {
993 if ( $this->mDTopen ) {
994 $this->mDTopen = false;
995 $text = "</dt></dl>";
996 } else {
997 $text = "</dd></dl>";
998 }
999 }
1000 else { return "<!-- ERR 3 -->"; }
1001 return $text."\n";
1002 }
1003
1004 /* private */ function doBlockLevels( $text, $linestart ) {
1005 $fname = "Parser::doBlockLevels";
1006 wfProfileIn( $fname );
1007
1008 # Parsing through the text line by line. The main thing
1009 # happening here is handling of block-level elements p, pre,
1010 # and making lists from lines starting with * # : etc.
1011 #
1012 $textLines = explode( "\n", $text );
1013
1014 $lastPrefix = $output = $lastLine = '';
1015 $this->mDTopen = $inBlockElem = false;
1016 $prefixLength = 0;
1017 $paragraphStack = false;
1018
1019 if ( !$linestart ) {
1020 $output .= array_shift( $textLines );
1021 }
1022 foreach ( $textLines as $oLine ) {
1023 $lastPrefixLength = strlen( $lastPrefix );
1024 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1025 $preOpenMatch = preg_match("/<pre/i", $oLine );
1026 if (!$this->mInPre) {
1027 $this->mInPre = !empty($preOpenMatch);
1028 }
1029 if ( !$this->mInPre ) {
1030 # Multiple prefixes may abut each other for nested lists.
1031 $prefixLength = strspn( $oLine, "*#:;" );
1032 $pref = substr( $oLine, 0, $prefixLength );
1033
1034 # eh?
1035 $pref2 = str_replace( ";", ":", $pref );
1036 $t = substr( $oLine, $prefixLength );
1037 } else {
1038 # Don't interpret any other prefixes in preformatted text
1039 $prefixLength = 0;
1040 $pref = $pref2 = '';
1041 $t = $oLine;
1042 }
1043
1044 # List generation
1045 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1046 # Same as the last item, so no need to deal with nesting or opening stuff
1047 $output .= $this->nextItem( substr( $pref, -1 ) );
1048 $paragraphStack = false;
1049
1050 if ( ";" == substr( $pref, -1 ) ) {
1051 # The one nasty exception: definition lists work like this:
1052 # ; title : definition text
1053 # So we check for : in the remainder text to split up the
1054 # title and definition, without b0rking links.
1055 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1056 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1057 $term = $match[1];
1058 $output .= $term . $this->nextItem( ":" );
1059 $t = $match[2];
1060 }
1061 }
1062 } elseif( $prefixLength || $lastPrefixLength ) {
1063 # Either open or close a level...
1064 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1065 $paragraphStack = false;
1066
1067 while( $commonPrefixLength < $lastPrefixLength ) {
1068 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1069 --$lastPrefixLength;
1070 }
1071 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1072 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1073 }
1074 while ( $prefixLength > $commonPrefixLength ) {
1075 $char = substr( $pref, $commonPrefixLength, 1 );
1076 $output .= $this->openList( $char );
1077
1078 if ( ";" == $char ) {
1079 # FIXME: This is dupe of code above
1080 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1081 $term = $match[1];
1082 $output .= $term . $this->nextItem( ":" );
1083 $t = $match[2];
1084 }
1085 }
1086 ++$commonPrefixLength;
1087 }
1088 $lastPrefix = $pref2;
1089 }
1090 if( 0 == $prefixLength ) {
1091 # No prefix (not in list)--go to paragraph mode
1092 $uniq_prefix = UNIQ_PREFIX;
1093 // XXX: use a stack for nestable elements like span, table and div
1094 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1095 $closematch = preg_match(
1096 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1097 "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1098 if ( $openmatch or $closematch ) {
1099 $paragraphStack = false;
1100 $output .= $this->closeParagraph();
1101 if($preOpenMatch and !$preCloseMatch) {
1102 $this->mInPre = true;
1103 }
1104 if ( $closematch ) {
1105 $inBlockElem = false;
1106 } else {
1107 $inBlockElem = true;
1108 }
1109 } else if ( !$inBlockElem && !$this->mInPre ) {
1110 if ( " " == $t{0} and trim($t) != '' ) {
1111 // pre
1112 if ($this->mLastSection != 'pre') {
1113 $paragraphStack = false;
1114 $output .= $this->closeParagraph().'<pre>';
1115 $this->mLastSection = 'pre';
1116 }
1117 } else {
1118 // paragraph
1119 if ( '' == trim($t) ) {
1120 if ( $paragraphStack ) {
1121 $output .= $paragraphStack.'<br/>';
1122 $paragraphStack = false;
1123 $this->mLastSection = 'p';
1124 } else {
1125 if ($this->mLastSection != 'p' ) {
1126 $output .= $this->closeParagraph();
1127 $this->mLastSection = '';
1128 $paragraphStack = "<p>";
1129 } else {
1130 $paragraphStack = '</p><p>';
1131 }
1132 }
1133 } else {
1134 if ( $paragraphStack ) {
1135 $output .= $paragraphStack;
1136 $paragraphStack = false;
1137 $this->mLastSection = 'p';
1138 } else if ($this->mLastSection != 'p') {
1139 $output .= $this->closeParagraph().'<p>';
1140 $this->mLastSection = 'p';
1141 }
1142 }
1143 }
1144 }
1145 }
1146 if ($paragraphStack === false) {
1147 $output .= $t."\n";
1148 }
1149 }
1150 while ( $prefixLength ) {
1151 $output .= $this->closeList( $pref2{$prefixLength-1} );
1152 --$prefixLength;
1153 }
1154 if ( "" != $this->mLastSection ) {
1155 $output .= "</" . $this->mLastSection . ">";
1156 $this->mLastSection = "";
1157 }
1158
1159 wfProfileOut( $fname );
1160 return $output;
1161 }
1162
1163 function getVariableValue( $index ) {
1164 global $wgLang, $wgSitename, $wgServer;
1165
1166 switch ( $index ) {
1167 case MAG_CURRENTMONTH:
1168 return date( "m" );
1169 case MAG_CURRENTMONTHNAME:
1170 return $wgLang->getMonthName( date("n") );
1171 case MAG_CURRENTMONTHNAMEGEN:
1172 return $wgLang->getMonthNameGen( date("n") );
1173 case MAG_CURRENTDAY:
1174 return date("j");
1175 case MAG_PAGENAME:
1176 return $this->mTitle->getText();
1177 case MAG_NAMESPACE:
1178 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1179 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1180 case MAG_CURRENTDAYNAME:
1181 return $wgLang->getWeekdayName( date("w")+1 );
1182 case MAG_CURRENTYEAR:
1183 return date( "Y" );
1184 case MAG_CURRENTTIME:
1185 return $wgLang->time( wfTimestampNow(), false );
1186 case MAG_NUMBEROFARTICLES:
1187 return wfNumberOfArticles();
1188 case MAG_SITENAME:
1189 return $wgSitename;
1190 case MAG_SERVER:
1191 return $wgServer;
1192 default:
1193 return NULL;
1194 }
1195 }
1196
1197 function initialiseVariables()
1198 {
1199 global $wgVariableIDs;
1200 $this->mVariables = array();
1201 foreach ( $wgVariableIDs as $id ) {
1202 $mw =& MagicWord::get( $id );
1203 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1204 }
1205 }
1206
1207 /* private */ function replaceVariables( $text, $args = array() )
1208 {
1209 global $wgLang, $wgScript, $wgArticlePath;
1210
1211 $fname = "Parser::replaceVariables";
1212 wfProfileIn( $fname );
1213
1214 $bail = false;
1215 if ( !$this->mVariables ) {
1216 $this->initialiseVariables();
1217 }
1218 $titleChars = Title::legalChars();
1219 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1220
1221 # This function is called recursively. To keep track of arguments we need a stack:
1222 array_push( $this->mArgStack, $args );
1223
1224 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1225 $GLOBALS['wgCurParser'] =& $this;
1226
1227
1228 if ( $this->mOutputType == OT_HTML ) {
1229 # Variable substitution
1230 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1231
1232 # Argument substitution
1233 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1234 }
1235 # Template substitution
1236 $regex = "/(\\n?){{([$titleChars]*)(\\|.*?|)}}/s";
1237 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1238
1239 array_pop( $this->mArgStack );
1240
1241 wfProfileOut( $fname );
1242 return $text;
1243 }
1244
1245 function variableSubstitution( $matches )
1246 {
1247 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1248 $text = $this->mVariables[$matches[1]];
1249 $this->mOutput->mContainsOldMagic = true;
1250 } else {
1251 $text = $matches[0];
1252 }
1253 return $text;
1254 }
1255
1256 function braceSubstitution( $matches )
1257 {
1258 global $wgLinkCache, $wgLang;
1259 $fname = "Parser::braceSubstitution";
1260 $found = false;
1261 $nowiki = false;
1262 $noparse = false;
1263
1264 $title = NULL;
1265
1266 # $newline is an optional newline character before the braces
1267 # $part1 is the bit before the first |, and must contain only title characters
1268 # $args is a list of arguments, starting from index 0, not including $part1
1269
1270 $newline = $matches[1];
1271 $part1 = $matches[2];
1272 # If the third subpattern matched anything, it will start with |
1273 if ( $matches[3] !== "" ) {
1274 $args = explode( "|", substr( $matches[3], 1 ) );
1275 } else {
1276 $args = array();
1277 }
1278 $argc = count( $args );
1279
1280 # {{{}}}
1281 if ( strpos( $matches[0], "{{{" ) !== false ) {
1282 $text = $matches[0];
1283 $found = true;
1284 $noparse = true;
1285 }
1286
1287 # SUBST
1288 if ( !$found ) {
1289 $mwSubst =& MagicWord::get( MAG_SUBST );
1290 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1291 if ( $this->mOutputType != OT_WIKI ) {
1292 # Invalid SUBST not replaced at PST time
1293 # Return without further processing
1294 $text = $matches[0];
1295 $found = true;
1296 $noparse= true;
1297 }
1298 } elseif ( $this->mOutputType == OT_WIKI ) {
1299 # SUBST not found in PST pass, do nothing
1300 $text = $matches[0];
1301 $found = true;
1302 }
1303 }
1304
1305 # MSG, MSGNW and INT
1306 if ( !$found ) {
1307 # Check for MSGNW:
1308 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1309 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1310 $nowiki = true;
1311 } else {
1312 # Remove obsolete MSG:
1313 $mwMsg =& MagicWord::get( MAG_MSG );
1314 $mwMsg->matchStartAndRemove( $part1 );
1315 }
1316
1317 # Check if it is an internal message
1318 $mwInt =& MagicWord::get( MAG_INT );
1319 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1320 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1321 $text = wfMsgReal( $part1, $args, true );
1322 $found = true;
1323 }
1324 }
1325 }
1326
1327 # NS
1328 if ( !$found ) {
1329 # Check for NS: (namespace expansion)
1330 $mwNs = MagicWord::get( MAG_NS );
1331 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1332 if ( intval( $part1 ) ) {
1333 $text = $wgLang->getNsText( intval( $part1 ) );
1334 $found = true;
1335 } else {
1336 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1337 if ( !is_null( $index ) ) {
1338 $text = $wgLang->getNsText( $index );
1339 $found = true;
1340 }
1341 }
1342 }
1343 }
1344
1345 # LOCALURL and LOCALURLE
1346 if ( !$found ) {
1347 $mwLocal = MagicWord::get( MAG_LOCALURL );
1348 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1349
1350 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1351 $func = 'getLocalURL';
1352 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1353 $func = 'escapeLocalURL';
1354 } else {
1355 $func = '';
1356 }
1357
1358 if ( $func !== '' ) {
1359 $title = Title::newFromText( $part1 );
1360 if ( !is_null( $title ) ) {
1361 if ( $argc > 0 ) {
1362 $text = $title->$func( $args[0] );
1363 } else {
1364 $text = $title->$func();
1365 }
1366 $found = true;
1367 }
1368 }
1369 }
1370
1371 # Internal variables
1372 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1373 $text = $this->mVariables[$part1];
1374 $found = true;
1375 $this->mOutput->mContainsOldMagic = true;
1376 }
1377 /*
1378 # Arguments input from the caller
1379 $inputArgs = end( $this->mArgStack );
1380 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1381 $text = $inputArgs[$part1];
1382 $found = true;
1383 }
1384 */
1385 # Load from database
1386 if ( !$found ) {
1387 $title = Title::newFromText( $part1, NS_TEMPLATE );
1388 if ( !is_null( $title ) && !$title->isExternal() ) {
1389 # Check for excessive inclusion
1390 $dbk = $title->getPrefixedDBkey();
1391 if ( $this->incrementIncludeCount( $dbk ) ) {
1392 $article = new Article( $title );
1393 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1394 if ( $articleContent !== false ) {
1395 $found = true;
1396 $text = $articleContent;
1397
1398 }
1399 }
1400
1401 # If the title is valid but undisplayable, make a link to it
1402 if ( $this->mOutputType == OT_HTML && !$found ) {
1403 $text = "[[" . $title->getPrefixedText() . "]]";
1404 $found = true;
1405 }
1406 }
1407 }
1408
1409 # Recursive parsing, escaping and link table handling
1410 # Only for HTML output
1411 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1412 $text = wfEscapeWikiText( $text );
1413 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1414 # Clean up argument array
1415 $assocArgs = array();
1416 $index = 1;
1417 foreach( $args as $arg ) {
1418 $eqpos = strpos( $arg, "=" );
1419 if ( $eqpos === false ) {
1420 $assocArgs[$index++] = $arg;
1421 } else {
1422 $name = trim( substr( $arg, 0, $eqpos ) );
1423 $value = trim( substr( $arg, $eqpos+1 ) );
1424 if ( $value === false ) {
1425 $value = "";
1426 }
1427 if ( $name !== false ) {
1428 $assocArgs[$name] = $value;
1429 }
1430 }
1431 }
1432
1433 # Do not enter included links in link table
1434 if ( !is_null( $title ) ) {
1435 $wgLinkCache->suspend();
1436 }
1437
1438 # Run full parser on the included text
1439 $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
1440
1441 # Resume the link cache and register the inclusion as a link
1442 if ( !is_null( $title ) ) {
1443 $wgLinkCache->resume();
1444 $wgLinkCache->addLinkObj( $title );
1445 }
1446 }
1447
1448 if ( !$found ) {
1449 return $matches[0];
1450 } else {
1451 return $text;
1452 }
1453 }
1454
1455 # Triple brace replacement -- used for template arguments
1456 function argSubstitution( $matches )
1457 {
1458 $newline = $matches[1];
1459 $arg = trim( $matches[2] );
1460 $text = $matches[0];
1461 $inputArgs = end( $this->mArgStack );
1462
1463 if ( array_key_exists( $arg, $inputArgs ) ) {
1464 $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
1465 }
1466
1467 return $text;
1468 }
1469
1470 # Returns true if the function is allowed to include this entity
1471 function incrementIncludeCount( $dbk )
1472 {
1473 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1474 $this->mIncludeCount[$dbk] = 0;
1475 }
1476 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1477 return true;
1478 } else {
1479 return false;
1480 }
1481 }
1482
1483
1484 # Cleans up HTML, removes dangerous tags and attributes
1485 /* private */ function removeHTMLtags( $text )
1486 {
1487 global $wgUseTidy, $wgUserHtml;
1488 $fname = "Parser::removeHTMLtags";
1489 wfProfileIn( $fname );
1490
1491 if( $wgUserHtml ) {
1492 $htmlpairs = array( # Tags that must be closed
1493 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1494 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1495 "strike", "strong", "tt", "var", "div", "center",
1496 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1497 "ruby", "rt" , "rb" , "rp", "p"
1498 );
1499 $htmlsingle = array(
1500 "br", "hr", "li", "dt", "dd"
1501 );
1502 $htmlnest = array( # Tags that can be nested--??
1503 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1504 "dl", "font", "big", "small", "sub", "sup"
1505 );
1506 $tabletags = array( # Can only appear inside table
1507 "td", "th", "tr"
1508 );
1509 } else {
1510 $htmlpairs = array();
1511 $htmlsingle = array();
1512 $htmlnest = array();
1513 $tabletags = array();
1514 }
1515
1516 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1517 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1518
1519 $htmlattrs = $this->getHTMLattrs () ;
1520
1521 # Remove HTML comments
1522 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1523
1524 $bits = explode( "<", $text );
1525 $text = array_shift( $bits );
1526 if(!$wgUseTidy) {
1527 $tagstack = array(); $tablestack = array();
1528 foreach ( $bits as $x ) {
1529 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1530 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1531 $x, $regs );
1532 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1533 error_reporting( $prev );
1534
1535 $badtag = 0 ;
1536 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1537 # Check our stack
1538 if ( $slash ) {
1539 # Closing a tag...
1540 if ( ! in_array( $t, $htmlsingle ) &&
1541 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1542 if(!empty($ot)) array_push( $tagstack, $ot );
1543 $badtag = 1;
1544 } else {
1545 if ( $t == "table" ) {
1546 $tagstack = array_pop( $tablestack );
1547 }
1548 $newparams = "";
1549 }
1550 } else {
1551 # Keep track for later
1552 if ( in_array( $t, $tabletags ) &&
1553 ! in_array( "table", $tagstack ) ) {
1554 $badtag = 1;
1555 } else if ( in_array( $t, $tagstack ) &&
1556 ! in_array ( $t , $htmlnest ) ) {
1557 $badtag = 1 ;
1558 } else if ( ! in_array( $t, $htmlsingle ) ) {
1559 if ( $t == "table" ) {
1560 array_push( $tablestack, $tagstack );
1561 $tagstack = array();
1562 }
1563 array_push( $tagstack, $t );
1564 }
1565 # Strip non-approved attributes from the tag
1566 $newparams = $this->fixTagAttributes($params);
1567
1568 }
1569 if ( ! $badtag ) {
1570 $rest = str_replace( ">", "&gt;", $rest );
1571 $text .= "<$slash$t $newparams$brace$rest";
1572 continue;
1573 }
1574 }
1575 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1576 }
1577 # Close off any remaining tags
1578 while ( $t = array_pop( $tagstack ) ) {
1579 $text .= "</$t>\n";
1580 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1581 }
1582 } else {
1583 # this might be possible using tidy itself
1584 foreach ( $bits as $x ) {
1585 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1586 $x, $regs );
1587 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1588 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1589 $newparams = $this->fixTagAttributes($params);
1590 $rest = str_replace( ">", "&gt;", $rest );
1591 $text .= "<$slash$t $newparams$brace$rest";
1592 } else {
1593 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1594 }
1595 }
1596 }
1597 wfProfileOut( $fname );
1598 return $text;
1599 }
1600
1601
1602 /*
1603 *
1604 * This function accomplishes several tasks:
1605 * 1) Auto-number headings if that option is enabled
1606 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1607 * 3) Add a Table of contents on the top for users who have enabled the option
1608 * 4) Auto-anchor headings
1609 *
1610 * It loops through all headlines, collects the necessary data, then splits up the
1611 * string and re-inserts the newly formatted headlines.
1612 *
1613 */
1614
1615 /* private */ function formatHeadings( $text, $isMain=true )
1616 {
1617 global $wgInputEncoding;
1618
1619 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1620 $doShowToc = $this->mOptions->getShowToc();
1621 if( !$this->mTitle->userCanEdit() ) {
1622 $showEditLink = 0;
1623 $rightClickHack = 0;
1624 } else {
1625 $showEditLink = $this->mOptions->getEditSection();
1626 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1627 }
1628
1629 # Inhibit editsection links if requested in the page
1630 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1631 if( $esw->matchAndRemove( $text ) ) {
1632 $showEditLink = 0;
1633 }
1634 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1635 # do not add TOC
1636 $mw =& MagicWord::get( MAG_NOTOC );
1637 if( $mw->matchAndRemove( $text ) ) {
1638 $doShowToc = 0;
1639 }
1640
1641 # never add the TOC to the Main Page. This is an entry page that should not
1642 # be more than 1-2 screens large anyway
1643 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1644 $doShowToc = 0;
1645 }
1646
1647 # Get all headlines for numbering them and adding funky stuff like [edit]
1648 # links - this is for later, but we need the number of headlines right now
1649 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1650
1651 # if there are fewer than 4 headlines in the article, do not show TOC
1652 if( $numMatches < 4 ) {
1653 $doShowToc = 0;
1654 }
1655
1656 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1657 # override above conditions and always show TOC
1658 $mw =& MagicWord::get( MAG_FORCETOC );
1659 if ($mw->matchAndRemove( $text ) ) {
1660 $doShowToc = 1;
1661 }
1662
1663
1664 # We need this to perform operations on the HTML
1665 $sk =& $this->mOptions->getSkin();
1666
1667 # headline counter
1668 $headlineCount = 0;
1669
1670 # Ugh .. the TOC should have neat indentation levels which can be
1671 # passed to the skin functions. These are determined here
1672 $toclevel = 0;
1673 $toc = "";
1674 $full = "";
1675 $head = array();
1676 $sublevelCount = array();
1677 $level = 0;
1678 $prevlevel = 0;
1679 foreach( $matches[3] as $headline ) {
1680 $numbering = "";
1681 if( $level ) {
1682 $prevlevel = $level;
1683 }
1684 $level = $matches[1][$headlineCount];
1685 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1686 # reset when we enter a new level
1687 $sublevelCount[$level] = 0;
1688 $toc .= $sk->tocIndent( $level - $prevlevel );
1689 $toclevel += $level - $prevlevel;
1690 }
1691 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1692 # reset when we step back a level
1693 $sublevelCount[$level+1]=0;
1694 $toc .= $sk->tocUnindent( $prevlevel - $level );
1695 $toclevel -= $prevlevel - $level;
1696 }
1697 # count number of headlines for each level
1698 @$sublevelCount[$level]++;
1699 if( $doNumberHeadings || $doShowToc ) {
1700 $dot = 0;
1701 for( $i = 1; $i <= $level; $i++ ) {
1702 if( !empty( $sublevelCount[$i] ) ) {
1703 if( $dot ) {
1704 $numbering .= ".";
1705 }
1706 $numbering .= $sublevelCount[$i];
1707 $dot = 1;
1708 }
1709 }
1710 }
1711
1712 # The canonized header is a version of the header text safe to use for links
1713 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1714 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1715
1716 # strip out HTML
1717 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1718 $tocline = trim( $canonized_headline );
1719 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1720 # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1721 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1722 $refer[$headlineCount] = $canonized_headline;
1723
1724 # count how many in assoc. array so we can track dupes in anchors
1725 @$refers[$canonized_headline]++;
1726 $refcount[$headlineCount]=$refers[$canonized_headline];
1727
1728 # Prepend the number to the heading text
1729
1730 if( $doNumberHeadings || $doShowToc ) {
1731 $tocline = $numbering . " " . $tocline;
1732
1733 # Don't number the heading if it is the only one (looks silly)
1734 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1735 # the two are different if the line contains a link
1736 $headline=$numbering . " " . $headline;
1737 }
1738 }
1739
1740 # Create the anchor for linking from the TOC to the section
1741 $anchor = $canonized_headline;
1742 if($refcount[$headlineCount] > 1 ) {
1743 $anchor .= "_" . $refcount[$headlineCount];
1744 }
1745 if( $doShowToc ) {
1746 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1747 }
1748 if( $showEditLink ) {
1749 if ( empty( $head[$headlineCount] ) ) {
1750 $head[$headlineCount] = "";
1751 }
1752 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1753 }
1754
1755 # Add the edit section span
1756 if( $rightClickHack ) {
1757 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1758 }
1759
1760 # give headline the correct <h#> tag
1761 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1762
1763 $headlineCount++;
1764 }
1765
1766 if( $doShowToc ) {
1767 $toclines = $headlineCount;
1768 $toc .= $sk->tocUnindent( $toclevel );
1769 $toc = $sk->tocTable( $toc );
1770 }
1771
1772 # split up and insert constructed headlines
1773
1774 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1775 $i = 0;
1776
1777 foreach( $blocks as $block ) {
1778 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1779 # This is the [edit] link that appears for the top block of text when
1780 # section editing is enabled
1781
1782 # Disabled because it broke block formatting
1783 # For example, a bullet point in the top line
1784 # $full .= $sk->editSectionLink(0);
1785 }
1786 $full .= $block;
1787 if( $doShowToc && !$i && $isMain) {
1788 # Top anchor now in skin
1789 $full = $full.$toc;
1790 }
1791
1792 if( !empty( $head[$i] ) ) {
1793 $full .= $head[$i];
1794 }
1795 $i++;
1796 }
1797
1798 return $full;
1799 }
1800
1801 /* private */ function magicISBN( $text )
1802 {
1803 global $wgLang;
1804
1805 $a = split( "ISBN ", " $text" );
1806 if ( count ( $a ) < 2 ) return $text;
1807 $text = substr( array_shift( $a ), 1);
1808 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1809
1810 foreach ( $a as $x ) {
1811 $isbn = $blank = "" ;
1812 while ( " " == $x{0} ) {
1813 $blank .= " ";
1814 $x = substr( $x, 1 );
1815 }
1816 while ( strstr( $valid, $x{0} ) != false ) {
1817 $isbn .= $x{0};
1818 $x = substr( $x, 1 );
1819 }
1820 $num = str_replace( "-", "", $isbn );
1821 $num = str_replace( " ", "", $num );
1822
1823 if ( "" == $num ) {
1824 $text .= "ISBN $blank$x";
1825 } else {
1826 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1827 $text .= "<a href=\"" .
1828 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1829 "\" class=\"internal\">ISBN $isbn</a>";
1830 $text .= $x;
1831 }
1832 }
1833 return $text;
1834 }
1835 /* private */ function magicRFC( $text )
1836 {
1837 global $wgLang;
1838
1839 $a = split( "RFC ", " $text" );
1840 if ( count ( $a ) < 2 ) return $text;
1841 $text = substr( array_shift( $a ), 1);
1842 $valid = "0123456789";
1843
1844 foreach ( $a as $x ) {
1845 $rfc = $blank = "" ;
1846 while ( " " == $x{0} ) {
1847 $blank .= " ";
1848 $x = substr( $x, 1 );
1849 }
1850 while ( strstr( $valid, $x{0} ) != false ) {
1851 $rfc .= $x{0};
1852 $x = substr( $x, 1 );
1853 }
1854
1855 if ( "" == $rfc ) {
1856 $text .= "RFC $blank$x";
1857 } else {
1858 $url = wfmsg( "rfcurl" );
1859 $url = str_replace( "$1", $rfc, $url);
1860 $sk =& $this->mOptions->getSkin();
1861 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1862 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1863 }
1864 }
1865 return $text;
1866 }
1867
1868 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1869 {
1870 $this->mOptions = $options;
1871 $this->mTitle =& $title;
1872 $this->mOutputType = OT_WIKI;
1873
1874 if ( $clearState ) {
1875 $this->clearState();
1876 }
1877
1878 $stripState = false;
1879 $pairs = array(
1880 "\r\n" => "\n",
1881 );
1882 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1883 // now with regexes
1884 /*
1885 $pairs = array(
1886 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1887 "/<br *?>/i" => "<br/>",
1888 );
1889 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1890 */
1891 $text = $this->strip( $text, $stripState, false );
1892 $text = $this->pstPass2( $text, $user );
1893 $text = $this->unstrip( $text, $stripState );
1894 return $text;
1895 }
1896
1897 /* private */ function pstPass2( $text, &$user )
1898 {
1899 global $wgLang, $wgLocaltimezone, $wgCurParser;
1900
1901 # Variable replacement
1902 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1903 $text = $this->replaceVariables( $text );
1904
1905 # Signatures
1906 #
1907 $n = $user->getName();
1908 $k = $user->getOption( "nickname" );
1909 if ( "" == $k ) { $k = $n; }
1910 if(isset($wgLocaltimezone)) {
1911 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1912 }
1913 /* Note: this is an ugly timezone hack for the European wikis */
1914 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1915 " (" . date( "T" ) . ")";
1916 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1917
1918 $text = preg_replace( "/~~~~~/", $d, $text );
1919 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1920 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1921 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1922 Namespace::getUser() ) . ":$n|$k]]", $text );
1923
1924 # Context links: [[|name]] and [[name (context)|]]
1925 #
1926 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1927 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1928 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1929 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1930
1931 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1932 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1933 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1934 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1935 # [[ns:page (cont)|]]
1936 $context = "";
1937 $t = $this->mTitle->getText();
1938 if ( preg_match( $conpat, $t, $m ) ) {
1939 $context = $m[2];
1940 }
1941 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1942 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1943 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1944
1945 if ( "" == $context ) {
1946 $text = preg_replace( $p2, "[[\\1]]", $text );
1947 } else {
1948 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1949 }
1950
1951 /*
1952 $mw =& MagicWord::get( MAG_SUBST );
1953 $wgCurParser = $this->fork();
1954 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1955 $this->merge( $wgCurParser );
1956 */
1957
1958 # Trim trailing whitespace
1959 # MAG_END (__END__) tag allows for trailing
1960 # whitespace to be deliberately included
1961 $text = rtrim( $text );
1962 $mw =& MagicWord::get( MAG_END );
1963 $mw->matchAndRemove( $text );
1964
1965 return $text;
1966 }
1967
1968 # Set up some variables which are usually set up in parse()
1969 # so that an external function can call some class members with confidence
1970 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1971 {
1972 $this->mTitle =& $title;
1973 $this->mOptions = $options;
1974 $this->mOutputType = $outputType;
1975 if ( $clearState ) {
1976 $this->clearState();
1977 }
1978 }
1979
1980 function transformMsg( $text, $options ) {
1981 global $wgTitle;
1982 static $executing = false;
1983
1984 # Guard against infinite recursion
1985 if ( $executing ) {
1986 return $text;
1987 }
1988 $executing = true;
1989
1990 $this->mTitle = $wgTitle;
1991 $this->mOptions = $options;
1992 $this->mOutputType = OT_MSG;
1993 $this->clearState();
1994 $text = $this->replaceVariables( $text );
1995
1996 $executing = false;
1997 return $text;
1998 }
1999 }
2000
2001 class ParserOutput
2002 {
2003 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2004 var $mCacheTime; # Used in ParserCache
2005
2006 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2007 $containsOldMagic = false )
2008 {
2009 $this->mText = $text;
2010 $this->mLanguageLinks = $languageLinks;
2011 $this->mCategoryLinks = $categoryLinks;
2012 $this->mContainsOldMagic = $containsOldMagic;
2013 $this->mCacheTime = "";
2014 }
2015
2016 function getText() { return $this->mText; }
2017 function getLanguageLinks() { return $this->mLanguageLinks; }
2018 function getCategoryLinks() { return $this->mCategoryLinks; }
2019 function getCacheTime() { return $this->mCacheTime; }
2020 function containsOldMagic() { return $this->mContainsOldMagic; }
2021 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2022 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2023 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2024 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2025 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2026
2027 function merge( $other ) {
2028 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2029 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2030 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2031 }
2032
2033 }
2034
2035 class ParserOptions
2036 {
2037 # All variables are private
2038 var $mUseTeX; # Use texvc to expand <math> tags
2039 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2040 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2041 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2042 var $mAllowExternalImages; # Allow external images inline
2043 var $mSkin; # Reference to the preferred skin
2044 var $mDateFormat; # Date format index
2045 var $mEditSection; # Create "edit section" links
2046 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2047 var $mNumberHeadings; # Automatically number headings
2048 var $mShowToc; # Show table of contents
2049
2050 function getUseTeX() { return $this->mUseTeX; }
2051 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2052 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2053 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2054 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2055 function getSkin() { return $this->mSkin; }
2056 function getDateFormat() { return $this->mDateFormat; }
2057 function getEditSection() { return $this->mEditSection; }
2058 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2059 function getNumberHeadings() { return $this->mNumberHeadings; }
2060 function getShowToc() { return $this->mShowToc; }
2061
2062 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2063 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2064 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2065 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2066 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2067 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2068 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2069 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2070 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2071 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2072 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2073
2074 /* static */ function newFromUser( &$user )
2075 {
2076 $popts = new ParserOptions;
2077 $popts->initialiseFromUser( $user );
2078 return $popts;
2079 }
2080
2081 function initialiseFromUser( &$userInput )
2082 {
2083 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2084
2085 if ( !$userInput ) {
2086 $user = new User;
2087 $user->setLoaded( true );
2088 } else {
2089 $user =& $userInput;
2090 }
2091
2092 $this->mUseTeX = $wgUseTeX;
2093 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2094 $this->mUseDynamicDates = $wgUseDynamicDates;
2095 $this->mInterwikiMagic = $wgInterwikiMagic;
2096 $this->mAllowExternalImages = $wgAllowExternalImages;
2097 $this->mSkin =& $user->getSkin();
2098 $this->mDateFormat = $user->getOption( "date" );
2099 $this->mEditSection = $user->getOption( "editsection" );
2100 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2101 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2102 $this->mShowToc = $user->getOption( "showtoc" );
2103 }
2104
2105
2106 }
2107
2108 # Regex callbacks, used in Parser::replaceVariables
2109 function wfBraceSubstitution( $matches )
2110 {
2111 global $wgCurParser;
2112 return $wgCurParser->braceSubstitution( $matches );
2113 }
2114
2115 function wfArgSubstitution( $matches )
2116 {
2117 global $wgCurParser;
2118 return $wgCurParser->argSubstitution( $matches );
2119 }
2120
2121 function wfVariableSubstitution( $matches )
2122 {
2123 global $wgCurParser;
2124 return $wgCurParser->variableSubstitution( $matches );
2125 }
2126
2127 ?>