Use isset instead of defined. Fix BUG #963397
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 }
81
82 # First pass--just handle <nowiki> sections, pass the rest off
83 # to internalParse() which does all the real work.
84 #
85 # Returns a ParserOutput
86 #
87 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
88 {
89 global $wgUseTidy;
90 $fname = "Parser::parse";
91 wfProfileIn( $fname );
92
93 if ( $clearState ) {
94 $this->clearState();
95 }
96
97 $this->mOptions = $options;
98 $this->mTitle =& $title;
99 $this->mOutputType = OT_HTML;
100
101 $stripState = NULL;
102 $text = $this->strip( $text, $this->mStripState );
103 $text = $this->internalParse( $text, $linestart );
104 $text = $this->unstrip( $text, $this->mStripState );
105 # Clean up special characters, only run once, next-to-last before doBlockLevels
106 if(!$wgUseTidy) {
107 $fixtags = array(
108 # french spaces, last one Guillemet-left
109 # only if there is something before the space
110 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
111 # french spaces, Guillemet-right
112 "/(\\302\\253) /i"=>"\\1&nbsp;",
113 "/<hr *>/i" => '<hr />',
114 "/<br *>/i" => '<br />',
115 "/<center *>/i"=>'<div class="center">',
116 "/<\\/center *>/i" => '</div>',
117 # Clean up spare ampersands; note that we probably ought to be
118 # more careful about named entities.
119 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
120 );
121 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
122 } else {
123 $fixtags = array(
124 # french spaces, last one Guillemet-left
125 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
126 # french spaces, Guillemet-right
127 "/(\\302\\253) /i"=>"\\1&nbsp;",
128 "/<center *>/i"=>'<div class="center">',
129 "/<\\/center *>/i" => '</div>'
130 );
131 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
132 }
133 # only once and last
134 $text = $this->doBlockLevels( $text, $linestart );
135 if($wgUseTidy) {
136 $text = $this->tidy($text);
137 }
138 $this->mOutput->setText( $text );
139 wfProfileOut( $fname );
140 return $this->mOutput;
141 }
142
143 /* static */ function getRandomString()
144 {
145 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
146 }
147
148 # Replaces all occurrences of <$tag>content</$tag> in the text
149 # with a random marker and returns the new text. the output parameter
150 # $content will be an associative array filled with data on the form
151 # $unique_marker => content.
152
153 # If $content is already set, the additional entries will be appended
154
155 # If $tag is set to STRIP_COMMENTS, the function will extract
156 # <!-- HTML comments -->
157
158 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
159 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
160 if ( !$content ) {
161 $content = array( );
162 }
163 $n = 1;
164 $stripped = "";
165
166 while ( "" != $text ) {
167 if($tag==STRIP_COMMENTS) {
168 $p = preg_split( "/<!--/i", $text, 2 );
169 } else {
170 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
171 }
172 $stripped .= $p[0];
173 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
174 $text = "";
175 } else {
176 if($tag==STRIP_COMMENTS) {
177 $q = preg_split( "/-->/i", $p[1], 2 );
178 } else {
179 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
180 }
181 $marker = $rnd . sprintf("%08X", $n++);
182 $content[$marker] = $q[0];
183 $stripped .= $marker;
184 $text = $q[1];
185 }
186 }
187 return $stripped;
188 }
189
190 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
191 # If $render is set, performs necessary rendering operations on plugins
192 # Returns the text, and fills an array with data needed in unstrip()
193 # If the $state is already a valid strip state, it adds to the state
194
195 # When $stripcomments is set, HTML comments <!-- like this -->
196 # will be stripped in addition to other tags. This is important
197 # for section editing, where these comments cause confusion when
198 # counting the sections in the wikisource
199 function strip( $text, &$state, $stripcomments = false )
200 {
201 $render = ($this->mOutputType == OT_HTML);
202 $nowiki_content = array();
203 $hiero_content = array();
204 $timeline_content = array();
205 $math_content = array();
206 $pre_content = array();
207 $comment_content = array();
208
209 # Replace any instances of the placeholders
210 $uniq_prefix = UNIQ_PREFIX;
211 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
212
213 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
214 foreach( $nowiki_content as $marker => $content ){
215 if( $render ){
216 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
217 } else {
218 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
219 }
220 }
221
222 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
223 foreach( $hiero_content as $marker => $content ){
224 if( $render && $GLOBALS['wgUseWikiHiero']){
225 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
226 } else {
227 $hiero_content[$marker] = "<hiero>$content</hiero>";
228 }
229 }
230
231 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
232 foreach( $timeline_content as $marker => $content ){
233 if( $render && $GLOBALS['wgUseTimeline']){
234 $timeline_content[$marker] = renderTimeline( $content );
235 } else {
236 $timeline_content[$marker] = "<timeline>$content</timeline>";
237 }
238 }
239
240 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
241 foreach( $math_content as $marker => $content ){
242 if( $render ) {
243 if( $this->mOptions->getUseTeX() ) {
244 $math_content[$marker] = renderMath( $content );
245 } else {
246 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
247 }
248 } else {
249 $math_content[$marker] = "<math>$content</math>";
250 }
251 }
252
253 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
254 foreach( $pre_content as $marker => $content ){
255 if( $render ){
256 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
257 } else {
258 $pre_content[$marker] = "<pre>$content</pre>";
259 }
260 }
261 if($stripcomments) {
262 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
263 foreach( $comment_content as $marker => $content ){
264 $comment_content[$marker] = "<!--$content-->";
265 }
266 }
267
268 # Merge state with the pre-existing state, if there is one
269 if ( $state ) {
270 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
271 $state['hiero'] = $state['hiero'] + $hiero_content;
272 $state['timeline'] = $state['timeline'] + $timeline_content;
273 $state['math'] = $state['math'] + $math_content;
274 $state['pre'] = $state['pre'] + $pre_content;
275 $state['comment'] = $state['comment'] + $comment_content;
276 } else {
277 $state = array(
278 'nowiki' => $nowiki_content,
279 'hiero' => $hiero_content,
280 'timeline' => $timeline_content,
281 'math' => $math_content,
282 'pre' => $pre_content,
283 'comment' => $comment_content
284 );
285 }
286 return $text;
287 }
288
289 function unstrip( $text, &$state )
290 {
291 # Must expand in reverse order, otherwise nested tags will be corrupted
292 $contentDict = end( $state );
293 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
294 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
295 $text = str_replace( key( $contentDict ), $content, $text );
296 }
297 }
298
299 return $text;
300 }
301
302 # Add an item to the strip state
303 # Returns the unique tag which must be inserted into the stripped text
304 # The tag will be replaced with the original text in unstrip()
305
306 function insertStripItem( $text, &$state )
307 {
308 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
309 if ( !$state ) {
310 $state = array(
311 'nowiki' => array(),
312 'hiero' => array(),
313 'math' => array(),
314 'pre' => array()
315 );
316 }
317 $state['item'][$rnd] = $text;
318 return $rnd;
319 }
320
321 # This method generates the list of subcategories and pages for a category
322 function categoryMagic ()
323 {
324 global $wgLang , $wgUser ;
325 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
326
327 $cns = Namespace::getCategory() ;
328 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
329
330 $r = "<br style=\"clear:both;\"/>\n";
331
332
333 $sk =& $wgUser->getSkin() ;
334
335 $articles = array() ;
336 $children = array() ;
337 $data = array () ;
338 $id = $this->mTitle->getArticleID() ;
339
340 # FIXME: add limits
341 $t = wfStrencode( $this->mTitle->getDBKey() );
342 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
343 $res = wfQuery ( $sql, DB_READ ) ;
344 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
345
346 # For all pages that link to this category
347 foreach ( $data AS $x )
348 {
349 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
350 if ( $t != "" ) $t .= ":" ;
351 $t .= $x->cur_title ;
352
353 if ( $x->cur_namespace == $cns ) {
354 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
355 } else {
356 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
357 }
358 }
359 wfFreeResult ( $res ) ;
360
361 # Showing subcategories
362 if ( count ( $children ) > 0 ) {
363 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
364 $r .= implode ( ", " , $children ) ;
365 }
366
367 # Showing pages in this category
368 if ( count ( $articles ) > 0 ) {
369 $ti = $this->mTitle->getText() ;
370 $h = wfMsg( "category_header", $ti );
371 $r .= "<h2>{$h}</h2>\n" ;
372 $r .= implode ( ", " , $articles ) ;
373 }
374
375
376 return $r ;
377 }
378
379 function getHTMLattrs ()
380 {
381 $htmlattrs = array( # Allowed attributes--no scripting, etc.
382 "title", "align", "lang", "dir", "width", "height",
383 "bgcolor", "clear", /* BR */ "noshade", /* HR */
384 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
385 /* FONT */ "type", "start", "value", "compact",
386 /* For various lists, mostly deprecated but safe */
387 "summary", "width", "border", "frame", "rules",
388 "cellspacing", "cellpadding", "valign", "char",
389 "charoff", "colgroup", "col", "span", "abbr", "axis",
390 "headers", "scope", "rowspan", "colspan", /* Tables */
391 "id", "class", "name", "style" /* For CSS */
392 );
393 return $htmlattrs ;
394 }
395
396 function fixTagAttributes ( $t )
397 {
398 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
399 $htmlattrs = $this->getHTMLattrs() ;
400
401 # Strip non-approved attributes from the tag
402 $t = preg_replace(
403 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
404 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
405 $t);
406 # Strip javascript "expression" from stylesheets. Brute force approach:
407 # If anythin offensive is found, all attributes of the HTML tag are dropped
408
409 if( preg_match(
410 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
411 wfMungeToUtf8( $t ) ) )
412 {
413 $t="";
414 }
415
416 return trim ( $t ) ;
417 }
418
419 /* interface with html tidy, used if $wgUseTidy = true */
420 function tidy ( $text ) {
421 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
422 global $wgInputEncoding, $wgOutputEncoding;
423 $fname = "Parser::tidy";
424 wfProfileIn( $fname );
425
426 $cleansource = '';
427 switch(strtoupper($wgOutputEncoding)) {
428 case 'ISO-8859-1':
429 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
430 break;
431 case 'UTF-8':
432 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
433 break;
434 default:
435 $wgTidyOpts .= ' -raw';
436 }
437
438 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
439 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
440 '<head><title>test</title></head><body>'.$text.'</body></html>';
441 $descriptorspec = array(
442 0 => array("pipe", "r"),
443 1 => array("pipe", "w"),
444 2 => array("file", "/dev/null", "a")
445 );
446 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
447 if (is_resource($process)) {
448 fwrite($pipes[0], $text);
449 fclose($pipes[0]);
450 while (!feof($pipes[1])) {
451 $cleansource .= fgets($pipes[1], 1024);
452 }
453 fclose($pipes[1]);
454 $return_value = proc_close($process);
455 }
456
457 wfProfileOut( $fname );
458
459 if( $cleansource == '' && $text != '') {
460 wfDebug( "Tidy error detected!\n" );
461 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
462 } else {
463 return $cleansource;
464 }
465 }
466
467 function doTableStuff ( $t )
468 {
469 $t = explode ( "\n" , $t ) ;
470 $td = array () ; # Is currently a td tag open?
471 $ltd = array () ; # Was it TD or TH?
472 $tr = array () ; # Is currently a tr tag open?
473 $ltr = array () ; # tr attributes
474 foreach ( $t AS $k => $x )
475 {
476 $x = trim ( $x ) ;
477 $fc = substr ( $x , 0 , 1 ) ;
478 if ( "{|" == substr ( $x , 0 , 2 ) )
479 {
480 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
481 array_push ( $td , false ) ;
482 array_push ( $ltd , "" ) ;
483 array_push ( $tr , false ) ;
484 array_push ( $ltr , "" ) ;
485 }
486 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
487 else if ( "|}" == substr ( $x , 0 , 2 ) )
488 {
489 $z = "</table>\n" ;
490 $l = array_pop ( $ltd ) ;
491 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
492 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
493 array_pop ( $ltr ) ;
494 $t[$k] = $z ;
495 }
496 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
497 {
498 $z = trim ( substr ( $x , 2 ) ) ;
499 $t[$k] = "<caption>{$z}</caption>\n" ;
500 }*/
501 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
502 {
503 $x = substr ( $x , 1 ) ;
504 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
505 $z = "" ;
506 $l = array_pop ( $ltd ) ;
507 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
508 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
509 array_pop ( $ltr ) ;
510 $t[$k] = $z ;
511 array_push ( $tr , false ) ;
512 array_push ( $td , false ) ;
513 array_push ( $ltd , "" ) ;
514 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
515 }
516 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
517 {
518 if ( "|+" == substr ( $x , 0 , 2 ) )
519 {
520 $fc = "+" ;
521 $x = substr ( $x , 1 ) ;
522 }
523 $after = substr ( $x , 1 ) ;
524 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
525 $after = explode ( "||" , $after ) ;
526 $t[$k] = "" ;
527 foreach ( $after AS $theline )
528 {
529 $z = "" ;
530 if ( $fc != "+" )
531 {
532 $tra = array_pop ( $ltr ) ;
533 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
534 array_push ( $tr , true ) ;
535 array_push ( $ltr , "" ) ;
536 }
537
538 $l = array_pop ( $ltd ) ;
539 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
540 if ( $fc == "|" ) $l = "td" ;
541 else if ( $fc == "!" ) $l = "th" ;
542 else if ( $fc == "+" ) $l = "caption" ;
543 else $l = "" ;
544 array_push ( $ltd , $l ) ;
545 $y = explode ( "|" , $theline , 2 ) ;
546 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
547 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
548 $t[$k] .= $y ;
549 array_push ( $td , true ) ;
550 }
551 }
552 }
553
554 # Closing open td, tr && table
555 while ( count ( $td ) > 0 )
556 {
557 if ( array_pop ( $td ) ) $t[] = "</td>" ;
558 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
559 $t[] = "</table>" ;
560 }
561
562 $t = implode ( "\n" , $t ) ;
563 # $t = $this->removeHTMLtags( $t );
564 return $t ;
565 }
566
567 # Parses the text and adds the result to the strip state
568 # Returns the strip tag
569 function stripParse( $text, $linestart, $args )
570 {
571 $text = $this->strip( $text, $this->mStripState );
572 $text = $this->internalParse( $text, $linestart, $args, false );
573 if( $linestart ) {
574 $text = "\n" . $text;
575 }
576 return $this->insertStripItem( $text, $this->mStripState );
577 }
578
579 function internalParse( $text, $linestart, $args = array(), $isMain=true )
580 {
581 $fname = "Parser::internalParse";
582 wfProfileIn( $fname );
583
584 $text = $this->removeHTMLtags( $text );
585 $text = $this->replaceVariables( $text, $args );
586
587 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
588
589 $text = $this->doHeadings( $text );
590 if($this->mOptions->getUseDynamicDates()) {
591 global $wgDateFormatter;
592 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
593 }
594 $text = $this->doAllQuotes( $text );
595 $text = $this->replaceExternalLinks( $text );
596 $text = $this->replaceInternalLinks ( $text );
597 $text = $this->replaceInternalLinks ( $text );
598 //$text = $this->doTokenizedParser ( $text );
599 $text = $this->doTableStuff ( $text ) ;
600 $text = $this->magicISBN( $text );
601 $text = $this->magicRFC( $text );
602 $text = $this->formatHeadings( $text, $isMain );
603 $sk =& $this->mOptions->getSkin();
604 $text = $sk->transformContent( $text );
605
606 if ( !isset ( $this->categoryMagicDone ) ) {
607 $text .= $this->categoryMagic () ;
608 $this->categoryMagicDone = true ;
609 }
610
611 wfProfileOut( $fname );
612 return $text;
613 }
614
615
616 /* private */ function doHeadings( $text )
617 {
618 for ( $i = 6; $i >= 1; --$i ) {
619 $h = substr( "======", 0, $i );
620 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
621 "<h{$i}>\\1</h{$i}>\\2", $text );
622 }
623 return $text;
624 }
625
626 /* private */ function doAllQuotes( $text )
627 {
628 $outtext = "";
629 $lines = explode( "\n", $text );
630 foreach ( $lines as $line ) {
631 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
632 }
633 return substr($outtext, 0,-1);
634 }
635
636 /* private */ function doQuotes( $pre, $text, $mode )
637 {
638 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
639 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
640 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
641 if ( substr ($m[2], 0, 1) == "'" ) {
642 $m[2] = substr ($m[2], 1);
643 if ($mode == "em") {
644 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
645 } else if ($mode == "strong") {
646 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
647 } else if (($mode == "emstrong") || ($mode == "both")) {
648 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
649 } else if ($mode == "strongem") {
650 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
651 } else {
652 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
653 }
654 } else {
655 if ($mode == "strong") {
656 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
657 } else if ($mode == "em") {
658 return $m1_em . $this->doQuotes ( "", $m[2], "" );
659 } else if ($mode == "emstrong") {
660 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
661 } else if (($mode == "strongem") || ($mode == "both")) {
662 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
663 } else {
664 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
665 }
666 }
667 } else {
668 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
669 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
670 if ($mode == "") {
671 return $pre . $text;
672 } else if ($mode == "em") {
673 return $pre . $text_em;
674 } else if ($mode == "strong") {
675 return $pre . $text_strong;
676 } else if ($mode == "strongem") {
677 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
678 } else {
679 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
680 }
681 }
682 }
683
684 # Note: we have to do external links before the internal ones,
685 # and otherwise take great care in the order of things here, so
686 # that we don't end up interpreting some URLs twice.
687
688 /* private */ function replaceExternalLinks( $text )
689 {
690 $fname = "Parser::replaceExternalLinks";
691 wfProfileIn( $fname );
692 $text = $this->subReplaceExternalLinks( $text, "http", true );
693 $text = $this->subReplaceExternalLinks( $text, "https", true );
694 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
695 $text = $this->subReplaceExternalLinks( $text, "irc", false );
696 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
697 $text = $this->subReplaceExternalLinks( $text, "news", false );
698 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
699 wfProfileOut( $fname );
700 return $text;
701 }
702
703 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
704 {
705 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
706 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
707
708 # this is the list of separators that should be ignored if they
709 # are the last character of an URL but that should be included
710 # if they occur within the URL, e.g. "go to www.foo.com, where .."
711 # in this case, the last comma should not become part of the URL,
712 # but in "www.foo.com/123,2342,32.htm" it should.
713 $sep = ",;\.:";
714 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
715 $images = "gif|png|jpg|jpeg";
716
717 # PLEASE NOTE: The curly braces { } are not part of the regex,
718 # they are interpreted as part of the string (used to tell PHP
719 # that the content of the string should be inserted there).
720 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
721 "((?i){$images})([^{$uc}]|$)/";
722
723 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
724 $sk =& $this->mOptions->getSkin();
725
726 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
727 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
728 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
729 }
730 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
731 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
732 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
733 "</a>\\5", $s );
734 $s = str_replace( $unique, $protocol, $s );
735
736 $a = explode( "[{$protocol}:", " " . $s );
737 $s = array_shift( $a );
738 $s = substr( $s, 1 );
739
740 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
741 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
742
743 foreach ( $a as $line ) {
744 if ( preg_match( $e1, $line, $m ) ) {
745 $link = "{$protocol}:{$m[1]}";
746 $trail = $m[2];
747 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
748 else { $text = wfEscapeHTML( $link ); }
749 } else if ( preg_match( $e2, $line, $m ) ) {
750 $link = "{$protocol}:{$m[1]}";
751 $text = $m[2];
752 $trail = $m[3];
753 } else {
754 $s .= "[{$protocol}:" . $line;
755 continue;
756 }
757 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
758 $paren = "";
759 } else {
760 # Expand the URL for printable version
761 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
762 }
763 $la = $sk->getExternalLinkAttributes( $link, $text );
764 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
765
766 }
767 return $s;
768 }
769
770
771 /* private */ function replaceInternalLinks( $s )
772 {
773 global $wgLang, $wgLinkCache;
774 global $wgNamespacesWithSubpages, $wgLanguageCode;
775 static $fname = "Parser::replaceInternalLink" ;
776 wfProfileIn( $fname );
777
778 wfProfileIn( "$fname-setup" );
779 static $tc = FALSE;
780 # the % is needed to support urlencoded titles as well
781 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
782 $sk =& $this->mOptions->getSkin();
783
784 $a = explode( "[[", " " . $s );
785 $s = array_shift( $a );
786 $s = substr( $s, 1 );
787
788 # Match a link having the form [[namespace:link|alternate]]trail
789 static $e1 = FALSE;
790 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
791 # Match the end of a line for a word that's not followed by whitespace,
792 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
793 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
794 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
795 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
796
797
798 # Special and Media are pseudo-namespaces; no pages actually exist in them
799 static $image = FALSE;
800 static $special = FALSE;
801 static $media = FALSE;
802 static $category = FALSE;
803 if ( !$image ) { $image = Namespace::getImage(); }
804 if ( !$special ) { $special = Namespace::getSpecial(); }
805 if ( !$media ) { $media = Namespace::getMedia(); }
806 if ( !$category ) { $category = Namespace::getCategory(); }
807
808 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
809
810 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
811 $new_prefix = $m[2];
812 $s = $m[1];
813 } else {
814 $new_prefix="";
815 }
816
817 wfProfileOut( "$fname-setup" );
818
819 foreach ( $a as $line ) {
820 $prefix = $new_prefix;
821
822 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
823 $text = $m[2];
824 # fix up urlencoded title texts
825 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
826 $trail = $m[3];
827 } else { # Invalid form; output directly
828 $s .= $prefix . "[[" . $line ;
829 wfProfileOut( $fname );
830 continue;
831 }
832
833 /* Valid link forms:
834 Foobar -- normal
835 :Foobar -- override special treatment of prefix (images, language links)
836 /Foobar -- convert to CurrentPage/Foobar
837 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
838 */
839 $c = substr($m[1],0,1);
840 $noforce = ($c != ":");
841 if( $c == "/" ) { # subpage
842 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
843 $m[1]=substr($m[1],1,strlen($m[1])-2);
844 $noslash=$m[1];
845 } else {
846 $noslash=substr($m[1],1);
847 }
848 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
849 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
850 if( "" == $text ) {
851 $text= $m[1];
852 } # this might be changed for ugliness reasons
853 } else {
854 $link = $noslash; # no subpage allowed, use standard link
855 }
856 } elseif( $noforce ) { # no subpage
857 $link = $m[1];
858 } else {
859 $link = substr( $m[1], 1 );
860 }
861 $wasblank = ( "" == $text );
862 if( $wasblank )
863 $text = $link;
864
865 $nt = Title::newFromText( $link );
866 if( !$nt ) {
867 $s .= $prefix . "[[" . $line;
868 wfProfileOut( $fname );
869 continue;
870 }
871 $ns = $nt->getNamespace();
872 $iw = $nt->getInterWiki();
873 if( $noforce ) {
874 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
875 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
876 $tmp = $prefix . $trail ;
877 wfProfileOut( $fname );
878 $s .= (trim($tmp) == '')? '': $tmp;
879 continue;
880 }
881 if ( $ns == $image ) {
882 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
883 $wgLinkCache->addImageLinkObj( $nt );
884 wfProfileOut( $fname );
885 continue;
886 }
887 if ( $ns == $category ) {
888 $t = $nt->getText() ;
889 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
890
891 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
892 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
893 $wgLinkCache->resume();
894
895 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
896 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
897 $this->mOutput->mCategoryLinks[] = $t ;
898 $s .= $prefix . $trail ;
899 wfProfileOut( $fname );
900 continue;
901 }
902 }
903 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
904 ( strpos( $link, "#" ) == FALSE ) ) {
905 # Self-links are handled specially; generally de-link and change to bold.
906 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
907 wfProfileOut( $fname );
908 continue;
909 }
910
911 if( $ns == $media ) {
912 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
913 $wgLinkCache->addImageLinkObj( $nt );
914 wfProfileOut( $fname );
915 continue;
916 } elseif( $ns == $special ) {
917 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
918 wfProfileOut( $fname );
919 continue;
920 }
921 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
922 }
923 wfProfileOut( $fname );
924 return $s;
925 }
926
927 # Some functions here used by doBlockLevels()
928 #
929 /* private */ function closeParagraph()
930 {
931 $result = "";
932 if ( '' != $this->mLastSection ) {
933 $result = "</" . $this->mLastSection . ">\n";
934 }
935 $this->mInPre = false;
936 $this->mLastSection = "";
937 return $result;
938 }
939 # getCommon() returns the length of the longest common substring
940 # of both arguments, starting at the beginning of both.
941 #
942 /* private */ function getCommon( $st1, $st2 )
943 {
944 $fl = strlen( $st1 );
945 $shorter = strlen( $st2 );
946 if ( $fl < $shorter ) { $shorter = $fl; }
947
948 for ( $i = 0; $i < $shorter; ++$i ) {
949 if ( $st1{$i} != $st2{$i} ) { break; }
950 }
951 return $i;
952 }
953 # These next three functions open, continue, and close the list
954 # element appropriate to the prefix character passed into them.
955 #
956 /* private */ function openList( $char )
957 {
958 $result = $this->closeParagraph();
959
960 if ( "*" == $char ) { $result .= "<ul><li>"; }
961 else if ( "#" == $char ) { $result .= "<ol><li>"; }
962 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
963 else if ( ";" == $char ) {
964 $result .= "<dl><dt>";
965 $this->mDTopen = true;
966 }
967 else { $result = "<!-- ERR 1 -->"; }
968
969 return $result;
970 }
971
972 /* private */ function nextItem( $char )
973 {
974 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
975 else if ( ":" == $char || ";" == $char ) {
976 $close = "</dd>";
977 if ( $this->mDTopen ) { $close = "</dt>"; }
978 if ( ";" == $char ) {
979 $this->mDTopen = true;
980 return $close . "<dt>";
981 } else {
982 $this->mDTopen = false;
983 return $close . "<dd>";
984 }
985 }
986 return "<!-- ERR 2 -->";
987 }
988
989 /* private */function closeList( $char )
990 {
991 if ( "*" == $char ) { $text = "</li></ul>"; }
992 else if ( "#" == $char ) { $text = "</li></ol>"; }
993 else if ( ":" == $char ) {
994 if ( $this->mDTopen ) {
995 $this->mDTopen = false;
996 $text = "</dt></dl>";
997 } else {
998 $text = "</dd></dl>";
999 }
1000 }
1001 else { return "<!-- ERR 3 -->"; }
1002 return $text."\n";
1003 }
1004
1005 /* private */ function doBlockLevels( $text, $linestart ) {
1006 $fname = "Parser::doBlockLevels";
1007 wfProfileIn( $fname );
1008
1009 # Parsing through the text line by line. The main thing
1010 # happening here is handling of block-level elements p, pre,
1011 # and making lists from lines starting with * # : etc.
1012 #
1013 $textLines = explode( "\n", $text );
1014
1015 $lastPrefix = $output = $lastLine = '';
1016 $this->mDTopen = $inBlockElem = false;
1017 $prefixLength = 0;
1018 $paragraphStack = false;
1019
1020 if ( !$linestart ) {
1021 $output .= array_shift( $textLines );
1022 }
1023 foreach ( $textLines as $oLine ) {
1024 $lastPrefixLength = strlen( $lastPrefix );
1025 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1026 $preOpenMatch = preg_match("/<pre/i", $oLine );
1027 if (!$this->mInPre) {
1028 $this->mInPre = !empty($preOpenMatch);
1029 }
1030 if ( !$this->mInPre ) {
1031 # Multiple prefixes may abut each other for nested lists.
1032 $prefixLength = strspn( $oLine, "*#:;" );
1033 $pref = substr( $oLine, 0, $prefixLength );
1034
1035 # eh?
1036 $pref2 = str_replace( ";", ":", $pref );
1037 $t = substr( $oLine, $prefixLength );
1038 } else {
1039 # Don't interpret any other prefixes in preformatted text
1040 $prefixLength = 0;
1041 $pref = $pref2 = '';
1042 $t = $oLine;
1043 }
1044
1045 # List generation
1046 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1047 # Same as the last item, so no need to deal with nesting or opening stuff
1048 $output .= $this->nextItem( substr( $pref, -1 ) );
1049 $paragraphStack = false;
1050
1051 if ( ";" == substr( $pref, -1 ) ) {
1052 # The one nasty exception: definition lists work like this:
1053 # ; title : definition text
1054 # So we check for : in the remainder text to split up the
1055 # title and definition, without b0rking links.
1056 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1057 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1058 $term = $match[1];
1059 $output .= $term . $this->nextItem( ":" );
1060 $t = $match[2];
1061 }
1062 }
1063 } elseif( $prefixLength || $lastPrefixLength ) {
1064 # Either open or close a level...
1065 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1066 $paragraphStack = false;
1067
1068 while( $commonPrefixLength < $lastPrefixLength ) {
1069 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1070 --$lastPrefixLength;
1071 }
1072 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1073 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1074 }
1075 while ( $prefixLength > $commonPrefixLength ) {
1076 $char = substr( $pref, $commonPrefixLength, 1 );
1077 $output .= $this->openList( $char );
1078
1079 if ( ";" == $char ) {
1080 # FIXME: This is dupe of code above
1081 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1082 $term = $match[1];
1083 $output .= $term . $this->nextItem( ":" );
1084 $t = $match[2];
1085 }
1086 }
1087 ++$commonPrefixLength;
1088 }
1089 $lastPrefix = $pref2;
1090 }
1091 if( 0 == $prefixLength ) {
1092 # No prefix (not in list)--go to paragraph mode
1093 $uniq_prefix = UNIQ_PREFIX;
1094 // XXX: use a stack for nestable elements like span, table and div
1095 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1096 $closematch = preg_match(
1097 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1098 "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1099 if ( $openmatch or $closematch ) {
1100 $paragraphStack = false;
1101 $output .= $this->closeParagraph();
1102 if($preOpenMatch and !$preCloseMatch) {
1103 $this->mInPre = true;
1104 }
1105 if ( $closematch ) {
1106 $inBlockElem = false;
1107 } else {
1108 $inBlockElem = true;
1109 }
1110 } else if ( !$inBlockElem && !$this->mInPre ) {
1111 if ( " " == $t{0} and trim($t) != '' ) {
1112 // pre
1113 if ($this->mLastSection != 'pre') {
1114 $paragraphStack = false;
1115 $output .= $this->closeParagraph().'<pre>';
1116 $this->mLastSection = 'pre';
1117 }
1118 } else {
1119 // paragraph
1120 if ( '' == trim($t) ) {
1121 if ( $paragraphStack ) {
1122 $output .= $paragraphStack.'<br />';
1123 $paragraphStack = false;
1124 $this->mLastSection = 'p';
1125 } else {
1126 if ($this->mLastSection != 'p' ) {
1127 $output .= $this->closeParagraph();
1128 $this->mLastSection = '';
1129 $paragraphStack = "<p>";
1130 } else {
1131 $paragraphStack = '</p><p>';
1132 }
1133 }
1134 } else {
1135 if ( $paragraphStack ) {
1136 $output .= $paragraphStack;
1137 $paragraphStack = false;
1138 $this->mLastSection = 'p';
1139 } else if ($this->mLastSection != 'p') {
1140 $output .= $this->closeParagraph().'<p>';
1141 $this->mLastSection = 'p';
1142 }
1143 }
1144 }
1145 }
1146 }
1147 if ($paragraphStack === false) {
1148 $output .= $t."\n";
1149 }
1150 }
1151 while ( $prefixLength ) {
1152 $output .= $this->closeList( $pref2{$prefixLength-1} );
1153 --$prefixLength;
1154 }
1155 if ( "" != $this->mLastSection ) {
1156 $output .= "</" . $this->mLastSection . ">";
1157 $this->mLastSection = "";
1158 }
1159
1160 wfProfileOut( $fname );
1161 return $output;
1162 }
1163
1164 function getVariableValue( $index ) {
1165 global $wgLang, $wgSitename, $wgServer;
1166
1167 switch ( $index ) {
1168 case MAG_CURRENTMONTH:
1169 return date( "m" );
1170 case MAG_CURRENTMONTHNAME:
1171 return $wgLang->getMonthName( date("n") );
1172 case MAG_CURRENTMONTHNAMEGEN:
1173 return $wgLang->getMonthNameGen( date("n") );
1174 case MAG_CURRENTDAY:
1175 return date("j");
1176 case MAG_PAGENAME:
1177 return $this->mTitle->getText();
1178 case MAG_NAMESPACE:
1179 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1180 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1181 case MAG_CURRENTDAYNAME:
1182 return $wgLang->getWeekdayName( date("w")+1 );
1183 case MAG_CURRENTYEAR:
1184 return date( "Y" );
1185 case MAG_CURRENTTIME:
1186 return $wgLang->time( wfTimestampNow(), false );
1187 case MAG_NUMBEROFARTICLES:
1188 return wfNumberOfArticles();
1189 case MAG_SITENAME:
1190 return $wgSitename;
1191 case MAG_SERVER:
1192 return $wgServer;
1193 default:
1194 return NULL;
1195 }
1196 }
1197
1198 function initialiseVariables()
1199 {
1200 global $wgVariableIDs;
1201 $this->mVariables = array();
1202 foreach ( $wgVariableIDs as $id ) {
1203 $mw =& MagicWord::get( $id );
1204 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1205 }
1206 }
1207
1208 /* private */ function replaceVariables( $text, $args = array() )
1209 {
1210 global $wgLang, $wgScript, $wgArticlePath;
1211
1212 $fname = "Parser::replaceVariables";
1213 wfProfileIn( $fname );
1214
1215 $bail = false;
1216 if ( !$this->mVariables ) {
1217 $this->initialiseVariables();
1218 }
1219 $titleChars = Title::legalChars();
1220 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1221
1222 # This function is called recursively. To keep track of arguments we need a stack:
1223 array_push( $this->mArgStack, $args );
1224
1225 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1226 $GLOBALS['wgCurParser'] =& $this;
1227
1228
1229 if ( $this->mOutputType == OT_HTML ) {
1230 # Variable substitution
1231 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1232
1233 # Argument substitution
1234 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1235 }
1236 # Template substitution
1237 $regex = "/(\\n?){{([$titleChars]*)(\\|.*?|)}}/s";
1238 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1239
1240 array_pop( $this->mArgStack );
1241
1242 wfProfileOut( $fname );
1243 return $text;
1244 }
1245
1246 function variableSubstitution( $matches )
1247 {
1248 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1249 $text = $this->mVariables[$matches[1]];
1250 $this->mOutput->mContainsOldMagic = true;
1251 } else {
1252 $text = $matches[0];
1253 }
1254 return $text;
1255 }
1256
1257 function braceSubstitution( $matches )
1258 {
1259 global $wgLinkCache, $wgLang;
1260 $fname = "Parser::braceSubstitution";
1261 $found = false;
1262 $nowiki = false;
1263 $noparse = false;
1264
1265 $title = NULL;
1266
1267 # $newline is an optional newline character before the braces
1268 # $part1 is the bit before the first |, and must contain only title characters
1269 # $args is a list of arguments, starting from index 0, not including $part1
1270
1271 $newline = $matches[1];
1272 $part1 = $matches[2];
1273 # If the third subpattern matched anything, it will start with |
1274 if ( $matches[3] !== "" ) {
1275 $args = explode( "|", substr( $matches[3], 1 ) );
1276 } else {
1277 $args = array();
1278 }
1279 $argc = count( $args );
1280
1281 # {{{}}}
1282 if ( strpos( $matches[0], "{{{" ) !== false ) {
1283 $text = $matches[0];
1284 $found = true;
1285 $noparse = true;
1286 }
1287
1288 # SUBST
1289 if ( !$found ) {
1290 $mwSubst =& MagicWord::get( MAG_SUBST );
1291 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1292 if ( $this->mOutputType != OT_WIKI ) {
1293 # Invalid SUBST not replaced at PST time
1294 # Return without further processing
1295 $text = $matches[0];
1296 $found = true;
1297 $noparse= true;
1298 }
1299 } elseif ( $this->mOutputType == OT_WIKI ) {
1300 # SUBST not found in PST pass, do nothing
1301 $text = $matches[0];
1302 $found = true;
1303 }
1304 }
1305
1306 # MSG, MSGNW and INT
1307 if ( !$found ) {
1308 # Check for MSGNW:
1309 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1310 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1311 $nowiki = true;
1312 } else {
1313 # Remove obsolete MSG:
1314 $mwMsg =& MagicWord::get( MAG_MSG );
1315 $mwMsg->matchStartAndRemove( $part1 );
1316 }
1317
1318 # Check if it is an internal message
1319 $mwInt =& MagicWord::get( MAG_INT );
1320 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1321 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1322 $text = wfMsgReal( $part1, $args, true );
1323 $found = true;
1324 }
1325 }
1326 }
1327
1328 # NS
1329 if ( !$found ) {
1330 # Check for NS: (namespace expansion)
1331 $mwNs = MagicWord::get( MAG_NS );
1332 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1333 if ( intval( $part1 ) ) {
1334 $text = $wgLang->getNsText( intval( $part1 ) );
1335 $found = true;
1336 } else {
1337 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1338 if ( !is_null( $index ) ) {
1339 $text = $wgLang->getNsText( $index );
1340 $found = true;
1341 }
1342 }
1343 }
1344 }
1345
1346 # LOCALURL and LOCALURLE
1347 if ( !$found ) {
1348 $mwLocal = MagicWord::get( MAG_LOCALURL );
1349 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1350
1351 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1352 $func = 'getLocalURL';
1353 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1354 $func = 'escapeLocalURL';
1355 } else {
1356 $func = '';
1357 }
1358
1359 if ( $func !== '' ) {
1360 $title = Title::newFromText( $part1 );
1361 if ( !is_null( $title ) ) {
1362 if ( $argc > 0 ) {
1363 $text = $title->$func( $args[0] );
1364 } else {
1365 $text = $title->$func();
1366 }
1367 $found = true;
1368 }
1369 }
1370 }
1371
1372 # Internal variables
1373 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1374 $text = $this->mVariables[$part1];
1375 $found = true;
1376 $this->mOutput->mContainsOldMagic = true;
1377 }
1378 /*
1379 # Arguments input from the caller
1380 $inputArgs = end( $this->mArgStack );
1381 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1382 $text = $inputArgs[$part1];
1383 $found = true;
1384 }
1385 */
1386 # Load from database
1387 if ( !$found ) {
1388 $title = Title::newFromText( $part1, NS_TEMPLATE );
1389 if ( !is_null( $title ) && !$title->isExternal() ) {
1390 # Check for excessive inclusion
1391 $dbk = $title->getPrefixedDBkey();
1392 if ( $this->incrementIncludeCount( $dbk ) ) {
1393 $article = new Article( $title );
1394 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1395 if ( $articleContent !== false ) {
1396 $found = true;
1397 $text = $articleContent;
1398
1399 }
1400 }
1401
1402 # If the title is valid but undisplayable, make a link to it
1403 if ( $this->mOutputType == OT_HTML && !$found ) {
1404 $text = "[[" . $title->getPrefixedText() . "]]";
1405 $found = true;
1406 }
1407 }
1408 }
1409
1410 # Recursive parsing, escaping and link table handling
1411 # Only for HTML output
1412 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1413 $text = wfEscapeWikiText( $text );
1414 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1415 # Clean up argument array
1416 $assocArgs = array();
1417 $index = 1;
1418 foreach( $args as $arg ) {
1419 $eqpos = strpos( $arg, "=" );
1420 if ( $eqpos === false ) {
1421 $assocArgs[$index++] = $arg;
1422 } else {
1423 $name = trim( substr( $arg, 0, $eqpos ) );
1424 $value = trim( substr( $arg, $eqpos+1 ) );
1425 if ( $value === false ) {
1426 $value = "";
1427 }
1428 if ( $name !== false ) {
1429 $assocArgs[$name] = $value;
1430 }
1431 }
1432 }
1433
1434 # Do not enter included links in link table
1435 if ( !is_null( $title ) ) {
1436 $wgLinkCache->suspend();
1437 }
1438
1439 # Run full parser on the included text
1440 $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
1441
1442 # Resume the link cache and register the inclusion as a link
1443 if ( !is_null( $title ) ) {
1444 $wgLinkCache->resume();
1445 $wgLinkCache->addLinkObj( $title );
1446 }
1447 }
1448
1449 if ( !$found ) {
1450 return $matches[0];
1451 } else {
1452 return $text;
1453 }
1454 }
1455
1456 # Triple brace replacement -- used for template arguments
1457 function argSubstitution( $matches )
1458 {
1459 $newline = $matches[1];
1460 $arg = trim( $matches[2] );
1461 $text = $matches[0];
1462 $inputArgs = end( $this->mArgStack );
1463
1464 if ( array_key_exists( $arg, $inputArgs ) ) {
1465 $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
1466 }
1467
1468 return $text;
1469 }
1470
1471 # Returns true if the function is allowed to include this entity
1472 function incrementIncludeCount( $dbk )
1473 {
1474 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1475 $this->mIncludeCount[$dbk] = 0;
1476 }
1477 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1478 return true;
1479 } else {
1480 return false;
1481 }
1482 }
1483
1484
1485 # Cleans up HTML, removes dangerous tags and attributes
1486 /* private */ function removeHTMLtags( $text )
1487 {
1488 global $wgUseTidy, $wgUserHtml;
1489 $fname = "Parser::removeHTMLtags";
1490 wfProfileIn( $fname );
1491
1492 if( $wgUserHtml ) {
1493 $htmlpairs = array( # Tags that must be closed
1494 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1495 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1496 "strike", "strong", "tt", "var", "div", "center",
1497 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1498 "ruby", "rt" , "rb" , "rp", "p"
1499 );
1500 $htmlsingle = array(
1501 "br", "hr", "li", "dt", "dd"
1502 );
1503 $htmlnest = array( # Tags that can be nested--??
1504 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1505 "dl", "font", "big", "small", "sub", "sup"
1506 );
1507 $tabletags = array( # Can only appear inside table
1508 "td", "th", "tr"
1509 );
1510 } else {
1511 $htmlpairs = array();
1512 $htmlsingle = array();
1513 $htmlnest = array();
1514 $tabletags = array();
1515 }
1516
1517 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1518 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1519
1520 $htmlattrs = $this->getHTMLattrs () ;
1521
1522 # Remove HTML comments
1523 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1524
1525 $bits = explode( "<", $text );
1526 $text = array_shift( $bits );
1527 if(!$wgUseTidy) {
1528 $tagstack = array(); $tablestack = array();
1529 foreach ( $bits as $x ) {
1530 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1531 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1532 $x, $regs );
1533 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1534 error_reporting( $prev );
1535
1536 $badtag = 0 ;
1537 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1538 # Check our stack
1539 if ( $slash ) {
1540 # Closing a tag...
1541 if ( ! in_array( $t, $htmlsingle ) &&
1542 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1543 if(!empty($ot)) array_push( $tagstack, $ot );
1544 $badtag = 1;
1545 } else {
1546 if ( $t == "table" ) {
1547 $tagstack = array_pop( $tablestack );
1548 }
1549 $newparams = "";
1550 }
1551 } else {
1552 # Keep track for later
1553 if ( in_array( $t, $tabletags ) &&
1554 ! in_array( "table", $tagstack ) ) {
1555 $badtag = 1;
1556 } else if ( in_array( $t, $tagstack ) &&
1557 ! in_array ( $t , $htmlnest ) ) {
1558 $badtag = 1 ;
1559 } else if ( ! in_array( $t, $htmlsingle ) ) {
1560 if ( $t == "table" ) {
1561 array_push( $tablestack, $tagstack );
1562 $tagstack = array();
1563 }
1564 array_push( $tagstack, $t );
1565 }
1566 # Strip non-approved attributes from the tag
1567 $newparams = $this->fixTagAttributes($params);
1568
1569 }
1570 if ( ! $badtag ) {
1571 $rest = str_replace( ">", "&gt;", $rest );
1572 $text .= "<$slash$t $newparams$brace$rest";
1573 continue;
1574 }
1575 }
1576 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1577 }
1578 # Close off any remaining tags
1579 while ( $t = array_pop( $tagstack ) ) {
1580 $text .= "</$t>\n";
1581 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1582 }
1583 } else {
1584 # this might be possible using tidy itself
1585 foreach ( $bits as $x ) {
1586 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1587 $x, $regs );
1588 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1589 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1590 $newparams = $this->fixTagAttributes($params);
1591 $rest = str_replace( ">", "&gt;", $rest );
1592 $text .= "<$slash$t $newparams$brace$rest";
1593 } else {
1594 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1595 }
1596 }
1597 }
1598 wfProfileOut( $fname );
1599 return $text;
1600 }
1601
1602
1603 /*
1604 *
1605 * This function accomplishes several tasks:
1606 * 1) Auto-number headings if that option is enabled
1607 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1608 * 3) Add a Table of contents on the top for users who have enabled the option
1609 * 4) Auto-anchor headings
1610 *
1611 * It loops through all headlines, collects the necessary data, then splits up the
1612 * string and re-inserts the newly formatted headlines.
1613 *
1614 */
1615
1616 /* private */ function formatHeadings( $text, $isMain=true )
1617 {
1618 global $wgInputEncoding;
1619
1620 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1621 $doShowToc = $this->mOptions->getShowToc();
1622 if( !$this->mTitle->userCanEdit() ) {
1623 $showEditLink = 0;
1624 $rightClickHack = 0;
1625 } else {
1626 $showEditLink = $this->mOptions->getEditSection();
1627 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1628 }
1629
1630 # Inhibit editsection links if requested in the page
1631 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1632 if( $esw->matchAndRemove( $text ) ) {
1633 $showEditLink = 0;
1634 }
1635 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1636 # do not add TOC
1637 $mw =& MagicWord::get( MAG_NOTOC );
1638 if( $mw->matchAndRemove( $text ) ) {
1639 $doShowToc = 0;
1640 }
1641
1642 # never add the TOC to the Main Page. This is an entry page that should not
1643 # be more than 1-2 screens large anyway
1644 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1645 $doShowToc = 0;
1646 }
1647
1648 # Get all headlines for numbering them and adding funky stuff like [edit]
1649 # links - this is for later, but we need the number of headlines right now
1650 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1651
1652 # if there are fewer than 4 headlines in the article, do not show TOC
1653 if( $numMatches < 4 ) {
1654 $doShowToc = 0;
1655 }
1656
1657 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1658 # override above conditions and always show TOC
1659 $mw =& MagicWord::get( MAG_FORCETOC );
1660 if ($mw->matchAndRemove( $text ) ) {
1661 $doShowToc = 1;
1662 }
1663
1664
1665 # We need this to perform operations on the HTML
1666 $sk =& $this->mOptions->getSkin();
1667
1668 # headline counter
1669 $headlineCount = 0;
1670
1671 # Ugh .. the TOC should have neat indentation levels which can be
1672 # passed to the skin functions. These are determined here
1673 $toclevel = 0;
1674 $toc = "";
1675 $full = "";
1676 $head = array();
1677 $sublevelCount = array();
1678 $level = 0;
1679 $prevlevel = 0;
1680 foreach( $matches[3] as $headline ) {
1681 $numbering = "";
1682 if( $level ) {
1683 $prevlevel = $level;
1684 }
1685 $level = $matches[1][$headlineCount];
1686 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1687 # reset when we enter a new level
1688 $sublevelCount[$level] = 0;
1689 $toc .= $sk->tocIndent( $level - $prevlevel );
1690 $toclevel += $level - $prevlevel;
1691 }
1692 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1693 # reset when we step back a level
1694 $sublevelCount[$level+1]=0;
1695 $toc .= $sk->tocUnindent( $prevlevel - $level );
1696 $toclevel -= $prevlevel - $level;
1697 }
1698 # count number of headlines for each level
1699 @$sublevelCount[$level]++;
1700 if( $doNumberHeadings || $doShowToc ) {
1701 $dot = 0;
1702 for( $i = 1; $i <= $level; $i++ ) {
1703 if( !empty( $sublevelCount[$i] ) ) {
1704 if( $dot ) {
1705 $numbering .= ".";
1706 }
1707 $numbering .= $sublevelCount[$i];
1708 $dot = 1;
1709 }
1710 }
1711 }
1712
1713 # The canonized header is a version of the header text safe to use for links
1714 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1715 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1716
1717 # strip out HTML
1718 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1719 $tocline = trim( $canonized_headline );
1720 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1721 # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1722 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1723 $refer[$headlineCount] = $canonized_headline;
1724
1725 # count how many in assoc. array so we can track dupes in anchors
1726 @$refers[$canonized_headline]++;
1727 $refcount[$headlineCount]=$refers[$canonized_headline];
1728
1729 # Prepend the number to the heading text
1730
1731 if( $doNumberHeadings || $doShowToc ) {
1732 $tocline = $numbering . " " . $tocline;
1733
1734 # Don't number the heading if it is the only one (looks silly)
1735 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1736 # the two are different if the line contains a link
1737 $headline=$numbering . " " . $headline;
1738 }
1739 }
1740
1741 # Create the anchor for linking from the TOC to the section
1742 $anchor = $canonized_headline;
1743 if($refcount[$headlineCount] > 1 ) {
1744 $anchor .= "_" . $refcount[$headlineCount];
1745 }
1746 if( $doShowToc ) {
1747 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1748 }
1749 if( $showEditLink ) {
1750 if ( empty( $head[$headlineCount] ) ) {
1751 $head[$headlineCount] = "";
1752 }
1753 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1754 }
1755
1756 # Add the edit section span
1757 if( $rightClickHack ) {
1758 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1759 }
1760
1761 # give headline the correct <h#> tag
1762 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1763
1764 $headlineCount++;
1765 }
1766
1767 if( $doShowToc ) {
1768 $toclines = $headlineCount;
1769 $toc .= $sk->tocUnindent( $toclevel );
1770 $toc = $sk->tocTable( $toc );
1771 }
1772
1773 # split up and insert constructed headlines
1774
1775 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1776 $i = 0;
1777
1778 foreach( $blocks as $block ) {
1779 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1780 # This is the [edit] link that appears for the top block of text when
1781 # section editing is enabled
1782
1783 # Disabled because it broke block formatting
1784 # For example, a bullet point in the top line
1785 # $full .= $sk->editSectionLink(0);
1786 }
1787 $full .= $block;
1788 if( $doShowToc && !$i && $isMain) {
1789 # Top anchor now in skin
1790 $full = $full.$toc;
1791 }
1792
1793 if( !empty( $head[$i] ) ) {
1794 $full .= $head[$i];
1795 }
1796 $i++;
1797 }
1798
1799 return $full;
1800 }
1801
1802 /* private */ function magicISBN( $text )
1803 {
1804 global $wgLang;
1805
1806 $a = split( "ISBN ", " $text" );
1807 if ( count ( $a ) < 2 ) return $text;
1808 $text = substr( array_shift( $a ), 1);
1809 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1810
1811 foreach ( $a as $x ) {
1812 $isbn = $blank = "" ;
1813 while ( " " == $x{0} ) {
1814 $blank .= " ";
1815 $x = substr( $x, 1 );
1816 }
1817 while ( strstr( $valid, $x{0} ) != false ) {
1818 $isbn .= $x{0};
1819 $x = substr( $x, 1 );
1820 }
1821 $num = str_replace( "-", "", $isbn );
1822 $num = str_replace( " ", "", $num );
1823
1824 if ( "" == $num ) {
1825 $text .= "ISBN $blank$x";
1826 } else {
1827 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1828 $text .= "<a href=\"" .
1829 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1830 "\" class=\"internal\">ISBN $isbn</a>";
1831 $text .= $x;
1832 }
1833 }
1834 return $text;
1835 }
1836 /* private */ function magicRFC( $text )
1837 {
1838 global $wgLang;
1839
1840 $a = split( "RFC ", " $text" );
1841 if ( count ( $a ) < 2 ) return $text;
1842 $text = substr( array_shift( $a ), 1);
1843 $valid = "0123456789";
1844
1845 foreach ( $a as $x ) {
1846 $rfc = $blank = "" ;
1847 while ( " " == $x{0} ) {
1848 $blank .= " ";
1849 $x = substr( $x, 1 );
1850 }
1851 while ( strstr( $valid, $x{0} ) != false ) {
1852 $rfc .= $x{0};
1853 $x = substr( $x, 1 );
1854 }
1855
1856 if ( "" == $rfc ) {
1857 $text .= "RFC $blank$x";
1858 } else {
1859 $url = wfmsg( "rfcurl" );
1860 $url = str_replace( "$1", $rfc, $url);
1861 $sk =& $this->mOptions->getSkin();
1862 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1863 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1864 }
1865 }
1866 return $text;
1867 }
1868
1869 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1870 {
1871 $this->mOptions = $options;
1872 $this->mTitle =& $title;
1873 $this->mOutputType = OT_WIKI;
1874
1875 if ( $clearState ) {
1876 $this->clearState();
1877 }
1878
1879 $stripState = false;
1880 $pairs = array(
1881 "\r\n" => "\n",
1882 );
1883 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1884 // now with regexes
1885 /*
1886 $pairs = array(
1887 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1888 "/<br *?>/i" => "<br />",
1889 );
1890 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1891 */
1892 $text = $this->strip( $text, $stripState, false );
1893 $text = $this->pstPass2( $text, $user );
1894 $text = $this->unstrip( $text, $stripState );
1895 return $text;
1896 }
1897
1898 /* private */ function pstPass2( $text, &$user )
1899 {
1900 global $wgLang, $wgLocaltimezone, $wgCurParser;
1901
1902 # Variable replacement
1903 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1904 $text = $this->replaceVariables( $text );
1905
1906 # Signatures
1907 #
1908 $n = $user->getName();
1909 $k = $user->getOption( "nickname" );
1910 if ( "" == $k ) { $k = $n; }
1911 if(isset($wgLocaltimezone)) {
1912 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1913 }
1914 /* Note: this is an ugly timezone hack for the European wikis */
1915 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1916 " (" . date( "T" ) . ")";
1917 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1918
1919 $text = preg_replace( "/~~~~~/", $d, $text );
1920 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1921 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1922 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1923 Namespace::getUser() ) . ":$n|$k]]", $text );
1924
1925 # Context links: [[|name]] and [[name (context)|]]
1926 #
1927 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1928 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1929 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1930 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1931
1932 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1933 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1934 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1935 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1936 # [[ns:page (cont)|]]
1937 $context = "";
1938 $t = $this->mTitle->getText();
1939 if ( preg_match( $conpat, $t, $m ) ) {
1940 $context = $m[2];
1941 }
1942 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1943 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1944 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1945
1946 if ( "" == $context ) {
1947 $text = preg_replace( $p2, "[[\\1]]", $text );
1948 } else {
1949 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1950 }
1951
1952 /*
1953 $mw =& MagicWord::get( MAG_SUBST );
1954 $wgCurParser = $this->fork();
1955 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1956 $this->merge( $wgCurParser );
1957 */
1958
1959 # Trim trailing whitespace
1960 # MAG_END (__END__) tag allows for trailing
1961 # whitespace to be deliberately included
1962 $text = rtrim( $text );
1963 $mw =& MagicWord::get( MAG_END );
1964 $mw->matchAndRemove( $text );
1965
1966 return $text;
1967 }
1968
1969 # Set up some variables which are usually set up in parse()
1970 # so that an external function can call some class members with confidence
1971 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1972 {
1973 $this->mTitle =& $title;
1974 $this->mOptions = $options;
1975 $this->mOutputType = $outputType;
1976 if ( $clearState ) {
1977 $this->clearState();
1978 }
1979 }
1980
1981 function transformMsg( $text, $options ) {
1982 global $wgTitle;
1983 static $executing = false;
1984
1985 # Guard against infinite recursion
1986 if ( $executing ) {
1987 return $text;
1988 }
1989 $executing = true;
1990
1991 $this->mTitle = $wgTitle;
1992 $this->mOptions = $options;
1993 $this->mOutputType = OT_MSG;
1994 $this->clearState();
1995 $text = $this->replaceVariables( $text );
1996
1997 $executing = false;
1998 return $text;
1999 }
2000 }
2001
2002 class ParserOutput
2003 {
2004 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2005 var $mCacheTime; # Used in ParserCache
2006
2007 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2008 $containsOldMagic = false )
2009 {
2010 $this->mText = $text;
2011 $this->mLanguageLinks = $languageLinks;
2012 $this->mCategoryLinks = $categoryLinks;
2013 $this->mContainsOldMagic = $containsOldMagic;
2014 $this->mCacheTime = "";
2015 }
2016
2017 function getText() { return $this->mText; }
2018 function getLanguageLinks() { return $this->mLanguageLinks; }
2019 function getCategoryLinks() { return $this->mCategoryLinks; }
2020 function getCacheTime() { return $this->mCacheTime; }
2021 function containsOldMagic() { return $this->mContainsOldMagic; }
2022 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2023 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2024 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2025 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2026 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2027
2028 function merge( $other ) {
2029 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2030 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2031 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2032 }
2033
2034 }
2035
2036 class ParserOptions
2037 {
2038 # All variables are private
2039 var $mUseTeX; # Use texvc to expand <math> tags
2040 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2041 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2042 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2043 var $mAllowExternalImages; # Allow external images inline
2044 var $mSkin; # Reference to the preferred skin
2045 var $mDateFormat; # Date format index
2046 var $mEditSection; # Create "edit section" links
2047 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2048 var $mNumberHeadings; # Automatically number headings
2049 var $mShowToc; # Show table of contents
2050
2051 function getUseTeX() { return $this->mUseTeX; }
2052 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2053 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2054 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2055 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2056 function getSkin() { return $this->mSkin; }
2057 function getDateFormat() { return $this->mDateFormat; }
2058 function getEditSection() { return $this->mEditSection; }
2059 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2060 function getNumberHeadings() { return $this->mNumberHeadings; }
2061 function getShowToc() { return $this->mShowToc; }
2062
2063 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2064 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2065 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2066 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2067 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2068 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2069 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2070 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2071 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2072 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2073 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2074
2075 /* static */ function newFromUser( &$user )
2076 {
2077 $popts = new ParserOptions;
2078 $popts->initialiseFromUser( $user );
2079 return $popts;
2080 }
2081
2082 function initialiseFromUser( &$userInput )
2083 {
2084 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2085
2086 if ( !$userInput ) {
2087 $user = new User;
2088 $user->setLoaded( true );
2089 } else {
2090 $user =& $userInput;
2091 }
2092
2093 $this->mUseTeX = $wgUseTeX;
2094 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2095 $this->mUseDynamicDates = $wgUseDynamicDates;
2096 $this->mInterwikiMagic = $wgInterwikiMagic;
2097 $this->mAllowExternalImages = $wgAllowExternalImages;
2098 $this->mSkin =& $user->getSkin();
2099 $this->mDateFormat = $user->getOption( "date" );
2100 $this->mEditSection = $user->getOption( "editsection" );
2101 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2102 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2103 $this->mShowToc = $user->getOption( "showtoc" );
2104 }
2105
2106
2107 }
2108
2109 # Regex callbacks, used in Parser::replaceVariables
2110 function wfBraceSubstitution( $matches )
2111 {
2112 global $wgCurParser;
2113 return $wgCurParser->braceSubstitution( $matches );
2114 }
2115
2116 function wfArgSubstitution( $matches )
2117 {
2118 global $wgCurParser;
2119 return $wgCurParser->argSubstitution( $matches );
2120 }
2121
2122 function wfVariableSubstitution( $matches )
2123 {
2124 global $wgCurParser;
2125 return $wgCurParser->variableSubstitution( $matches );
2126 }
2127
2128 ?>