Only pop an array if it really is an array. BUG # 963554
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 }
81
82 # First pass--just handle <nowiki> sections, pass the rest off
83 # to internalParse() which does all the real work.
84 #
85 # Returns a ParserOutput
86 #
87 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
88 {
89 global $wgUseTidy;
90 $fname = "Parser::parse";
91 wfProfileIn( $fname );
92
93 if ( $clearState ) {
94 $this->clearState();
95 }
96
97 $this->mOptions = $options;
98 $this->mTitle =& $title;
99 $this->mOutputType = OT_HTML;
100
101 $stripState = NULL;
102 $text = $this->strip( $text, $this->mStripState );
103 $text = $this->internalParse( $text, $linestart );
104 $text = $this->unstrip( $text, $this->mStripState );
105 # Clean up special characters, only run once, next-to-last before doBlockLevels
106 if(!$wgUseTidy) {
107 $fixtags = array(
108 # french spaces, last one Guillemet-left
109 # only if there is something before the space
110 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
111 # french spaces, Guillemet-right
112 "/(\\302\\253) /i"=>"\\1&nbsp;",
113 "/<hr *>/i" => '<hr />',
114 "/<br *>/i" => '<br />',
115 "/<center *>/i"=>'<div class="center">',
116 "/<\\/center *>/i" => '</div>',
117 # Clean up spare ampersands; note that we probably ought to be
118 # more careful about named entities.
119 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
120 );
121 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
122 } else {
123 $fixtags = array(
124 # french spaces, last one Guillemet-left
125 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
126 # french spaces, Guillemet-right
127 "/(\\302\\253) /i"=>"\\1&nbsp;",
128 "/<center *>/i"=>'<div class="center">',
129 "/<\\/center *>/i" => '</div>'
130 );
131 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
132 }
133 # only once and last
134 $text = $this->doBlockLevels( $text, $linestart );
135 if($wgUseTidy) {
136 $text = $this->tidy($text);
137 }
138 $this->mOutput->setText( $text );
139 wfProfileOut( $fname );
140 return $this->mOutput;
141 }
142
143 /* static */ function getRandomString()
144 {
145 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
146 }
147
148 # Replaces all occurrences of <$tag>content</$tag> in the text
149 # with a random marker and returns the new text. the output parameter
150 # $content will be an associative array filled with data on the form
151 # $unique_marker => content.
152
153 # If $content is already set, the additional entries will be appended
154
155 # If $tag is set to STRIP_COMMENTS, the function will extract
156 # <!-- HTML comments -->
157
158 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
159 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
160 if ( !$content ) {
161 $content = array( );
162 }
163 $n = 1;
164 $stripped = "";
165
166 while ( "" != $text ) {
167 if($tag==STRIP_COMMENTS) {
168 $p = preg_split( "/<!--/i", $text, 2 );
169 } else {
170 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
171 }
172 $stripped .= $p[0];
173 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
174 $text = "";
175 } else {
176 if($tag==STRIP_COMMENTS) {
177 $q = preg_split( "/-->/i", $p[1], 2 );
178 } else {
179 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
180 }
181 $marker = $rnd . sprintf("%08X", $n++);
182 $content[$marker] = $q[0];
183 $stripped .= $marker;
184 $text = $q[1];
185 }
186 }
187 return $stripped;
188 }
189
190 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
191 # If $render is set, performs necessary rendering operations on plugins
192 # Returns the text, and fills an array with data needed in unstrip()
193 # If the $state is already a valid strip state, it adds to the state
194
195 # When $stripcomments is set, HTML comments <!-- like this -->
196 # will be stripped in addition to other tags. This is important
197 # for section editing, where these comments cause confusion when
198 # counting the sections in the wikisource
199 function strip( $text, &$state, $stripcomments = false )
200 {
201 $render = ($this->mOutputType == OT_HTML);
202 $nowiki_content = array();
203 $hiero_content = array();
204 $timeline_content = array();
205 $math_content = array();
206 $pre_content = array();
207 $comment_content = array();
208
209 # Replace any instances of the placeholders
210 $uniq_prefix = UNIQ_PREFIX;
211 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
212
213 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
214 foreach( $nowiki_content as $marker => $content ){
215 if( $render ){
216 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
217 } else {
218 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
219 }
220 }
221
222 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
223 foreach( $hiero_content as $marker => $content ){
224 if( $render && $GLOBALS['wgUseWikiHiero']){
225 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
226 } else {
227 $hiero_content[$marker] = "<hiero>$content</hiero>";
228 }
229 }
230
231 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
232 foreach( $timeline_content as $marker => $content ){
233 if( $render && $GLOBALS['wgUseTimeline']){
234 $timeline_content[$marker] = renderTimeline( $content );
235 } else {
236 $timeline_content[$marker] = "<timeline>$content</timeline>";
237 }
238 }
239
240 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
241 foreach( $math_content as $marker => $content ){
242 if( $render ) {
243 if( $this->mOptions->getUseTeX() ) {
244 $math_content[$marker] = renderMath( $content );
245 } else {
246 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
247 }
248 } else {
249 $math_content[$marker] = "<math>$content</math>";
250 }
251 }
252
253 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
254 foreach( $pre_content as $marker => $content ){
255 if( $render ){
256 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
257 } else {
258 $pre_content[$marker] = "<pre>$content</pre>";
259 }
260 }
261 if($stripcomments) {
262 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
263 foreach( $comment_content as $marker => $content ){
264 $comment_content[$marker] = "<!--$content-->";
265 }
266 }
267
268 # Merge state with the pre-existing state, if there is one
269 if ( $state ) {
270 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
271 $state['hiero'] = $state['hiero'] + $hiero_content;
272 $state['timeline'] = $state['timeline'] + $timeline_content;
273 $state['math'] = $state['math'] + $math_content;
274 $state['pre'] = $state['pre'] + $pre_content;
275 $state['comment'] = $state['comment'] + $comment_content;
276 } else {
277 $state = array(
278 'nowiki' => $nowiki_content,
279 'hiero' => $hiero_content,
280 'timeline' => $timeline_content,
281 'math' => $math_content,
282 'pre' => $pre_content,
283 'comment' => $comment_content
284 );
285 }
286 return $text;
287 }
288
289 function unstrip( $text, &$state )
290 {
291 # Must expand in reverse order, otherwise nested tags will be corrupted
292 $contentDict = end( $state );
293 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
294 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
295 $text = str_replace( key( $contentDict ), $content, $text );
296 }
297 }
298
299 return $text;
300 }
301
302 # Add an item to the strip state
303 # Returns the unique tag which must be inserted into the stripped text
304 # The tag will be replaced with the original text in unstrip()
305
306 function insertStripItem( $text, &$state )
307 {
308 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
309 if ( !$state ) {
310 $state = array(
311 'nowiki' => array(),
312 'hiero' => array(),
313 'math' => array(),
314 'pre' => array()
315 );
316 }
317 $state['item'][$rnd] = $text;
318 return $rnd;
319 }
320
321 # This method generates the list of subcategories and pages for a category
322 function categoryMagic ()
323 {
324 global $wgLang , $wgUser ;
325 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
326
327 $cns = Namespace::getCategory() ;
328 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
329
330 $r = "<br style=\"clear:both;\"/>\n";
331
332
333 $sk =& $wgUser->getSkin() ;
334
335 $articles = array() ;
336 $children = array() ;
337 $data = array () ;
338 $id = $this->mTitle->getArticleID() ;
339
340 # FIXME: add limits
341 $t = wfStrencode( $this->mTitle->getDBKey() );
342 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
343 $res = wfQuery ( $sql, DB_READ ) ;
344 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
345
346 # For all pages that link to this category
347 foreach ( $data AS $x )
348 {
349 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
350 if ( $t != "" ) $t .= ":" ;
351 $t .= $x->cur_title ;
352
353 if ( $x->cur_namespace == $cns ) {
354 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
355 } else {
356 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
357 }
358 }
359 wfFreeResult ( $res ) ;
360
361 # Showing subcategories
362 if ( count ( $children ) > 0 ) {
363 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
364 $r .= implode ( ", " , $children ) ;
365 }
366
367 # Showing pages in this category
368 if ( count ( $articles ) > 0 ) {
369 $ti = $this->mTitle->getText() ;
370 $h = wfMsg( "category_header", $ti );
371 $r .= "<h2>{$h}</h2>\n" ;
372 $r .= implode ( ", " , $articles ) ;
373 }
374
375
376 return $r ;
377 }
378
379 function getHTMLattrs ()
380 {
381 $htmlattrs = array( # Allowed attributes--no scripting, etc.
382 "title", "align", "lang", "dir", "width", "height",
383 "bgcolor", "clear", /* BR */ "noshade", /* HR */
384 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
385 /* FONT */ "type", "start", "value", "compact",
386 /* For various lists, mostly deprecated but safe */
387 "summary", "width", "border", "frame", "rules",
388 "cellspacing", "cellpadding", "valign", "char",
389 "charoff", "colgroup", "col", "span", "abbr", "axis",
390 "headers", "scope", "rowspan", "colspan", /* Tables */
391 "id", "class", "name", "style" /* For CSS */
392 );
393 return $htmlattrs ;
394 }
395
396 function fixTagAttributes ( $t )
397 {
398 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
399 $htmlattrs = $this->getHTMLattrs() ;
400
401 # Strip non-approved attributes from the tag
402 $t = preg_replace(
403 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
404 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
405 $t);
406 # Strip javascript "expression" from stylesheets. Brute force approach:
407 # If anythin offensive is found, all attributes of the HTML tag are dropped
408
409 if( preg_match(
410 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
411 wfMungeToUtf8( $t ) ) )
412 {
413 $t="";
414 }
415
416 return trim ( $t ) ;
417 }
418
419 /* interface with html tidy, used if $wgUseTidy = true */
420 function tidy ( $text ) {
421 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
422 global $wgInputEncoding, $wgOutputEncoding;
423 $fname = "Parser::tidy";
424 wfProfileIn( $fname );
425
426 $cleansource = '';
427 switch(strtoupper($wgOutputEncoding)) {
428 case 'ISO-8859-1':
429 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
430 break;
431 case 'UTF-8':
432 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
433 break;
434 default:
435 $wgTidyOpts .= ' -raw';
436 }
437
438 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
439 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
440 '<head><title>test</title></head><body>'.$text.'</body></html>';
441 $descriptorspec = array(
442 0 => array("pipe", "r"),
443 1 => array("pipe", "w"),
444 2 => array("file", "/dev/null", "a")
445 );
446 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
447 if (is_resource($process)) {
448 fwrite($pipes[0], $text);
449 fclose($pipes[0]);
450 while (!feof($pipes[1])) {
451 $cleansource .= fgets($pipes[1], 1024);
452 }
453 fclose($pipes[1]);
454 $return_value = proc_close($process);
455 }
456
457 wfProfileOut( $fname );
458
459 if( $cleansource == '' && $text != '') {
460 wfDebug( "Tidy error detected!\n" );
461 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
462 } else {
463 return $cleansource;
464 }
465 }
466
467 function doTableStuff ( $t )
468 {
469 $t = explode ( "\n" , $t ) ;
470 $td = array () ; # Is currently a td tag open?
471 $ltd = array () ; # Was it TD or TH?
472 $tr = array () ; # Is currently a tr tag open?
473 $ltr = array () ; # tr attributes
474 foreach ( $t AS $k => $x )
475 {
476 $x = trim ( $x ) ;
477 $fc = substr ( $x , 0 , 1 ) ;
478 if ( "{|" == substr ( $x , 0 , 2 ) )
479 {
480 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
481 array_push ( $td , false ) ;
482 array_push ( $ltd , "" ) ;
483 array_push ( $tr , false ) ;
484 array_push ( $ltr , "" ) ;
485 }
486 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
487 else if ( "|}" == substr ( $x , 0 , 2 ) )
488 {
489 $z = "</table>\n" ;
490 $l = array_pop ( $ltd ) ;
491 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
492 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
493 array_pop ( $ltr ) ;
494 $t[$k] = $z ;
495 }
496 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
497 {
498 $z = trim ( substr ( $x , 2 ) ) ;
499 $t[$k] = "<caption>{$z}</caption>\n" ;
500 }*/
501 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
502 {
503 $x = substr ( $x , 1 ) ;
504 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
505 $z = "" ;
506 $l = array_pop ( $ltd ) ;
507 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
508 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
509 array_pop ( $ltr ) ;
510 $t[$k] = $z ;
511 array_push ( $tr , false ) ;
512 array_push ( $td , false ) ;
513 array_push ( $ltd , "" ) ;
514 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
515 }
516 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
517 {
518 if ( "|+" == substr ( $x , 0 , 2 ) )
519 {
520 $fc = "+" ;
521 $x = substr ( $x , 1 ) ;
522 }
523 $after = substr ( $x , 1 ) ;
524 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
525 $after = explode ( "||" , $after ) ;
526 $t[$k] = "" ;
527 foreach ( $after AS $theline )
528 {
529 $z = "" ;
530 if ( $fc != "+" )
531 {
532 $tra = array_pop ( $ltr ) ;
533 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
534 array_push ( $tr , true ) ;
535 array_push ( $ltr , "" ) ;
536 }
537
538 $l = array_pop ( $ltd ) ;
539 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
540 if ( $fc == "|" ) $l = "td" ;
541 else if ( $fc == "!" ) $l = "th" ;
542 else if ( $fc == "+" ) $l = "caption" ;
543 else $l = "" ;
544 array_push ( $ltd , $l ) ;
545 $y = explode ( "|" , $theline , 2 ) ;
546 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
547 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
548 $t[$k] .= $y ;
549 array_push ( $td , true ) ;
550 }
551 }
552 }
553
554 # Closing open td, tr && table
555 while ( count ( $td ) > 0 )
556 {
557 if ( array_pop ( $td ) ) $t[] = "</td>" ;
558 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
559 $t[] = "</table>" ;
560 }
561
562 $t = implode ( "\n" , $t ) ;
563 # $t = $this->removeHTMLtags( $t );
564 return $t ;
565 }
566
567 # Parses the text and adds the result to the strip state
568 # Returns the strip tag
569 function stripParse( $text, $newline, $args )
570 {
571 $text = $this->strip( $text, $this->mStripState );
572 $text = $this->internalParse( $text, (bool)$newline, $args, false );
573 return $newline.$this->insertStripItem( $text, $this->mStripState );
574 }
575
576 function internalParse( $text, $linestart, $args = array(), $isMain=true )
577 {
578 $fname = "Parser::internalParse";
579 wfProfileIn( $fname );
580
581 $text = $this->removeHTMLtags( $text );
582 $text = $this->replaceVariables( $text, $args );
583
584 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
585
586 $text = $this->doHeadings( $text );
587 if($this->mOptions->getUseDynamicDates()) {
588 global $wgDateFormatter;
589 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
590 }
591 $text = $this->doAllQuotes( $text );
592 $text = $this->replaceExternalLinks( $text );
593 $text = $this->replaceInternalLinks ( $text );
594 $text = $this->replaceInternalLinks ( $text );
595 //$text = $this->doTokenizedParser ( $text );
596 $text = $this->doTableStuff ( $text ) ;
597 $text = $this->magicISBN( $text );
598 $text = $this->magicRFC( $text );
599 $text = $this->formatHeadings( $text, $isMain );
600 $sk =& $this->mOptions->getSkin();
601 $text = $sk->transformContent( $text );
602
603 if ( !isset ( $this->categoryMagicDone ) ) {
604 $text .= $this->categoryMagic () ;
605 $this->categoryMagicDone = true ;
606 }
607
608 wfProfileOut( $fname );
609 return $text;
610 }
611
612
613 /* private */ function doHeadings( $text )
614 {
615 for ( $i = 6; $i >= 1; --$i ) {
616 $h = substr( "======", 0, $i );
617 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
618 "<h{$i}>\\1</h{$i}>\\2", $text );
619 }
620 return $text;
621 }
622
623 /* private */ function doAllQuotes( $text )
624 {
625 $outtext = "";
626 $lines = explode( "\n", $text );
627 foreach ( $lines as $line ) {
628 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
629 }
630 return substr($outtext, 0,-1);
631 }
632
633 /* private */ function doQuotes( $pre, $text, $mode )
634 {
635 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
636 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
637 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
638 if ( substr ($m[2], 0, 1) == "'" ) {
639 $m[2] = substr ($m[2], 1);
640 if ($mode == "em") {
641 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
642 } else if ($mode == "strong") {
643 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
644 } else if (($mode == "emstrong") || ($mode == "both")) {
645 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
646 } else if ($mode == "strongem") {
647 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
648 } else {
649 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
650 }
651 } else {
652 if ($mode == "strong") {
653 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
654 } else if ($mode == "em") {
655 return $m1_em . $this->doQuotes ( "", $m[2], "" );
656 } else if ($mode == "emstrong") {
657 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
658 } else if (($mode == "strongem") || ($mode == "both")) {
659 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
660 } else {
661 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
662 }
663 }
664 } else {
665 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
666 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
667 if ($mode == "") {
668 return $pre . $text;
669 } else if ($mode == "em") {
670 return $pre . $text_em;
671 } else if ($mode == "strong") {
672 return $pre . $text_strong;
673 } else if ($mode == "strongem") {
674 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
675 } else {
676 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
677 }
678 }
679 }
680
681 # Note: we have to do external links before the internal ones,
682 # and otherwise take great care in the order of things here, so
683 # that we don't end up interpreting some URLs twice.
684
685 /* private */ function replaceExternalLinks( $text )
686 {
687 $fname = "Parser::replaceExternalLinks";
688 wfProfileIn( $fname );
689 $text = $this->subReplaceExternalLinks( $text, "http", true );
690 $text = $this->subReplaceExternalLinks( $text, "https", true );
691 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
692 $text = $this->subReplaceExternalLinks( $text, "irc", false );
693 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
694 $text = $this->subReplaceExternalLinks( $text, "news", false );
695 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
696 wfProfileOut( $fname );
697 return $text;
698 }
699
700 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
701 {
702 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
703 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
704
705 # this is the list of separators that should be ignored if they
706 # are the last character of an URL but that should be included
707 # if they occur within the URL, e.g. "go to www.foo.com, where .."
708 # in this case, the last comma should not become part of the URL,
709 # but in "www.foo.com/123,2342,32.htm" it should.
710 $sep = ",;\.:";
711 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
712 $images = "gif|png|jpg|jpeg";
713
714 # PLEASE NOTE: The curly braces { } are not part of the regex,
715 # they are interpreted as part of the string (used to tell PHP
716 # that the content of the string should be inserted there).
717 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
718 "((?i){$images})([^{$uc}]|$)/";
719
720 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
721 $sk =& $this->mOptions->getSkin();
722
723 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
724 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
725 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
726 }
727 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
728 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
729 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
730 "</a>\\5", $s );
731 $s = str_replace( $unique, $protocol, $s );
732
733 $a = explode( "[{$protocol}:", " " . $s );
734 $s = array_shift( $a );
735 $s = substr( $s, 1 );
736
737 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
738 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
739
740 foreach ( $a as $line ) {
741 if ( preg_match( $e1, $line, $m ) ) {
742 $link = "{$protocol}:{$m[1]}";
743 $trail = $m[2];
744 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
745 else { $text = wfEscapeHTML( $link ); }
746 } else if ( preg_match( $e2, $line, $m ) ) {
747 $link = "{$protocol}:{$m[1]}";
748 $text = $m[2];
749 $trail = $m[3];
750 } else {
751 $s .= "[{$protocol}:" . $line;
752 continue;
753 }
754 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
755 $paren = "";
756 } else {
757 # Expand the URL for printable version
758 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
759 }
760 $la = $sk->getExternalLinkAttributes( $link, $text );
761 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
762
763 }
764 return $s;
765 }
766
767
768 /* private */ function replaceInternalLinks( $s )
769 {
770 global $wgLang, $wgLinkCache;
771 global $wgNamespacesWithSubpages, $wgLanguageCode;
772 static $fname = "Parser::replaceInternalLink" ;
773 wfProfileIn( $fname );
774
775 wfProfileIn( "$fname-setup" );
776 static $tc = FALSE;
777 # the % is needed to support urlencoded titles as well
778 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
779 $sk =& $this->mOptions->getSkin();
780
781 $a = explode( "[[", " " . $s );
782 $s = array_shift( $a );
783 $s = substr( $s, 1 );
784
785 # Match a link having the form [[namespace:link|alternate]]trail
786 static $e1 = FALSE;
787 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
788 # Match the end of a line for a word that's not followed by whitespace,
789 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
790 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
791 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
792 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
793
794
795 # Special and Media are pseudo-namespaces; no pages actually exist in them
796 static $image = FALSE;
797 static $special = FALSE;
798 static $media = FALSE;
799 static $category = FALSE;
800 if ( !$image ) { $image = Namespace::getImage(); }
801 if ( !$special ) { $special = Namespace::getSpecial(); }
802 if ( !$media ) { $media = Namespace::getMedia(); }
803 if ( !$category ) { $category = Namespace::getCategory(); }
804
805 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
806
807 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
808 $new_prefix = $m[2];
809 $s = $m[1];
810 } else {
811 $new_prefix="";
812 }
813
814 wfProfileOut( "$fname-setup" );
815
816 foreach ( $a as $line ) {
817 $prefix = $new_prefix;
818
819 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
820 $text = $m[2];
821 # fix up urlencoded title texts
822 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
823 $trail = $m[3];
824 } else { # Invalid form; output directly
825 $s .= $prefix . "[[" . $line ;
826 wfProfileOut( $fname );
827 continue;
828 }
829
830 /* Valid link forms:
831 Foobar -- normal
832 :Foobar -- override special treatment of prefix (images, language links)
833 /Foobar -- convert to CurrentPage/Foobar
834 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
835 */
836 $c = substr($m[1],0,1);
837 $noforce = ($c != ":");
838 if( $c == "/" ) { # subpage
839 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
840 $m[1]=substr($m[1],1,strlen($m[1])-2);
841 $noslash=$m[1];
842 } else {
843 $noslash=substr($m[1],1);
844 }
845 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
846 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
847 if( "" == $text ) {
848 $text= $m[1];
849 } # this might be changed for ugliness reasons
850 } else {
851 $link = $noslash; # no subpage allowed, use standard link
852 }
853 } elseif( $noforce ) { # no subpage
854 $link = $m[1];
855 } else {
856 $link = substr( $m[1], 1 );
857 }
858 $wasblank = ( "" == $text );
859 if( $wasblank )
860 $text = $link;
861
862 $nt = Title::newFromText( $link );
863 if( !$nt ) {
864 $s .= $prefix . "[[" . $line;
865 wfProfileOut( $fname );
866 continue;
867 }
868 $ns = $nt->getNamespace();
869 $iw = $nt->getInterWiki();
870 if( $noforce ) {
871 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
872 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
873 $tmp = $prefix . $trail ;
874 wfProfileOut( $fname );
875 $s .= (trim($tmp) == '')? '': $tmp;
876 continue;
877 }
878 if ( $ns == $image ) {
879 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
880 $wgLinkCache->addImageLinkObj( $nt );
881 wfProfileOut( $fname );
882 continue;
883 }
884 if ( $ns == $category ) {
885 $t = $nt->getText() ;
886 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
887
888 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
889 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
890 $wgLinkCache->resume();
891
892 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
893 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
894 $this->mOutput->mCategoryLinks[] = $t ;
895 $s .= $prefix . $trail ;
896 wfProfileOut( $fname );
897 continue;
898 }
899 }
900 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
901 ( strpos( $link, "#" ) == FALSE ) ) {
902 # Self-links are handled specially; generally de-link and change to bold.
903 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
904 wfProfileOut( $fname );
905 continue;
906 }
907
908 if( $ns == $media ) {
909 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
910 $wgLinkCache->addImageLinkObj( $nt );
911 wfProfileOut( $fname );
912 continue;
913 } elseif( $ns == $special ) {
914 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
915 wfProfileOut( $fname );
916 continue;
917 }
918 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
919 }
920 wfProfileOut( $fname );
921 return $s;
922 }
923
924 # Some functions here used by doBlockLevels()
925 #
926 /* private */ function closeParagraph()
927 {
928 $result = "";
929 if ( '' != $this->mLastSection ) {
930 $result = "</" . $this->mLastSection . ">\n";
931 }
932 $this->mInPre = false;
933 $this->mLastSection = "";
934 return $result;
935 }
936 # getCommon() returns the length of the longest common substring
937 # of both arguments, starting at the beginning of both.
938 #
939 /* private */ function getCommon( $st1, $st2 )
940 {
941 $fl = strlen( $st1 );
942 $shorter = strlen( $st2 );
943 if ( $fl < $shorter ) { $shorter = $fl; }
944
945 for ( $i = 0; $i < $shorter; ++$i ) {
946 if ( $st1{$i} != $st2{$i} ) { break; }
947 }
948 return $i;
949 }
950 # These next three functions open, continue, and close the list
951 # element appropriate to the prefix character passed into them.
952 #
953 /* private */ function openList( $char )
954 {
955 $result = $this->closeParagraph();
956
957 if ( "*" == $char ) { $result .= "<ul><li>"; }
958 else if ( "#" == $char ) { $result .= "<ol><li>"; }
959 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
960 else if ( ";" == $char ) {
961 $result .= "<dl><dt>";
962 $this->mDTopen = true;
963 }
964 else { $result = "<!-- ERR 1 -->"; }
965
966 return $result;
967 }
968
969 /* private */ function nextItem( $char )
970 {
971 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
972 else if ( ":" == $char || ";" == $char ) {
973 $close = "</dd>";
974 if ( $this->mDTopen ) { $close = "</dt>"; }
975 if ( ";" == $char ) {
976 $this->mDTopen = true;
977 return $close . "<dt>";
978 } else {
979 $this->mDTopen = false;
980 return $close . "<dd>";
981 }
982 }
983 return "<!-- ERR 2 -->";
984 }
985
986 /* private */function closeList( $char )
987 {
988 if ( "*" == $char ) { $text = "</li></ul>"; }
989 else if ( "#" == $char ) { $text = "</li></ol>"; }
990 else if ( ":" == $char ) {
991 if ( $this->mDTopen ) {
992 $this->mDTopen = false;
993 $text = "</dt></dl>";
994 } else {
995 $text = "</dd></dl>";
996 }
997 }
998 else { return "<!-- ERR 3 -->"; }
999 return $text."\n";
1000 }
1001
1002 /* private */ function doBlockLevels( $text, $linestart ) {
1003 $fname = "Parser::doBlockLevels";
1004 wfProfileIn( $fname );
1005
1006 # Parsing through the text line by line. The main thing
1007 # happening here is handling of block-level elements p, pre,
1008 # and making lists from lines starting with * # : etc.
1009 #
1010 $textLines = explode( "\n", $text );
1011
1012 $lastPrefix = $output = $lastLine = '';
1013 $this->mDTopen = $inBlockElem = false;
1014 $prefixLength = 0;
1015 $paragraphStack = false;
1016
1017 if ( !$linestart ) {
1018 $output .= array_shift( $textLines );
1019 }
1020 foreach ( $textLines as $oLine ) {
1021 $lastPrefixLength = strlen( $lastPrefix );
1022 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1023 $preOpenMatch = preg_match("/<pre/i", $oLine );
1024 if (!$this->mInPre) {
1025 $this->mInPre = !empty($preOpenMatch);
1026 }
1027 if ( !$this->mInPre ) {
1028 # Multiple prefixes may abut each other for nested lists.
1029 $prefixLength = strspn( $oLine, "*#:;" );
1030 $pref = substr( $oLine, 0, $prefixLength );
1031
1032 # eh?
1033 $pref2 = str_replace( ";", ":", $pref );
1034 $t = substr( $oLine, $prefixLength );
1035 } else {
1036 # Don't interpret any other prefixes in preformatted text
1037 $prefixLength = 0;
1038 $pref = $pref2 = '';
1039 $t = $oLine;
1040 }
1041
1042 # List generation
1043 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1044 # Same as the last item, so no need to deal with nesting or opening stuff
1045 $output .= $this->nextItem( substr( $pref, -1 ) );
1046 $paragraphStack = false;
1047
1048 if ( ";" == substr( $pref, -1 ) ) {
1049 # The one nasty exception: definition lists work like this:
1050 # ; title : definition text
1051 # So we check for : in the remainder text to split up the
1052 # title and definition, without b0rking links.
1053 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1054 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1055 $term = $match[1];
1056 $output .= $term . $this->nextItem( ":" );
1057 $t = $match[2];
1058 }
1059 }
1060 } elseif( $prefixLength || $lastPrefixLength ) {
1061 # Either open or close a level...
1062 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1063 $paragraphStack = false;
1064
1065 while( $commonPrefixLength < $lastPrefixLength ) {
1066 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1067 --$lastPrefixLength;
1068 }
1069 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1070 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1071 }
1072 while ( $prefixLength > $commonPrefixLength ) {
1073 $char = substr( $pref, $commonPrefixLength, 1 );
1074 $output .= $this->openList( $char );
1075
1076 if ( ";" == $char ) {
1077 # FIXME: This is dupe of code above
1078 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1079 $term = $match[1];
1080 $output .= $term . $this->nextItem( ":" );
1081 $t = $match[2];
1082 }
1083 }
1084 ++$commonPrefixLength;
1085 }
1086 $lastPrefix = $pref2;
1087 }
1088 if( 0 == $prefixLength ) {
1089 # No prefix (not in list)--go to paragraph mode
1090 $uniq_prefix = UNIQ_PREFIX;
1091 // XXX: use a stack for nestable elements like span, table and div
1092 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1093 $closematch = preg_match(
1094 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1095 "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1096 if ( $openmatch or $closematch ) {
1097 $paragraphStack = false;
1098 $output .= $this->closeParagraph();
1099 if($preOpenMatch and !$preCloseMatch) {
1100 $this->mInPre = true;
1101 }
1102 if ( $closematch ) {
1103 $inBlockElem = false;
1104 } else {
1105 $inBlockElem = true;
1106 }
1107 } else if ( !$inBlockElem && !$this->mInPre ) {
1108 if ( " " == $t{0} and trim($t) != '' ) {
1109 // pre
1110 if ($this->mLastSection != 'pre') {
1111 $paragraphStack = false;
1112 $output .= $this->closeParagraph().'<pre>';
1113 $this->mLastSection = 'pre';
1114 }
1115 } else {
1116 // paragraph
1117 if ( '' == trim($t) ) {
1118 if ( $paragraphStack ) {
1119 $output .= $paragraphStack.'<br />';
1120 $paragraphStack = false;
1121 $this->mLastSection = 'p';
1122 } else {
1123 if ($this->mLastSection != 'p' ) {
1124 $output .= $this->closeParagraph();
1125 $this->mLastSection = '';
1126 $paragraphStack = "<p>";
1127 } else {
1128 $paragraphStack = '</p><p>';
1129 }
1130 }
1131 } else {
1132 if ( $paragraphStack ) {
1133 $output .= $paragraphStack;
1134 $paragraphStack = false;
1135 $this->mLastSection = 'p';
1136 } else if ($this->mLastSection != 'p') {
1137 $output .= $this->closeParagraph().'<p>';
1138 $this->mLastSection = 'p';
1139 }
1140 }
1141 }
1142 }
1143 }
1144 if ($paragraphStack === false) {
1145 $output .= $t."\n";
1146 }
1147 }
1148 while ( $prefixLength ) {
1149 $output .= $this->closeList( $pref2{$prefixLength-1} );
1150 --$prefixLength;
1151 }
1152 if ( "" != $this->mLastSection ) {
1153 $output .= "</" . $this->mLastSection . ">";
1154 $this->mLastSection = "";
1155 }
1156
1157 wfProfileOut( $fname );
1158 return $output;
1159 }
1160
1161 function getVariableValue( $index ) {
1162 global $wgLang, $wgSitename, $wgServer;
1163
1164 switch ( $index ) {
1165 case MAG_CURRENTMONTH:
1166 return date( "m" );
1167 case MAG_CURRENTMONTHNAME:
1168 return $wgLang->getMonthName( date("n") );
1169 case MAG_CURRENTMONTHNAMEGEN:
1170 return $wgLang->getMonthNameGen( date("n") );
1171 case MAG_CURRENTDAY:
1172 return date("j");
1173 case MAG_PAGENAME:
1174 return $this->mTitle->getText();
1175 case MAG_NAMESPACE:
1176 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1177 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1178 case MAG_CURRENTDAYNAME:
1179 return $wgLang->getWeekdayName( date("w")+1 );
1180 case MAG_CURRENTYEAR:
1181 return date( "Y" );
1182 case MAG_CURRENTTIME:
1183 return $wgLang->time( wfTimestampNow(), false );
1184 case MAG_NUMBEROFARTICLES:
1185 return wfNumberOfArticles();
1186 case MAG_SITENAME:
1187 return $wgSitename;
1188 case MAG_SERVER:
1189 return $wgServer;
1190 default:
1191 return NULL;
1192 }
1193 }
1194
1195 function initialiseVariables()
1196 {
1197 global $wgVariableIDs;
1198 $this->mVariables = array();
1199 foreach ( $wgVariableIDs as $id ) {
1200 $mw =& MagicWord::get( $id );
1201 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1202 }
1203 }
1204
1205 /* private */ function replaceVariables( $text, $args = array() )
1206 {
1207 global $wgLang, $wgScript, $wgArticlePath;
1208
1209 $fname = "Parser::replaceVariables";
1210 wfProfileIn( $fname );
1211
1212 $bail = false;
1213 if ( !$this->mVariables ) {
1214 $this->initialiseVariables();
1215 }
1216 $titleChars = Title::legalChars();
1217 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1218
1219 # This function is called recursively. To keep track of arguments we need a stack:
1220 array_push( $this->mArgStack, $args );
1221
1222 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1223 $GLOBALS['wgCurParser'] =& $this;
1224
1225
1226 if ( $this->mOutputType == OT_HTML ) {
1227 # Variable substitution
1228 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1229
1230 # Argument substitution
1231 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1232 }
1233 # Template substitution
1234 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|*?|)}}/s";
1235 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1236
1237 array_pop( $this->mArgStack );
1238
1239 wfProfileOut( $fname );
1240 return $text;
1241 }
1242
1243 function variableSubstitution( $matches )
1244 {
1245 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1246 $text = $this->mVariables[$matches[1]];
1247 $this->mOutput->mContainsOldMagic = true;
1248 } else {
1249 $text = $matches[0];
1250 }
1251 return $text;
1252 }
1253
1254 function braceSubstitution( $matches )
1255 {
1256 global $wgLinkCache, $wgLang;
1257 $fname = "Parser::braceSubstitution";
1258 $found = false;
1259 $nowiki = false;
1260 $noparse = false;
1261
1262 $title = NULL;
1263
1264 # $newline is an optional newline character before the braces
1265 # $part1 is the bit before the first |, and must contain only title characters
1266 # $args is a list of arguments, starting from index 0, not including $part1
1267
1268 $newline = $matches[1];
1269 $part1 = $matches[2];
1270 # If the third subpattern matched anything, it will start with |
1271 if ( $matches[3] !== "" ) {
1272 $args = explode( "|", substr( $matches[3], 1 ) );
1273 } else {
1274 $args = array();
1275 }
1276 $argc = count( $args );
1277
1278 # {{{}}}
1279 if ( strpos( $matches[0], "{{{" ) !== false ) {
1280 $text = $matches[0];
1281 $found = true;
1282 $noparse = true;
1283 }
1284
1285 # SUBST
1286 if ( !$found ) {
1287 $mwSubst =& MagicWord::get( MAG_SUBST );
1288 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1289 if ( $this->mOutputType != OT_WIKI ) {
1290 # Invalid SUBST not replaced at PST time
1291 # Return without further processing
1292 $text = $matches[0];
1293 $found = true;
1294 $noparse= true;
1295 }
1296 } elseif ( $this->mOutputType == OT_WIKI ) {
1297 # SUBST not found in PST pass, do nothing
1298 $text = $matches[0];
1299 $found = true;
1300 }
1301 }
1302
1303 # MSG, MSGNW and INT
1304 if ( !$found ) {
1305 # Check for MSGNW:
1306 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1307 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1308 $nowiki = true;
1309 } else {
1310 # Remove obsolete MSG:
1311 $mwMsg =& MagicWord::get( MAG_MSG );
1312 $mwMsg->matchStartAndRemove( $part1 );
1313 }
1314
1315 # Check if it is an internal message
1316 $mwInt =& MagicWord::get( MAG_INT );
1317 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1318 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1319 $text = wfMsgReal( $part1, $args, true );
1320 $found = true;
1321 }
1322 }
1323 }
1324
1325 # NS
1326 if ( !$found ) {
1327 # Check for NS: (namespace expansion)
1328 $mwNs = MagicWord::get( MAG_NS );
1329 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1330 if ( intval( $part1 ) ) {
1331 $text = $wgLang->getNsText( intval( $part1 ) );
1332 $found = true;
1333 } else {
1334 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1335 if ( !is_null( $index ) ) {
1336 $text = $wgLang->getNsText( $index );
1337 $found = true;
1338 }
1339 }
1340 }
1341 }
1342
1343 # LOCALURL and LOCALURLE
1344 if ( !$found ) {
1345 $mwLocal = MagicWord::get( MAG_LOCALURL );
1346 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1347
1348 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1349 $func = 'getLocalURL';
1350 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1351 $func = 'escapeLocalURL';
1352 } else {
1353 $func = '';
1354 }
1355
1356 if ( $func !== '' ) {
1357 $title = Title::newFromText( $part1 );
1358 if ( !is_null( $title ) ) {
1359 if ( $argc > 0 ) {
1360 $text = $title->$func( $args[0] );
1361 } else {
1362 $text = $title->$func();
1363 }
1364 $found = true;
1365 }
1366 }
1367 }
1368
1369 # Internal variables
1370 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1371 $text = $this->mVariables[$part1];
1372 $found = true;
1373 $this->mOutput->mContainsOldMagic = true;
1374 }
1375 /*
1376 # Arguments input from the caller
1377 $inputArgs = end( $this->mArgStack );
1378 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1379 $text = $inputArgs[$part1];
1380 $found = true;
1381 }
1382 */
1383 # Load from database
1384 if ( !$found ) {
1385 $title = Title::newFromText( $part1, NS_TEMPLATE );
1386 if ( !is_null( $title ) && !$title->isExternal() ) {
1387 # Check for excessive inclusion
1388 $dbk = $title->getPrefixedDBkey();
1389 if ( $this->incrementIncludeCount( $dbk ) ) {
1390 $article = new Article( $title );
1391 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1392 if ( $articleContent !== false ) {
1393 $found = true;
1394 $text = $articleContent;
1395
1396 }
1397 }
1398
1399 # If the title is valid but undisplayable, make a link to it
1400 if ( $this->mOutputType == OT_HTML && !$found ) {
1401 $text = "[[" . $title->getPrefixedText() . "]]";
1402 $found = true;
1403 }
1404 }
1405 }
1406
1407 # Recursive parsing, escaping and link table handling
1408 # Only for HTML output
1409 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1410 $text = wfEscapeWikiText( $text );
1411 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1412 # Clean up argument array
1413 $assocArgs = array();
1414 $index = 1;
1415 foreach( $args as $arg ) {
1416 $eqpos = strpos( $arg, "=" );
1417 if ( $eqpos === false ) {
1418 $assocArgs[$index++] = $arg;
1419 } else {
1420 $name = trim( substr( $arg, 0, $eqpos ) );
1421 $value = trim( substr( $arg, $eqpos+1 ) );
1422 if ( $value === false ) {
1423 $value = "";
1424 }
1425 if ( $name !== false ) {
1426 $assocArgs[$name] = $value;
1427 }
1428 }
1429 }
1430
1431 # Do not enter included links in link table
1432 if ( !is_null( $title ) ) {
1433 $wgLinkCache->suspend();
1434 }
1435
1436 # Run full parser on the included text
1437 $text = $this->stripParse( $text, $newline, $assocArgs );
1438
1439 # Resume the link cache and register the inclusion as a link
1440 if ( !is_null( $title ) ) {
1441 $wgLinkCache->resume();
1442 $wgLinkCache->addLinkObj( $title );
1443 }
1444 }
1445
1446 if ( !$found ) {
1447 return $matches[0];
1448 } else {
1449 return $text;
1450 }
1451 }
1452
1453 # Triple brace replacement -- used for template arguments
1454 function argSubstitution( $matches )
1455 {
1456 $newline = $matches[1];
1457 $arg = trim( $matches[2] );
1458 $text = $matches[0];
1459 $inputArgs = end( $this->mArgStack );
1460
1461 if ( array_key_exists( $arg, $inputArgs ) ) {
1462 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1463 }
1464
1465 return $text;
1466 }
1467
1468 # Returns true if the function is allowed to include this entity
1469 function incrementIncludeCount( $dbk )
1470 {
1471 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1472 $this->mIncludeCount[$dbk] = 0;
1473 }
1474 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1475 return true;
1476 } else {
1477 return false;
1478 }
1479 }
1480
1481
1482 # Cleans up HTML, removes dangerous tags and attributes
1483 /* private */ function removeHTMLtags( $text )
1484 {
1485 global $wgUseTidy, $wgUserHtml;
1486 $fname = "Parser::removeHTMLtags";
1487 wfProfileIn( $fname );
1488
1489 if( $wgUserHtml ) {
1490 $htmlpairs = array( # Tags that must be closed
1491 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1492 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1493 "strike", "strong", "tt", "var", "div", "center",
1494 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1495 "ruby", "rt" , "rb" , "rp", "p"
1496 );
1497 $htmlsingle = array(
1498 "br", "hr", "li", "dt", "dd"
1499 );
1500 $htmlnest = array( # Tags that can be nested--??
1501 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1502 "dl", "font", "big", "small", "sub", "sup"
1503 );
1504 $tabletags = array( # Can only appear inside table
1505 "td", "th", "tr"
1506 );
1507 } else {
1508 $htmlpairs = array();
1509 $htmlsingle = array();
1510 $htmlnest = array();
1511 $tabletags = array();
1512 }
1513
1514 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1515 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1516
1517 $htmlattrs = $this->getHTMLattrs () ;
1518
1519 # Remove HTML comments
1520 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1521
1522 $bits = explode( "<", $text );
1523 $text = array_shift( $bits );
1524 if(!$wgUseTidy) {
1525 $tagstack = array(); $tablestack = array();
1526 foreach ( $bits as $x ) {
1527 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1528 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1529 $x, $regs );
1530 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1531 error_reporting( $prev );
1532
1533 $badtag = 0 ;
1534 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1535 # Check our stack
1536 if ( $slash ) {
1537 # Closing a tag...
1538 if ( ! in_array( $t, $htmlsingle ) &&
1539 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1540 if(!empty($ot)) array_push( $tagstack, $ot );
1541 $badtag = 1;
1542 } else {
1543 if ( $t == "table" ) {
1544 $tagstack = array_pop( $tablestack );
1545 }
1546 $newparams = "";
1547 }
1548 } else {
1549 # Keep track for later
1550 if ( in_array( $t, $tabletags ) &&
1551 ! in_array( "table", $tagstack ) ) {
1552 $badtag = 1;
1553 } else if ( in_array( $t, $tagstack ) &&
1554 ! in_array ( $t , $htmlnest ) ) {
1555 $badtag = 1 ;
1556 } else if ( ! in_array( $t, $htmlsingle ) ) {
1557 if ( $t == "table" ) {
1558 array_push( $tablestack, $tagstack );
1559 $tagstack = array();
1560 }
1561 array_push( $tagstack, $t );
1562 }
1563 # Strip non-approved attributes from the tag
1564 $newparams = $this->fixTagAttributes($params);
1565
1566 }
1567 if ( ! $badtag ) {
1568 $rest = str_replace( ">", "&gt;", $rest );
1569 $text .= "<$slash$t $newparams$brace$rest";
1570 continue;
1571 }
1572 }
1573 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1574 }
1575 # Close off any remaining tags
1576 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1577 $text .= "</$t>\n";
1578 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1579 }
1580 } else {
1581 # this might be possible using tidy itself
1582 foreach ( $bits as $x ) {
1583 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1584 $x, $regs );
1585 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1586 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1587 $newparams = $this->fixTagAttributes($params);
1588 $rest = str_replace( ">", "&gt;", $rest );
1589 $text .= "<$slash$t $newparams$brace$rest";
1590 } else {
1591 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1592 }
1593 }
1594 }
1595 wfProfileOut( $fname );
1596 return $text;
1597 }
1598
1599
1600 /*
1601 *
1602 * This function accomplishes several tasks:
1603 * 1) Auto-number headings if that option is enabled
1604 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1605 * 3) Add a Table of contents on the top for users who have enabled the option
1606 * 4) Auto-anchor headings
1607 *
1608 * It loops through all headlines, collects the necessary data, then splits up the
1609 * string and re-inserts the newly formatted headlines.
1610 *
1611 */
1612
1613 /* private */ function formatHeadings( $text, $isMain=true )
1614 {
1615 global $wgInputEncoding;
1616
1617 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1618 $doShowToc = $this->mOptions->getShowToc();
1619 if( !$this->mTitle->userCanEdit() ) {
1620 $showEditLink = 0;
1621 $rightClickHack = 0;
1622 } else {
1623 $showEditLink = $this->mOptions->getEditSection();
1624 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1625 }
1626
1627 # Inhibit editsection links if requested in the page
1628 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1629 if( $esw->matchAndRemove( $text ) ) {
1630 $showEditLink = 0;
1631 }
1632 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1633 # do not add TOC
1634 $mw =& MagicWord::get( MAG_NOTOC );
1635 if( $mw->matchAndRemove( $text ) ) {
1636 $doShowToc = 0;
1637 }
1638
1639 # never add the TOC to the Main Page. This is an entry page that should not
1640 # be more than 1-2 screens large anyway
1641 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1642 $doShowToc = 0;
1643 }
1644
1645 # Get all headlines for numbering them and adding funky stuff like [edit]
1646 # links - this is for later, but we need the number of headlines right now
1647 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1648
1649 # if there are fewer than 4 headlines in the article, do not show TOC
1650 if( $numMatches < 4 ) {
1651 $doShowToc = 0;
1652 }
1653
1654 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1655 # override above conditions and always show TOC
1656 $mw =& MagicWord::get( MAG_FORCETOC );
1657 if ($mw->matchAndRemove( $text ) ) {
1658 $doShowToc = 1;
1659 }
1660
1661
1662 # We need this to perform operations on the HTML
1663 $sk =& $this->mOptions->getSkin();
1664
1665 # headline counter
1666 $headlineCount = 0;
1667
1668 # Ugh .. the TOC should have neat indentation levels which can be
1669 # passed to the skin functions. These are determined here
1670 $toclevel = 0;
1671 $toc = "";
1672 $full = "";
1673 $head = array();
1674 $sublevelCount = array();
1675 $level = 0;
1676 $prevlevel = 0;
1677 foreach( $matches[3] as $headline ) {
1678 $numbering = "";
1679 if( $level ) {
1680 $prevlevel = $level;
1681 }
1682 $level = $matches[1][$headlineCount];
1683 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1684 # reset when we enter a new level
1685 $sublevelCount[$level] = 0;
1686 $toc .= $sk->tocIndent( $level - $prevlevel );
1687 $toclevel += $level - $prevlevel;
1688 }
1689 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1690 # reset when we step back a level
1691 $sublevelCount[$level+1]=0;
1692 $toc .= $sk->tocUnindent( $prevlevel - $level );
1693 $toclevel -= $prevlevel - $level;
1694 }
1695 # count number of headlines for each level
1696 @$sublevelCount[$level]++;
1697 if( $doNumberHeadings || $doShowToc ) {
1698 $dot = 0;
1699 for( $i = 1; $i <= $level; $i++ ) {
1700 if( !empty( $sublevelCount[$i] ) ) {
1701 if( $dot ) {
1702 $numbering .= ".";
1703 }
1704 $numbering .= $sublevelCount[$i];
1705 $dot = 1;
1706 }
1707 }
1708 }
1709
1710 # The canonized header is a version of the header text safe to use for links
1711 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1712 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1713
1714 # strip out HTML
1715 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1716 $tocline = trim( $canonized_headline );
1717 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1718 # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1719 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1720 $refer[$headlineCount] = $canonized_headline;
1721
1722 # count how many in assoc. array so we can track dupes in anchors
1723 @$refers[$canonized_headline]++;
1724 $refcount[$headlineCount]=$refers[$canonized_headline];
1725
1726 # Prepend the number to the heading text
1727
1728 if( $doNumberHeadings || $doShowToc ) {
1729 $tocline = $numbering . " " . $tocline;
1730
1731 # Don't number the heading if it is the only one (looks silly)
1732 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1733 # the two are different if the line contains a link
1734 $headline=$numbering . " " . $headline;
1735 }
1736 }
1737
1738 # Create the anchor for linking from the TOC to the section
1739 $anchor = $canonized_headline;
1740 if($refcount[$headlineCount] > 1 ) {
1741 $anchor .= "_" . $refcount[$headlineCount];
1742 }
1743 if( $doShowToc ) {
1744 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1745 }
1746 if( $showEditLink ) {
1747 if ( empty( $head[$headlineCount] ) ) {
1748 $head[$headlineCount] = "";
1749 }
1750 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1751 }
1752
1753 # Add the edit section span
1754 if( $rightClickHack ) {
1755 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1756 }
1757
1758 # give headline the correct <h#> tag
1759 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1760
1761 $headlineCount++;
1762 }
1763
1764 if( $doShowToc ) {
1765 $toclines = $headlineCount;
1766 $toc .= $sk->tocUnindent( $toclevel );
1767 $toc = $sk->tocTable( $toc );
1768 }
1769
1770 # split up and insert constructed headlines
1771
1772 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1773 $i = 0;
1774
1775 foreach( $blocks as $block ) {
1776 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1777 # This is the [edit] link that appears for the top block of text when
1778 # section editing is enabled
1779
1780 # Disabled because it broke block formatting
1781 # For example, a bullet point in the top line
1782 # $full .= $sk->editSectionLink(0);
1783 }
1784 $full .= $block;
1785 if( $doShowToc && !$i && $isMain) {
1786 # Top anchor now in skin
1787 $full = $full.$toc;
1788 }
1789
1790 if( !empty( $head[$i] ) ) {
1791 $full .= $head[$i];
1792 }
1793 $i++;
1794 }
1795
1796 return $full;
1797 }
1798
1799 /* private */ function magicISBN( $text )
1800 {
1801 global $wgLang;
1802
1803 $a = split( "ISBN ", " $text" );
1804 if ( count ( $a ) < 2 ) return $text;
1805 $text = substr( array_shift( $a ), 1);
1806 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1807
1808 foreach ( $a as $x ) {
1809 $isbn = $blank = "" ;
1810 while ( " " == $x{0} ) {
1811 $blank .= " ";
1812 $x = substr( $x, 1 );
1813 }
1814 while ( strstr( $valid, $x{0} ) != false ) {
1815 $isbn .= $x{0};
1816 $x = substr( $x, 1 );
1817 }
1818 $num = str_replace( "-", "", $isbn );
1819 $num = str_replace( " ", "", $num );
1820
1821 if ( "" == $num ) {
1822 $text .= "ISBN $blank$x";
1823 } else {
1824 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1825 $text .= "<a href=\"" .
1826 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1827 "\" class=\"internal\">ISBN $isbn</a>";
1828 $text .= $x;
1829 }
1830 }
1831 return $text;
1832 }
1833 /* private */ function magicRFC( $text )
1834 {
1835 global $wgLang;
1836
1837 $a = split( "RFC ", " $text" );
1838 if ( count ( $a ) < 2 ) return $text;
1839 $text = substr( array_shift( $a ), 1);
1840 $valid = "0123456789";
1841
1842 foreach ( $a as $x ) {
1843 $rfc = $blank = "" ;
1844 while ( " " == $x{0} ) {
1845 $blank .= " ";
1846 $x = substr( $x, 1 );
1847 }
1848 while ( strstr( $valid, $x{0} ) != false ) {
1849 $rfc .= $x{0};
1850 $x = substr( $x, 1 );
1851 }
1852
1853 if ( "" == $rfc ) {
1854 $text .= "RFC $blank$x";
1855 } else {
1856 $url = wfmsg( "rfcurl" );
1857 $url = str_replace( "$1", $rfc, $url);
1858 $sk =& $this->mOptions->getSkin();
1859 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1860 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1861 }
1862 }
1863 return $text;
1864 }
1865
1866 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1867 {
1868 $this->mOptions = $options;
1869 $this->mTitle =& $title;
1870 $this->mOutputType = OT_WIKI;
1871
1872 if ( $clearState ) {
1873 $this->clearState();
1874 }
1875
1876 $stripState = false;
1877 $pairs = array(
1878 "\r\n" => "\n",
1879 );
1880 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1881 // now with regexes
1882 /*
1883 $pairs = array(
1884 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1885 "/<br *?>/i" => "<br />",
1886 );
1887 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1888 */
1889 $text = $this->strip( $text, $stripState, false );
1890 $text = $this->pstPass2( $text, $user );
1891 $text = $this->unstrip( $text, $stripState );
1892 return $text;
1893 }
1894
1895 /* private */ function pstPass2( $text, &$user )
1896 {
1897 global $wgLang, $wgLocaltimezone, $wgCurParser;
1898
1899 # Variable replacement
1900 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1901 $text = $this->replaceVariables( $text );
1902
1903 # Signatures
1904 #
1905 $n = $user->getName();
1906 $k = $user->getOption( "nickname" );
1907 if ( "" == $k ) { $k = $n; }
1908 if(isset($wgLocaltimezone)) {
1909 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1910 }
1911 /* Note: this is an ugly timezone hack for the European wikis */
1912 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1913 " (" . date( "T" ) . ")";
1914 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1915
1916 $text = preg_replace( "/~~~~~/", $d, $text );
1917 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1918 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1919 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1920 Namespace::getUser() ) . ":$n|$k]]", $text );
1921
1922 # Context links: [[|name]] and [[name (context)|]]
1923 #
1924 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1925 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1926 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1927 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1928
1929 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1930 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1931 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1932 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1933 # [[ns:page (cont)|]]
1934 $context = "";
1935 $t = $this->mTitle->getText();
1936 if ( preg_match( $conpat, $t, $m ) ) {
1937 $context = $m[2];
1938 }
1939 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1940 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1941 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1942
1943 if ( "" == $context ) {
1944 $text = preg_replace( $p2, "[[\\1]]", $text );
1945 } else {
1946 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1947 }
1948
1949 /*
1950 $mw =& MagicWord::get( MAG_SUBST );
1951 $wgCurParser = $this->fork();
1952 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1953 $this->merge( $wgCurParser );
1954 */
1955
1956 # Trim trailing whitespace
1957 # MAG_END (__END__) tag allows for trailing
1958 # whitespace to be deliberately included
1959 $text = rtrim( $text );
1960 $mw =& MagicWord::get( MAG_END );
1961 $mw->matchAndRemove( $text );
1962
1963 return $text;
1964 }
1965
1966 # Set up some variables which are usually set up in parse()
1967 # so that an external function can call some class members with confidence
1968 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1969 {
1970 $this->mTitle =& $title;
1971 $this->mOptions = $options;
1972 $this->mOutputType = $outputType;
1973 if ( $clearState ) {
1974 $this->clearState();
1975 }
1976 }
1977
1978 function transformMsg( $text, $options ) {
1979 global $wgTitle;
1980 static $executing = false;
1981
1982 # Guard against infinite recursion
1983 if ( $executing ) {
1984 return $text;
1985 }
1986 $executing = true;
1987
1988 $this->mTitle = $wgTitle;
1989 $this->mOptions = $options;
1990 $this->mOutputType = OT_MSG;
1991 $this->clearState();
1992 $text = $this->replaceVariables( $text );
1993
1994 $executing = false;
1995 return $text;
1996 }
1997 }
1998
1999 class ParserOutput
2000 {
2001 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2002 var $mCacheTime; # Used in ParserCache
2003
2004 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2005 $containsOldMagic = false )
2006 {
2007 $this->mText = $text;
2008 $this->mLanguageLinks = $languageLinks;
2009 $this->mCategoryLinks = $categoryLinks;
2010 $this->mContainsOldMagic = $containsOldMagic;
2011 $this->mCacheTime = "";
2012 }
2013
2014 function getText() { return $this->mText; }
2015 function getLanguageLinks() { return $this->mLanguageLinks; }
2016 function getCategoryLinks() { return $this->mCategoryLinks; }
2017 function getCacheTime() { return $this->mCacheTime; }
2018 function containsOldMagic() { return $this->mContainsOldMagic; }
2019 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2020 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2021 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2022 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2023 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2024
2025 function merge( $other ) {
2026 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2027 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2028 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2029 }
2030
2031 }
2032
2033 class ParserOptions
2034 {
2035 # All variables are private
2036 var $mUseTeX; # Use texvc to expand <math> tags
2037 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2038 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2039 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2040 var $mAllowExternalImages; # Allow external images inline
2041 var $mSkin; # Reference to the preferred skin
2042 var $mDateFormat; # Date format index
2043 var $mEditSection; # Create "edit section" links
2044 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2045 var $mNumberHeadings; # Automatically number headings
2046 var $mShowToc; # Show table of contents
2047
2048 function getUseTeX() { return $this->mUseTeX; }
2049 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2050 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2051 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2052 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2053 function getSkin() { return $this->mSkin; }
2054 function getDateFormat() { return $this->mDateFormat; }
2055 function getEditSection() { return $this->mEditSection; }
2056 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2057 function getNumberHeadings() { return $this->mNumberHeadings; }
2058 function getShowToc() { return $this->mShowToc; }
2059
2060 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2061 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2062 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2063 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2064 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2065 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2066 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2067 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2068 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2069 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2070 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2071
2072 /* static */ function newFromUser( &$user )
2073 {
2074 $popts = new ParserOptions;
2075 $popts->initialiseFromUser( $user );
2076 return $popts;
2077 }
2078
2079 function initialiseFromUser( &$userInput )
2080 {
2081 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2082
2083 if ( !$userInput ) {
2084 $user = new User;
2085 $user->setLoaded( true );
2086 } else {
2087 $user =& $userInput;
2088 }
2089
2090 $this->mUseTeX = $wgUseTeX;
2091 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2092 $this->mUseDynamicDates = $wgUseDynamicDates;
2093 $this->mInterwikiMagic = $wgInterwikiMagic;
2094 $this->mAllowExternalImages = $wgAllowExternalImages;
2095 $this->mSkin =& $user->getSkin();
2096 $this->mDateFormat = $user->getOption( "date" );
2097 $this->mEditSection = $user->getOption( "editsection" );
2098 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2099 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2100 $this->mShowToc = $user->getOption( "showtoc" );
2101 }
2102
2103
2104 }
2105
2106 # Regex callbacks, used in Parser::replaceVariables
2107 function wfBraceSubstitution( $matches )
2108 {
2109 global $wgCurParser;
2110 return $wgCurParser->braceSubstitution( $matches );
2111 }
2112
2113 function wfArgSubstitution( $matches )
2114 {
2115 global $wgCurParser;
2116 return $wgCurParser->argSubstitution( $matches );
2117 }
2118
2119 function wfVariableSubstitution( $matches )
2120 {
2121 global $wgCurParser;
2122 return $wgCurParser->variableSubstitution( $matches );
2123 }
2124
2125 ?>