Don't put literal <nowiki>, <hiero>, or <timeline> into the output. This
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 $this->mInPre = false;
81 $this->mInNowiki = false;
82 }
83
84 # First pass--just handle <nowiki> sections, pass the rest off
85 # to internalParse() which does all the real work.
86 #
87 # Returns a ParserOutput
88 #
89 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
90 {
91 global $wgUseTidy;
92 $fname = "Parser::parse";
93 wfProfileIn( $fname );
94
95 if ( $clearState ) {
96 $this->clearState();
97 }
98
99 $this->mOptions = $options;
100 $this->mTitle =& $title;
101 $this->mOutputType = OT_HTML;
102
103 $stripState = NULL;
104 $text = $this->strip( $text, $this->mStripState );
105 $text = $this->internalParse( $text, $linestart );
106 $text = $this->unstrip( $text, $this->mStripState );
107 # Clean up special characters, only run once, next-to-last before doBlockLevels
108 if(!$wgUseTidy) {
109 $fixtags = array(
110 # french spaces, last one Guillemet-left
111 # only if there is something before the space
112 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
113 # french spaces, Guillemet-right
114 "/(\\302\\253) /i"=>"\\1&nbsp;",
115 "/<hr *>/i" => '<hr />',
116 "/<br *>/i" => '<br />',
117 "/<center *>/i"=>'<div class="center">',
118 "/<\\/center *>/i" => '</div>',
119 # Clean up spare ampersands; note that we probably ought to be
120 # more careful about named entities.
121 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
122 );
123 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
124 } else {
125 $fixtags = array(
126 # french spaces, last one Guillemet-left
127 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
128 # french spaces, Guillemet-right
129 "/(\\302\\253) /i"=>"\\1&nbsp;",
130 "/<center *>/i"=>'<div class="center">',
131 "/<\\/center *>/i" => '</div>'
132 );
133 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
134 }
135 # only once and last
136 $text = $this->doBlockLevels( $text, $linestart );
137 if($wgUseTidy) {
138 $text = $this->tidy($text);
139 }
140 $this->mOutput->setText( $text );
141 wfProfileOut( $fname );
142 return $this->mOutput;
143 }
144
145 /* static */ function getRandomString()
146 {
147 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
148 }
149
150 # Replaces all occurrences of <$tag>content</$tag> in the text
151 # with a random marker and returns the new text. the output parameter
152 # $content will be an associative array filled with data on the form
153 # $unique_marker => content.
154
155 # If $content is already set, the additional entries will be appended
156
157 # If $tag is set to STRIP_COMMENTS, the function will extract
158 # <!-- HTML comments -->
159
160 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
161 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
162 if ( !$content ) {
163 $content = array( );
164 }
165 $n = 1;
166 $stripped = "";
167
168 while ( "" != $text ) {
169 if($tag==STRIP_COMMENTS) {
170 $p = preg_split( "/<!--/i", $text, 2 );
171 } else {
172 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
173 }
174 $stripped .= $p[0];
175 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
176 $text = "";
177 } else {
178 if($tag==STRIP_COMMENTS) {
179 $q = preg_split( "/-->/i", $p[1], 2 );
180 } else {
181 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
182 }
183 $marker = $rnd . sprintf("%08X", $n++);
184 $content[$marker] = $q[0];
185 $stripped .= $marker;
186 $text = $q[1];
187 }
188 }
189 return $stripped;
190 }
191
192 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
193 # If $render is set, performs necessary rendering operations on plugins
194 # Returns the text, and fills an array with data needed in unstrip()
195 # If the $state is already a valid strip state, it adds to the state
196
197 # When $stripcomments is set, HTML comments <!-- like this -->
198 # will be stripped in addition to other tags. This is important
199 # for section editing, where these comments cause confusion when
200 # counting the sections in the wikisource
201 function strip( $text, &$state, $stripcomments = false )
202 {
203 $render = ($this->mOutputType == OT_HTML);
204 $nowiki_content = array();
205 $hiero_content = array();
206 $timeline_content = array();
207 $math_content = array();
208 $pre_content = array();
209 $comment_content = array();
210
211 # Replace any instances of the placeholders
212 $uniq_prefix = UNIQ_PREFIX;
213 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
214
215 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
216 foreach( $nowiki_content as $marker => $content ){
217 if( $render ){
218 //# use span to mark nowiki areas, note the trailing whitespace in span to avoid collisions with other spans
219 //$nowiki_content[$marker] = '<span class="nowiki">'.wfEscapeHTMLTagsOnly( $content )."</span >";
220 $nowiki_content[$marker] = $content;
221 } else {
222 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
223 }
224 }
225
226 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
227 foreach( $hiero_content as $marker => $content ){
228 if( $render ) {
229 if( $GLOBALS['wgUseWikiHiero'] ) {
230 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
231 } else {
232 $hiero_content[$marker] = "&lt;hiero&gt;$content&lt;/hiero&gt;";
233 }
234 } else {
235 $hiero_content[$marker] = "<hiero>$content</hiero>";
236 }
237 }
238
239 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
240 foreach( $timeline_content as $marker => $content ){
241 if( $render ) {
242 if( $render && $GLOBALS['wgUseTimeline']){
243 $timeline_content[$marker] = renderTimeline( $content );
244 } else {
245 $timeline_content[$marker] = "&lt;timeline&gt;$content&lt;/timeline&gt;";
246 }
247 } else {
248 $timeline_content[$marker] = "<timeline>$content</timeline>";
249 }
250 }
251
252 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
253 foreach( $math_content as $marker => $content ){
254 if( $render ) {
255 if( $this->mOptions->getUseTeX() ) {
256 $math_content[$marker] = renderMath( $content );
257 } else {
258 $math_content[$marker] = "&lt;math&gt;$content&lt;/math&gt;";
259 }
260 } else {
261 $math_content[$marker] = "<math>$content</math>";
262 }
263 }
264
265 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
266 foreach( $pre_content as $marker => $content ){
267 if( $render ) {
268 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
269 } else {
270 $pre_content[$marker] = "<pre>$content</pre>";
271 }
272 }
273 if($stripcomments) {
274 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
275 foreach( $comment_content as $marker => $content ){
276 $comment_content[$marker] = "<!--$content-->";
277 }
278 }
279
280 # Merge state with the pre-existing state, if there is one
281 if ( $state ) {
282 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
283 $state['hiero'] = $state['hiero'] + $hiero_content;
284 $state['timeline'] = $state['timeline'] + $timeline_content;
285 $state['math'] = $state['math'] + $math_content;
286 $state['pre'] = $state['pre'] + $pre_content;
287 $state['comment'] = $state['comment'] + $comment_content;
288 } else {
289 $state = array(
290 'nowiki' => $nowiki_content,
291 'hiero' => $hiero_content,
292 'timeline' => $timeline_content,
293 'math' => $math_content,
294 'pre' => $pre_content,
295 'comment' => $comment_content
296 );
297 }
298 return $text;
299 }
300
301 function unstrip( $text, &$state )
302 {
303 # Must expand in reverse order, otherwise nested tags will be corrupted
304 $contentDict = end( $state );
305 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
306 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
307 $text = str_replace( key( $contentDict ), $content, $text );
308 }
309 }
310
311 return $text;
312 }
313
314 # Add an item to the strip state
315 # Returns the unique tag which must be inserted into the stripped text
316 # The tag will be replaced with the original text in unstrip()
317
318 function insertStripItem( $text, &$state )
319 {
320 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
321 if ( !$state ) {
322 $state = array(
323 'nowiki' => array(),
324 'hiero' => array(),
325 'math' => array(),
326 'pre' => array()
327 );
328 }
329 $state['item'][$rnd] = $text;
330 return $rnd;
331 }
332
333 # This method generates the list of subcategories and pages for a category
334 function categoryMagic ()
335 {
336 global $wgLang , $wgUser ;
337 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
338
339 $cns = Namespace::getCategory() ;
340 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
341
342 $r = "<br style=\"clear:both;\"/>\n";
343
344
345 $sk =& $wgUser->getSkin() ;
346
347 $articles = array() ;
348 $children = array() ;
349 $data = array () ;
350 $id = $this->mTitle->getArticleID() ;
351
352 # FIXME: add limits
353 $t = wfStrencode( $this->mTitle->getDBKey() );
354 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
355 $res = wfQuery ( $sql, DB_READ ) ;
356 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
357
358 # For all pages that link to this category
359 foreach ( $data AS $x )
360 {
361 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
362 if ( $t != "" ) $t .= ":" ;
363 $t .= $x->cur_title ;
364
365 if ( $x->cur_namespace == $cns ) {
366 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
367 } else {
368 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
369 }
370 }
371 wfFreeResult ( $res ) ;
372
373 # Showing subcategories
374 if ( count ( $children ) > 0 ) {
375 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
376 $r .= implode ( ", " , $children ) ;
377 }
378
379 # Showing pages in this category
380 if ( count ( $articles ) > 0 ) {
381 $ti = $this->mTitle->getText() ;
382 $h = wfMsg( "category_header", $ti );
383 $r .= "<h2>{$h}</h2>\n" ;
384 $r .= implode ( ", " , $articles ) ;
385 }
386
387
388 return $r ;
389 }
390
391 function getHTMLattrs ()
392 {
393 $htmlattrs = array( # Allowed attributes--no scripting, etc.
394 "title", "align", "lang", "dir", "width", "height",
395 "bgcolor", "clear", /* BR */ "noshade", /* HR */
396 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
397 /* FONT */ "type", "start", "value", "compact",
398 /* For various lists, mostly deprecated but safe */
399 "summary", "width", "border", "frame", "rules",
400 "cellspacing", "cellpadding", "valign", "char",
401 "charoff", "colgroup", "col", "span", "abbr", "axis",
402 "headers", "scope", "rowspan", "colspan", /* Tables */
403 "id", "class", "name", "style" /* For CSS */
404 );
405 return $htmlattrs ;
406 }
407
408 function fixTagAttributes ( $t )
409 {
410 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
411 $htmlattrs = $this->getHTMLattrs() ;
412
413 # Strip non-approved attributes from the tag
414 $t = preg_replace(
415 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
416 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
417 $t);
418 # Strip javascript "expression" from stylesheets. Brute force approach:
419 # If anythin offensive is found, all attributes of the HTML tag are dropped
420
421 if( preg_match(
422 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
423 wfMungeToUtf8( $t ) ) )
424 {
425 $t="";
426 }
427
428 return trim ( $t ) ;
429 }
430
431 /* interface with html tidy, used if $wgUseTidy = true */
432 function tidy ( $text ) {
433 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
434 global $wgInputEncoding, $wgOutputEncoding;
435 $fname = "Parser::tidy";
436 wfProfileIn( $fname );
437
438 $cleansource = '';
439 switch(strtoupper($wgOutputEncoding)) {
440 case 'ISO-8859-1':
441 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
442 break;
443 case 'UTF-8':
444 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
445 break;
446 default:
447 $wgTidyOpts .= ' -raw';
448 }
449
450 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
451 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
452 '<head><title>test</title></head><body>'.$text.'</body></html>';
453 $descriptorspec = array(
454 0 => array("pipe", "r"),
455 1 => array("pipe", "w"),
456 2 => array("file", "/dev/null", "a")
457 );
458 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
459 if (is_resource($process)) {
460 fwrite($pipes[0], $wrappedtext);
461 fclose($pipes[0]);
462 while (!feof($pipes[1])) {
463 $cleansource .= fgets($pipes[1], 1024);
464 }
465 fclose($pipes[1]);
466 $return_value = proc_close($process);
467 }
468
469 wfProfileOut( $fname );
470
471 if( $cleansource == '' && $text != '') {
472 wfDebug( "Tidy error detected!\n" );
473 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
474 } else {
475 return $cleansource;
476 }
477 }
478
479 function doTableStuff ( $t )
480 {
481 $t = explode ( "\n" , $t ) ;
482 $td = array () ; # Is currently a td tag open?
483 $ltd = array () ; # Was it TD or TH?
484 $tr = array () ; # Is currently a tr tag open?
485 $ltr = array () ; # tr attributes
486 foreach ( $t AS $k => $x )
487 {
488 $x = trim ( $x ) ;
489 $fc = substr ( $x , 0 , 1 ) ;
490 if ( "{|" == substr ( $x , 0 , 2 ) )
491 {
492 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
493 array_push ( $td , false ) ;
494 array_push ( $ltd , "" ) ;
495 array_push ( $tr , false ) ;
496 array_push ( $ltr , "" ) ;
497 }
498 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
499 else if ( "|}" == substr ( $x , 0 , 2 ) )
500 {
501 $z = "</table>\n" ;
502 $l = array_pop ( $ltd ) ;
503 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
504 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
505 array_pop ( $ltr ) ;
506 $t[$k] = $z ;
507 }
508 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
509 {
510 $z = trim ( substr ( $x , 2 ) ) ;
511 $t[$k] = "<caption>{$z}</caption>\n" ;
512 }*/
513 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
514 {
515 $x = substr ( $x , 1 ) ;
516 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
517 $z = "" ;
518 $l = array_pop ( $ltd ) ;
519 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
520 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
521 array_pop ( $ltr ) ;
522 $t[$k] = $z ;
523 array_push ( $tr , false ) ;
524 array_push ( $td , false ) ;
525 array_push ( $ltd , "" ) ;
526 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
527 }
528 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
529 {
530 if ( "|+" == substr ( $x , 0 , 2 ) )
531 {
532 $fc = "+" ;
533 $x = substr ( $x , 1 ) ;
534 }
535 $after = substr ( $x , 1 ) ;
536 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
537 $after = explode ( "||" , $after ) ;
538 $t[$k] = "" ;
539 foreach ( $after AS $theline )
540 {
541 $z = "" ;
542 if ( $fc != "+" )
543 {
544 $tra = array_pop ( $ltr ) ;
545 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
546 array_push ( $tr , true ) ;
547 array_push ( $ltr , "" ) ;
548 }
549
550 $l = array_pop ( $ltd ) ;
551 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
552 if ( $fc == "|" ) $l = "td" ;
553 else if ( $fc == "!" ) $l = "th" ;
554 else if ( $fc == "+" ) $l = "caption" ;
555 else $l = "" ;
556 array_push ( $ltd , $l ) ;
557 $y = explode ( "|" , $theline , 2 ) ;
558 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
559 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
560 $t[$k] .= $y ;
561 array_push ( $td , true ) ;
562 }
563 }
564 }
565
566 # Closing open td, tr && table
567 while ( count ( $td ) > 0 )
568 {
569 if ( array_pop ( $td ) ) $t[] = "</td>" ;
570 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
571 $t[] = "</table>" ;
572 }
573
574 $t = implode ( "\n" , $t ) ;
575 # $t = $this->removeHTMLtags( $t );
576 return $t ;
577 }
578
579 # Parses the text and adds the result to the strip state
580 # Returns the strip tag
581 function stripParse( $text, $newline, $args )
582 {
583 $text = $this->strip( $text, $this->mStripState );
584 $text = $this->internalParse( $text, (bool)$newline, $args, false );
585 return $newline.$this->insertStripItem( $text, $this->mStripState );
586 }
587
588 function internalParse( $text, $linestart, $args = array(), $isMain=true )
589 {
590 $fname = "Parser::internalParse";
591 wfProfileIn( $fname );
592
593 $text = $this->removeHTMLtags( $text );
594 $text = $this->replaceVariables( $text, $args );
595
596 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
597
598 $text = $this->doHeadings( $text );
599 if($this->mOptions->getUseDynamicDates()) {
600 global $wgDateFormatter;
601 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
602 }
603 $text = $this->doAllQuotes( $text );
604 $text = $this->replaceExternalLinks( $text );
605 $text = $this->replaceInternalLinks ( $text );
606 $text = $this->replaceInternalLinks ( $text );
607 //$text = $this->doTokenizedParser ( $text );
608 $text = $this->doTableStuff ( $text ) ;
609 $text = $this->magicISBN( $text );
610 $text = $this->magicRFC( $text );
611 $text = $this->formatHeadings( $text, $isMain );
612 $sk =& $this->mOptions->getSkin();
613 $text = $sk->transformContent( $text );
614
615 if ( !isset ( $this->categoryMagicDone ) ) {
616 $text .= $this->categoryMagic () ;
617 $this->categoryMagicDone = true ;
618 }
619
620 wfProfileOut( $fname );
621 return $text;
622 }
623
624
625 /* private */ function doHeadings( $text )
626 {
627 for ( $i = 6; $i >= 1; --$i ) {
628 $h = substr( "======", 0, $i );
629 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
630 "<h{$i}>\\1</h{$i}>\\2", $text );
631 }
632 return $text;
633 }
634
635 /* private */ function doAllQuotes( $text )
636 {
637 $outtext = "";
638 $lines = explode( "\n", $text );
639 foreach ( $lines as $line ) {
640 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
641 }
642 return substr($outtext, 0,-1);
643 }
644
645 /* private */ function doQuotes( $pre, $text, $mode )
646 {
647 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
648 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
649 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
650 if ( substr ($m[2], 0, 1) == "'" ) {
651 $m[2] = substr ($m[2], 1);
652 if ($mode == "em") {
653 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
654 } else if ($mode == "strong") {
655 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
656 } else if (($mode == "emstrong") || ($mode == "both")) {
657 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
658 } else if ($mode == "strongem") {
659 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
660 } else {
661 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
662 }
663 } else {
664 if ($mode == "strong") {
665 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
666 } else if ($mode == "em") {
667 return $m1_em . $this->doQuotes ( "", $m[2], "" );
668 } else if ($mode == "emstrong") {
669 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
670 } else if (($mode == "strongem") || ($mode == "both")) {
671 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
672 } else {
673 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
674 }
675 }
676 } else {
677 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
678 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
679 if ($mode == "") {
680 return $pre . $text;
681 } else if ($mode == "em") {
682 return $pre . $text_em;
683 } else if ($mode == "strong") {
684 return $pre . $text_strong;
685 } else if ($mode == "strongem") {
686 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
687 } else {
688 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
689 }
690 }
691 }
692
693 # Note: we have to do external links before the internal ones,
694 # and otherwise take great care in the order of things here, so
695 # that we don't end up interpreting some URLs twice.
696
697 /* private */ function replaceExternalLinks( $text )
698 {
699 $fname = "Parser::replaceExternalLinks";
700 wfProfileIn( $fname );
701 $text = $this->subReplaceExternalLinks( $text, "http", true );
702 $text = $this->subReplaceExternalLinks( $text, "https", true );
703 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
704 $text = $this->subReplaceExternalLinks( $text, "irc", false );
705 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
706 $text = $this->subReplaceExternalLinks( $text, "news", false );
707 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
708 wfProfileOut( $fname );
709 return $text;
710 }
711
712 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
713 {
714 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
715 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
716
717 # this is the list of separators that should be ignored if they
718 # are the last character of an URL but that should be included
719 # if they occur within the URL, e.g. "go to www.foo.com, where .."
720 # in this case, the last comma should not become part of the URL,
721 # but in "www.foo.com/123,2342,32.htm" it should.
722 $sep = ",;\.:";
723 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
724 $images = "gif|png|jpg|jpeg";
725
726 # PLEASE NOTE: The curly braces { } are not part of the regex,
727 # they are interpreted as part of the string (used to tell PHP
728 # that the content of the string should be inserted there).
729 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
730 "((?i){$images})([^{$uc}]|$)/";
731
732 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
733 $sk =& $this->mOptions->getSkin();
734
735 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
736 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
737 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
738 }
739 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
740 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
741 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
742 "</a>\\5", $s );
743 $s = str_replace( $unique, $protocol, $s );
744
745 $a = explode( "[{$protocol}:", " " . $s );
746 $s = array_shift( $a );
747 $s = substr( $s, 1 );
748
749 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
750 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
751
752 foreach ( $a as $line ) {
753 if ( preg_match( $e1, $line, $m ) ) {
754 $link = "{$protocol}:{$m[1]}";
755 $trail = $m[2];
756 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
757 else { $text = wfEscapeHTML( $link ); }
758 } else if ( preg_match( $e2, $line, $m ) ) {
759 $link = "{$protocol}:{$m[1]}";
760 $text = $m[2];
761 $trail = $m[3];
762 } else {
763 $s .= "[{$protocol}:" . $line;
764 continue;
765 }
766 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
767 $paren = "";
768 } else {
769 # Expand the URL for printable version
770 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
771 }
772 $la = $sk->getExternalLinkAttributes( $link, $text );
773 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
774
775 }
776 return $s;
777 }
778
779
780 /* private */ function replaceInternalLinks( $s )
781 {
782 global $wgLang, $wgLinkCache;
783 global $wgNamespacesWithSubpages, $wgLanguageCode;
784 static $fname = "Parser::replaceInternalLink" ;
785 wfProfileIn( $fname );
786
787 wfProfileIn( "$fname-setup" );
788 static $tc = FALSE;
789 # the % is needed to support urlencoded titles as well
790 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
791 $sk =& $this->mOptions->getSkin();
792
793 $a = explode( "[[", " " . $s );
794 $s = array_shift( $a );
795 $s = substr( $s, 1 );
796
797 # Match a link having the form [[namespace:link|alternate]]trail
798 static $e1 = FALSE;
799 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
800 # Match the end of a line for a word that's not followed by whitespace,
801 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
802 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
803 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
804 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
805
806
807 # Special and Media are pseudo-namespaces; no pages actually exist in them
808 static $image = FALSE;
809 static $special = FALSE;
810 static $media = FALSE;
811 static $category = FALSE;
812 if ( !$image ) { $image = Namespace::getImage(); }
813 if ( !$special ) { $special = Namespace::getSpecial(); }
814 if ( !$media ) { $media = Namespace::getMedia(); }
815 if ( !$category ) { $category = Namespace::getCategory(); }
816
817 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
818
819 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
820 $new_prefix = $m[2];
821 $s = $m[1];
822 } else {
823 $new_prefix="";
824 }
825
826 wfProfileOut( "$fname-setup" );
827
828 foreach ( $a as $line ) {
829 $prefix = $new_prefix;
830
831 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
832 $text = $m[2];
833 # fix up urlencoded title texts
834 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
835 $trail = $m[3];
836 } else { # Invalid form; output directly
837 $s .= $prefix . "[[" . $line ;
838 wfProfileOut( $fname );
839 continue;
840 }
841
842 /* Valid link forms:
843 Foobar -- normal
844 :Foobar -- override special treatment of prefix (images, language links)
845 /Foobar -- convert to CurrentPage/Foobar
846 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
847 */
848 $c = substr($m[1],0,1);
849 $noforce = ($c != ":");
850 if( $c == "/" ) { # subpage
851 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
852 $m[1]=substr($m[1],1,strlen($m[1])-2);
853 $noslash=$m[1];
854 } else {
855 $noslash=substr($m[1],1);
856 }
857 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
858 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
859 if( "" == $text ) {
860 $text= $m[1];
861 } # this might be changed for ugliness reasons
862 } else {
863 $link = $noslash; # no subpage allowed, use standard link
864 }
865 } elseif( $noforce ) { # no subpage
866 $link = $m[1];
867 } else {
868 $link = substr( $m[1], 1 );
869 }
870 $wasblank = ( "" == $text );
871 if( $wasblank )
872 $text = $link;
873
874 $nt = Title::newFromText( $link );
875 if( !$nt ) {
876 $s .= $prefix . "[[" . $line;
877 wfProfileOut( $fname );
878 continue;
879 }
880 $ns = $nt->getNamespace();
881 $iw = $nt->getInterWiki();
882 if( $noforce ) {
883 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
884 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
885 $tmp = $prefix . $trail ;
886 wfProfileOut( $fname );
887 $s .= (trim($tmp) == '')? '': $tmp;
888 continue;
889 }
890 if ( $ns == $image ) {
891 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
892 $wgLinkCache->addImageLinkObj( $nt );
893 wfProfileOut( $fname );
894 continue;
895 }
896 if ( $ns == $category ) {
897 $t = $nt->getText() ;
898 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
899
900 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
901 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
902 $wgLinkCache->resume();
903
904 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
905 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
906 $this->mOutput->mCategoryLinks[] = $t ;
907 $s .= $prefix . $trail ;
908 wfProfileOut( $fname );
909 continue;
910 }
911 }
912 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
913 ( strpos( $link, "#" ) == FALSE ) ) {
914 # Self-links are handled specially; generally de-link and change to bold.
915 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
916 wfProfileOut( $fname );
917 continue;
918 }
919
920 if( $ns == $media ) {
921 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
922 $wgLinkCache->addImageLinkObj( $nt );
923 wfProfileOut( $fname );
924 continue;
925 } elseif( $ns == $special ) {
926 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
927 wfProfileOut( $fname );
928 continue;
929 }
930 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
931 }
932 wfProfileOut( $fname );
933 return $s;
934 }
935
936 # Some functions here used by doBlockLevels()
937 #
938 /* private */ function closeParagraph()
939 {
940 $result = "";
941 if ( '' != $this->mLastSection ) {
942 $result = "</" . $this->mLastSection . ">\n";
943 }
944 $this->mInPre = false;
945 $this->mLastSection = "";
946 return $result;
947 }
948 # getCommon() returns the length of the longest common substring
949 # of both arguments, starting at the beginning of both.
950 #
951 /* private */ function getCommon( $st1, $st2 )
952 {
953 $fl = strlen( $st1 );
954 $shorter = strlen( $st2 );
955 if ( $fl < $shorter ) { $shorter = $fl; }
956
957 for ( $i = 0; $i < $shorter; ++$i ) {
958 if ( $st1{$i} != $st2{$i} ) { break; }
959 }
960 return $i;
961 }
962 # These next three functions open, continue, and close the list
963 # element appropriate to the prefix character passed into them.
964 #
965 /* private */ function openList( $char )
966 {
967 $result = $this->closeParagraph();
968
969 if ( "*" == $char ) { $result .= "<ul><li>"; }
970 else if ( "#" == $char ) { $result .= "<ol><li>"; }
971 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
972 else if ( ";" == $char ) {
973 $result .= "<dl><dt>";
974 $this->mDTopen = true;
975 }
976 else { $result = "<!-- ERR 1 -->"; }
977
978 return $result;
979 }
980
981 /* private */ function nextItem( $char )
982 {
983 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
984 else if ( ":" == $char || ";" == $char ) {
985 $close = "</dd>";
986 if ( $this->mDTopen ) { $close = "</dt>"; }
987 if ( ";" == $char ) {
988 $this->mDTopen = true;
989 return $close . "<dt>";
990 } else {
991 $this->mDTopen = false;
992 return $close . "<dd>";
993 }
994 }
995 return "<!-- ERR 2 -->";
996 }
997
998 /* private */function closeList( $char )
999 {
1000 if ( "*" == $char ) { $text = "</li></ul>"; }
1001 else if ( "#" == $char ) { $text = "</li></ol>"; }
1002 else if ( ":" == $char ) {
1003 if ( $this->mDTopen ) {
1004 $this->mDTopen = false;
1005 $text = "</dt></dl>";
1006 } else {
1007 $text = "</dd></dl>";
1008 }
1009 }
1010 else { return "<!-- ERR 3 -->"; }
1011 return $text."\n";
1012 }
1013
1014 /* private */ function doBlockLevels( $text, $linestart ) {
1015 $fname = "Parser::doBlockLevels";
1016 wfProfileIn( $fname );
1017
1018 # Parsing through the text line by line. The main thing
1019 # happening here is handling of block-level elements p, pre,
1020 # and making lists from lines starting with * # : etc.
1021 #
1022
1023 // Strip nowiki's again.
1024 $text = $this->strip($text,$dblStripState);
1025 $textLines = explode( "\n", $text );
1026
1027 $lastPrefix = $output = $lastLine = '';
1028 $this->mDTopen = $inBlockElem = false;
1029 $prefixLength = 0;
1030 $paragraphStack = false;
1031
1032 if ( !$linestart ) {
1033 $output .= array_shift( $textLines );
1034 }
1035 foreach ( $textLines as $oLine ) {
1036 $lastPrefixLength = strlen( $lastPrefix );
1037 $preOpenMatch = preg_match("/<pre/i", $oLine );
1038 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1039 $nowikiOpenMatch = preg_match("/<span class=\"nowiki\"/", $oLine );
1040 $nowikiCloseMatch = preg_match("/<\\/span >/", $oLine );
1041 if($nowikiOpenMatch) $nowikiFullMatch = preg_match("/^(.*)<span class=\"nowiki\"/", $oLine, $nowikiOpenMatches );
1042 if (!$this->mInPre) {
1043 $this->mInPre = !empty($preOpenMatch);
1044 }
1045 if (!$this->mInNowiki) {
1046 $this->mInNowiki = !empty($nowikiOpenMatch);
1047 }
1048 if (
1049 !$this->mInPre && (!$this->mInNowiki ||
1050 ($nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0) )
1051 )
1052 {
1053 # Multiple prefixes may abut each other for nested lists.
1054 $prefixLength = strspn( $oLine, "*#:;" );
1055 $pref = substr( $oLine, 0, $prefixLength );
1056
1057 # eh?
1058 $pref2 = str_replace( ";", ":", $pref );
1059 $t = substr( $oLine, $prefixLength );
1060 } else {
1061 # Don't interpret any other prefixes in preformatted text
1062 $prefixLength = 0;
1063 $pref = $pref2 = '';
1064 $t = $oLine;
1065 }
1066
1067 # List generation
1068 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1069 # Same as the last item, so no need to deal with nesting or opening stuff
1070 $output .= $this->nextItem( substr( $pref, -1 ) );
1071 $paragraphStack = false;
1072
1073 if ( ";" == substr( $pref, -1 ) ) {
1074 # The one nasty exception: definition lists work like this:
1075 # ; title : definition text
1076 # So we check for : in the remainder text to split up the
1077 # title and definition, without b0rking links.
1078 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1079 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1080 $term = $match[1];
1081 $output .= $term . $this->nextItem( ":" );
1082 $t = $match[2];
1083 }
1084 }
1085 } elseif( $prefixLength || $lastPrefixLength ) {
1086 # Either open or close a level...
1087 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1088 $paragraphStack = false;
1089
1090 while( $commonPrefixLength < $lastPrefixLength ) {
1091 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1092 --$lastPrefixLength;
1093 }
1094 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1095 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1096 }
1097 while ( $prefixLength > $commonPrefixLength ) {
1098 $char = substr( $pref, $commonPrefixLength, 1 );
1099 $output .= $this->openList( $char );
1100
1101 if ( ";" == $char ) {
1102 # FIXME: This is dupe of code above
1103 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1104 $term = $match[1];
1105 $output .= $term . $this->nextItem( ":" );
1106 $t = $match[2];
1107 }
1108 }
1109 ++$commonPrefixLength;
1110 }
1111 $lastPrefix = $pref2;
1112 }
1113 if( 0 == $prefixLength ) {
1114 # No prefix (not in list)--go to paragraph mode
1115 $uniq_prefix = UNIQ_PREFIX;
1116 // XXX: use a stack for nestable elements like span, table and div
1117 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/td|<\\/th)/i", $t );
1118 $closematch = preg_match(
1119 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1120 "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1121 if ( $openmatch or $closematch ) {
1122 $paragraphStack = false;
1123 $output .= $this->closeParagraph();
1124 if($preOpenMatch and !$preCloseMatch) {
1125 $this->mInPre = true;
1126 }
1127 if ( $closematch ) {
1128 $inBlockElem = false;
1129 } else {
1130 $inBlockElem = true;
1131 }
1132 } else if (
1133 !$inBlockElem && !$this->mInPre &&
1134 (!$this->mInNowiki || ($nowikiOpenMatch && trim($nowikiOpenMatches[1]) == '' ) ) )
1135 {
1136 if ( " " == $t{0} and trim($t) != '' and (!$this->mInNowiki || $nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0 ) ) {
1137 // pre
1138 if ($this->mLastSection != 'pre') {
1139 $paragraphStack = false;
1140 $output .= $this->closeParagraph().'<pre>';
1141 $this->mLastSection = 'pre';
1142 }
1143 } else {
1144 // paragraph
1145 if ( '' == trim($t) ) {
1146 if ( $paragraphStack ) {
1147 $output .= $paragraphStack.'<br />';
1148 $paragraphStack = false;
1149 $this->mLastSection = 'p';
1150 } else {
1151 if ($this->mLastSection != 'p' ) {
1152 $output .= $this->closeParagraph();
1153 $this->mLastSection = '';
1154 $paragraphStack = "<p>";
1155 } else {
1156 $paragraphStack = '</p><p>';
1157 }
1158 }
1159 } else {
1160 if ( $paragraphStack ) {
1161 $output .= $paragraphStack;
1162 $paragraphStack = false;
1163 $this->mLastSection = 'p';
1164 } else if ($this->mLastSection != 'p') {
1165 $output .= $this->closeParagraph().'<p>';
1166 $this->mLastSection = 'p';
1167 }
1168 }
1169 }
1170 }
1171 }
1172 if($nowikiCloseMatch) $this->mInNowiki = false;
1173 if ($paragraphStack === false) {
1174 $output .= $t."\n";
1175 }
1176 }
1177 while ( $prefixLength ) {
1178 $output .= $this->closeList( $pref2{$prefixLength-1} );
1179 --$prefixLength;
1180 }
1181 if ( "" != $this->mLastSection ) {
1182 $output .= "</" . $this->mLastSection . ">";
1183 $this->mLastSection = "";
1184 }
1185 $output = $this->unstrip( $output, $dblStripState );
1186
1187 wfProfileOut( $fname );
1188 return $output;
1189 }
1190
1191 function getVariableValue( $index ) {
1192 global $wgLang, $wgSitename, $wgServer;
1193
1194 switch ( $index ) {
1195 case MAG_CURRENTMONTH:
1196 return date( "m" );
1197 case MAG_CURRENTMONTHNAME:
1198 return $wgLang->getMonthName( date("n") );
1199 case MAG_CURRENTMONTHNAMEGEN:
1200 return $wgLang->getMonthNameGen( date("n") );
1201 case MAG_CURRENTDAY:
1202 return date("j");
1203 case MAG_PAGENAME:
1204 return $this->mTitle->getText();
1205 case MAG_NAMESPACE:
1206 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1207 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1208 case MAG_CURRENTDAYNAME:
1209 return $wgLang->getWeekdayName( date("w")+1 );
1210 case MAG_CURRENTYEAR:
1211 return date( "Y" );
1212 case MAG_CURRENTTIME:
1213 return $wgLang->time( wfTimestampNow(), false );
1214 case MAG_NUMBEROFARTICLES:
1215 return wfNumberOfArticles();
1216 case MAG_SITENAME:
1217 return $wgSitename;
1218 case MAG_SERVER:
1219 return $wgServer;
1220 default:
1221 return NULL;
1222 }
1223 }
1224
1225 function initialiseVariables()
1226 {
1227 global $wgVariableIDs;
1228 $this->mVariables = array();
1229 foreach ( $wgVariableIDs as $id ) {
1230 $mw =& MagicWord::get( $id );
1231 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1232 }
1233 }
1234
1235 /* private */ function replaceVariables( $text, $args = array() )
1236 {
1237 global $wgLang, $wgScript, $wgArticlePath;
1238
1239 $fname = "Parser::replaceVariables";
1240 wfProfileIn( $fname );
1241
1242 $bail = false;
1243 if ( !$this->mVariables ) {
1244 $this->initialiseVariables();
1245 }
1246 $titleChars = Title::legalChars();
1247 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1248
1249 # This function is called recursively. To keep track of arguments we need a stack:
1250 array_push( $this->mArgStack, $args );
1251
1252 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1253 $GLOBALS['wgCurParser'] =& $this;
1254
1255
1256 if ( $this->mOutputType == OT_HTML ) {
1257 # Variable substitution
1258 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1259
1260 # Argument substitution
1261 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1262 }
1263 # Template substitution
1264 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1265 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1266
1267 array_pop( $this->mArgStack );
1268
1269 wfProfileOut( $fname );
1270 return $text;
1271 }
1272
1273 function variableSubstitution( $matches )
1274 {
1275 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1276 $text = $this->mVariables[$matches[1]];
1277 $this->mOutput->mContainsOldMagic = true;
1278 } else {
1279 $text = $matches[0];
1280 }
1281 return $text;
1282 }
1283
1284 function braceSubstitution( $matches )
1285 {
1286 global $wgLinkCache, $wgLang;
1287 $fname = "Parser::braceSubstitution";
1288 $found = false;
1289 $nowiki = false;
1290 $noparse = false;
1291
1292 $title = NULL;
1293
1294 # $newline is an optional newline character before the braces
1295 # $part1 is the bit before the first |, and must contain only title characters
1296 # $args is a list of arguments, starting from index 0, not including $part1
1297
1298 $newline = $matches[1];
1299 $part1 = $matches[2];
1300 # If the third subpattern matched anything, it will start with |
1301 if ( $matches[3] !== "" ) {
1302 $args = explode( "|", substr( $matches[3], 1 ) );
1303 } else {
1304 $args = array();
1305 }
1306 $argc = count( $args );
1307
1308 # {{{}}}
1309 if ( strpos( $matches[0], "{{{" ) !== false ) {
1310 $text = $matches[0];
1311 $found = true;
1312 $noparse = true;
1313 }
1314
1315 # SUBST
1316 if ( !$found ) {
1317 $mwSubst =& MagicWord::get( MAG_SUBST );
1318 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1319 if ( $this->mOutputType != OT_WIKI ) {
1320 # Invalid SUBST not replaced at PST time
1321 # Return without further processing
1322 $text = $matches[0];
1323 $found = true;
1324 $noparse= true;
1325 }
1326 } elseif ( $this->mOutputType == OT_WIKI ) {
1327 # SUBST not found in PST pass, do nothing
1328 $text = $matches[0];
1329 $found = true;
1330 }
1331 }
1332
1333 # MSG, MSGNW and INT
1334 if ( !$found ) {
1335 # Check for MSGNW:
1336 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1337 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1338 $nowiki = true;
1339 } else {
1340 # Remove obsolete MSG:
1341 $mwMsg =& MagicWord::get( MAG_MSG );
1342 $mwMsg->matchStartAndRemove( $part1 );
1343 }
1344
1345 # Check if it is an internal message
1346 $mwInt =& MagicWord::get( MAG_INT );
1347 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1348 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1349 $text = wfMsgReal( $part1, $args, true );
1350 $found = true;
1351 }
1352 }
1353 }
1354
1355 # NS
1356 if ( !$found ) {
1357 # Check for NS: (namespace expansion)
1358 $mwNs = MagicWord::get( MAG_NS );
1359 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1360 if ( intval( $part1 ) ) {
1361 $text = $wgLang->getNsText( intval( $part1 ) );
1362 $found = true;
1363 } else {
1364 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1365 if ( !is_null( $index ) ) {
1366 $text = $wgLang->getNsText( $index );
1367 $found = true;
1368 }
1369 }
1370 }
1371 }
1372
1373 # LOCALURL and LOCALURLE
1374 if ( !$found ) {
1375 $mwLocal = MagicWord::get( MAG_LOCALURL );
1376 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1377
1378 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1379 $func = 'getLocalURL';
1380 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1381 $func = 'escapeLocalURL';
1382 } else {
1383 $func = '';
1384 }
1385
1386 if ( $func !== '' ) {
1387 $title = Title::newFromText( $part1 );
1388 if ( !is_null( $title ) ) {
1389 if ( $argc > 0 ) {
1390 $text = $title->$func( $args[0] );
1391 } else {
1392 $text = $title->$func();
1393 }
1394 $found = true;
1395 }
1396 }
1397 }
1398
1399 # Internal variables
1400 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1401 $text = $this->mVariables[$part1];
1402 $found = true;
1403 $this->mOutput->mContainsOldMagic = true;
1404 }
1405 /*
1406 # Arguments input from the caller
1407 $inputArgs = end( $this->mArgStack );
1408 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1409 $text = $inputArgs[$part1];
1410 $found = true;
1411 }
1412 */
1413 # Load from database
1414 if ( !$found ) {
1415 $title = Title::newFromText( $part1, NS_TEMPLATE );
1416 if ( !is_null( $title ) && !$title->isExternal() ) {
1417 # Check for excessive inclusion
1418 $dbk = $title->getPrefixedDBkey();
1419 if ( $this->incrementIncludeCount( $dbk ) ) {
1420 $article = new Article( $title );
1421 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1422 if ( $articleContent !== false ) {
1423 $found = true;
1424 $text = $articleContent;
1425
1426 }
1427 }
1428
1429 # If the title is valid but undisplayable, make a link to it
1430 if ( $this->mOutputType == OT_HTML && !$found ) {
1431 $text = "[[" . $title->getPrefixedText() . "]]";
1432 $found = true;
1433 }
1434 }
1435 }
1436
1437 # Recursive parsing, escaping and link table handling
1438 # Only for HTML output
1439 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1440 $text = wfEscapeWikiText( $text );
1441 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1442 # Clean up argument array
1443 $assocArgs = array();
1444 $index = 1;
1445 foreach( $args as $arg ) {
1446 $eqpos = strpos( $arg, "=" );
1447 if ( $eqpos === false ) {
1448 $assocArgs[$index++] = $arg;
1449 } else {
1450 $name = trim( substr( $arg, 0, $eqpos ) );
1451 $value = trim( substr( $arg, $eqpos+1 ) );
1452 if ( $value === false ) {
1453 $value = "";
1454 }
1455 if ( $name !== false ) {
1456 $assocArgs[$name] = $value;
1457 }
1458 }
1459 }
1460
1461 # Do not enter included links in link table
1462 if ( !is_null( $title ) ) {
1463 $wgLinkCache->suspend();
1464 }
1465
1466 # Run full parser on the included text
1467 $text = $this->stripParse( $text, $newline, $assocArgs );
1468
1469 # Resume the link cache and register the inclusion as a link
1470 if ( !is_null( $title ) ) {
1471 $wgLinkCache->resume();
1472 $wgLinkCache->addLinkObj( $title );
1473 }
1474 }
1475
1476 if ( !$found ) {
1477 return $matches[0];
1478 } else {
1479 return $text;
1480 }
1481 }
1482
1483 # Triple brace replacement -- used for template arguments
1484 function argSubstitution( $matches )
1485 {
1486 $newline = $matches[1];
1487 $arg = trim( $matches[2] );
1488 $text = $matches[0];
1489 $inputArgs = end( $this->mArgStack );
1490
1491 if ( array_key_exists( $arg, $inputArgs ) ) {
1492 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1493 }
1494
1495 return $text;
1496 }
1497
1498 # Returns true if the function is allowed to include this entity
1499 function incrementIncludeCount( $dbk )
1500 {
1501 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1502 $this->mIncludeCount[$dbk] = 0;
1503 }
1504 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1505 return true;
1506 } else {
1507 return false;
1508 }
1509 }
1510
1511
1512 # Cleans up HTML, removes dangerous tags and attributes
1513 /* private */ function removeHTMLtags( $text )
1514 {
1515 global $wgUseTidy, $wgUserHtml;
1516 $fname = "Parser::removeHTMLtags";
1517 wfProfileIn( $fname );
1518
1519 if( $wgUserHtml ) {
1520 $htmlpairs = array( # Tags that must be closed
1521 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1522 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1523 "strike", "strong", "tt", "var", "div", "center",
1524 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1525 "ruby", "rt" , "rb" , "rp", "p"
1526 );
1527 $htmlsingle = array(
1528 "br", "hr", "li", "dt", "dd"
1529 );
1530 $htmlnest = array( # Tags that can be nested--??
1531 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1532 "dl", "font", "big", "small", "sub", "sup"
1533 );
1534 $tabletags = array( # Can only appear inside table
1535 "td", "th", "tr"
1536 );
1537 } else {
1538 $htmlpairs = array();
1539 $htmlsingle = array();
1540 $htmlnest = array();
1541 $tabletags = array();
1542 }
1543
1544 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1545 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1546
1547 $htmlattrs = $this->getHTMLattrs () ;
1548
1549 # Remove HTML comments
1550 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1551
1552 $bits = explode( "<", $text );
1553 $text = array_shift( $bits );
1554 if(!$wgUseTidy) {
1555 $tagstack = array(); $tablestack = array();
1556 foreach ( $bits as $x ) {
1557 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1558 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1559 $x, $regs );
1560 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1561 error_reporting( $prev );
1562
1563 $badtag = 0 ;
1564 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1565 # Check our stack
1566 if ( $slash ) {
1567 # Closing a tag...
1568 if ( ! in_array( $t, $htmlsingle ) &&
1569 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1570 @array_push( $tagstack, $ot );
1571 $badtag = 1;
1572 } else {
1573 if ( $t == "table" ) {
1574 $tagstack = array_pop( $tablestack );
1575 }
1576 $newparams = "";
1577 }
1578 } else {
1579 # Keep track for later
1580 if ( in_array( $t, $tabletags ) &&
1581 ! in_array( "table", $tagstack ) ) {
1582 $badtag = 1;
1583 } else if ( in_array( $t, $tagstack ) &&
1584 ! in_array ( $t , $htmlnest ) ) {
1585 $badtag = 1 ;
1586 } else if ( ! in_array( $t, $htmlsingle ) ) {
1587 if ( $t == "table" ) {
1588 array_push( $tablestack, $tagstack );
1589 $tagstack = array();
1590 }
1591 array_push( $tagstack, $t );
1592 }
1593 # Strip non-approved attributes from the tag
1594 $newparams = $this->fixTagAttributes($params);
1595
1596 }
1597 if ( ! $badtag ) {
1598 $rest = str_replace( ">", "&gt;", $rest );
1599 $text .= "<$slash$t $newparams$brace$rest";
1600 continue;
1601 }
1602 }
1603 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1604 }
1605 # Close off any remaining tags
1606 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1607 $text .= "</$t>\n";
1608 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1609 }
1610 } else {
1611 # this might be possible using tidy itself
1612 foreach ( $bits as $x ) {
1613 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1614 $x, $regs );
1615 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1616 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1617 $newparams = $this->fixTagAttributes($params);
1618 $rest = str_replace( ">", "&gt;", $rest );
1619 $text .= "<$slash$t $newparams$brace$rest";
1620 } else {
1621 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1622 }
1623 }
1624 }
1625 wfProfileOut( $fname );
1626 return $text;
1627 }
1628
1629
1630 /*
1631 *
1632 * This function accomplishes several tasks:
1633 * 1) Auto-number headings if that option is enabled
1634 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1635 * 3) Add a Table of contents on the top for users who have enabled the option
1636 * 4) Auto-anchor headings
1637 *
1638 * It loops through all headlines, collects the necessary data, then splits up the
1639 * string and re-inserts the newly formatted headlines.
1640 *
1641 */
1642
1643 /* private */ function formatHeadings( $text, $isMain=true )
1644 {
1645 global $wgInputEncoding;
1646
1647 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1648 $doShowToc = $this->mOptions->getShowToc();
1649 if( !$this->mTitle->userCanEdit() ) {
1650 $showEditLink = 0;
1651 $rightClickHack = 0;
1652 } else {
1653 $showEditLink = $this->mOptions->getEditSection();
1654 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1655 }
1656
1657 # Inhibit editsection links if requested in the page
1658 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1659 if( $esw->matchAndRemove( $text ) ) {
1660 $showEditLink = 0;
1661 }
1662 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1663 # do not add TOC
1664 $mw =& MagicWord::get( MAG_NOTOC );
1665 if( $mw->matchAndRemove( $text ) ) {
1666 $doShowToc = 0;
1667 }
1668
1669 # never add the TOC to the Main Page. This is an entry page that should not
1670 # be more than 1-2 screens large anyway
1671 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1672 $doShowToc = 0;
1673 }
1674
1675 # Get all headlines for numbering them and adding funky stuff like [edit]
1676 # links - this is for later, but we need the number of headlines right now
1677 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1678
1679 # if there are fewer than 4 headlines in the article, do not show TOC
1680 if( $numMatches < 4 ) {
1681 $doShowToc = 0;
1682 }
1683
1684 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1685 # override above conditions and always show TOC
1686 $mw =& MagicWord::get( MAG_FORCETOC );
1687 if ($mw->matchAndRemove( $text ) ) {
1688 $doShowToc = 1;
1689 }
1690
1691
1692 # We need this to perform operations on the HTML
1693 $sk =& $this->mOptions->getSkin();
1694
1695 # headline counter
1696 $headlineCount = 0;
1697
1698 # Ugh .. the TOC should have neat indentation levels which can be
1699 # passed to the skin functions. These are determined here
1700 $toclevel = 0;
1701 $toc = "";
1702 $full = "";
1703 $head = array();
1704 $sublevelCount = array();
1705 $level = 0;
1706 $prevlevel = 0;
1707 foreach( $matches[3] as $headline ) {
1708 $numbering = "";
1709 if( $level ) {
1710 $prevlevel = $level;
1711 }
1712 $level = $matches[1][$headlineCount];
1713 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1714 # reset when we enter a new level
1715 $sublevelCount[$level] = 0;
1716 $toc .= $sk->tocIndent( $level - $prevlevel );
1717 $toclevel += $level - $prevlevel;
1718 }
1719 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1720 # reset when we step back a level
1721 $sublevelCount[$level+1]=0;
1722 $toc .= $sk->tocUnindent( $prevlevel - $level );
1723 $toclevel -= $prevlevel - $level;
1724 }
1725 # count number of headlines for each level
1726 @$sublevelCount[$level]++;
1727 if( $doNumberHeadings || $doShowToc ) {
1728 $dot = 0;
1729 for( $i = 1; $i <= $level; $i++ ) {
1730 if( !empty( $sublevelCount[$i] ) ) {
1731 if( $dot ) {
1732 $numbering .= ".";
1733 }
1734 $numbering .= $sublevelCount[$i];
1735 $dot = 1;
1736 }
1737 }
1738 }
1739
1740 # The canonized header is a version of the header text safe to use for links
1741 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1742 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1743
1744 # strip out HTML
1745 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1746 $tocline = trim( $canonized_headline );
1747 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1748 # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1749 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1750 $refer[$headlineCount] = $canonized_headline;
1751
1752 # count how many in assoc. array so we can track dupes in anchors
1753 @$refers[$canonized_headline]++;
1754 $refcount[$headlineCount]=$refers[$canonized_headline];
1755
1756 # Prepend the number to the heading text
1757
1758 if( $doNumberHeadings || $doShowToc ) {
1759 $tocline = $numbering . " " . $tocline;
1760
1761 # Don't number the heading if it is the only one (looks silly)
1762 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1763 # the two are different if the line contains a link
1764 $headline=$numbering . " " . $headline;
1765 }
1766 }
1767
1768 # Create the anchor for linking from the TOC to the section
1769 $anchor = $canonized_headline;
1770 if($refcount[$headlineCount] > 1 ) {
1771 $anchor .= "_" . $refcount[$headlineCount];
1772 }
1773 if( $doShowToc ) {
1774 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1775 }
1776 if( $showEditLink ) {
1777 if ( empty( $head[$headlineCount] ) ) {
1778 $head[$headlineCount] = "";
1779 }
1780 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1781 }
1782
1783 # Add the edit section span
1784 if( $rightClickHack ) {
1785 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1786 }
1787
1788 # give headline the correct <h#> tag
1789 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1790
1791 $headlineCount++;
1792 }
1793
1794 if( $doShowToc ) {
1795 $toclines = $headlineCount;
1796 $toc .= $sk->tocUnindent( $toclevel );
1797 $toc = $sk->tocTable( $toc );
1798 }
1799
1800 # split up and insert constructed headlines
1801
1802 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1803 $i = 0;
1804
1805 foreach( $blocks as $block ) {
1806 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1807 # This is the [edit] link that appears for the top block of text when
1808 # section editing is enabled
1809
1810 # Disabled because it broke block formatting
1811 # For example, a bullet point in the top line
1812 # $full .= $sk->editSectionLink(0);
1813 }
1814 $full .= $block;
1815 if( $doShowToc && !$i && $isMain) {
1816 # Top anchor now in skin
1817 $full = $full.$toc;
1818 }
1819
1820 if( !empty( $head[$i] ) ) {
1821 $full .= $head[$i];
1822 }
1823 $i++;
1824 }
1825
1826 return $full;
1827 }
1828
1829 /* private */ function magicISBN( $text )
1830 {
1831 global $wgLang;
1832
1833 $a = split( "ISBN ", " $text" );
1834 if ( count ( $a ) < 2 ) return $text;
1835 $text = substr( array_shift( $a ), 1);
1836 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1837
1838 foreach ( $a as $x ) {
1839 $isbn = $blank = "" ;
1840 while ( " " == $x{0} ) {
1841 $blank .= " ";
1842 $x = substr( $x, 1 );
1843 }
1844 while ( strstr( $valid, $x{0} ) != false ) {
1845 $isbn .= $x{0};
1846 $x = substr( $x, 1 );
1847 }
1848 $num = str_replace( "-", "", $isbn );
1849 $num = str_replace( " ", "", $num );
1850
1851 if ( "" == $num ) {
1852 $text .= "ISBN $blank$x";
1853 } else {
1854 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1855 $text .= "<a href=\"" .
1856 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1857 "\" class=\"internal\">ISBN $isbn</a>";
1858 $text .= $x;
1859 }
1860 }
1861 return $text;
1862 }
1863 /* private */ function magicRFC( $text )
1864 {
1865 global $wgLang;
1866
1867 $a = split( "RFC ", " $text" );
1868 if ( count ( $a ) < 2 ) return $text;
1869 $text = substr( array_shift( $a ), 1);
1870 $valid = "0123456789";
1871
1872 foreach ( $a as $x ) {
1873 $rfc = $blank = "" ;
1874 while ( " " == $x{0} ) {
1875 $blank .= " ";
1876 $x = substr( $x, 1 );
1877 }
1878 while ( strstr( $valid, $x{0} ) != false ) {
1879 $rfc .= $x{0};
1880 $x = substr( $x, 1 );
1881 }
1882
1883 if ( "" == $rfc ) {
1884 $text .= "RFC $blank$x";
1885 } else {
1886 $url = wfmsg( "rfcurl" );
1887 $url = str_replace( "$1", $rfc, $url);
1888 $sk =& $this->mOptions->getSkin();
1889 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1890 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1891 }
1892 }
1893 return $text;
1894 }
1895
1896 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1897 {
1898 $this->mOptions = $options;
1899 $this->mTitle =& $title;
1900 $this->mOutputType = OT_WIKI;
1901
1902 if ( $clearState ) {
1903 $this->clearState();
1904 }
1905
1906 $stripState = false;
1907 $pairs = array(
1908 "\r\n" => "\n",
1909 );
1910 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1911 // now with regexes
1912 /*
1913 $pairs = array(
1914 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1915 "/<br *?>/i" => "<br />",
1916 );
1917 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1918 */
1919 $text = $this->strip( $text, $stripState, false );
1920 $text = $this->pstPass2( $text, $user );
1921 $text = $this->unstrip( $text, $stripState );
1922 return $text;
1923 }
1924
1925 /* private */ function pstPass2( $text, &$user )
1926 {
1927 global $wgLang, $wgLocaltimezone, $wgCurParser;
1928
1929 # Variable replacement
1930 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1931 $text = $this->replaceVariables( $text );
1932
1933 # Signatures
1934 #
1935 $n = $user->getName();
1936 $k = $user->getOption( "nickname" );
1937 if ( "" == $k ) { $k = $n; }
1938 if(isset($wgLocaltimezone)) {
1939 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1940 }
1941 /* Note: this is an ugly timezone hack for the European wikis */
1942 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1943 " (" . date( "T" ) . ")";
1944 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1945
1946 $text = preg_replace( "/~~~~~/", $d, $text );
1947 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1948 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1949 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1950 Namespace::getUser() ) . ":$n|$k]]", $text );
1951
1952 # Context links: [[|name]] and [[name (context)|]]
1953 #
1954 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1955 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1956 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1957 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1958
1959 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1960 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1961 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1962 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1963 # [[ns:page (cont)|]]
1964 $context = "";
1965 $t = $this->mTitle->getText();
1966 if ( preg_match( $conpat, $t, $m ) ) {
1967 $context = $m[2];
1968 }
1969 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1970 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1971 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1972
1973 if ( "" == $context ) {
1974 $text = preg_replace( $p2, "[[\\1]]", $text );
1975 } else {
1976 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1977 }
1978
1979 /*
1980 $mw =& MagicWord::get( MAG_SUBST );
1981 $wgCurParser = $this->fork();
1982 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1983 $this->merge( $wgCurParser );
1984 */
1985
1986 # Trim trailing whitespace
1987 # MAG_END (__END__) tag allows for trailing
1988 # whitespace to be deliberately included
1989 $text = rtrim( $text );
1990 $mw =& MagicWord::get( MAG_END );
1991 $mw->matchAndRemove( $text );
1992
1993 return $text;
1994 }
1995
1996 # Set up some variables which are usually set up in parse()
1997 # so that an external function can call some class members with confidence
1998 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1999 {
2000 $this->mTitle =& $title;
2001 $this->mOptions = $options;
2002 $this->mOutputType = $outputType;
2003 if ( $clearState ) {
2004 $this->clearState();
2005 }
2006 }
2007
2008 function transformMsg( $text, $options ) {
2009 global $wgTitle;
2010 static $executing = false;
2011
2012 # Guard against infinite recursion
2013 if ( $executing ) {
2014 return $text;
2015 }
2016 $executing = true;
2017
2018 $this->mTitle = $wgTitle;
2019 $this->mOptions = $options;
2020 $this->mOutputType = OT_MSG;
2021 $this->clearState();
2022 $text = $this->replaceVariables( $text );
2023
2024 $executing = false;
2025 return $text;
2026 }
2027 }
2028
2029 class ParserOutput
2030 {
2031 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2032 var $mCacheTime; # Used in ParserCache
2033
2034 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2035 $containsOldMagic = false )
2036 {
2037 $this->mText = $text;
2038 $this->mLanguageLinks = $languageLinks;
2039 $this->mCategoryLinks = $categoryLinks;
2040 $this->mContainsOldMagic = $containsOldMagic;
2041 $this->mCacheTime = "";
2042 }
2043
2044 function getText() { return $this->mText; }
2045 function getLanguageLinks() { return $this->mLanguageLinks; }
2046 function getCategoryLinks() { return $this->mCategoryLinks; }
2047 function getCacheTime() { return $this->mCacheTime; }
2048 function containsOldMagic() { return $this->mContainsOldMagic; }
2049 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2050 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2051 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2052 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2053 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2054
2055 function merge( $other ) {
2056 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2057 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2058 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2059 }
2060
2061 }
2062
2063 class ParserOptions
2064 {
2065 # All variables are private
2066 var $mUseTeX; # Use texvc to expand <math> tags
2067 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2068 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2069 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2070 var $mAllowExternalImages; # Allow external images inline
2071 var $mSkin; # Reference to the preferred skin
2072 var $mDateFormat; # Date format index
2073 var $mEditSection; # Create "edit section" links
2074 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2075 var $mNumberHeadings; # Automatically number headings
2076 var $mShowToc; # Show table of contents
2077
2078 function getUseTeX() { return $this->mUseTeX; }
2079 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2080 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2081 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2082 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2083 function getSkin() { return $this->mSkin; }
2084 function getDateFormat() { return $this->mDateFormat; }
2085 function getEditSection() { return $this->mEditSection; }
2086 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2087 function getNumberHeadings() { return $this->mNumberHeadings; }
2088 function getShowToc() { return $this->mShowToc; }
2089
2090 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2091 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2092 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2093 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2094 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2095 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2096 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2097 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2098 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2099 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2100 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2101
2102 /* static */ function newFromUser( &$user )
2103 {
2104 $popts = new ParserOptions;
2105 $popts->initialiseFromUser( $user );
2106 return $popts;
2107 }
2108
2109 function initialiseFromUser( &$userInput )
2110 {
2111 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2112
2113 if ( !$userInput ) {
2114 $user = new User;
2115 $user->setLoaded( true );
2116 } else {
2117 $user =& $userInput;
2118 }
2119
2120 $this->mUseTeX = $wgUseTeX;
2121 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2122 $this->mUseDynamicDates = $wgUseDynamicDates;
2123 $this->mInterwikiMagic = $wgInterwikiMagic;
2124 $this->mAllowExternalImages = $wgAllowExternalImages;
2125 $this->mSkin =& $user->getSkin();
2126 $this->mDateFormat = $user->getOption( "date" );
2127 $this->mEditSection = $user->getOption( "editsection" );
2128 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2129 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2130 $this->mShowToc = $user->getOption( "showtoc" );
2131 }
2132
2133
2134 }
2135
2136 # Regex callbacks, used in Parser::replaceVariables
2137 function wfBraceSubstitution( $matches )
2138 {
2139 global $wgCurParser;
2140 return $wgCurParser->braceSubstitution( $matches );
2141 }
2142
2143 function wfArgSubstitution( $matches )
2144 {
2145 global $wgCurParser;
2146 return $wgCurParser->argSubstitution( $matches );
2147 }
2148
2149 function wfVariableSubstitution( $matches )
2150 {
2151 global $wgCurParser;
2152 return $wgCurParser->variableSubstitution( $matches );
2153 }
2154
2155 ?>