/tr added to tags
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 $this->mInPre = false;
81 }
82
83 # First pass--just handle <nowiki> sections, pass the rest off
84 # to internalParse() which does all the real work.
85 #
86 # Returns a ParserOutput
87 #
88 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
89 {
90 global $wgUseTidy;
91 $fname = "Parser::parse";
92 wfProfileIn( $fname );
93
94 if ( $clearState ) {
95 $this->clearState();
96 }
97
98 $this->mOptions = $options;
99 $this->mTitle =& $title;
100 $this->mOutputType = OT_HTML;
101
102 $stripState = NULL;
103 $text = $this->strip( $text, $this->mStripState );
104 $text = $this->internalParse( $text, $linestart );
105 $text = $this->unstrip( $text, $this->mStripState );
106 # Clean up special characters, only run once, next-to-last before doBlockLevels
107 if(!$wgUseTidy) {
108 $fixtags = array(
109 # french spaces, last one Guillemet-left
110 # only if there is something before the space
111 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
112 # french spaces, Guillemet-right
113 "/(\\302\\253) /i"=>"\\1&nbsp;",
114 "/<hr *>/i" => '<hr />',
115 "/<br *>/i" => '<br />',
116 "/<center *>/i"=>'<div class="center">',
117 "/<\\/center *>/i" => '</div>',
118 # Clean up spare ampersands; note that we probably ought to be
119 # more careful about named entities.
120 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
121 );
122 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
123 } else {
124 $fixtags = array(
125 # french spaces, last one Guillemet-left
126 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
127 # french spaces, Guillemet-right
128 "/(\\302\\253) /i"=>"\\1&nbsp;",
129 "/<center *>/i"=>'<div class="center">',
130 "/<\\/center *>/i" => '</div>'
131 );
132 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
133 }
134 # only once and last
135 $text = $this->doBlockLevels( $text, $linestart );
136 $text = $this->unstripNoWiki( $text, $this->mStripState );
137 if($wgUseTidy) {
138 $text = $this->tidy($text);
139 }
140 $this->mOutput->setText( $text );
141 wfProfileOut( $fname );
142 return $this->mOutput;
143 }
144
145 /* static */ function getRandomString()
146 {
147 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
148 }
149
150 # Replaces all occurrences of <$tag>content</$tag> in the text
151 # with a random marker and returns the new text. the output parameter
152 # $content will be an associative array filled with data on the form
153 # $unique_marker => content.
154
155 # If $content is already set, the additional entries will be appended
156
157 # If $tag is set to STRIP_COMMENTS, the function will extract
158 # <!-- HTML comments -->
159
160 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
161 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
162 if ( !$content ) {
163 $content = array( );
164 }
165 $n = 1;
166 $stripped = "";
167
168 while ( "" != $text ) {
169 if($tag==STRIP_COMMENTS) {
170 $p = preg_split( "/<!--/i", $text, 2 );
171 } else {
172 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
173 }
174 $stripped .= $p[0];
175 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
176 $text = "";
177 } else {
178 if($tag==STRIP_COMMENTS) {
179 $q = preg_split( "/-->/i", $p[1], 2 );
180 } else {
181 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
182 }
183 $marker = $rnd . sprintf("%08X", $n++);
184 $content[$marker] = $q[0];
185 $stripped .= $marker;
186 $text = $q[1];
187 }
188 }
189 return $stripped;
190 }
191
192 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
193 # If $render is set, performs necessary rendering operations on plugins
194 # Returns the text, and fills an array with data needed in unstrip()
195 # If the $state is already a valid strip state, it adds to the state
196
197 # When $stripcomments is set, HTML comments <!-- like this -->
198 # will be stripped in addition to other tags. This is important
199 # for section editing, where these comments cause confusion when
200 # counting the sections in the wikisource
201 function strip( $text, &$state, $stripcomments = false )
202 {
203 $render = ($this->mOutputType == OT_HTML);
204 $nowiki_content = array();
205 $hiero_content = array();
206 $timeline_content = array();
207 $math_content = array();
208 $pre_content = array();
209 $comment_content = array();
210
211 # Replace any instances of the placeholders
212 $uniq_prefix = UNIQ_PREFIX;
213 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
214
215 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
216 foreach( $nowiki_content as $marker => $content ){
217 if( $render ){
218 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
219 } else {
220 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
221 }
222 }
223
224 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
225 foreach( $hiero_content as $marker => $content ){
226 if( $render && $GLOBALS['wgUseWikiHiero']){
227 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
228 } else {
229 $hiero_content[$marker] = "<hiero>$content</hiero>";
230 }
231 }
232
233 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
234 foreach( $timeline_content as $marker => $content ){
235 if( $render && $GLOBALS['wgUseTimeline']){
236 $timeline_content[$marker] = renderTimeline( $content );
237 } else {
238 $timeline_content[$marker] = "<timeline>$content</timeline>";
239 }
240 }
241
242 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
243 foreach( $math_content as $marker => $content ){
244 if( $render ) {
245 if( $this->mOptions->getUseTeX() ) {
246 $math_content[$marker] = renderMath( $content );
247 } else {
248 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
249 }
250 } else {
251 $math_content[$marker] = "<math>$content</math>";
252 }
253 }
254
255 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
256 foreach( $pre_content as $marker => $content ){
257 if( $render ){
258 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
259 } else {
260 $pre_content[$marker] = "<pre>$content</pre>";
261 }
262 }
263 if($stripcomments) {
264 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
265 foreach( $comment_content as $marker => $content ){
266 $comment_content[$marker] = "<!--$content-->";
267 }
268 }
269
270 # Merge state with the pre-existing state, if there is one
271 if ( $state ) {
272 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
273 $state['hiero'] = $state['hiero'] + $hiero_content;
274 $state['timeline'] = $state['timeline'] + $timeline_content;
275 $state['math'] = $state['math'] + $math_content;
276 $state['pre'] = $state['pre'] + $pre_content;
277 $state['comment'] = $state['comment'] + $comment_content;
278 } else {
279 $state = array(
280 'nowiki' => $nowiki_content,
281 'hiero' => $hiero_content,
282 'timeline' => $timeline_content,
283 'math' => $math_content,
284 'pre' => $pre_content,
285 'comment' => $comment_content
286 );
287 }
288 return $text;
289 }
290
291 # always call unstripNoWiki() after this one
292 function unstrip( $text, &$state )
293 {
294 # Must expand in reverse order, otherwise nested tags will be corrupted
295 $contentDict = end( $state );
296 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
297 if( key($state) != 'nowiki') {
298 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
299 $text = str_replace( key( $contentDict ), $content, $text );
300 }
301 }
302 }
303
304 return $text;
305 }
306 # always call this after unstrip() to preserve the order
307 function unstripNoWiki( $text, &$state )
308 {
309 # Must expand in reverse order, otherwise nested tags will be corrupted
310 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
311 $text = str_replace( key( $state['nowiki'] ), $content, $text );
312 }
313
314 return $text;
315 }
316
317 # Add an item to the strip state
318 # Returns the unique tag which must be inserted into the stripped text
319 # The tag will be replaced with the original text in unstrip()
320
321 function insertStripItem( $text, &$state )
322 {
323 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
324 if ( !$state ) {
325 $state = array(
326 'nowiki' => array(),
327 'hiero' => array(),
328 'math' => array(),
329 'pre' => array()
330 );
331 }
332 $state['item'][$rnd] = $text;
333 return $rnd;
334 }
335
336 # This method generates the list of subcategories and pages for a category
337 function categoryMagic ()
338 {
339 global $wgLang , $wgUser ;
340 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
341
342 $cns = Namespace::getCategory() ;
343 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
344
345 $r = "<br style=\"clear:both;\"/>\n";
346
347
348 $sk =& $wgUser->getSkin() ;
349
350 $articles = array() ;
351 $children = array() ;
352 $data = array () ;
353 $id = $this->mTitle->getArticleID() ;
354
355 # FIXME: add limits
356 $t = wfStrencode( $this->mTitle->getDBKey() );
357 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
358 $res = wfQuery ( $sql, DB_READ ) ;
359 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
360
361 # For all pages that link to this category
362 foreach ( $data AS $x )
363 {
364 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
365 if ( $t != "" ) $t .= ":" ;
366 $t .= $x->cur_title ;
367
368 if ( $x->cur_namespace == $cns ) {
369 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
370 } else {
371 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
372 }
373 }
374 wfFreeResult ( $res ) ;
375
376 # Showing subcategories
377 if ( count ( $children ) > 0 ) {
378 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
379 $r .= implode ( ", " , $children ) ;
380 }
381
382 # Showing pages in this category
383 if ( count ( $articles ) > 0 ) {
384 $ti = $this->mTitle->getText() ;
385 $h = wfMsg( "category_header", $ti );
386 $r .= "<h2>{$h}</h2>\n" ;
387 $r .= implode ( ", " , $articles ) ;
388 }
389
390
391 return $r ;
392 }
393
394 function getHTMLattrs ()
395 {
396 $htmlattrs = array( # Allowed attributes--no scripting, etc.
397 "title", "align", "lang", "dir", "width", "height",
398 "bgcolor", "clear", /* BR */ "noshade", /* HR */
399 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
400 /* FONT */ "type", "start", "value", "compact",
401 /* For various lists, mostly deprecated but safe */
402 "summary", "width", "border", "frame", "rules",
403 "cellspacing", "cellpadding", "valign", "char",
404 "charoff", "colgroup", "col", "span", "abbr", "axis",
405 "headers", "scope", "rowspan", "colspan", /* Tables */
406 "id", "class", "name", "style" /* For CSS */
407 );
408 return $htmlattrs ;
409 }
410
411 function fixTagAttributes ( $t )
412 {
413 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
414 $htmlattrs = $this->getHTMLattrs() ;
415
416 # Strip non-approved attributes from the tag
417 $t = preg_replace(
418 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
419 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
420 $t);
421 # Strip javascript "expression" from stylesheets. Brute force approach:
422 # If anythin offensive is found, all attributes of the HTML tag are dropped
423
424 if( preg_match(
425 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
426 wfMungeToUtf8( $t ) ) )
427 {
428 $t="";
429 }
430
431 return trim ( $t ) ;
432 }
433
434 /* interface with html tidy, used if $wgUseTidy = true */
435 function tidy ( $text ) {
436 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
437 global $wgInputEncoding, $wgOutputEncoding;
438 $fname = "Parser::tidy";
439 wfProfileIn( $fname );
440
441 $cleansource = '';
442 switch(strtoupper($wgOutputEncoding)) {
443 case 'ISO-8859-1':
444 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
445 break;
446 case 'UTF-8':
447 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
448 break;
449 default:
450 $wgTidyOpts .= ' -raw';
451 }
452
453 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
454 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
455 '<head><title>test</title></head><body>'.$text.'</body></html>';
456 $descriptorspec = array(
457 0 => array("pipe", "r"),
458 1 => array("pipe", "w"),
459 2 => array("file", "/dev/null", "a")
460 );
461 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
462 if (is_resource($process)) {
463 fwrite($pipes[0], $wrappedtext);
464 fclose($pipes[0]);
465 while (!feof($pipes[1])) {
466 $cleansource .= fgets($pipes[1], 1024);
467 }
468 fclose($pipes[1]);
469 $return_value = proc_close($process);
470 }
471
472 wfProfileOut( $fname );
473
474 if( $cleansource == '' && $text != '') {
475 wfDebug( "Tidy error detected!\n" );
476 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
477 } else {
478 return $cleansource;
479 }
480 }
481
482 function doTableStuff ( $t )
483 {
484 $t = explode ( "\n" , $t ) ;
485 $td = array () ; # Is currently a td tag open?
486 $ltd = array () ; # Was it TD or TH?
487 $tr = array () ; # Is currently a tr tag open?
488 $ltr = array () ; # tr attributes
489 foreach ( $t AS $k => $x )
490 {
491 $x = trim ( $x ) ;
492 $fc = substr ( $x , 0 , 1 ) ;
493 if ( "{|" == substr ( $x , 0 , 2 ) )
494 {
495 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
496 array_push ( $td , false ) ;
497 array_push ( $ltd , "" ) ;
498 array_push ( $tr , false ) ;
499 array_push ( $ltr , "" ) ;
500 }
501 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
502 else if ( "|}" == substr ( $x , 0 , 2 ) )
503 {
504 $z = "</table>\n" ;
505 $l = array_pop ( $ltd ) ;
506 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
507 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
508 array_pop ( $ltr ) ;
509 $t[$k] = $z ;
510 }
511 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
512 {
513 $z = trim ( substr ( $x , 2 ) ) ;
514 $t[$k] = "<caption>{$z}</caption>\n" ;
515 }*/
516 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
517 {
518 $x = substr ( $x , 1 ) ;
519 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
520 $z = "" ;
521 $l = array_pop ( $ltd ) ;
522 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
523 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
524 array_pop ( $ltr ) ;
525 $t[$k] = $z ;
526 array_push ( $tr , false ) ;
527 array_push ( $td , false ) ;
528 array_push ( $ltd , "" ) ;
529 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
530 }
531 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
532 {
533 if ( "|+" == substr ( $x , 0 , 2 ) )
534 {
535 $fc = "+" ;
536 $x = substr ( $x , 1 ) ;
537 }
538 $after = substr ( $x , 1 ) ;
539 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
540 $after = explode ( "||" , $after ) ;
541 $t[$k] = "" ;
542 foreach ( $after AS $theline )
543 {
544 $z = "" ;
545 if ( $fc != "+" )
546 {
547 $tra = array_pop ( $ltr ) ;
548 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
549 array_push ( $tr , true ) ;
550 array_push ( $ltr , "" ) ;
551 }
552
553 $l = array_pop ( $ltd ) ;
554 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
555 if ( $fc == "|" ) $l = "td" ;
556 else if ( $fc == "!" ) $l = "th" ;
557 else if ( $fc == "+" ) $l = "caption" ;
558 else $l = "" ;
559 array_push ( $ltd , $l ) ;
560 $y = explode ( "|" , $theline , 2 ) ;
561 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
562 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
563 $t[$k] .= $y ;
564 array_push ( $td , true ) ;
565 }
566 }
567 }
568
569 # Closing open td, tr && table
570 while ( count ( $td ) > 0 )
571 {
572 if ( array_pop ( $td ) ) $t[] = "</td>" ;
573 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
574 $t[] = "</table>" ;
575 }
576
577 $t = implode ( "\n" , $t ) ;
578 # $t = $this->removeHTMLtags( $t );
579 return $t ;
580 }
581
582 # Parses the text and adds the result to the strip state
583 # Returns the strip tag
584 function stripParse( $text, $newline, $args )
585 {
586 $text = $this->strip( $text, $this->mStripState );
587 $text = $this->internalParse( $text, (bool)$newline, $args, false );
588 return $newline.$this->insertStripItem( $text, $this->mStripState );
589 }
590
591 function internalParse( $text, $linestart, $args = array(), $isMain=true )
592 {
593 $fname = "Parser::internalParse";
594 wfProfileIn( $fname );
595
596 $text = $this->removeHTMLtags( $text );
597 $text = $this->replaceVariables( $text, $args );
598
599 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
600
601 $text = $this->doHeadings( $text );
602 if($this->mOptions->getUseDynamicDates()) {
603 global $wgDateFormatter;
604 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
605 }
606 $text = $this->doAllQuotes( $text );
607 $text = $this->replaceExternalLinks( $text );
608 $text = $this->replaceInternalLinks ( $text );
609 $text = $this->replaceInternalLinks ( $text );
610 //$text = $this->doTokenizedParser ( $text );
611 $text = $this->doTableStuff ( $text ) ;
612 $text = $this->magicISBN( $text );
613 $text = $this->magicRFC( $text );
614 $text = $this->formatHeadings( $text, $isMain );
615 $sk =& $this->mOptions->getSkin();
616 $text = $sk->transformContent( $text );
617
618 if ( !isset ( $this->categoryMagicDone ) ) {
619 $text .= $this->categoryMagic () ;
620 $this->categoryMagicDone = true ;
621 }
622
623 wfProfileOut( $fname );
624 return $text;
625 }
626
627
628 /* private */ function doHeadings( $text )
629 {
630 for ( $i = 6; $i >= 1; --$i ) {
631 $h = substr( "======", 0, $i );
632 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
633 "<h{$i}>\\1</h{$i}>\\2", $text );
634 }
635 return $text;
636 }
637
638 /* private */ function doAllQuotes( $text )
639 {
640 $outtext = "";
641 $lines = explode( "\n", $text );
642 foreach ( $lines as $line ) {
643 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
644 }
645 return substr($outtext, 0,-1);
646 }
647
648 /* private */ function doQuotes( $pre, $text, $mode )
649 {
650 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
651 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
652 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
653 if ( substr ($m[2], 0, 1) == "'" ) {
654 $m[2] = substr ($m[2], 1);
655 if ($mode == "em") {
656 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
657 } else if ($mode == "strong") {
658 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
659 } else if (($mode == "emstrong") || ($mode == "both")) {
660 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
661 } else if ($mode == "strongem") {
662 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
663 } else {
664 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
665 }
666 } else {
667 if ($mode == "strong") {
668 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
669 } else if ($mode == "em") {
670 return $m1_em . $this->doQuotes ( "", $m[2], "" );
671 } else if ($mode == "emstrong") {
672 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
673 } else if (($mode == "strongem") || ($mode == "both")) {
674 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
675 } else {
676 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
677 }
678 }
679 } else {
680 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
681 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
682 if ($mode == "") {
683 return $pre . $text;
684 } else if ($mode == "em") {
685 return $pre . $text_em;
686 } else if ($mode == "strong") {
687 return $pre . $text_strong;
688 } else if ($mode == "strongem") {
689 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
690 } else {
691 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
692 }
693 }
694 }
695
696 # Note: we have to do external links before the internal ones,
697 # and otherwise take great care in the order of things here, so
698 # that we don't end up interpreting some URLs twice.
699
700 /* private */ function replaceExternalLinks( $text )
701 {
702 $fname = "Parser::replaceExternalLinks";
703 wfProfileIn( $fname );
704 $text = $this->subReplaceExternalLinks( $text, "http", true );
705 $text = $this->subReplaceExternalLinks( $text, "https", true );
706 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
707 $text = $this->subReplaceExternalLinks( $text, "irc", false );
708 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
709 $text = $this->subReplaceExternalLinks( $text, "news", false );
710 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
711 wfProfileOut( $fname );
712 return $text;
713 }
714
715 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
716 {
717 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
718 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
719
720 # this is the list of separators that should be ignored if they
721 # are the last character of an URL but that should be included
722 # if they occur within the URL, e.g. "go to www.foo.com, where .."
723 # in this case, the last comma should not become part of the URL,
724 # but in "www.foo.com/123,2342,32.htm" it should.
725 $sep = ",;\.:";
726 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
727 $images = "gif|png|jpg|jpeg";
728
729 # PLEASE NOTE: The curly braces { } are not part of the regex,
730 # they are interpreted as part of the string (used to tell PHP
731 # that the content of the string should be inserted there).
732 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
733 "((?i){$images})([^{$uc}]|$)/";
734
735 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
736 $sk =& $this->mOptions->getSkin();
737
738 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
739 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
740 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
741 }
742 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
743 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
744 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
745 "</a>\\5", $s );
746 $s = str_replace( $unique, $protocol, $s );
747
748 $a = explode( "[{$protocol}:", " " . $s );
749 $s = array_shift( $a );
750 $s = substr( $s, 1 );
751
752 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
753 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
754
755 foreach ( $a as $line ) {
756 if ( preg_match( $e1, $line, $m ) ) {
757 $link = "{$protocol}:{$m[1]}";
758 $trail = $m[2];
759 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
760 else { $text = wfEscapeHTML( $link ); }
761 } else if ( preg_match( $e2, $line, $m ) ) {
762 $link = "{$protocol}:{$m[1]}";
763 $text = $m[2];
764 $trail = $m[3];
765 } else {
766 $s .= "[{$protocol}:" . $line;
767 continue;
768 }
769 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
770 $paren = "";
771 } else {
772 # Expand the URL for printable version
773 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
774 }
775 $la = $sk->getExternalLinkAttributes( $link, $text );
776 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
777
778 }
779 return $s;
780 }
781
782
783 /* private */ function replaceInternalLinks( $s )
784 {
785 global $wgLang, $wgLinkCache;
786 global $wgNamespacesWithSubpages, $wgLanguageCode;
787 static $fname = "Parser::replaceInternalLinks" ;
788 wfProfileIn( $fname );
789
790 wfProfileIn( "$fname-setup" );
791 static $tc = FALSE;
792 # the % is needed to support urlencoded titles as well
793 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
794 $sk =& $this->mOptions->getSkin();
795
796 $a = explode( "[[", " " . $s );
797 $s = array_shift( $a );
798 $s = substr( $s, 1 );
799
800 # Match a link having the form [[namespace:link|alternate]]trail
801 static $e1 = FALSE;
802 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
803 # Match the end of a line for a word that's not followed by whitespace,
804 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
805 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
806
807 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
808 # Special and Media are pseudo-namespaces; no pages actually exist in them
809 static $image = FALSE;
810 static $special = FALSE;
811 static $media = FALSE;
812 static $category = FALSE;
813 if ( !$image ) { $image = Namespace::getImage(); }
814 if ( !$special ) { $special = Namespace::getSpecial(); }
815 if ( !$media ) { $media = Namespace::getMedia(); }
816 if ( !$category ) { $category = Namespace::getCategory(); }
817
818 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
819
820 if ( $useLinkPrefixExtension ) {
821 if ( preg_match( $e2, $s, $m ) ) {
822 $first_prefix = $m[2];
823 $s = $m[1];
824 } else {
825 $first_prefix = false;
826 }
827 } else {
828 $prefix = '';
829 }
830
831 wfProfileOut( "$fname-setup" );
832
833 foreach ( $a as $line ) {
834 wfProfileIn( "$fname-prefixhandling" );
835 if ( $useLinkPrefixExtension ) {
836 if ( preg_match( $e2, $s, $m ) ) {
837 $prefix = $m[2];
838 $s = $m[1];
839 } else {
840 $prefix='';
841 }
842 # first link
843 if($first_prefix) {
844 $prefix = $first_prefix;
845 $first_prefix = false;
846 }
847 }
848 wfProfileOut( "$fname-prefixhandling" );
849
850 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
851 $text = $m[2];
852 # fix up urlencoded title texts
853 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
854 $trail = $m[3];
855 } else { # Invalid form; output directly
856 $s .= $prefix . "[[" . $line ;
857 wfProfileOut( $fname );
858 continue;
859 }
860
861 /* Valid link forms:
862 Foobar -- normal
863 :Foobar -- override special treatment of prefix (images, language links)
864 /Foobar -- convert to CurrentPage/Foobar
865 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
866 */
867 $c = substr($m[1],0,1);
868 $noforce = ($c != ":");
869 if( $c == "/" ) { # subpage
870 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
871 $m[1]=substr($m[1],1,strlen($m[1])-2);
872 $noslash=$m[1];
873 } else {
874 $noslash=substr($m[1],1);
875 }
876 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
877 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
878 if( "" == $text ) {
879 $text= $m[1];
880 } # this might be changed for ugliness reasons
881 } else {
882 $link = $noslash; # no subpage allowed, use standard link
883 }
884 } elseif( $noforce ) { # no subpage
885 $link = $m[1];
886 } else {
887 $link = substr( $m[1], 1 );
888 }
889 $wasblank = ( "" == $text );
890 if( $wasblank )
891 $text = $link;
892
893 $nt = Title::newFromText( $link );
894 if( !$nt ) {
895 $s .= $prefix . "[[" . $line;
896 wfProfileOut( $fname );
897 continue;
898 }
899 $ns = $nt->getNamespace();
900 $iw = $nt->getInterWiki();
901 if( $noforce ) {
902 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
903 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
904 $tmp = $prefix . $trail ;
905 wfProfileOut( $fname );
906 $s .= (trim($tmp) == '')? '': $tmp;
907 continue;
908 }
909 if ( $ns == $image ) {
910 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
911 $wgLinkCache->addImageLinkObj( $nt );
912 wfProfileOut( $fname );
913 continue;
914 }
915 if ( $ns == $category ) {
916 $t = $nt->getText() ;
917 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
918
919 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
920 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
921 $wgLinkCache->resume();
922
923 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
924 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
925 $this->mOutput->mCategoryLinks[] = $t ;
926 $s .= $prefix . $trail ;
927 wfProfileOut( $fname );
928 continue;
929 }
930 }
931 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
932 ( strpos( $link, "#" ) == FALSE ) ) {
933 # Self-links are handled specially; generally de-link and change to bold.
934 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
935 wfProfileOut( $fname );
936 continue;
937 }
938
939 if( $ns == $media ) {
940 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
941 $wgLinkCache->addImageLinkObj( $nt );
942 wfProfileOut( $fname );
943 continue;
944 } elseif( $ns == $special ) {
945 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
946 wfProfileOut( $fname );
947 continue;
948 }
949 $s .= $sk->makeLinkObj( $nt, $text, "", $trail, $prefix );
950 }
951 wfProfileOut( $fname );
952 return $s;
953 }
954
955 # Some functions here used by doBlockLevels()
956 #
957 /* private */ function closeParagraph()
958 {
959 $result = "";
960 if ( '' != $this->mLastSection ) {
961 $result = "</" . $this->mLastSection . ">\n";
962 }
963 $this->mInPre = false;
964 $this->mLastSection = "";
965 return $result;
966 }
967 # getCommon() returns the length of the longest common substring
968 # of both arguments, starting at the beginning of both.
969 #
970 /* private */ function getCommon( $st1, $st2 )
971 {
972 $fl = strlen( $st1 );
973 $shorter = strlen( $st2 );
974 if ( $fl < $shorter ) { $shorter = $fl; }
975
976 for ( $i = 0; $i < $shorter; ++$i ) {
977 if ( $st1{$i} != $st2{$i} ) { break; }
978 }
979 return $i;
980 }
981 # These next three functions open, continue, and close the list
982 # element appropriate to the prefix character passed into them.
983 #
984 /* private */ function openList( $char )
985 {
986 $result = $this->closeParagraph();
987
988 if ( "*" == $char ) { $result .= "<ul><li>"; }
989 else if ( "#" == $char ) { $result .= "<ol><li>"; }
990 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
991 else if ( ";" == $char ) {
992 $result .= "<dl><dt>";
993 $this->mDTopen = true;
994 }
995 else { $result = "<!-- ERR 1 -->"; }
996
997 return $result;
998 }
999
1000 /* private */ function nextItem( $char )
1001 {
1002 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
1003 else if ( ":" == $char || ";" == $char ) {
1004 $close = "</dd>";
1005 if ( $this->mDTopen ) { $close = "</dt>"; }
1006 if ( ";" == $char ) {
1007 $this->mDTopen = true;
1008 return $close . "<dt>";
1009 } else {
1010 $this->mDTopen = false;
1011 return $close . "<dd>";
1012 }
1013 }
1014 return "<!-- ERR 2 -->";
1015 }
1016
1017 /* private */function closeList( $char )
1018 {
1019 if ( "*" == $char ) { $text = "</li></ul>"; }
1020 else if ( "#" == $char ) { $text = "</li></ol>"; }
1021 else if ( ":" == $char ) {
1022 if ( $this->mDTopen ) {
1023 $this->mDTopen = false;
1024 $text = "</dt></dl>";
1025 } else {
1026 $text = "</dd></dl>";
1027 }
1028 }
1029 else { return "<!-- ERR 3 -->"; }
1030 return $text."\n";
1031 }
1032
1033 /* private */ function doBlockLevels( $text, $linestart ) {
1034 $fname = "Parser::doBlockLevels";
1035 wfProfileIn( $fname );
1036
1037 # Parsing through the text line by line. The main thing
1038 # happening here is handling of block-level elements p, pre,
1039 # and making lists from lines starting with * # : etc.
1040 #
1041 $textLines = explode( "\n", $text );
1042
1043 $lastPrefix = $output = $lastLine = '';
1044 $this->mDTopen = $inBlockElem = false;
1045 $prefixLength = 0;
1046 $paragraphStack = false;
1047
1048 if ( !$linestart ) {
1049 $output .= array_shift( $textLines );
1050 }
1051 foreach ( $textLines as $oLine ) {
1052 $lastPrefixLength = strlen( $lastPrefix );
1053 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1054 $preOpenMatch = preg_match("/<pre/i", $oLine );
1055 if (!$this->mInPre) {
1056 $this->mInPre = !empty($preOpenMatch);
1057 }
1058 if ( !$this->mInPre ) {
1059 # Multiple prefixes may abut each other for nested lists.
1060 $prefixLength = strspn( $oLine, "*#:;" );
1061 $pref = substr( $oLine, 0, $prefixLength );
1062
1063 # eh?
1064 $pref2 = str_replace( ";", ":", $pref );
1065 $t = substr( $oLine, $prefixLength );
1066 } else {
1067 # Don't interpret any other prefixes in preformatted text
1068 $prefixLength = 0;
1069 $pref = $pref2 = '';
1070 $t = $oLine;
1071 }
1072
1073 # List generation
1074 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1075 # Same as the last item, so no need to deal with nesting or opening stuff
1076 $output .= $this->nextItem( substr( $pref, -1 ) );
1077 $paragraphStack = false;
1078
1079 if ( ";" == substr( $pref, -1 ) ) {
1080 # The one nasty exception: definition lists work like this:
1081 # ; title : definition text
1082 # So we check for : in the remainder text to split up the
1083 # title and definition, without b0rking links.
1084 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1085 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1086 $term = $match[1];
1087 $output .= $term . $this->nextItem( ":" );
1088 $t = $match[2];
1089 }
1090 }
1091 } elseif( $prefixLength || $lastPrefixLength ) {
1092 # Either open or close a level...
1093 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1094 $paragraphStack = false;
1095
1096 while( $commonPrefixLength < $lastPrefixLength ) {
1097 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1098 --$lastPrefixLength;
1099 }
1100 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1101 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1102 }
1103 while ( $prefixLength > $commonPrefixLength ) {
1104 $char = substr( $pref, $commonPrefixLength, 1 );
1105 $output .= $this->openList( $char );
1106
1107 if ( ";" == $char ) {
1108 # FIXME: This is dupe of code above
1109 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1110 $term = $match[1];
1111 $output .= $term . $this->nextItem( ":" );
1112 $t = $match[2];
1113 }
1114 }
1115 ++$commonPrefixLength;
1116 }
1117 $lastPrefix = $pref2;
1118 }
1119 if( 0 == $prefixLength ) {
1120 # No prefix (not in list)--go to paragraph mode
1121 $uniq_prefix = UNIQ_PREFIX;
1122 // XXX: use a stack for nestable elements like span, table and div
1123 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i", $t );
1124 $closematch = preg_match(
1125 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1126 "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1127 if ( $openmatch or $closematch ) {
1128 $paragraphStack = false;
1129 $output .= $this->closeParagraph();
1130 if($preOpenMatch and !$preCloseMatch) {
1131 $this->mInPre = true;
1132 }
1133 if ( $closematch ) {
1134 $inBlockElem = false;
1135 } else {
1136 $inBlockElem = true;
1137 }
1138 } else if ( !$inBlockElem && !$this->mInPre ) {
1139 if ( " " == $t{0} and trim($t) != '' ) {
1140 // pre
1141 if ($this->mLastSection != 'pre') {
1142 $paragraphStack = false;
1143 $output .= $this->closeParagraph().'<pre>';
1144 $this->mLastSection = 'pre';
1145 }
1146 } else {
1147 // paragraph
1148 if ( '' == trim($t) ) {
1149 if ( $paragraphStack ) {
1150 $output .= $paragraphStack.'<br />';
1151 $paragraphStack = false;
1152 $this->mLastSection = 'p';
1153 } else {
1154 if ($this->mLastSection != 'p' ) {
1155 $output .= $this->closeParagraph();
1156 $this->mLastSection = '';
1157 $paragraphStack = "<p>";
1158 } else {
1159 $paragraphStack = '</p><p>';
1160 }
1161 }
1162 } else {
1163 if ( $paragraphStack ) {
1164 $output .= $paragraphStack;
1165 $paragraphStack = false;
1166 $this->mLastSection = 'p';
1167 } else if ($this->mLastSection != 'p') {
1168 $output .= $this->closeParagraph().'<p>';
1169 $this->mLastSection = 'p';
1170 }
1171 }
1172 }
1173 }
1174 }
1175 if ($paragraphStack === false) {
1176 $output .= $t."\n";
1177 }
1178 }
1179 while ( $prefixLength ) {
1180 $output .= $this->closeList( $pref2{$prefixLength-1} );
1181 --$prefixLength;
1182 }
1183 if ( "" != $this->mLastSection ) {
1184 $output .= "</" . $this->mLastSection . ">";
1185 $this->mLastSection = "";
1186 }
1187
1188 wfProfileOut( $fname );
1189 return $output;
1190 }
1191
1192 function getVariableValue( $index ) {
1193 global $wgLang, $wgSitename, $wgServer;
1194
1195 switch ( $index ) {
1196 case MAG_CURRENTMONTH:
1197 return date( "m" );
1198 case MAG_CURRENTMONTHNAME:
1199 return $wgLang->getMonthName( date("n") );
1200 case MAG_CURRENTMONTHNAMEGEN:
1201 return $wgLang->getMonthNameGen( date("n") );
1202 case MAG_CURRENTDAY:
1203 return date("j");
1204 case MAG_PAGENAME:
1205 return $this->mTitle->getText();
1206 case MAG_NAMESPACE:
1207 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1208 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1209 case MAG_CURRENTDAYNAME:
1210 return $wgLang->getWeekdayName( date("w")+1 );
1211 case MAG_CURRENTYEAR:
1212 return date( "Y" );
1213 case MAG_CURRENTTIME:
1214 return $wgLang->time( wfTimestampNow(), false );
1215 case MAG_NUMBEROFARTICLES:
1216 return wfNumberOfArticles();
1217 case MAG_SITENAME:
1218 return $wgSitename;
1219 case MAG_SERVER:
1220 return $wgServer;
1221 default:
1222 return NULL;
1223 }
1224 }
1225
1226 function initialiseVariables()
1227 {
1228 global $wgVariableIDs;
1229 $this->mVariables = array();
1230 foreach ( $wgVariableIDs as $id ) {
1231 $mw =& MagicWord::get( $id );
1232 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1233 }
1234 }
1235
1236 /* private */ function replaceVariables( $text, $args = array() )
1237 {
1238 global $wgLang, $wgScript, $wgArticlePath;
1239
1240 $fname = "Parser::replaceVariables";
1241 wfProfileIn( $fname );
1242
1243 $bail = false;
1244 if ( !$this->mVariables ) {
1245 $this->initialiseVariables();
1246 }
1247 $titleChars = Title::legalChars();
1248 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1249
1250 # This function is called recursively. To keep track of arguments we need a stack:
1251 array_push( $this->mArgStack, $args );
1252
1253 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1254 $GLOBALS['wgCurParser'] =& $this;
1255
1256
1257 if ( $this->mOutputType == OT_HTML ) {
1258 # Variable substitution
1259 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1260
1261 # Argument substitution
1262 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1263 }
1264 # Template substitution
1265 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1266 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1267
1268 array_pop( $this->mArgStack );
1269
1270 wfProfileOut( $fname );
1271 return $text;
1272 }
1273
1274 function variableSubstitution( $matches )
1275 {
1276 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1277 $text = $this->mVariables[$matches[1]];
1278 $this->mOutput->mContainsOldMagic = true;
1279 } else {
1280 $text = $matches[0];
1281 }
1282 return $text;
1283 }
1284
1285 function braceSubstitution( $matches )
1286 {
1287 global $wgLinkCache, $wgLang;
1288 $fname = "Parser::braceSubstitution";
1289 $found = false;
1290 $nowiki = false;
1291 $noparse = false;
1292
1293 $title = NULL;
1294
1295 # $newline is an optional newline character before the braces
1296 # $part1 is the bit before the first |, and must contain only title characters
1297 # $args is a list of arguments, starting from index 0, not including $part1
1298
1299 $newline = $matches[1];
1300 $part1 = $matches[2];
1301 # If the third subpattern matched anything, it will start with |
1302 if ( $matches[3] !== "" ) {
1303 $args = explode( "|", substr( $matches[3], 1 ) );
1304 } else {
1305 $args = array();
1306 }
1307 $argc = count( $args );
1308
1309 # {{{}}}
1310 if ( strpos( $matches[0], "{{{" ) !== false ) {
1311 $text = $matches[0];
1312 $found = true;
1313 $noparse = true;
1314 }
1315
1316 # SUBST
1317 if ( !$found ) {
1318 $mwSubst =& MagicWord::get( MAG_SUBST );
1319 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1320 if ( $this->mOutputType != OT_WIKI ) {
1321 # Invalid SUBST not replaced at PST time
1322 # Return without further processing
1323 $text = $matches[0];
1324 $found = true;
1325 $noparse= true;
1326 }
1327 } elseif ( $this->mOutputType == OT_WIKI ) {
1328 # SUBST not found in PST pass, do nothing
1329 $text = $matches[0];
1330 $found = true;
1331 }
1332 }
1333
1334 # MSG, MSGNW and INT
1335 if ( !$found ) {
1336 # Check for MSGNW:
1337 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1338 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1339 $nowiki = true;
1340 } else {
1341 # Remove obsolete MSG:
1342 $mwMsg =& MagicWord::get( MAG_MSG );
1343 $mwMsg->matchStartAndRemove( $part1 );
1344 }
1345
1346 # Check if it is an internal message
1347 $mwInt =& MagicWord::get( MAG_INT );
1348 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1349 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1350 $text = wfMsgReal( $part1, $args, true );
1351 $found = true;
1352 }
1353 }
1354 }
1355
1356 # NS
1357 if ( !$found ) {
1358 # Check for NS: (namespace expansion)
1359 $mwNs = MagicWord::get( MAG_NS );
1360 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1361 if ( intval( $part1 ) ) {
1362 $text = $wgLang->getNsText( intval( $part1 ) );
1363 $found = true;
1364 } else {
1365 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1366 if ( !is_null( $index ) ) {
1367 $text = $wgLang->getNsText( $index );
1368 $found = true;
1369 }
1370 }
1371 }
1372 }
1373
1374 # LOCALURL and LOCALURLE
1375 if ( !$found ) {
1376 $mwLocal = MagicWord::get( MAG_LOCALURL );
1377 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1378
1379 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1380 $func = 'getLocalURL';
1381 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1382 $func = 'escapeLocalURL';
1383 } else {
1384 $func = '';
1385 }
1386
1387 if ( $func !== '' ) {
1388 $title = Title::newFromText( $part1 );
1389 if ( !is_null( $title ) ) {
1390 if ( $argc > 0 ) {
1391 $text = $title->$func( $args[0] );
1392 } else {
1393 $text = $title->$func();
1394 }
1395 $found = true;
1396 }
1397 }
1398 }
1399
1400 # Internal variables
1401 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1402 $text = $this->mVariables[$part1];
1403 $found = true;
1404 $this->mOutput->mContainsOldMagic = true;
1405 }
1406 /*
1407 # Arguments input from the caller
1408 $inputArgs = end( $this->mArgStack );
1409 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1410 $text = $inputArgs[$part1];
1411 $found = true;
1412 }
1413 */
1414 # Load from database
1415 if ( !$found ) {
1416 $title = Title::newFromText( $part1, NS_TEMPLATE );
1417 if ( !is_null( $title ) && !$title->isExternal() ) {
1418 # Check for excessive inclusion
1419 $dbk = $title->getPrefixedDBkey();
1420 if ( $this->incrementIncludeCount( $dbk ) ) {
1421 $article = new Article( $title );
1422 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1423 if ( $articleContent !== false ) {
1424 $found = true;
1425 $text = $articleContent;
1426
1427 }
1428 }
1429
1430 # If the title is valid but undisplayable, make a link to it
1431 if ( $this->mOutputType == OT_HTML && !$found ) {
1432 $text = "[[" . $title->getPrefixedText() . "]]";
1433 $found = true;
1434 }
1435 }
1436 }
1437
1438 # Recursive parsing, escaping and link table handling
1439 # Only for HTML output
1440 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1441 $text = wfEscapeWikiText( $text );
1442 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1443 # Clean up argument array
1444 $assocArgs = array();
1445 $index = 1;
1446 foreach( $args as $arg ) {
1447 $eqpos = strpos( $arg, "=" );
1448 if ( $eqpos === false ) {
1449 $assocArgs[$index++] = $arg;
1450 } else {
1451 $name = trim( substr( $arg, 0, $eqpos ) );
1452 $value = trim( substr( $arg, $eqpos+1 ) );
1453 if ( $value === false ) {
1454 $value = "";
1455 }
1456 if ( $name !== false ) {
1457 $assocArgs[$name] = $value;
1458 }
1459 }
1460 }
1461
1462 # Do not enter included links in link table
1463 if ( !is_null( $title ) ) {
1464 $wgLinkCache->suspend();
1465 }
1466
1467 # Run full parser on the included text
1468 $text = $this->stripParse( $text, $newline, $assocArgs );
1469
1470 # Resume the link cache and register the inclusion as a link
1471 if ( !is_null( $title ) ) {
1472 $wgLinkCache->resume();
1473 $wgLinkCache->addLinkObj( $title );
1474 }
1475 }
1476
1477 if ( !$found ) {
1478 return $matches[0];
1479 } else {
1480 return $text;
1481 }
1482 }
1483
1484 # Triple brace replacement -- used for template arguments
1485 function argSubstitution( $matches )
1486 {
1487 $newline = $matches[1];
1488 $arg = trim( $matches[2] );
1489 $text = $matches[0];
1490 $inputArgs = end( $this->mArgStack );
1491
1492 if ( array_key_exists( $arg, $inputArgs ) ) {
1493 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1494 }
1495
1496 return $text;
1497 }
1498
1499 # Returns true if the function is allowed to include this entity
1500 function incrementIncludeCount( $dbk )
1501 {
1502 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1503 $this->mIncludeCount[$dbk] = 0;
1504 }
1505 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1506 return true;
1507 } else {
1508 return false;
1509 }
1510 }
1511
1512
1513 # Cleans up HTML, removes dangerous tags and attributes
1514 /* private */ function removeHTMLtags( $text )
1515 {
1516 global $wgUseTidy, $wgUserHtml;
1517 $fname = "Parser::removeHTMLtags";
1518 wfProfileIn( $fname );
1519
1520 if( $wgUserHtml ) {
1521 $htmlpairs = array( # Tags that must be closed
1522 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1523 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1524 "strike", "strong", "tt", "var", "div", "center",
1525 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1526 "ruby", "rt" , "rb" , "rp", "p"
1527 );
1528 $htmlsingle = array(
1529 "br", "hr", "li", "dt", "dd"
1530 );
1531 $htmlnest = array( # Tags that can be nested--??
1532 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1533 "dl", "font", "big", "small", "sub", "sup"
1534 );
1535 $tabletags = array( # Can only appear inside table
1536 "td", "th", "tr"
1537 );
1538 } else {
1539 $htmlpairs = array();
1540 $htmlsingle = array();
1541 $htmlnest = array();
1542 $tabletags = array();
1543 }
1544
1545 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1546 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1547
1548 $htmlattrs = $this->getHTMLattrs () ;
1549
1550 # Remove HTML comments
1551 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1552
1553 $bits = explode( "<", $text );
1554 $text = array_shift( $bits );
1555 if(!$wgUseTidy) {
1556 $tagstack = array(); $tablestack = array();
1557 foreach ( $bits as $x ) {
1558 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1559 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1560 $x, $regs );
1561 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1562 error_reporting( $prev );
1563
1564 $badtag = 0 ;
1565 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1566 # Check our stack
1567 if ( $slash ) {
1568 # Closing a tag...
1569 if ( ! in_array( $t, $htmlsingle ) &&
1570 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1571 @array_push( $tagstack, $ot );
1572 $badtag = 1;
1573 } else {
1574 if ( $t == "table" ) {
1575 $tagstack = array_pop( $tablestack );
1576 }
1577 $newparams = "";
1578 }
1579 } else {
1580 # Keep track for later
1581 if ( in_array( $t, $tabletags ) &&
1582 ! in_array( "table", $tagstack ) ) {
1583 $badtag = 1;
1584 } else if ( in_array( $t, $tagstack ) &&
1585 ! in_array ( $t , $htmlnest ) ) {
1586 $badtag = 1 ;
1587 } else if ( ! in_array( $t, $htmlsingle ) ) {
1588 if ( $t == "table" ) {
1589 array_push( $tablestack, $tagstack );
1590 $tagstack = array();
1591 }
1592 array_push( $tagstack, $t );
1593 }
1594 # Strip non-approved attributes from the tag
1595 $newparams = $this->fixTagAttributes($params);
1596
1597 }
1598 if ( ! $badtag ) {
1599 $rest = str_replace( ">", "&gt;", $rest );
1600 $text .= "<$slash$t $newparams$brace$rest";
1601 continue;
1602 }
1603 }
1604 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1605 }
1606 # Close off any remaining tags
1607 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1608 $text .= "</$t>\n";
1609 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1610 }
1611 } else {
1612 # this might be possible using tidy itself
1613 foreach ( $bits as $x ) {
1614 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1615 $x, $regs );
1616 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1617 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1618 $newparams = $this->fixTagAttributes($params);
1619 $rest = str_replace( ">", "&gt;", $rest );
1620 $text .= "<$slash$t $newparams$brace$rest";
1621 } else {
1622 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1623 }
1624 }
1625 }
1626 wfProfileOut( $fname );
1627 return $text;
1628 }
1629
1630
1631 /*
1632 *
1633 * This function accomplishes several tasks:
1634 * 1) Auto-number headings if that option is enabled
1635 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1636 * 3) Add a Table of contents on the top for users who have enabled the option
1637 * 4) Auto-anchor headings
1638 *
1639 * It loops through all headlines, collects the necessary data, then splits up the
1640 * string and re-inserts the newly formatted headlines.
1641 *
1642 */
1643
1644 /* private */ function formatHeadings( $text, $isMain=true )
1645 {
1646 global $wgInputEncoding;
1647
1648 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1649 $doShowToc = $this->mOptions->getShowToc();
1650 if( !$this->mTitle->userCanEdit() ) {
1651 $showEditLink = 0;
1652 $rightClickHack = 0;
1653 } else {
1654 $showEditLink = $this->mOptions->getEditSection();
1655 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1656 }
1657
1658 # Inhibit editsection links if requested in the page
1659 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1660 if( $esw->matchAndRemove( $text ) ) {
1661 $showEditLink = 0;
1662 }
1663 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1664 # do not add TOC
1665 $mw =& MagicWord::get( MAG_NOTOC );
1666 if( $mw->matchAndRemove( $text ) ) {
1667 $doShowToc = 0;
1668 }
1669
1670 # never add the TOC to the Main Page. This is an entry page that should not
1671 # be more than 1-2 screens large anyway
1672 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1673 $doShowToc = 0;
1674 }
1675
1676 # Get all headlines for numbering them and adding funky stuff like [edit]
1677 # links - this is for later, but we need the number of headlines right now
1678 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1679
1680 # if there are fewer than 4 headlines in the article, do not show TOC
1681 if( $numMatches < 4 ) {
1682 $doShowToc = 0;
1683 }
1684
1685 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1686 # override above conditions and always show TOC
1687 $mw =& MagicWord::get( MAG_FORCETOC );
1688 if ($mw->matchAndRemove( $text ) ) {
1689 $doShowToc = 1;
1690 }
1691
1692
1693 # We need this to perform operations on the HTML
1694 $sk =& $this->mOptions->getSkin();
1695
1696 # headline counter
1697 $headlineCount = 0;
1698
1699 # Ugh .. the TOC should have neat indentation levels which can be
1700 # passed to the skin functions. These are determined here
1701 $toclevel = 0;
1702 $toc = "";
1703 $full = "";
1704 $head = array();
1705 $sublevelCount = array();
1706 $level = 0;
1707 $prevlevel = 0;
1708 foreach( $matches[3] as $headline ) {
1709 $numbering = "";
1710 if( $level ) {
1711 $prevlevel = $level;
1712 }
1713 $level = $matches[1][$headlineCount];
1714 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1715 # reset when we enter a new level
1716 $sublevelCount[$level] = 0;
1717 $toc .= $sk->tocIndent( $level - $prevlevel );
1718 $toclevel += $level - $prevlevel;
1719 }
1720 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1721 # reset when we step back a level
1722 $sublevelCount[$level+1]=0;
1723 $toc .= $sk->tocUnindent( $prevlevel - $level );
1724 $toclevel -= $prevlevel - $level;
1725 }
1726 # count number of headlines for each level
1727 @$sublevelCount[$level]++;
1728 if( $doNumberHeadings || $doShowToc ) {
1729 $dot = 0;
1730 for( $i = 1; $i <= $level; $i++ ) {
1731 if( !empty( $sublevelCount[$i] ) ) {
1732 if( $dot ) {
1733 $numbering .= ".";
1734 }
1735 $numbering .= $sublevelCount[$i];
1736 $dot = 1;
1737 }
1738 }
1739 }
1740
1741 # The canonized header is a version of the header text safe to use for links
1742 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1743 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1744 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1745
1746 # strip out HTML
1747 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1748 $tocline = trim( $canonized_headline );
1749 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1750 $canonized_headline = str_replace('%','.',$canonized_headline);
1751 $refer[$headlineCount] = $canonized_headline;
1752
1753 # count how many in assoc. array so we can track dupes in anchors
1754 @$refers[$canonized_headline]++;
1755 $refcount[$headlineCount]=$refers[$canonized_headline];
1756
1757 # Prepend the number to the heading text
1758
1759 if( $doNumberHeadings || $doShowToc ) {
1760 $tocline = $numbering . " " . $tocline;
1761
1762 # Don't number the heading if it is the only one (looks silly)
1763 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1764 # the two are different if the line contains a link
1765 $headline=$numbering . " " . $headline;
1766 }
1767 }
1768
1769 # Create the anchor for linking from the TOC to the section
1770 $anchor = $canonized_headline;
1771 if($refcount[$headlineCount] > 1 ) {
1772 $anchor .= "_" . $refcount[$headlineCount];
1773 }
1774 if( $doShowToc ) {
1775 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1776 }
1777 if( $showEditLink ) {
1778 if ( empty( $head[$headlineCount] ) ) {
1779 $head[$headlineCount] = "";
1780 }
1781 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1782 }
1783
1784 # Add the edit section span
1785 if( $rightClickHack ) {
1786 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1787 }
1788
1789 # give headline the correct <h#> tag
1790 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1791
1792 $headlineCount++;
1793 }
1794
1795 if( $doShowToc ) {
1796 $toclines = $headlineCount;
1797 $toc .= $sk->tocUnindent( $toclevel );
1798 $toc = $sk->tocTable( $toc );
1799 }
1800
1801 # split up and insert constructed headlines
1802
1803 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1804 $i = 0;
1805
1806 foreach( $blocks as $block ) {
1807 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1808 # This is the [edit] link that appears for the top block of text when
1809 # section editing is enabled
1810
1811 # Disabled because it broke block formatting
1812 # For example, a bullet point in the top line
1813 # $full .= $sk->editSectionLink(0);
1814 }
1815 $full .= $block;
1816 if( $doShowToc && !$i && $isMain) {
1817 # Top anchor now in skin
1818 $full = $full.$toc;
1819 }
1820
1821 if( !empty( $head[$i] ) ) {
1822 $full .= $head[$i];
1823 }
1824 $i++;
1825 }
1826
1827 return $full;
1828 }
1829
1830 /* private */ function magicISBN( $text )
1831 {
1832 global $wgLang;
1833
1834 $a = split( "ISBN ", " $text" );
1835 if ( count ( $a ) < 2 ) return $text;
1836 $text = substr( array_shift( $a ), 1);
1837 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1838
1839 foreach ( $a as $x ) {
1840 $isbn = $blank = "" ;
1841 while ( " " == $x{0} ) {
1842 $blank .= " ";
1843 $x = substr( $x, 1 );
1844 }
1845 while ( strstr( $valid, $x{0} ) != false ) {
1846 $isbn .= $x{0};
1847 $x = substr( $x, 1 );
1848 }
1849 $num = str_replace( "-", "", $isbn );
1850 $num = str_replace( " ", "", $num );
1851
1852 if ( "" == $num ) {
1853 $text .= "ISBN $blank$x";
1854 } else {
1855 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1856 $text .= "<a href=\"" .
1857 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1858 "\" class=\"internal\">ISBN $isbn</a>";
1859 $text .= $x;
1860 }
1861 }
1862 return $text;
1863 }
1864 /* private */ function magicRFC( $text )
1865 {
1866 global $wgLang;
1867
1868 $a = split( "RFC ", " $text" );
1869 if ( count ( $a ) < 2 ) return $text;
1870 $text = substr( array_shift( $a ), 1);
1871 $valid = "0123456789";
1872
1873 foreach ( $a as $x ) {
1874 $rfc = $blank = "" ;
1875 while ( " " == $x{0} ) {
1876 $blank .= " ";
1877 $x = substr( $x, 1 );
1878 }
1879 while ( strstr( $valid, $x{0} ) != false ) {
1880 $rfc .= $x{0};
1881 $x = substr( $x, 1 );
1882 }
1883
1884 if ( "" == $rfc ) {
1885 $text .= "RFC $blank$x";
1886 } else {
1887 $url = wfmsg( "rfcurl" );
1888 $url = str_replace( "$1", $rfc, $url);
1889 $sk =& $this->mOptions->getSkin();
1890 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1891 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1892 }
1893 }
1894 return $text;
1895 }
1896
1897 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1898 {
1899 $this->mOptions = $options;
1900 $this->mTitle =& $title;
1901 $this->mOutputType = OT_WIKI;
1902
1903 if ( $clearState ) {
1904 $this->clearState();
1905 }
1906
1907 $stripState = false;
1908 $pairs = array(
1909 "\r\n" => "\n",
1910 );
1911 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1912 // now with regexes
1913 /*
1914 $pairs = array(
1915 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1916 "/<br *?>/i" => "<br />",
1917 );
1918 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1919 */
1920 $text = $this->strip( $text, $stripState, false );
1921 $text = $this->pstPass2( $text, $user );
1922 $text = $this->unstrip( $text, $stripState );
1923 $text = $this->unstripNoWiki( $text, $stripState );
1924 return $text;
1925 }
1926
1927 /* private */ function pstPass2( $text, &$user )
1928 {
1929 global $wgLang, $wgLocaltimezone, $wgCurParser;
1930
1931 # Variable replacement
1932 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1933 $text = $this->replaceVariables( $text );
1934
1935 # Signatures
1936 #
1937 $n = $user->getName();
1938 $k = $user->getOption( "nickname" );
1939 if ( "" == $k ) { $k = $n; }
1940 if(isset($wgLocaltimezone)) {
1941 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1942 }
1943 /* Note: this is an ugly timezone hack for the European wikis */
1944 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1945 " (" . date( "T" ) . ")";
1946 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1947
1948 $text = preg_replace( "/~~~~~/", $d, $text );
1949 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1950 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1951 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1952 Namespace::getUser() ) . ":$n|$k]]", $text );
1953
1954 # Context links: [[|name]] and [[name (context)|]]
1955 #
1956 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1957 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1958 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1959 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1960
1961 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1962 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1963 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1964 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1965 # [[ns:page (cont)|]]
1966 $context = "";
1967 $t = $this->mTitle->getText();
1968 if ( preg_match( $conpat, $t, $m ) ) {
1969 $context = $m[2];
1970 }
1971 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1972 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1973 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1974
1975 if ( "" == $context ) {
1976 $text = preg_replace( $p2, "[[\\1]]", $text );
1977 } else {
1978 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1979 }
1980
1981 /*
1982 $mw =& MagicWord::get( MAG_SUBST );
1983 $wgCurParser = $this->fork();
1984 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1985 $this->merge( $wgCurParser );
1986 */
1987
1988 # Trim trailing whitespace
1989 # MAG_END (__END__) tag allows for trailing
1990 # whitespace to be deliberately included
1991 $text = rtrim( $text );
1992 $mw =& MagicWord::get( MAG_END );
1993 $mw->matchAndRemove( $text );
1994
1995 return $text;
1996 }
1997
1998 # Set up some variables which are usually set up in parse()
1999 # so that an external function can call some class members with confidence
2000 function startExternalParse( &$title, $options, $outputType, $clearState = true )
2001 {
2002 $this->mTitle =& $title;
2003 $this->mOptions = $options;
2004 $this->mOutputType = $outputType;
2005 if ( $clearState ) {
2006 $this->clearState();
2007 }
2008 }
2009
2010 function transformMsg( $text, $options ) {
2011 global $wgTitle;
2012 static $executing = false;
2013
2014 # Guard against infinite recursion
2015 if ( $executing ) {
2016 return $text;
2017 }
2018 $executing = true;
2019
2020 $this->mTitle = $wgTitle;
2021 $this->mOptions = $options;
2022 $this->mOutputType = OT_MSG;
2023 $this->clearState();
2024 $text = $this->replaceVariables( $text );
2025
2026 $executing = false;
2027 return $text;
2028 }
2029 }
2030
2031 class ParserOutput
2032 {
2033 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2034 var $mCacheTime; # Used in ParserCache
2035
2036 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2037 $containsOldMagic = false )
2038 {
2039 $this->mText = $text;
2040 $this->mLanguageLinks = $languageLinks;
2041 $this->mCategoryLinks = $categoryLinks;
2042 $this->mContainsOldMagic = $containsOldMagic;
2043 $this->mCacheTime = "";
2044 }
2045
2046 function getText() { return $this->mText; }
2047 function getLanguageLinks() { return $this->mLanguageLinks; }
2048 function getCategoryLinks() { return $this->mCategoryLinks; }
2049 function getCacheTime() { return $this->mCacheTime; }
2050 function containsOldMagic() { return $this->mContainsOldMagic; }
2051 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2052 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2053 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2054 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2055 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2056
2057 function merge( $other ) {
2058 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2059 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2060 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2061 }
2062
2063 }
2064
2065 class ParserOptions
2066 {
2067 # All variables are private
2068 var $mUseTeX; # Use texvc to expand <math> tags
2069 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2070 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2071 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2072 var $mAllowExternalImages; # Allow external images inline
2073 var $mSkin; # Reference to the preferred skin
2074 var $mDateFormat; # Date format index
2075 var $mEditSection; # Create "edit section" links
2076 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2077 var $mNumberHeadings; # Automatically number headings
2078 var $mShowToc; # Show table of contents
2079
2080 function getUseTeX() { return $this->mUseTeX; }
2081 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2082 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2083 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2084 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2085 function getSkin() { return $this->mSkin; }
2086 function getDateFormat() { return $this->mDateFormat; }
2087 function getEditSection() { return $this->mEditSection; }
2088 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2089 function getNumberHeadings() { return $this->mNumberHeadings; }
2090 function getShowToc() { return $this->mShowToc; }
2091
2092 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2093 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2094 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2095 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2096 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2097 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2098 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2099 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2100 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2101 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2102 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2103
2104 /* static */ function newFromUser( &$user )
2105 {
2106 $popts = new ParserOptions;
2107 $popts->initialiseFromUser( $user );
2108 return $popts;
2109 }
2110
2111 function initialiseFromUser( &$userInput )
2112 {
2113 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2114
2115 if ( !$userInput ) {
2116 $user = new User;
2117 $user->setLoaded( true );
2118 } else {
2119 $user =& $userInput;
2120 }
2121
2122 $this->mUseTeX = $wgUseTeX;
2123 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2124 $this->mUseDynamicDates = $wgUseDynamicDates;
2125 $this->mInterwikiMagic = $wgInterwikiMagic;
2126 $this->mAllowExternalImages = $wgAllowExternalImages;
2127 $this->mSkin =& $user->getSkin();
2128 $this->mDateFormat = $user->getOption( "date" );
2129 $this->mEditSection = $user->getOption( "editsection" );
2130 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2131 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2132 $this->mShowToc = $user->getOption( "showtoc" );
2133 }
2134
2135
2136 }
2137
2138 # Regex callbacks, used in Parser::replaceVariables
2139 function wfBraceSubstitution( $matches )
2140 {
2141 global $wgCurParser;
2142 return $wgCurParser->braceSubstitution( $matches );
2143 }
2144
2145 function wfArgSubstitution( $matches )
2146 {
2147 global $wgCurParser;
2148 return $wgCurParser->argSubstitution( $matches );
2149 }
2150
2151 function wfVariableSubstitution( $matches )
2152 {
2153 global $wgCurParser;
2154 return $wgCurParser->variableSubstitution( $matches );
2155 }
2156
2157 ?>