performance fix
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 $this->mInPre = false;
81 $this->mInNowiki = false;
82 }
83
84 # First pass--just handle <nowiki> sections, pass the rest off
85 # to internalParse() which does all the real work.
86 #
87 # Returns a ParserOutput
88 #
89 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
90 {
91 global $wgUseTidy;
92 $fname = "Parser::parse";
93 wfProfileIn( $fname );
94
95 if ( $clearState ) {
96 $this->clearState();
97 }
98
99 $this->mOptions = $options;
100 $this->mTitle =& $title;
101 $this->mOutputType = OT_HTML;
102
103 $stripState = NULL;
104 $text = $this->strip( $text, $this->mStripState );
105 $text = $this->internalParse( $text, $linestart );
106 $text = $this->unstrip( $text, $this->mStripState );
107 # Clean up special characters, only run once, next-to-last before doBlockLevels
108 if(!$wgUseTidy) {
109 $fixtags = array(
110 # french spaces, last one Guillemet-left
111 # only if there is something before the space
112 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
113 # french spaces, Guillemet-right
114 "/(\\302\\253) /i"=>"\\1&nbsp;",
115 "/<hr *>/i" => '<hr />',
116 "/<br *>/i" => '<br />',
117 "/<center *>/i"=>'<div class="center">',
118 "/<\\/center *>/i" => '</div>',
119 # Clean up spare ampersands; note that we probably ought to be
120 # more careful about named entities.
121 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
122 );
123 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
124 } else {
125 $fixtags = array(
126 # french spaces, last one Guillemet-left
127 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
128 # french spaces, Guillemet-right
129 "/(\\302\\253) /i"=>"\\1&nbsp;",
130 "/<center *>/i"=>'<div class="center">',
131 "/<\\/center *>/i" => '</div>'
132 );
133 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
134 }
135 # only once and last
136 $text = $this->doBlockLevels( $text, $linestart );
137 if($wgUseTidy) {
138 $text = $this->tidy($text);
139 }
140 $this->mOutput->setText( $text );
141 wfProfileOut( $fname );
142 return $this->mOutput;
143 }
144
145 /* static */ function getRandomString()
146 {
147 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
148 }
149
150 # Replaces all occurrences of <$tag>content</$tag> in the text
151 # with a random marker and returns the new text. the output parameter
152 # $content will be an associative array filled with data on the form
153 # $unique_marker => content.
154
155 # If $content is already set, the additional entries will be appended
156
157 # If $tag is set to STRIP_COMMENTS, the function will extract
158 # <!-- HTML comments -->
159
160 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
161 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
162 if ( !$content ) {
163 $content = array( );
164 }
165 $n = 1;
166 $stripped = "";
167
168 while ( "" != $text ) {
169 if($tag==STRIP_COMMENTS) {
170 $p = preg_split( "/<!--/i", $text, 2 );
171 } else {
172 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
173 }
174 $stripped .= $p[0];
175 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
176 $text = "";
177 } else {
178 if($tag==STRIP_COMMENTS) {
179 $q = preg_split( "/-->/i", $p[1], 2 );
180 } else {
181 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
182 }
183 $marker = $rnd . sprintf("%08X", $n++);
184 $content[$marker] = $q[0];
185 $stripped .= $marker;
186 $text = $q[1];
187 }
188 }
189 return $stripped;
190 }
191
192 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
193 # If $render is set, performs necessary rendering operations on plugins
194 # Returns the text, and fills an array with data needed in unstrip()
195 # If the $state is already a valid strip state, it adds to the state
196
197 # When $stripcomments is set, HTML comments <!-- like this -->
198 # will be stripped in addition to other tags. This is important
199 # for section editing, where these comments cause confusion when
200 # counting the sections in the wikisource
201 function strip( $text, &$state, $stripcomments = false )
202 {
203 $render = ($this->mOutputType == OT_HTML);
204 $nowiki_content = array();
205 $hiero_content = array();
206 $timeline_content = array();
207 $math_content = array();
208 $pre_content = array();
209 $comment_content = array();
210
211 # Replace any instances of the placeholders
212 $uniq_prefix = UNIQ_PREFIX;
213 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
214
215 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
216 foreach( $nowiki_content as $marker => $content ){
217 if( $render ){
218 # use span to mark nowiki areas, note the trailing whitespace in span to avoid collisions with other spans
219 $nowiki_content[$marker] = '<span class="nowiki">'.wfEscapeHTMLTagsOnly( $content )."</span >";
220 } else {
221 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
222 }
223 }
224
225 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
226 foreach( $hiero_content as $marker => $content ){
227 if( $render && $GLOBALS['wgUseWikiHiero']){
228 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
229 } else {
230 $hiero_content[$marker] = "<hiero>$content</hiero>";
231 }
232 }
233
234 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
235 foreach( $timeline_content as $marker => $content ){
236 if( $render && $GLOBALS['wgUseTimeline']){
237 $timeline_content[$marker] = renderTimeline( $content );
238 } else {
239 $timeline_content[$marker] = "<timeline>$content</timeline>";
240 }
241 }
242
243 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
244 foreach( $math_content as $marker => $content ){
245 if( $render ) {
246 if( $this->mOptions->getUseTeX() ) {
247 $math_content[$marker] = renderMath( $content );
248 } else {
249 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
250 }
251 } else {
252 $math_content[$marker] = "<math>$content</math>";
253 }
254 }
255
256 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
257 foreach( $pre_content as $marker => $content ){
258 if( $render ){
259 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
260 } else {
261 $pre_content[$marker] = "<pre>$content</pre>";
262 }
263 }
264 if($stripcomments) {
265 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
266 foreach( $comment_content as $marker => $content ){
267 $comment_content[$marker] = "<!--$content-->";
268 }
269 }
270
271 # Merge state with the pre-existing state, if there is one
272 if ( $state ) {
273 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
274 $state['hiero'] = $state['hiero'] + $hiero_content;
275 $state['timeline'] = $state['timeline'] + $timeline_content;
276 $state['math'] = $state['math'] + $math_content;
277 $state['pre'] = $state['pre'] + $pre_content;
278 $state['comment'] = $state['comment'] + $comment_content;
279 } else {
280 $state = array(
281 'nowiki' => $nowiki_content,
282 'hiero' => $hiero_content,
283 'timeline' => $timeline_content,
284 'math' => $math_content,
285 'pre' => $pre_content,
286 'comment' => $comment_content
287 );
288 }
289 return $text;
290 }
291
292 function unstrip( $text, &$state )
293 {
294 # Must expand in reverse order, otherwise nested tags will be corrupted
295 $contentDict = end( $state );
296 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
297 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
298 $text = str_replace( key( $contentDict ), $content, $text );
299 }
300 }
301
302 return $text;
303 }
304
305 # Add an item to the strip state
306 # Returns the unique tag which must be inserted into the stripped text
307 # The tag will be replaced with the original text in unstrip()
308
309 function insertStripItem( $text, &$state )
310 {
311 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
312 if ( !$state ) {
313 $state = array(
314 'nowiki' => array(),
315 'hiero' => array(),
316 'math' => array(),
317 'pre' => array()
318 );
319 }
320 $state['item'][$rnd] = $text;
321 return $rnd;
322 }
323
324 # This method generates the list of subcategories and pages for a category
325 function categoryMagic ()
326 {
327 global $wgLang , $wgUser ;
328 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
329
330 $cns = Namespace::getCategory() ;
331 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
332
333 $r = "<br style=\"clear:both;\"/>\n";
334
335
336 $sk =& $wgUser->getSkin() ;
337
338 $articles = array() ;
339 $children = array() ;
340 $data = array () ;
341 $id = $this->mTitle->getArticleID() ;
342
343 # FIXME: add limits
344 $t = wfStrencode( $this->mTitle->getDBKey() );
345 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
346 $res = wfQuery ( $sql, DB_READ ) ;
347 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
348
349 # For all pages that link to this category
350 foreach ( $data AS $x )
351 {
352 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
353 if ( $t != "" ) $t .= ":" ;
354 $t .= $x->cur_title ;
355
356 if ( $x->cur_namespace == $cns ) {
357 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
358 } else {
359 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
360 }
361 }
362 wfFreeResult ( $res ) ;
363
364 # Showing subcategories
365 if ( count ( $children ) > 0 ) {
366 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
367 $r .= implode ( ", " , $children ) ;
368 }
369
370 # Showing pages in this category
371 if ( count ( $articles ) > 0 ) {
372 $ti = $this->mTitle->getText() ;
373 $h = wfMsg( "category_header", $ti );
374 $r .= "<h2>{$h}</h2>\n" ;
375 $r .= implode ( ", " , $articles ) ;
376 }
377
378
379 return $r ;
380 }
381
382 function getHTMLattrs ()
383 {
384 $htmlattrs = array( # Allowed attributes--no scripting, etc.
385 "title", "align", "lang", "dir", "width", "height",
386 "bgcolor", "clear", /* BR */ "noshade", /* HR */
387 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
388 /* FONT */ "type", "start", "value", "compact",
389 /* For various lists, mostly deprecated but safe */
390 "summary", "width", "border", "frame", "rules",
391 "cellspacing", "cellpadding", "valign", "char",
392 "charoff", "colgroup", "col", "span", "abbr", "axis",
393 "headers", "scope", "rowspan", "colspan", /* Tables */
394 "id", "class", "name", "style" /* For CSS */
395 );
396 return $htmlattrs ;
397 }
398
399 function fixTagAttributes ( $t )
400 {
401 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
402 $htmlattrs = $this->getHTMLattrs() ;
403
404 # Strip non-approved attributes from the tag
405 $t = preg_replace(
406 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
407 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
408 $t);
409 # Strip javascript "expression" from stylesheets. Brute force approach:
410 # If anythin offensive is found, all attributes of the HTML tag are dropped
411
412 if( preg_match(
413 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
414 wfMungeToUtf8( $t ) ) )
415 {
416 $t="";
417 }
418
419 return trim ( $t ) ;
420 }
421
422 /* interface with html tidy, used if $wgUseTidy = true */
423 function tidy ( $text ) {
424 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
425 global $wgInputEncoding, $wgOutputEncoding;
426 $fname = "Parser::tidy";
427 wfProfileIn( $fname );
428
429 $cleansource = '';
430 switch(strtoupper($wgOutputEncoding)) {
431 case 'ISO-8859-1':
432 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
433 break;
434 case 'UTF-8':
435 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
436 break;
437 default:
438 $wgTidyOpts .= ' -raw';
439 }
440
441 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
442 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
443 '<head><title>test</title></head><body>'.$text.'</body></html>';
444 $descriptorspec = array(
445 0 => array("pipe", "r"),
446 1 => array("pipe", "w"),
447 2 => array("file", "/dev/null", "a")
448 );
449 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
450 if (is_resource($process)) {
451 fwrite($pipes[0], $wrappedtext);
452 fclose($pipes[0]);
453 while (!feof($pipes[1])) {
454 $cleansource .= fgets($pipes[1], 1024);
455 }
456 fclose($pipes[1]);
457 $return_value = proc_close($process);
458 }
459
460 wfProfileOut( $fname );
461
462 if( $cleansource == '' && $text != '') {
463 wfDebug( "Tidy error detected!\n" );
464 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
465 } else {
466 return $cleansource;
467 }
468 }
469
470 function doTableStuff ( $t )
471 {
472 $t = explode ( "\n" , $t ) ;
473 $td = array () ; # Is currently a td tag open?
474 $ltd = array () ; # Was it TD or TH?
475 $tr = array () ; # Is currently a tr tag open?
476 $ltr = array () ; # tr attributes
477 foreach ( $t AS $k => $x )
478 {
479 $x = trim ( $x ) ;
480 $fc = substr ( $x , 0 , 1 ) ;
481 if ( "{|" == substr ( $x , 0 , 2 ) )
482 {
483 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
484 array_push ( $td , false ) ;
485 array_push ( $ltd , "" ) ;
486 array_push ( $tr , false ) ;
487 array_push ( $ltr , "" ) ;
488 }
489 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
490 else if ( "|}" == substr ( $x , 0 , 2 ) )
491 {
492 $z = "</table>\n" ;
493 $l = array_pop ( $ltd ) ;
494 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
495 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
496 array_pop ( $ltr ) ;
497 $t[$k] = $z ;
498 }
499 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
500 {
501 $z = trim ( substr ( $x , 2 ) ) ;
502 $t[$k] = "<caption>{$z}</caption>\n" ;
503 }*/
504 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
505 {
506 $x = substr ( $x , 1 ) ;
507 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
508 $z = "" ;
509 $l = array_pop ( $ltd ) ;
510 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
511 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
512 array_pop ( $ltr ) ;
513 $t[$k] = $z ;
514 array_push ( $tr , false ) ;
515 array_push ( $td , false ) ;
516 array_push ( $ltd , "" ) ;
517 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
518 }
519 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
520 {
521 if ( "|+" == substr ( $x , 0 , 2 ) )
522 {
523 $fc = "+" ;
524 $x = substr ( $x , 1 ) ;
525 }
526 $after = substr ( $x , 1 ) ;
527 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
528 $after = explode ( "||" , $after ) ;
529 $t[$k] = "" ;
530 foreach ( $after AS $theline )
531 {
532 $z = "" ;
533 if ( $fc != "+" )
534 {
535 $tra = array_pop ( $ltr ) ;
536 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
537 array_push ( $tr , true ) ;
538 array_push ( $ltr , "" ) ;
539 }
540
541 $l = array_pop ( $ltd ) ;
542 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
543 if ( $fc == "|" ) $l = "td" ;
544 else if ( $fc == "!" ) $l = "th" ;
545 else if ( $fc == "+" ) $l = "caption" ;
546 else $l = "" ;
547 array_push ( $ltd , $l ) ;
548 $y = explode ( "|" , $theline , 2 ) ;
549 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
550 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
551 $t[$k] .= $y ;
552 array_push ( $td , true ) ;
553 }
554 }
555 }
556
557 # Closing open td, tr && table
558 while ( count ( $td ) > 0 )
559 {
560 if ( array_pop ( $td ) ) $t[] = "</td>" ;
561 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
562 $t[] = "</table>" ;
563 }
564
565 $t = implode ( "\n" , $t ) ;
566 # $t = $this->removeHTMLtags( $t );
567 return $t ;
568 }
569
570 # Parses the text and adds the result to the strip state
571 # Returns the strip tag
572 function stripParse( $text, $newline, $args )
573 {
574 $text = $this->strip( $text, $this->mStripState );
575 $text = $this->internalParse( $text, (bool)$newline, $args, false );
576 return $newline.$this->insertStripItem( $text, $this->mStripState );
577 }
578
579 function internalParse( $text, $linestart, $args = array(), $isMain=true )
580 {
581 $fname = "Parser::internalParse";
582 wfProfileIn( $fname );
583
584 $text = $this->removeHTMLtags( $text );
585 $text = $this->replaceVariables( $text, $args );
586
587 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
588
589 $text = $this->doHeadings( $text );
590 if($this->mOptions->getUseDynamicDates()) {
591 global $wgDateFormatter;
592 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
593 }
594 $text = $this->doAllQuotes( $text );
595 $text = $this->replaceExternalLinks( $text );
596 $text = $this->replaceInternalLinks ( $text );
597 $text = $this->replaceInternalLinks ( $text );
598 //$text = $this->doTokenizedParser ( $text );
599 $text = $this->doTableStuff ( $text ) ;
600 $text = $this->magicISBN( $text );
601 $text = $this->magicRFC( $text );
602 $text = $this->formatHeadings( $text, $isMain );
603 $sk =& $this->mOptions->getSkin();
604 $text = $sk->transformContent( $text );
605
606 if ( !isset ( $this->categoryMagicDone ) ) {
607 $text .= $this->categoryMagic () ;
608 $this->categoryMagicDone = true ;
609 }
610
611 wfProfileOut( $fname );
612 return $text;
613 }
614
615
616 /* private */ function doHeadings( $text )
617 {
618 for ( $i = 6; $i >= 1; --$i ) {
619 $h = substr( "======", 0, $i );
620 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
621 "<h{$i}>\\1</h{$i}>\\2", $text );
622 }
623 return $text;
624 }
625
626 /* private */ function doAllQuotes( $text )
627 {
628 $outtext = "";
629 $lines = explode( "\n", $text );
630 foreach ( $lines as $line ) {
631 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
632 }
633 return substr($outtext, 0,-1);
634 }
635
636 /* private */ function doQuotes( $pre, $text, $mode )
637 {
638 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
639 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
640 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
641 if ( substr ($m[2], 0, 1) == "'" ) {
642 $m[2] = substr ($m[2], 1);
643 if ($mode == "em") {
644 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
645 } else if ($mode == "strong") {
646 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
647 } else if (($mode == "emstrong") || ($mode == "both")) {
648 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
649 } else if ($mode == "strongem") {
650 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
651 } else {
652 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
653 }
654 } else {
655 if ($mode == "strong") {
656 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
657 } else if ($mode == "em") {
658 return $m1_em . $this->doQuotes ( "", $m[2], "" );
659 } else if ($mode == "emstrong") {
660 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
661 } else if (($mode == "strongem") || ($mode == "both")) {
662 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
663 } else {
664 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
665 }
666 }
667 } else {
668 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
669 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
670 if ($mode == "") {
671 return $pre . $text;
672 } else if ($mode == "em") {
673 return $pre . $text_em;
674 } else if ($mode == "strong") {
675 return $pre . $text_strong;
676 } else if ($mode == "strongem") {
677 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
678 } else {
679 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
680 }
681 }
682 }
683
684 # Note: we have to do external links before the internal ones,
685 # and otherwise take great care in the order of things here, so
686 # that we don't end up interpreting some URLs twice.
687
688 /* private */ function replaceExternalLinks( $text )
689 {
690 $fname = "Parser::replaceExternalLinks";
691 wfProfileIn( $fname );
692 $text = $this->subReplaceExternalLinks( $text, "http", true );
693 $text = $this->subReplaceExternalLinks( $text, "https", true );
694 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
695 $text = $this->subReplaceExternalLinks( $text, "irc", false );
696 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
697 $text = $this->subReplaceExternalLinks( $text, "news", false );
698 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
699 wfProfileOut( $fname );
700 return $text;
701 }
702
703 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
704 {
705 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
706 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
707
708 # this is the list of separators that should be ignored if they
709 # are the last character of an URL but that should be included
710 # if they occur within the URL, e.g. "go to www.foo.com, where .."
711 # in this case, the last comma should not become part of the URL,
712 # but in "www.foo.com/123,2342,32.htm" it should.
713 $sep = ",;\.:";
714 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
715 $images = "gif|png|jpg|jpeg";
716
717 # PLEASE NOTE: The curly braces { } are not part of the regex,
718 # they are interpreted as part of the string (used to tell PHP
719 # that the content of the string should be inserted there).
720 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
721 "((?i){$images})([^{$uc}]|$)/";
722
723 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
724 $sk =& $this->mOptions->getSkin();
725
726 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
727 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
728 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
729 }
730 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
731 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
732 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
733 "</a>\\5", $s );
734 $s = str_replace( $unique, $protocol, $s );
735
736 $a = explode( "[{$protocol}:", " " . $s );
737 $s = array_shift( $a );
738 $s = substr( $s, 1 );
739
740 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
741 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
742
743 foreach ( $a as $line ) {
744 if ( preg_match( $e1, $line, $m ) ) {
745 $link = "{$protocol}:{$m[1]}";
746 $trail = $m[2];
747 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
748 else { $text = wfEscapeHTML( $link ); }
749 } else if ( preg_match( $e2, $line, $m ) ) {
750 $link = "{$protocol}:{$m[1]}";
751 $text = $m[2];
752 $trail = $m[3];
753 } else {
754 $s .= "[{$protocol}:" . $line;
755 continue;
756 }
757 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
758 $paren = "";
759 } else {
760 # Expand the URL for printable version
761 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
762 }
763 $la = $sk->getExternalLinkAttributes( $link, $text );
764 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
765
766 }
767 return $s;
768 }
769
770
771 /* private */ function replaceInternalLinks( $s )
772 {
773 global $wgLang, $wgLinkCache;
774 global $wgNamespacesWithSubpages, $wgLanguageCode;
775 static $fname = "Parser::replaceInternalLink" ;
776 wfProfileIn( $fname );
777
778 wfProfileIn( "$fname-setup" );
779 static $tc = FALSE;
780 # the % is needed to support urlencoded titles as well
781 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
782 $sk =& $this->mOptions->getSkin();
783
784 $a = explode( "[[", " " . $s );
785 $s = array_shift( $a );
786 $s = substr( $s, 1 );
787
788 # Match a link having the form [[namespace:link|alternate]]trail
789 static $e1 = FALSE;
790 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
791 # Match the end of a line for a word that's not followed by whitespace,
792 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
793 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
794 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
795 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
796
797
798 # Special and Media are pseudo-namespaces; no pages actually exist in them
799 static $image = FALSE;
800 static $special = FALSE;
801 static $media = FALSE;
802 static $category = FALSE;
803 if ( !$image ) { $image = Namespace::getImage(); }
804 if ( !$special ) { $special = Namespace::getSpecial(); }
805 if ( !$media ) { $media = Namespace::getMedia(); }
806 if ( !$category ) { $category = Namespace::getCategory(); }
807
808 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
809
810 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
811 $new_prefix = $m[2];
812 $s = $m[1];
813 } else {
814 $new_prefix="";
815 }
816
817 wfProfileOut( "$fname-setup" );
818
819 foreach ( $a as $line ) {
820 $prefix = $new_prefix;
821
822 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
823 $text = $m[2];
824 # fix up urlencoded title texts
825 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
826 $trail = $m[3];
827 } else { # Invalid form; output directly
828 $s .= $prefix . "[[" . $line ;
829 wfProfileOut( $fname );
830 continue;
831 }
832
833 /* Valid link forms:
834 Foobar -- normal
835 :Foobar -- override special treatment of prefix (images, language links)
836 /Foobar -- convert to CurrentPage/Foobar
837 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
838 */
839 $c = substr($m[1],0,1);
840 $noforce = ($c != ":");
841 if( $c == "/" ) { # subpage
842 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
843 $m[1]=substr($m[1],1,strlen($m[1])-2);
844 $noslash=$m[1];
845 } else {
846 $noslash=substr($m[1],1);
847 }
848 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
849 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
850 if( "" == $text ) {
851 $text= $m[1];
852 } # this might be changed for ugliness reasons
853 } else {
854 $link = $noslash; # no subpage allowed, use standard link
855 }
856 } elseif( $noforce ) { # no subpage
857 $link = $m[1];
858 } else {
859 $link = substr( $m[1], 1 );
860 }
861 $wasblank = ( "" == $text );
862 if( $wasblank )
863 $text = $link;
864
865 $nt = Title::newFromText( $link );
866 if( !$nt ) {
867 $s .= $prefix . "[[" . $line;
868 wfProfileOut( $fname );
869 continue;
870 }
871 $ns = $nt->getNamespace();
872 $iw = $nt->getInterWiki();
873 if( $noforce ) {
874 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
875 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
876 $tmp = $prefix . $trail ;
877 wfProfileOut( $fname );
878 $s .= (trim($tmp) == '')? '': $tmp;
879 continue;
880 }
881 if ( $ns == $image ) {
882 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
883 $wgLinkCache->addImageLinkObj( $nt );
884 wfProfileOut( $fname );
885 continue;
886 }
887 if ( $ns == $category ) {
888 $t = $nt->getText() ;
889 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
890
891 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
892 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
893 $wgLinkCache->resume();
894
895 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
896 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
897 $this->mOutput->mCategoryLinks[] = $t ;
898 $s .= $prefix . $trail ;
899 wfProfileOut( $fname );
900 continue;
901 }
902 }
903 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
904 ( strpos( $link, "#" ) == FALSE ) ) {
905 # Self-links are handled specially; generally de-link and change to bold.
906 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
907 wfProfileOut( $fname );
908 continue;
909 }
910
911 if( $ns == $media ) {
912 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
913 $wgLinkCache->addImageLinkObj( $nt );
914 wfProfileOut( $fname );
915 continue;
916 } elseif( $ns == $special ) {
917 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
918 wfProfileOut( $fname );
919 continue;
920 }
921 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
922 }
923 wfProfileOut( $fname );
924 return $s;
925 }
926
927 # Some functions here used by doBlockLevels()
928 #
929 /* private */ function closeParagraph()
930 {
931 $result = "";
932 if ( '' != $this->mLastSection ) {
933 $result = "</" . $this->mLastSection . ">\n";
934 }
935 $this->mInPre = false;
936 $this->mLastSection = "";
937 return $result;
938 }
939 # getCommon() returns the length of the longest common substring
940 # of both arguments, starting at the beginning of both.
941 #
942 /* private */ function getCommon( $st1, $st2 )
943 {
944 $fl = strlen( $st1 );
945 $shorter = strlen( $st2 );
946 if ( $fl < $shorter ) { $shorter = $fl; }
947
948 for ( $i = 0; $i < $shorter; ++$i ) {
949 if ( $st1{$i} != $st2{$i} ) { break; }
950 }
951 return $i;
952 }
953 # These next three functions open, continue, and close the list
954 # element appropriate to the prefix character passed into them.
955 #
956 /* private */ function openList( $char )
957 {
958 $result = $this->closeParagraph();
959
960 if ( "*" == $char ) { $result .= "<ul><li>"; }
961 else if ( "#" == $char ) { $result .= "<ol><li>"; }
962 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
963 else if ( ";" == $char ) {
964 $result .= "<dl><dt>";
965 $this->mDTopen = true;
966 }
967 else { $result = "<!-- ERR 1 -->"; }
968
969 return $result;
970 }
971
972 /* private */ function nextItem( $char )
973 {
974 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
975 else if ( ":" == $char || ";" == $char ) {
976 $close = "</dd>";
977 if ( $this->mDTopen ) { $close = "</dt>"; }
978 if ( ";" == $char ) {
979 $this->mDTopen = true;
980 return $close . "<dt>";
981 } else {
982 $this->mDTopen = false;
983 return $close . "<dd>";
984 }
985 }
986 return "<!-- ERR 2 -->";
987 }
988
989 /* private */function closeList( $char )
990 {
991 if ( "*" == $char ) { $text = "</li></ul>"; }
992 else if ( "#" == $char ) { $text = "</li></ol>"; }
993 else if ( ":" == $char ) {
994 if ( $this->mDTopen ) {
995 $this->mDTopen = false;
996 $text = "</dt></dl>";
997 } else {
998 $text = "</dd></dl>";
999 }
1000 }
1001 else { return "<!-- ERR 3 -->"; }
1002 return $text."\n";
1003 }
1004
1005 /* private */ function doBlockLevels( $text, $linestart ) {
1006 $fname = "Parser::doBlockLevels";
1007 wfProfileIn( $fname );
1008
1009 # Parsing through the text line by line. The main thing
1010 # happening here is handling of block-level elements p, pre,
1011 # and making lists from lines starting with * # : etc.
1012 #
1013 $textLines = explode( "\n", $text );
1014
1015 $lastPrefix = $output = $lastLine = '';
1016 $this->mDTopen = $inBlockElem = false;
1017 $prefixLength = 0;
1018 $paragraphStack = false;
1019
1020 if ( !$linestart ) {
1021 $output .= array_shift( $textLines );
1022 }
1023 foreach ( $textLines as $oLine ) {
1024 $lastPrefixLength = strlen( $lastPrefix );
1025 $preOpenMatch = preg_match("/<pre/i", $oLine );
1026 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1027 $nowikiOpenMatch = preg_match("/<span class=\"nowiki\"/", $oLine );
1028 $nowikiCloseMatch = preg_match("/<\\/span >/", $oLine );
1029 if($nowikiOpenMatch) $nowikiFullMatch = preg_match("/^(.*)<span class=\"nowiki\"/", $oLine, $nowikiOpenMatches );
1030 if (!$this->mInPre) {
1031 $this->mInPre = !empty($preOpenMatch);
1032 }
1033 if (!$this->mInNowiki) {
1034 $this->mInNowiki = !empty($nowikiOpenMatch);
1035 }
1036 if (
1037 !$this->mInPre && (!$this->mInNowiki ||
1038 ($nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0) )
1039 )
1040 {
1041 # Multiple prefixes may abut each other for nested lists.
1042 $prefixLength = strspn( $oLine, "*#:;" );
1043 $pref = substr( $oLine, 0, $prefixLength );
1044
1045 # eh?
1046 $pref2 = str_replace( ";", ":", $pref );
1047 $t = substr( $oLine, $prefixLength );
1048 } else {
1049 # Don't interpret any other prefixes in preformatted text
1050 $prefixLength = 0;
1051 $pref = $pref2 = '';
1052 $t = $oLine;
1053 }
1054
1055 # List generation
1056 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1057 # Same as the last item, so no need to deal with nesting or opening stuff
1058 $output .= $this->nextItem( substr( $pref, -1 ) );
1059 $paragraphStack = false;
1060
1061 if ( ";" == substr( $pref, -1 ) ) {
1062 # The one nasty exception: definition lists work like this:
1063 # ; title : definition text
1064 # So we check for : in the remainder text to split up the
1065 # title and definition, without b0rking links.
1066 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1067 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1068 $term = $match[1];
1069 $output .= $term . $this->nextItem( ":" );
1070 $t = $match[2];
1071 }
1072 }
1073 } elseif( $prefixLength || $lastPrefixLength ) {
1074 # Either open or close a level...
1075 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1076 $paragraphStack = false;
1077
1078 while( $commonPrefixLength < $lastPrefixLength ) {
1079 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1080 --$lastPrefixLength;
1081 }
1082 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1083 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1084 }
1085 while ( $prefixLength > $commonPrefixLength ) {
1086 $char = substr( $pref, $commonPrefixLength, 1 );
1087 $output .= $this->openList( $char );
1088
1089 if ( ";" == $char ) {
1090 # FIXME: This is dupe of code above
1091 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1092 $term = $match[1];
1093 $output .= $term . $this->nextItem( ":" );
1094 $t = $match[2];
1095 }
1096 }
1097 ++$commonPrefixLength;
1098 }
1099 $lastPrefix = $pref2;
1100 }
1101 if( 0 == $prefixLength ) {
1102 # No prefix (not in list)--go to paragraph mode
1103 $uniq_prefix = UNIQ_PREFIX;
1104 // XXX: use a stack for nestable elements like span, table and div
1105 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/td|<\\/th)/i", $t );
1106 $closematch = preg_match(
1107 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1108 "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1109 if ( $openmatch or $closematch ) {
1110 $paragraphStack = false;
1111 $output .= $this->closeParagraph();
1112 if($preOpenMatch and !$preCloseMatch) {
1113 $this->mInPre = true;
1114 }
1115 if ( $closematch ) {
1116 $inBlockElem = false;
1117 } else {
1118 $inBlockElem = true;
1119 }
1120 } else if (
1121 !$inBlockElem && !$this->mInPre &&
1122 (!$this->mInNowiki || ($nowikiOpenMatch && trim($nowikiOpenMatches[1]) == '' ) ) )
1123 {
1124 if ( " " == $t{0} and trim($t) != '' and (!$this->mInNowiki || $nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0 ) ) {
1125 // pre
1126 if ($this->mLastSection != 'pre') {
1127 $paragraphStack = false;
1128 $output .= $this->closeParagraph().'<pre>';
1129 $this->mLastSection = 'pre';
1130 }
1131 } else {
1132 // paragraph
1133 if ( '' == trim($t) ) {
1134 if ( $paragraphStack ) {
1135 $output .= $paragraphStack.'<br />';
1136 $paragraphStack = false;
1137 $this->mLastSection = 'p';
1138 } else {
1139 if ($this->mLastSection != 'p' ) {
1140 $output .= $this->closeParagraph();
1141 $this->mLastSection = '';
1142 $paragraphStack = "<p>";
1143 } else {
1144 $paragraphStack = '</p><p>';
1145 }
1146 }
1147 } else {
1148 if ( $paragraphStack ) {
1149 $output .= $paragraphStack;
1150 $paragraphStack = false;
1151 $this->mLastSection = 'p';
1152 } else if ($this->mLastSection != 'p') {
1153 $output .= $this->closeParagraph().'<p>';
1154 $this->mLastSection = 'p';
1155 }
1156 }
1157 }
1158 }
1159 }
1160 if($nowikiCloseMatch) $this->mInNowiki = false;
1161 if ($paragraphStack === false) {
1162 $output .= $t."\n";
1163 }
1164 }
1165 while ( $prefixLength ) {
1166 $output .= $this->closeList( $pref2{$prefixLength-1} );
1167 --$prefixLength;
1168 }
1169 if ( "" != $this->mLastSection ) {
1170 $output .= "</" . $this->mLastSection . ">";
1171 $this->mLastSection = "";
1172 }
1173
1174 wfProfileOut( $fname );
1175 return $output;
1176 }
1177
1178 function getVariableValue( $index ) {
1179 global $wgLang, $wgSitename, $wgServer;
1180
1181 switch ( $index ) {
1182 case MAG_CURRENTMONTH:
1183 return date( "m" );
1184 case MAG_CURRENTMONTHNAME:
1185 return $wgLang->getMonthName( date("n") );
1186 case MAG_CURRENTMONTHNAMEGEN:
1187 return $wgLang->getMonthNameGen( date("n") );
1188 case MAG_CURRENTDAY:
1189 return date("j");
1190 case MAG_PAGENAME:
1191 return $this->mTitle->getText();
1192 case MAG_NAMESPACE:
1193 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1194 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1195 case MAG_CURRENTDAYNAME:
1196 return $wgLang->getWeekdayName( date("w")+1 );
1197 case MAG_CURRENTYEAR:
1198 return date( "Y" );
1199 case MAG_CURRENTTIME:
1200 return $wgLang->time( wfTimestampNow(), false );
1201 case MAG_NUMBEROFARTICLES:
1202 return wfNumberOfArticles();
1203 case MAG_SITENAME:
1204 return $wgSitename;
1205 case MAG_SERVER:
1206 return $wgServer;
1207 default:
1208 return NULL;
1209 }
1210 }
1211
1212 function initialiseVariables()
1213 {
1214 global $wgVariableIDs;
1215 $this->mVariables = array();
1216 foreach ( $wgVariableIDs as $id ) {
1217 $mw =& MagicWord::get( $id );
1218 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1219 }
1220 }
1221
1222 /* private */ function replaceVariables( $text, $args = array() )
1223 {
1224 global $wgLang, $wgScript, $wgArticlePath;
1225
1226 $fname = "Parser::replaceVariables";
1227 wfProfileIn( $fname );
1228
1229 $bail = false;
1230 if ( !$this->mVariables ) {
1231 $this->initialiseVariables();
1232 }
1233 $titleChars = Title::legalChars();
1234 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1235
1236 # This function is called recursively. To keep track of arguments we need a stack:
1237 array_push( $this->mArgStack, $args );
1238
1239 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1240 $GLOBALS['wgCurParser'] =& $this;
1241
1242
1243 if ( $this->mOutputType == OT_HTML ) {
1244 # Variable substitution
1245 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1246
1247 # Argument substitution
1248 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1249 }
1250 # Template substitution
1251 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1252 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1253
1254 array_pop( $this->mArgStack );
1255
1256 wfProfileOut( $fname );
1257 return $text;
1258 }
1259
1260 function variableSubstitution( $matches )
1261 {
1262 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1263 $text = $this->mVariables[$matches[1]];
1264 $this->mOutput->mContainsOldMagic = true;
1265 } else {
1266 $text = $matches[0];
1267 }
1268 return $text;
1269 }
1270
1271 function braceSubstitution( $matches )
1272 {
1273 global $wgLinkCache, $wgLang;
1274 $fname = "Parser::braceSubstitution";
1275 $found = false;
1276 $nowiki = false;
1277 $noparse = false;
1278
1279 $title = NULL;
1280
1281 # $newline is an optional newline character before the braces
1282 # $part1 is the bit before the first |, and must contain only title characters
1283 # $args is a list of arguments, starting from index 0, not including $part1
1284
1285 $newline = $matches[1];
1286 $part1 = $matches[2];
1287 # If the third subpattern matched anything, it will start with |
1288 if ( $matches[3] !== "" ) {
1289 $args = explode( "|", substr( $matches[3], 1 ) );
1290 } else {
1291 $args = array();
1292 }
1293 $argc = count( $args );
1294
1295 # {{{}}}
1296 if ( strpos( $matches[0], "{{{" ) !== false ) {
1297 $text = $matches[0];
1298 $found = true;
1299 $noparse = true;
1300 }
1301
1302 # SUBST
1303 if ( !$found ) {
1304 $mwSubst =& MagicWord::get( MAG_SUBST );
1305 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1306 if ( $this->mOutputType != OT_WIKI ) {
1307 # Invalid SUBST not replaced at PST time
1308 # Return without further processing
1309 $text = $matches[0];
1310 $found = true;
1311 $noparse= true;
1312 }
1313 } elseif ( $this->mOutputType == OT_WIKI ) {
1314 # SUBST not found in PST pass, do nothing
1315 $text = $matches[0];
1316 $found = true;
1317 }
1318 }
1319
1320 # MSG, MSGNW and INT
1321 if ( !$found ) {
1322 # Check for MSGNW:
1323 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1324 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1325 $nowiki = true;
1326 } else {
1327 # Remove obsolete MSG:
1328 $mwMsg =& MagicWord::get( MAG_MSG );
1329 $mwMsg->matchStartAndRemove( $part1 );
1330 }
1331
1332 # Check if it is an internal message
1333 $mwInt =& MagicWord::get( MAG_INT );
1334 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1335 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1336 $text = wfMsgReal( $part1, $args, true );
1337 $found = true;
1338 }
1339 }
1340 }
1341
1342 # NS
1343 if ( !$found ) {
1344 # Check for NS: (namespace expansion)
1345 $mwNs = MagicWord::get( MAG_NS );
1346 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1347 if ( intval( $part1 ) ) {
1348 $text = $wgLang->getNsText( intval( $part1 ) );
1349 $found = true;
1350 } else {
1351 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1352 if ( !is_null( $index ) ) {
1353 $text = $wgLang->getNsText( $index );
1354 $found = true;
1355 }
1356 }
1357 }
1358 }
1359
1360 # LOCALURL and LOCALURLE
1361 if ( !$found ) {
1362 $mwLocal = MagicWord::get( MAG_LOCALURL );
1363 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1364
1365 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1366 $func = 'getLocalURL';
1367 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1368 $func = 'escapeLocalURL';
1369 } else {
1370 $func = '';
1371 }
1372
1373 if ( $func !== '' ) {
1374 $title = Title::newFromText( $part1 );
1375 if ( !is_null( $title ) ) {
1376 if ( $argc > 0 ) {
1377 $text = $title->$func( $args[0] );
1378 } else {
1379 $text = $title->$func();
1380 }
1381 $found = true;
1382 }
1383 }
1384 }
1385
1386 # Internal variables
1387 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1388 $text = $this->mVariables[$part1];
1389 $found = true;
1390 $this->mOutput->mContainsOldMagic = true;
1391 }
1392 /*
1393 # Arguments input from the caller
1394 $inputArgs = end( $this->mArgStack );
1395 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1396 $text = $inputArgs[$part1];
1397 $found = true;
1398 }
1399 */
1400 # Load from database
1401 if ( !$found ) {
1402 $title = Title::newFromText( $part1, NS_TEMPLATE );
1403 if ( !is_null( $title ) && !$title->isExternal() ) {
1404 # Check for excessive inclusion
1405 $dbk = $title->getPrefixedDBkey();
1406 if ( $this->incrementIncludeCount( $dbk ) ) {
1407 $article = new Article( $title );
1408 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1409 if ( $articleContent !== false ) {
1410 $found = true;
1411 $text = $articleContent;
1412
1413 }
1414 }
1415
1416 # If the title is valid but undisplayable, make a link to it
1417 if ( $this->mOutputType == OT_HTML && !$found ) {
1418 $text = "[[" . $title->getPrefixedText() . "]]";
1419 $found = true;
1420 }
1421 }
1422 }
1423
1424 # Recursive parsing, escaping and link table handling
1425 # Only for HTML output
1426 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1427 $text = wfEscapeWikiText( $text );
1428 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1429 # Clean up argument array
1430 $assocArgs = array();
1431 $index = 1;
1432 foreach( $args as $arg ) {
1433 $eqpos = strpos( $arg, "=" );
1434 if ( $eqpos === false ) {
1435 $assocArgs[$index++] = $arg;
1436 } else {
1437 $name = trim( substr( $arg, 0, $eqpos ) );
1438 $value = trim( substr( $arg, $eqpos+1 ) );
1439 if ( $value === false ) {
1440 $value = "";
1441 }
1442 if ( $name !== false ) {
1443 $assocArgs[$name] = $value;
1444 }
1445 }
1446 }
1447
1448 # Do not enter included links in link table
1449 if ( !is_null( $title ) ) {
1450 $wgLinkCache->suspend();
1451 }
1452
1453 # Run full parser on the included text
1454 $text = $this->stripParse( $text, $newline, $assocArgs );
1455
1456 # Resume the link cache and register the inclusion as a link
1457 if ( !is_null( $title ) ) {
1458 $wgLinkCache->resume();
1459 $wgLinkCache->addLinkObj( $title );
1460 }
1461 }
1462
1463 if ( !$found ) {
1464 return $matches[0];
1465 } else {
1466 return $text;
1467 }
1468 }
1469
1470 # Triple brace replacement -- used for template arguments
1471 function argSubstitution( $matches )
1472 {
1473 $newline = $matches[1];
1474 $arg = trim( $matches[2] );
1475 $text = $matches[0];
1476 $inputArgs = end( $this->mArgStack );
1477
1478 if ( array_key_exists( $arg, $inputArgs ) ) {
1479 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1480 }
1481
1482 return $text;
1483 }
1484
1485 # Returns true if the function is allowed to include this entity
1486 function incrementIncludeCount( $dbk )
1487 {
1488 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1489 $this->mIncludeCount[$dbk] = 0;
1490 }
1491 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1492 return true;
1493 } else {
1494 return false;
1495 }
1496 }
1497
1498
1499 # Cleans up HTML, removes dangerous tags and attributes
1500 /* private */ function removeHTMLtags( $text )
1501 {
1502 global $wgUseTidy, $wgUserHtml;
1503 $fname = "Parser::removeHTMLtags";
1504 wfProfileIn( $fname );
1505
1506 if( $wgUserHtml ) {
1507 $htmlpairs = array( # Tags that must be closed
1508 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1509 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1510 "strike", "strong", "tt", "var", "div", "center",
1511 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1512 "ruby", "rt" , "rb" , "rp", "p"
1513 );
1514 $htmlsingle = array(
1515 "br", "hr", "li", "dt", "dd"
1516 );
1517 $htmlnest = array( # Tags that can be nested--??
1518 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1519 "dl", "font", "big", "small", "sub", "sup"
1520 );
1521 $tabletags = array( # Can only appear inside table
1522 "td", "th", "tr"
1523 );
1524 } else {
1525 $htmlpairs = array();
1526 $htmlsingle = array();
1527 $htmlnest = array();
1528 $tabletags = array();
1529 }
1530
1531 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1532 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1533
1534 $htmlattrs = $this->getHTMLattrs () ;
1535
1536 # Remove HTML comments
1537 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1538
1539 $bits = explode( "<", $text );
1540 $text = array_shift( $bits );
1541 if(!$wgUseTidy) {
1542 $tagstack = array(); $tablestack = array();
1543 foreach ( $bits as $x ) {
1544 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1545 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1546 $x, $regs );
1547 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1548 error_reporting( $prev );
1549
1550 $badtag = 0 ;
1551 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1552 # Check our stack
1553 if ( $slash ) {
1554 # Closing a tag...
1555 if ( ! in_array( $t, $htmlsingle ) &&
1556 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1557 @array_push( $tagstack, $ot );
1558 $badtag = 1;
1559 } else {
1560 if ( $t == "table" ) {
1561 $tagstack = array_pop( $tablestack );
1562 }
1563 $newparams = "";
1564 }
1565 } else {
1566 # Keep track for later
1567 if ( in_array( $t, $tabletags ) &&
1568 ! in_array( "table", $tagstack ) ) {
1569 $badtag = 1;
1570 } else if ( in_array( $t, $tagstack ) &&
1571 ! in_array ( $t , $htmlnest ) ) {
1572 $badtag = 1 ;
1573 } else if ( ! in_array( $t, $htmlsingle ) ) {
1574 if ( $t == "table" ) {
1575 array_push( $tablestack, $tagstack );
1576 $tagstack = array();
1577 }
1578 array_push( $tagstack, $t );
1579 }
1580 # Strip non-approved attributes from the tag
1581 $newparams = $this->fixTagAttributes($params);
1582
1583 }
1584 if ( ! $badtag ) {
1585 $rest = str_replace( ">", "&gt;", $rest );
1586 $text .= "<$slash$t $newparams$brace$rest";
1587 continue;
1588 }
1589 }
1590 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1591 }
1592 # Close off any remaining tags
1593 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1594 $text .= "</$t>\n";
1595 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1596 }
1597 } else {
1598 # this might be possible using tidy itself
1599 foreach ( $bits as $x ) {
1600 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1601 $x, $regs );
1602 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1603 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1604 $newparams = $this->fixTagAttributes($params);
1605 $rest = str_replace( ">", "&gt;", $rest );
1606 $text .= "<$slash$t $newparams$brace$rest";
1607 } else {
1608 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1609 }
1610 }
1611 }
1612 wfProfileOut( $fname );
1613 return $text;
1614 }
1615
1616
1617 /*
1618 *
1619 * This function accomplishes several tasks:
1620 * 1) Auto-number headings if that option is enabled
1621 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1622 * 3) Add a Table of contents on the top for users who have enabled the option
1623 * 4) Auto-anchor headings
1624 *
1625 * It loops through all headlines, collects the necessary data, then splits up the
1626 * string and re-inserts the newly formatted headlines.
1627 *
1628 */
1629
1630 /* private */ function formatHeadings( $text, $isMain=true )
1631 {
1632 global $wgInputEncoding;
1633
1634 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1635 $doShowToc = $this->mOptions->getShowToc();
1636 if( !$this->mTitle->userCanEdit() ) {
1637 $showEditLink = 0;
1638 $rightClickHack = 0;
1639 } else {
1640 $showEditLink = $this->mOptions->getEditSection();
1641 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1642 }
1643
1644 # Inhibit editsection links if requested in the page
1645 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1646 if( $esw->matchAndRemove( $text ) ) {
1647 $showEditLink = 0;
1648 }
1649 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1650 # do not add TOC
1651 $mw =& MagicWord::get( MAG_NOTOC );
1652 if( $mw->matchAndRemove( $text ) ) {
1653 $doShowToc = 0;
1654 }
1655
1656 # never add the TOC to the Main Page. This is an entry page that should not
1657 # be more than 1-2 screens large anyway
1658 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1659 $doShowToc = 0;
1660 }
1661
1662 # Get all headlines for numbering them and adding funky stuff like [edit]
1663 # links - this is for later, but we need the number of headlines right now
1664 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1665
1666 # if there are fewer than 4 headlines in the article, do not show TOC
1667 if( $numMatches < 4 ) {
1668 $doShowToc = 0;
1669 }
1670
1671 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1672 # override above conditions and always show TOC
1673 $mw =& MagicWord::get( MAG_FORCETOC );
1674 if ($mw->matchAndRemove( $text ) ) {
1675 $doShowToc = 1;
1676 }
1677
1678
1679 # We need this to perform operations on the HTML
1680 $sk =& $this->mOptions->getSkin();
1681
1682 # headline counter
1683 $headlineCount = 0;
1684
1685 # Ugh .. the TOC should have neat indentation levels which can be
1686 # passed to the skin functions. These are determined here
1687 $toclevel = 0;
1688 $toc = "";
1689 $full = "";
1690 $head = array();
1691 $sublevelCount = array();
1692 $level = 0;
1693 $prevlevel = 0;
1694 foreach( $matches[3] as $headline ) {
1695 $numbering = "";
1696 if( $level ) {
1697 $prevlevel = $level;
1698 }
1699 $level = $matches[1][$headlineCount];
1700 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1701 # reset when we enter a new level
1702 $sublevelCount[$level] = 0;
1703 $toc .= $sk->tocIndent( $level - $prevlevel );
1704 $toclevel += $level - $prevlevel;
1705 }
1706 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1707 # reset when we step back a level
1708 $sublevelCount[$level+1]=0;
1709 $toc .= $sk->tocUnindent( $prevlevel - $level );
1710 $toclevel -= $prevlevel - $level;
1711 }
1712 # count number of headlines for each level
1713 @$sublevelCount[$level]++;
1714 if( $doNumberHeadings || $doShowToc ) {
1715 $dot = 0;
1716 for( $i = 1; $i <= $level; $i++ ) {
1717 if( !empty( $sublevelCount[$i] ) ) {
1718 if( $dot ) {
1719 $numbering .= ".";
1720 }
1721 $numbering .= $sublevelCount[$i];
1722 $dot = 1;
1723 }
1724 }
1725 }
1726
1727 # The canonized header is a version of the header text safe to use for links
1728 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1729 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1730
1731 # strip out HTML
1732 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1733 $tocline = trim( $canonized_headline );
1734 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1735 # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1736 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1737 $refer[$headlineCount] = $canonized_headline;
1738
1739 # count how many in assoc. array so we can track dupes in anchors
1740 @$refers[$canonized_headline]++;
1741 $refcount[$headlineCount]=$refers[$canonized_headline];
1742
1743 # Prepend the number to the heading text
1744
1745 if( $doNumberHeadings || $doShowToc ) {
1746 $tocline = $numbering . " " . $tocline;
1747
1748 # Don't number the heading if it is the only one (looks silly)
1749 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1750 # the two are different if the line contains a link
1751 $headline=$numbering . " " . $headline;
1752 }
1753 }
1754
1755 # Create the anchor for linking from the TOC to the section
1756 $anchor = $canonized_headline;
1757 if($refcount[$headlineCount] > 1 ) {
1758 $anchor .= "_" . $refcount[$headlineCount];
1759 }
1760 if( $doShowToc ) {
1761 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1762 }
1763 if( $showEditLink ) {
1764 if ( empty( $head[$headlineCount] ) ) {
1765 $head[$headlineCount] = "";
1766 }
1767 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1768 }
1769
1770 # Add the edit section span
1771 if( $rightClickHack ) {
1772 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1773 }
1774
1775 # give headline the correct <h#> tag
1776 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1777
1778 $headlineCount++;
1779 }
1780
1781 if( $doShowToc ) {
1782 $toclines = $headlineCount;
1783 $toc .= $sk->tocUnindent( $toclevel );
1784 $toc = $sk->tocTable( $toc );
1785 }
1786
1787 # split up and insert constructed headlines
1788
1789 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1790 $i = 0;
1791
1792 foreach( $blocks as $block ) {
1793 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1794 # This is the [edit] link that appears for the top block of text when
1795 # section editing is enabled
1796
1797 # Disabled because it broke block formatting
1798 # For example, a bullet point in the top line
1799 # $full .= $sk->editSectionLink(0);
1800 }
1801 $full .= $block;
1802 if( $doShowToc && !$i && $isMain) {
1803 # Top anchor now in skin
1804 $full = $full.$toc;
1805 }
1806
1807 if( !empty( $head[$i] ) ) {
1808 $full .= $head[$i];
1809 }
1810 $i++;
1811 }
1812
1813 return $full;
1814 }
1815
1816 /* private */ function magicISBN( $text )
1817 {
1818 global $wgLang;
1819
1820 $a = split( "ISBN ", " $text" );
1821 if ( count ( $a ) < 2 ) return $text;
1822 $text = substr( array_shift( $a ), 1);
1823 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1824
1825 foreach ( $a as $x ) {
1826 $isbn = $blank = "" ;
1827 while ( " " == $x{0} ) {
1828 $blank .= " ";
1829 $x = substr( $x, 1 );
1830 }
1831 while ( strstr( $valid, $x{0} ) != false ) {
1832 $isbn .= $x{0};
1833 $x = substr( $x, 1 );
1834 }
1835 $num = str_replace( "-", "", $isbn );
1836 $num = str_replace( " ", "", $num );
1837
1838 if ( "" == $num ) {
1839 $text .= "ISBN $blank$x";
1840 } else {
1841 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1842 $text .= "<a href=\"" .
1843 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1844 "\" class=\"internal\">ISBN $isbn</a>";
1845 $text .= $x;
1846 }
1847 }
1848 return $text;
1849 }
1850 /* private */ function magicRFC( $text )
1851 {
1852 global $wgLang;
1853
1854 $a = split( "RFC ", " $text" );
1855 if ( count ( $a ) < 2 ) return $text;
1856 $text = substr( array_shift( $a ), 1);
1857 $valid = "0123456789";
1858
1859 foreach ( $a as $x ) {
1860 $rfc = $blank = "" ;
1861 while ( " " == $x{0} ) {
1862 $blank .= " ";
1863 $x = substr( $x, 1 );
1864 }
1865 while ( strstr( $valid, $x{0} ) != false ) {
1866 $rfc .= $x{0};
1867 $x = substr( $x, 1 );
1868 }
1869
1870 if ( "" == $rfc ) {
1871 $text .= "RFC $blank$x";
1872 } else {
1873 $url = wfmsg( "rfcurl" );
1874 $url = str_replace( "$1", $rfc, $url);
1875 $sk =& $this->mOptions->getSkin();
1876 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1877 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1878 }
1879 }
1880 return $text;
1881 }
1882
1883 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1884 {
1885 $this->mOptions = $options;
1886 $this->mTitle =& $title;
1887 $this->mOutputType = OT_WIKI;
1888
1889 if ( $clearState ) {
1890 $this->clearState();
1891 }
1892
1893 $stripState = false;
1894 $pairs = array(
1895 "\r\n" => "\n",
1896 );
1897 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1898 // now with regexes
1899 /*
1900 $pairs = array(
1901 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1902 "/<br *?>/i" => "<br />",
1903 );
1904 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1905 */
1906 $text = $this->strip( $text, $stripState, false );
1907 $text = $this->pstPass2( $text, $user );
1908 $text = $this->unstrip( $text, $stripState );
1909 return $text;
1910 }
1911
1912 /* private */ function pstPass2( $text, &$user )
1913 {
1914 global $wgLang, $wgLocaltimezone, $wgCurParser;
1915
1916 # Variable replacement
1917 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1918 $text = $this->replaceVariables( $text );
1919
1920 # Signatures
1921 #
1922 $n = $user->getName();
1923 $k = $user->getOption( "nickname" );
1924 if ( "" == $k ) { $k = $n; }
1925 if(isset($wgLocaltimezone)) {
1926 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1927 }
1928 /* Note: this is an ugly timezone hack for the European wikis */
1929 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1930 " (" . date( "T" ) . ")";
1931 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1932
1933 $text = preg_replace( "/~~~~~/", $d, $text );
1934 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1935 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1936 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1937 Namespace::getUser() ) . ":$n|$k]]", $text );
1938
1939 # Context links: [[|name]] and [[name (context)|]]
1940 #
1941 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1942 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1943 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1944 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1945
1946 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1947 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1948 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1949 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1950 # [[ns:page (cont)|]]
1951 $context = "";
1952 $t = $this->mTitle->getText();
1953 if ( preg_match( $conpat, $t, $m ) ) {
1954 $context = $m[2];
1955 }
1956 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1957 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1958 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1959
1960 if ( "" == $context ) {
1961 $text = preg_replace( $p2, "[[\\1]]", $text );
1962 } else {
1963 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1964 }
1965
1966 /*
1967 $mw =& MagicWord::get( MAG_SUBST );
1968 $wgCurParser = $this->fork();
1969 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1970 $this->merge( $wgCurParser );
1971 */
1972
1973 # Trim trailing whitespace
1974 # MAG_END (__END__) tag allows for trailing
1975 # whitespace to be deliberately included
1976 $text = rtrim( $text );
1977 $mw =& MagicWord::get( MAG_END );
1978 $mw->matchAndRemove( $text );
1979
1980 return $text;
1981 }
1982
1983 # Set up some variables which are usually set up in parse()
1984 # so that an external function can call some class members with confidence
1985 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1986 {
1987 $this->mTitle =& $title;
1988 $this->mOptions = $options;
1989 $this->mOutputType = $outputType;
1990 if ( $clearState ) {
1991 $this->clearState();
1992 }
1993 }
1994
1995 function transformMsg( $text, $options ) {
1996 global $wgTitle;
1997 static $executing = false;
1998
1999 # Guard against infinite recursion
2000 if ( $executing ) {
2001 return $text;
2002 }
2003 $executing = true;
2004
2005 $this->mTitle = $wgTitle;
2006 $this->mOptions = $options;
2007 $this->mOutputType = OT_MSG;
2008 $this->clearState();
2009 $text = $this->replaceVariables( $text );
2010
2011 $executing = false;
2012 return $text;
2013 }
2014 }
2015
2016 class ParserOutput
2017 {
2018 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2019 var $mCacheTime; # Used in ParserCache
2020
2021 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2022 $containsOldMagic = false )
2023 {
2024 $this->mText = $text;
2025 $this->mLanguageLinks = $languageLinks;
2026 $this->mCategoryLinks = $categoryLinks;
2027 $this->mContainsOldMagic = $containsOldMagic;
2028 $this->mCacheTime = "";
2029 }
2030
2031 function getText() { return $this->mText; }
2032 function getLanguageLinks() { return $this->mLanguageLinks; }
2033 function getCategoryLinks() { return $this->mCategoryLinks; }
2034 function getCacheTime() { return $this->mCacheTime; }
2035 function containsOldMagic() { return $this->mContainsOldMagic; }
2036 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2037 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2038 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2039 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2040 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2041
2042 function merge( $other ) {
2043 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2044 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2045 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2046 }
2047
2048 }
2049
2050 class ParserOptions
2051 {
2052 # All variables are private
2053 var $mUseTeX; # Use texvc to expand <math> tags
2054 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2055 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2056 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2057 var $mAllowExternalImages; # Allow external images inline
2058 var $mSkin; # Reference to the preferred skin
2059 var $mDateFormat; # Date format index
2060 var $mEditSection; # Create "edit section" links
2061 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2062 var $mNumberHeadings; # Automatically number headings
2063 var $mShowToc; # Show table of contents
2064
2065 function getUseTeX() { return $this->mUseTeX; }
2066 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2067 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2068 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2069 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2070 function getSkin() { return $this->mSkin; }
2071 function getDateFormat() { return $this->mDateFormat; }
2072 function getEditSection() { return $this->mEditSection; }
2073 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2074 function getNumberHeadings() { return $this->mNumberHeadings; }
2075 function getShowToc() { return $this->mShowToc; }
2076
2077 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2078 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2079 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2080 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2081 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2082 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2083 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2084 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2085 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2086 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2087 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2088
2089 /* static */ function newFromUser( &$user )
2090 {
2091 $popts = new ParserOptions;
2092 $popts->initialiseFromUser( $user );
2093 return $popts;
2094 }
2095
2096 function initialiseFromUser( &$userInput )
2097 {
2098 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2099
2100 if ( !$userInput ) {
2101 $user = new User;
2102 $user->setLoaded( true );
2103 } else {
2104 $user =& $userInput;
2105 }
2106
2107 $this->mUseTeX = $wgUseTeX;
2108 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2109 $this->mUseDynamicDates = $wgUseDynamicDates;
2110 $this->mInterwikiMagic = $wgInterwikiMagic;
2111 $this->mAllowExternalImages = $wgAllowExternalImages;
2112 $this->mSkin =& $user->getSkin();
2113 $this->mDateFormat = $user->getOption( "date" );
2114 $this->mEditSection = $user->getOption( "editsection" );
2115 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2116 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2117 $this->mShowToc = $user->getOption( "showtoc" );
2118 }
2119
2120
2121 }
2122
2123 # Regex callbacks, used in Parser::replaceVariables
2124 function wfBraceSubstitution( $matches )
2125 {
2126 global $wgCurParser;
2127 return $wgCurParser->braceSubstitution( $matches );
2128 }
2129
2130 function wfArgSubstitution( $matches )
2131 {
2132 global $wgCurParser;
2133 return $wgCurParser->argSubstitution( $matches );
2134 }
2135
2136 function wfVariableSubstitution( $matches )
2137 {
2138 global $wgCurParser;
2139 return $wgCurParser->variableSubstitution( $matches );
2140 }
2141
2142 ?>