section anchor cleanup, unurlencode :
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 $this->mInPre = false;
81 }
82
83 # First pass--just handle <nowiki> sections, pass the rest off
84 # to internalParse() which does all the real work.
85 #
86 # Returns a ParserOutput
87 #
88 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
89 {
90 global $wgUseTidy;
91 $fname = "Parser::parse";
92 wfProfileIn( $fname );
93
94 if ( $clearState ) {
95 $this->clearState();
96 }
97
98 $this->mOptions = $options;
99 $this->mTitle =& $title;
100 $this->mOutputType = OT_HTML;
101
102 $stripState = NULL;
103 $text = $this->strip( $text, $this->mStripState );
104 $text = $this->internalParse( $text, $linestart );
105 $text = $this->unstrip( $text, $this->mStripState );
106 # Clean up special characters, only run once, next-to-last before doBlockLevels
107 if(!$wgUseTidy) {
108 $fixtags = array(
109 # french spaces, last one Guillemet-left
110 # only if there is something before the space
111 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
112 # french spaces, Guillemet-right
113 "/(\\302\\253) /i"=>"\\1&nbsp;",
114 "/<hr *>/i" => '<hr />',
115 "/<br *>/i" => '<br />',
116 "/<center *>/i"=>'<div class="center">',
117 "/<\\/center *>/i" => '</div>',
118 # Clean up spare ampersands; note that we probably ought to be
119 # more careful about named entities.
120 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
121 );
122 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
123 } else {
124 $fixtags = array(
125 # french spaces, last one Guillemet-left
126 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
127 # french spaces, Guillemet-right
128 "/(\\302\\253) /i"=>"\\1&nbsp;",
129 "/<center *>/i"=>'<div class="center">',
130 "/<\\/center *>/i" => '</div>'
131 );
132 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
133 }
134 # only once and last
135 $text = $this->doBlockLevels( $text, $linestart );
136 $text = $this->unstripNoWiki( $text, $this->mStripState );
137 if($wgUseTidy) {
138 $text = $this->tidy($text);
139 }
140 $this->mOutput->setText( $text );
141 wfProfileOut( $fname );
142 return $this->mOutput;
143 }
144
145 /* static */ function getRandomString()
146 {
147 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
148 }
149
150 # Replaces all occurrences of <$tag>content</$tag> in the text
151 # with a random marker and returns the new text. the output parameter
152 # $content will be an associative array filled with data on the form
153 # $unique_marker => content.
154
155 # If $content is already set, the additional entries will be appended
156
157 # If $tag is set to STRIP_COMMENTS, the function will extract
158 # <!-- HTML comments -->
159
160 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
161 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
162 if ( !$content ) {
163 $content = array( );
164 }
165 $n = 1;
166 $stripped = "";
167
168 while ( "" != $text ) {
169 if($tag==STRIP_COMMENTS) {
170 $p = preg_split( "/<!--/i", $text, 2 );
171 } else {
172 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
173 }
174 $stripped .= $p[0];
175 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
176 $text = "";
177 } else {
178 if($tag==STRIP_COMMENTS) {
179 $q = preg_split( "/-->/i", $p[1], 2 );
180 } else {
181 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
182 }
183 $marker = $rnd . sprintf("%08X", $n++);
184 $content[$marker] = $q[0];
185 $stripped .= $marker;
186 $text = $q[1];
187 }
188 }
189 return $stripped;
190 }
191
192 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
193 # If $render is set, performs necessary rendering operations on plugins
194 # Returns the text, and fills an array with data needed in unstrip()
195 # If the $state is already a valid strip state, it adds to the state
196
197 # When $stripcomments is set, HTML comments <!-- like this -->
198 # will be stripped in addition to other tags. This is important
199 # for section editing, where these comments cause confusion when
200 # counting the sections in the wikisource
201 function strip( $text, &$state, $stripcomments = false )
202 {
203 $render = ($this->mOutputType == OT_HTML);
204 $nowiki_content = array();
205 $hiero_content = array();
206 $timeline_content = array();
207 $math_content = array();
208 $pre_content = array();
209 $comment_content = array();
210
211 # Replace any instances of the placeholders
212 $uniq_prefix = UNIQ_PREFIX;
213 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
214
215 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
216 foreach( $nowiki_content as $marker => $content ){
217 if( $render ){
218 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
219 } else {
220 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
221 }
222 }
223
224 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
225 foreach( $hiero_content as $marker => $content ){
226 if( $render && $GLOBALS['wgUseWikiHiero']){
227 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
228 } else {
229 $hiero_content[$marker] = "<hiero>$content</hiero>";
230 }
231 }
232
233 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
234 foreach( $timeline_content as $marker => $content ){
235 if( $render && $GLOBALS['wgUseTimeline']){
236 $timeline_content[$marker] = renderTimeline( $content );
237 } else {
238 $timeline_content[$marker] = "<timeline>$content</timeline>";
239 }
240 }
241
242 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
243 foreach( $math_content as $marker => $content ){
244 if( $render ) {
245 if( $this->mOptions->getUseTeX() ) {
246 $math_content[$marker] = renderMath( $content );
247 } else {
248 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
249 }
250 } else {
251 $math_content[$marker] = "<math>$content</math>";
252 }
253 }
254
255 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
256 foreach( $pre_content as $marker => $content ){
257 if( $render ){
258 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
259 } else {
260 $pre_content[$marker] = "<pre>$content</pre>";
261 }
262 }
263 if($stripcomments) {
264 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
265 foreach( $comment_content as $marker => $content ){
266 $comment_content[$marker] = "<!--$content-->";
267 }
268 }
269
270 # Merge state with the pre-existing state, if there is one
271 if ( $state ) {
272 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
273 $state['hiero'] = $state['hiero'] + $hiero_content;
274 $state['timeline'] = $state['timeline'] + $timeline_content;
275 $state['math'] = $state['math'] + $math_content;
276 $state['pre'] = $state['pre'] + $pre_content;
277 $state['comment'] = $state['comment'] + $comment_content;
278 } else {
279 $state = array(
280 'nowiki' => $nowiki_content,
281 'hiero' => $hiero_content,
282 'timeline' => $timeline_content,
283 'math' => $math_content,
284 'pre' => $pre_content,
285 'comment' => $comment_content
286 );
287 }
288 return $text;
289 }
290
291 # always call unstripNoWiki() after this one
292 function unstrip( $text, &$state )
293 {
294 # Must expand in reverse order, otherwise nested tags will be corrupted
295 $contentDict = end( $state );
296 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
297 if( key($state) != 'nowiki') {
298 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
299 $text = str_replace( key( $contentDict ), $content, $text );
300 }
301 }
302 }
303
304 return $text;
305 }
306 # always call this after unstrip() to preserve the order
307 function unstripNoWiki( $text, &$state )
308 {
309 # Must expand in reverse order, otherwise nested tags will be corrupted
310 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
311 $text = str_replace( key( $state['nowiki'] ), $content, $text );
312 }
313
314 return $text;
315 }
316
317 # Add an item to the strip state
318 # Returns the unique tag which must be inserted into the stripped text
319 # The tag will be replaced with the original text in unstrip()
320
321 function insertStripItem( $text, &$state )
322 {
323 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
324 if ( !$state ) {
325 $state = array(
326 'nowiki' => array(),
327 'hiero' => array(),
328 'math' => array(),
329 'pre' => array()
330 );
331 }
332 $state['item'][$rnd] = $text;
333 return $rnd;
334 }
335
336 # This method generates the list of subcategories and pages for a category
337 function categoryMagic ()
338 {
339 global $wgLang , $wgUser ;
340 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
341
342 $cns = Namespace::getCategory() ;
343 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
344
345 $r = "<br style=\"clear:both;\"/>\n";
346
347
348 $sk =& $wgUser->getSkin() ;
349
350 $articles = array() ;
351 $children = array() ;
352 $data = array () ;
353 $id = $this->mTitle->getArticleID() ;
354
355 # FIXME: add limits
356 $t = wfStrencode( $this->mTitle->getDBKey() );
357 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
358 $res = wfQuery ( $sql, DB_READ ) ;
359 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
360
361 # For all pages that link to this category
362 foreach ( $data AS $x )
363 {
364 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
365 if ( $t != "" ) $t .= ":" ;
366 $t .= $x->cur_title ;
367
368 if ( $x->cur_namespace == $cns ) {
369 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
370 } else {
371 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
372 }
373 }
374 wfFreeResult ( $res ) ;
375
376 # Showing subcategories
377 if ( count ( $children ) > 0 ) {
378 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
379 $r .= implode ( ", " , $children ) ;
380 }
381
382 # Showing pages in this category
383 if ( count ( $articles ) > 0 ) {
384 $ti = $this->mTitle->getText() ;
385 $h = wfMsg( "category_header", $ti );
386 $r .= "<h2>{$h}</h2>\n" ;
387 $r .= implode ( ", " , $articles ) ;
388 }
389
390
391 return $r ;
392 }
393
394 function getHTMLattrs ()
395 {
396 $htmlattrs = array( # Allowed attributes--no scripting, etc.
397 "title", "align", "lang", "dir", "width", "height",
398 "bgcolor", "clear", /* BR */ "noshade", /* HR */
399 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
400 /* FONT */ "type", "start", "value", "compact",
401 /* For various lists, mostly deprecated but safe */
402 "summary", "width", "border", "frame", "rules",
403 "cellspacing", "cellpadding", "valign", "char",
404 "charoff", "colgroup", "col", "span", "abbr", "axis",
405 "headers", "scope", "rowspan", "colspan", /* Tables */
406 "id", "class", "name", "style" /* For CSS */
407 );
408 return $htmlattrs ;
409 }
410
411 function fixTagAttributes ( $t )
412 {
413 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
414 $htmlattrs = $this->getHTMLattrs() ;
415
416 # Strip non-approved attributes from the tag
417 $t = preg_replace(
418 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
419 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
420 $t);
421 # Strip javascript "expression" from stylesheets. Brute force approach:
422 # If anythin offensive is found, all attributes of the HTML tag are dropped
423
424 if( preg_match(
425 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
426 wfMungeToUtf8( $t ) ) )
427 {
428 $t="";
429 }
430
431 return trim ( $t ) ;
432 }
433
434 /* interface with html tidy, used if $wgUseTidy = true */
435 function tidy ( $text ) {
436 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
437 global $wgInputEncoding, $wgOutputEncoding;
438 $fname = "Parser::tidy";
439 wfProfileIn( $fname );
440
441 $cleansource = '';
442 switch(strtoupper($wgOutputEncoding)) {
443 case 'ISO-8859-1':
444 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
445 break;
446 case 'UTF-8':
447 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
448 break;
449 default:
450 $wgTidyOpts .= ' -raw';
451 }
452
453 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
454 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
455 '<head><title>test</title></head><body>'.$text.'</body></html>';
456 $descriptorspec = array(
457 0 => array("pipe", "r"),
458 1 => array("pipe", "w"),
459 2 => array("file", "/dev/null", "a")
460 );
461 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
462 if (is_resource($process)) {
463 fwrite($pipes[0], $wrappedtext);
464 fclose($pipes[0]);
465 while (!feof($pipes[1])) {
466 $cleansource .= fgets($pipes[1], 1024);
467 }
468 fclose($pipes[1]);
469 $return_value = proc_close($process);
470 }
471
472 wfProfileOut( $fname );
473
474 if( $cleansource == '' && $text != '') {
475 wfDebug( "Tidy error detected!\n" );
476 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
477 } else {
478 return $cleansource;
479 }
480 }
481
482 function doTableStuff ( $t )
483 {
484 $t = explode ( "\n" , $t ) ;
485 $td = array () ; # Is currently a td tag open?
486 $ltd = array () ; # Was it TD or TH?
487 $tr = array () ; # Is currently a tr tag open?
488 $ltr = array () ; # tr attributes
489 foreach ( $t AS $k => $x )
490 {
491 $x = trim ( $x ) ;
492 $fc = substr ( $x , 0 , 1 ) ;
493 if ( "{|" == substr ( $x , 0 , 2 ) )
494 {
495 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
496 array_push ( $td , false ) ;
497 array_push ( $ltd , "" ) ;
498 array_push ( $tr , false ) ;
499 array_push ( $ltr , "" ) ;
500 }
501 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
502 else if ( "|}" == substr ( $x , 0 , 2 ) )
503 {
504 $z = "</table>\n" ;
505 $l = array_pop ( $ltd ) ;
506 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
507 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
508 array_pop ( $ltr ) ;
509 $t[$k] = $z ;
510 }
511 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
512 {
513 $z = trim ( substr ( $x , 2 ) ) ;
514 $t[$k] = "<caption>{$z}</caption>\n" ;
515 }*/
516 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
517 {
518 $x = substr ( $x , 1 ) ;
519 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
520 $z = "" ;
521 $l = array_pop ( $ltd ) ;
522 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
523 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
524 array_pop ( $ltr ) ;
525 $t[$k] = $z ;
526 array_push ( $tr , false ) ;
527 array_push ( $td , false ) ;
528 array_push ( $ltd , "" ) ;
529 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
530 }
531 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
532 {
533 if ( "|+" == substr ( $x , 0 , 2 ) )
534 {
535 $fc = "+" ;
536 $x = substr ( $x , 1 ) ;
537 }
538 $after = substr ( $x , 1 ) ;
539 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
540 $after = explode ( "||" , $after ) ;
541 $t[$k] = "" ;
542 foreach ( $after AS $theline )
543 {
544 $z = "" ;
545 if ( $fc != "+" )
546 {
547 $tra = array_pop ( $ltr ) ;
548 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
549 array_push ( $tr , true ) ;
550 array_push ( $ltr , "" ) ;
551 }
552
553 $l = array_pop ( $ltd ) ;
554 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
555 if ( $fc == "|" ) $l = "td" ;
556 else if ( $fc == "!" ) $l = "th" ;
557 else if ( $fc == "+" ) $l = "caption" ;
558 else $l = "" ;
559 array_push ( $ltd , $l ) ;
560 $y = explode ( "|" , $theline , 2 ) ;
561 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
562 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
563 $t[$k] .= $y ;
564 array_push ( $td , true ) ;
565 }
566 }
567 }
568
569 # Closing open td, tr && table
570 while ( count ( $td ) > 0 )
571 {
572 if ( array_pop ( $td ) ) $t[] = "</td>" ;
573 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
574 $t[] = "</table>" ;
575 }
576
577 $t = implode ( "\n" , $t ) ;
578 # $t = $this->removeHTMLtags( $t );
579 return $t ;
580 }
581
582 # Parses the text and adds the result to the strip state
583 # Returns the strip tag
584 function stripParse( $text, $newline, $args )
585 {
586 $text = $this->strip( $text, $this->mStripState );
587 $text = $this->internalParse( $text, (bool)$newline, $args, false );
588 return $newline.$this->insertStripItem( $text, $this->mStripState );
589 }
590
591 function internalParse( $text, $linestart, $args = array(), $isMain=true )
592 {
593 $fname = "Parser::internalParse";
594 wfProfileIn( $fname );
595
596 $text = $this->removeHTMLtags( $text );
597 $text = $this->replaceVariables( $text, $args );
598
599 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
600
601 $text = $this->doHeadings( $text );
602 if($this->mOptions->getUseDynamicDates()) {
603 global $wgDateFormatter;
604 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
605 }
606 $text = $this->doAllQuotes( $text );
607 $text = $this->replaceExternalLinks( $text );
608 $text = $this->replaceInternalLinks ( $text );
609 $text = $this->replaceInternalLinks ( $text );
610 //$text = $this->doTokenizedParser ( $text );
611 $text = $this->doTableStuff ( $text ) ;
612 $text = $this->magicISBN( $text );
613 $text = $this->magicRFC( $text );
614 $text = $this->formatHeadings( $text, $isMain );
615 $sk =& $this->mOptions->getSkin();
616 $text = $sk->transformContent( $text );
617
618 if ( !isset ( $this->categoryMagicDone ) ) {
619 $text .= $this->categoryMagic () ;
620 $this->categoryMagicDone = true ;
621 }
622
623 wfProfileOut( $fname );
624 return $text;
625 }
626
627
628 /* private */ function doHeadings( $text )
629 {
630 $fname = "Parser::doHeadings";
631 wfProfileIn( $fname );
632 for ( $i = 6; $i >= 1; --$i ) {
633 $h = substr( "======", 0, $i );
634 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
635 "<h{$i}>\\1</h{$i}>\\2", $text );
636 }
637 wfProfileOut( $fname );
638 return $text;
639 }
640
641 /* private */ function doAllQuotes( $text )
642 {
643 $fname = "Parser::doAllQuotes";
644 wfProfileIn( $fname );
645 $outtext = "";
646 $lines = explode( "\n", $text );
647 foreach ( $lines as $line ) {
648 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
649 }
650 $outtext = substr($outtext, 0,-1);
651 wfProfileOut( $fname );
652 return $outtext;
653 }
654
655 /* private */ function doQuotes( $pre, $text, $mode )
656 {
657 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
658 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
659 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
660 if ( substr ($m[2], 0, 1) == "'" ) {
661 $m[2] = substr ($m[2], 1);
662 if ($mode == "em") {
663 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
664 } else if ($mode == "strong") {
665 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
666 } else if (($mode == "emstrong") || ($mode == "both")) {
667 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
668 } else if ($mode == "strongem") {
669 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
670 } else {
671 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
672 }
673 } else {
674 if ($mode == "strong") {
675 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
676 } else if ($mode == "em") {
677 return $m1_em . $this->doQuotes ( "", $m[2], "" );
678 } else if ($mode == "emstrong") {
679 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
680 } else if (($mode == "strongem") || ($mode == "both")) {
681 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
682 } else {
683 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
684 }
685 }
686 } else {
687 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
688 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
689 if ($mode == "") {
690 return $pre . $text;
691 } else if ($mode == "em") {
692 return $pre . $text_em;
693 } else if ($mode == "strong") {
694 return $pre . $text_strong;
695 } else if ($mode == "strongem") {
696 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
697 } else {
698 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
699 }
700 }
701 }
702
703 # Note: we have to do external links before the internal ones,
704 # and otherwise take great care in the order of things here, so
705 # that we don't end up interpreting some URLs twice.
706
707 /* private */ function replaceExternalLinks( $text )
708 {
709 $fname = "Parser::replaceExternalLinks";
710 wfProfileIn( $fname );
711 $text = $this->subReplaceExternalLinks( $text, "http", true );
712 $text = $this->subReplaceExternalLinks( $text, "https", true );
713 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
714 $text = $this->subReplaceExternalLinks( $text, "irc", false );
715 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
716 $text = $this->subReplaceExternalLinks( $text, "news", false );
717 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
718 wfProfileOut( $fname );
719 return $text;
720 }
721
722 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
723 {
724 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
725 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
726
727 # this is the list of separators that should be ignored if they
728 # are the last character of an URL but that should be included
729 # if they occur within the URL, e.g. "go to www.foo.com, where .."
730 # in this case, the last comma should not become part of the URL,
731 # but in "www.foo.com/123,2342,32.htm" it should.
732 $sep = ",;\.:";
733 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
734 $images = "gif|png|jpg|jpeg";
735
736 # PLEASE NOTE: The curly braces { } are not part of the regex,
737 # they are interpreted as part of the string (used to tell PHP
738 # that the content of the string should be inserted there).
739 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
740 "((?i){$images})([^{$uc}]|$)/";
741
742 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
743 $sk =& $this->mOptions->getSkin();
744
745 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
746 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
747 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
748 }
749 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
750 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
751 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
752 "</a>\\5", $s );
753 $s = str_replace( $unique, $protocol, $s );
754
755 $a = explode( "[{$protocol}:", " " . $s );
756 $s = array_shift( $a );
757 $s = substr( $s, 1 );
758
759 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
760 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
761
762 foreach ( $a as $line ) {
763 if ( preg_match( $e1, $line, $m ) ) {
764 $link = "{$protocol}:{$m[1]}";
765 $trail = $m[2];
766 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
767 else { $text = wfEscapeHTML( $link ); }
768 } else if ( preg_match( $e2, $line, $m ) ) {
769 $link = "{$protocol}:{$m[1]}";
770 $text = $m[2];
771 $trail = $m[3];
772 } else {
773 $s .= "[{$protocol}:" . $line;
774 continue;
775 }
776 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
777 $paren = "";
778 } else {
779 # Expand the URL for printable version
780 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
781 }
782 $la = $sk->getExternalLinkAttributes( $link, $text );
783 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
784
785 }
786 return $s;
787 }
788
789
790 /* private */ function replaceInternalLinks( $s )
791 {
792 global $wgLang, $wgLinkCache;
793 global $wgNamespacesWithSubpages, $wgLanguageCode;
794 static $fname = "Parser::replaceInternalLinks" ;
795 wfProfileIn( $fname );
796
797 wfProfileIn( "$fname-setup" );
798 static $tc = FALSE;
799 # the % is needed to support urlencoded titles as well
800 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
801 $sk =& $this->mOptions->getSkin();
802
803 $a = explode( "[[", " " . $s );
804 $s = array_shift( $a );
805 $s = substr( $s, 1 );
806
807 # Match a link having the form [[namespace:link|alternate]]trail
808 static $e1 = FALSE;
809 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
810 # Match the end of a line for a word that's not followed by whitespace,
811 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
812 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
813
814 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
815 # Special and Media are pseudo-namespaces; no pages actually exist in them
816 static $image = FALSE;
817 static $special = FALSE;
818 static $media = FALSE;
819 static $category = FALSE;
820 if ( !$image ) { $image = Namespace::getImage(); }
821 if ( !$special ) { $special = Namespace::getSpecial(); }
822 if ( !$media ) { $media = Namespace::getMedia(); }
823 if ( !$category ) { $category = Namespace::getCategory(); }
824
825 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
826
827 if ( $useLinkPrefixExtension ) {
828 if ( preg_match( $e2, $s, $m ) ) {
829 $first_prefix = $m[2];
830 $s = $m[1];
831 } else {
832 $first_prefix = false;
833 }
834 } else {
835 $prefix = '';
836 }
837
838 wfProfileOut( "$fname-setup" );
839
840 foreach ( $a as $line ) {
841 wfProfileIn( "$fname-prefixhandling" );
842 if ( $useLinkPrefixExtension ) {
843 if ( preg_match( $e2, $s, $m ) ) {
844 $prefix = $m[2];
845 $s = $m[1];
846 } else {
847 $prefix='';
848 }
849 # first link
850 if($first_prefix) {
851 $prefix = $first_prefix;
852 $first_prefix = false;
853 }
854 }
855 wfProfileOut( "$fname-prefixhandling" );
856
857 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
858 $text = $m[2];
859 # fix up urlencoded title texts
860 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
861 $trail = $m[3];
862 } else { # Invalid form; output directly
863 $s .= $prefix . "[[" . $line ;
864 continue;
865 }
866
867 /* Valid link forms:
868 Foobar -- normal
869 :Foobar -- override special treatment of prefix (images, language links)
870 /Foobar -- convert to CurrentPage/Foobar
871 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
872 */
873 $c = substr($m[1],0,1);
874 $noforce = ($c != ":");
875 if( $c == "/" ) { # subpage
876 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
877 $m[1]=substr($m[1],1,strlen($m[1])-2);
878 $noslash=$m[1];
879 } else {
880 $noslash=substr($m[1],1);
881 }
882 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
883 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
884 if( "" == $text ) {
885 $text= $m[1];
886 } # this might be changed for ugliness reasons
887 } else {
888 $link = $noslash; # no subpage allowed, use standard link
889 }
890 } elseif( $noforce ) { # no subpage
891 $link = $m[1];
892 } else {
893 $link = substr( $m[1], 1 );
894 }
895 $wasblank = ( "" == $text );
896 if( $wasblank )
897 $text = $link;
898
899 $nt = Title::newFromText( $link );
900 if( !$nt ) {
901 $s .= $prefix . "[[" . $line;
902 continue;
903 }
904 $ns = $nt->getNamespace();
905 $iw = $nt->getInterWiki();
906 if( $noforce ) {
907 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
908 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
909 $tmp = $prefix . $trail ;
910 $s .= (trim($tmp) == '')? '': $tmp;
911 continue;
912 }
913 if ( $ns == $image ) {
914 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
915 $wgLinkCache->addImageLinkObj( $nt );
916 continue;
917 }
918 if ( $ns == $category ) {
919 $t = $nt->getText() ;
920 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
921
922 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
923 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
924 $wgLinkCache->resume();
925
926 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
927 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
928 $this->mOutput->mCategoryLinks[] = $t ;
929 $s .= $prefix . $trail ;
930 continue;
931 }
932 }
933 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
934 ( strpos( $link, "#" ) == FALSE ) ) {
935 # Self-links are handled specially; generally de-link and change to bold.
936 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
937 continue;
938 }
939
940 if( $ns == $media ) {
941 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
942 $wgLinkCache->addImageLinkObj( $nt );
943 continue;
944 } elseif( $ns == $special ) {
945 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
946 continue;
947 }
948 $s .= $sk->makeLinkObj( $nt, $text, "", $trail, $prefix );
949 }
950 wfProfileOut( $fname );
951 return $s;
952 }
953
954 # Some functions here used by doBlockLevels()
955 #
956 /* private */ function closeParagraph()
957 {
958 $result = "";
959 if ( '' != $this->mLastSection ) {
960 $result = "</" . $this->mLastSection . ">\n";
961 }
962 $this->mInPre = false;
963 $this->mLastSection = "";
964 return $result;
965 }
966 # getCommon() returns the length of the longest common substring
967 # of both arguments, starting at the beginning of both.
968 #
969 /* private */ function getCommon( $st1, $st2 )
970 {
971 $fl = strlen( $st1 );
972 $shorter = strlen( $st2 );
973 if ( $fl < $shorter ) { $shorter = $fl; }
974
975 for ( $i = 0; $i < $shorter; ++$i ) {
976 if ( $st1{$i} != $st2{$i} ) { break; }
977 }
978 return $i;
979 }
980 # These next three functions open, continue, and close the list
981 # element appropriate to the prefix character passed into them.
982 #
983 /* private */ function openList( $char )
984 {
985 $result = $this->closeParagraph();
986
987 if ( "*" == $char ) { $result .= "<ul><li>"; }
988 else if ( "#" == $char ) { $result .= "<ol><li>"; }
989 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
990 else if ( ";" == $char ) {
991 $result .= "<dl><dt>";
992 $this->mDTopen = true;
993 }
994 else { $result = "<!-- ERR 1 -->"; }
995
996 return $result;
997 }
998
999 /* private */ function nextItem( $char )
1000 {
1001 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
1002 else if ( ":" == $char || ";" == $char ) {
1003 $close = "</dd>";
1004 if ( $this->mDTopen ) { $close = "</dt>"; }
1005 if ( ";" == $char ) {
1006 $this->mDTopen = true;
1007 return $close . "<dt>";
1008 } else {
1009 $this->mDTopen = false;
1010 return $close . "<dd>";
1011 }
1012 }
1013 return "<!-- ERR 2 -->";
1014 }
1015
1016 /* private */function closeList( $char )
1017 {
1018 if ( "*" == $char ) { $text = "</li></ul>"; }
1019 else if ( "#" == $char ) { $text = "</li></ol>"; }
1020 else if ( ":" == $char ) {
1021 if ( $this->mDTopen ) {
1022 $this->mDTopen = false;
1023 $text = "</dt></dl>";
1024 } else {
1025 $text = "</dd></dl>";
1026 }
1027 }
1028 else { return "<!-- ERR 3 -->"; }
1029 return $text."\n";
1030 }
1031
1032 /* private */ function doBlockLevels( $text, $linestart ) {
1033 $fname = "Parser::doBlockLevels";
1034 wfProfileIn( $fname );
1035
1036 # Parsing through the text line by line. The main thing
1037 # happening here is handling of block-level elements p, pre,
1038 # and making lists from lines starting with * # : etc.
1039 #
1040 $textLines = explode( "\n", $text );
1041
1042 $lastPrefix = $output = $lastLine = '';
1043 $this->mDTopen = $inBlockElem = false;
1044 $prefixLength = 0;
1045 $paragraphStack = false;
1046
1047 if ( !$linestart ) {
1048 $output .= array_shift( $textLines );
1049 }
1050 foreach ( $textLines as $oLine ) {
1051 $lastPrefixLength = strlen( $lastPrefix );
1052 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1053 $preOpenMatch = preg_match("/<pre/i", $oLine );
1054 if (!$this->mInPre) {
1055 $this->mInPre = !empty($preOpenMatch);
1056 }
1057 if ( !$this->mInPre ) {
1058 # Multiple prefixes may abut each other for nested lists.
1059 $prefixLength = strspn( $oLine, "*#:;" );
1060 $pref = substr( $oLine, 0, $prefixLength );
1061
1062 # eh?
1063 $pref2 = str_replace( ";", ":", $pref );
1064 $t = substr( $oLine, $prefixLength );
1065 } else {
1066 # Don't interpret any other prefixes in preformatted text
1067 $prefixLength = 0;
1068 $pref = $pref2 = '';
1069 $t = $oLine;
1070 }
1071
1072 # List generation
1073 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1074 # Same as the last item, so no need to deal with nesting or opening stuff
1075 $output .= $this->nextItem( substr( $pref, -1 ) );
1076 $paragraphStack = false;
1077
1078 if ( ";" == substr( $pref, -1 ) ) {
1079 # The one nasty exception: definition lists work like this:
1080 # ; title : definition text
1081 # So we check for : in the remainder text to split up the
1082 # title and definition, without b0rking links.
1083 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1084 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1085 $term = $match[1];
1086 $output .= $term . $this->nextItem( ":" );
1087 $t = $match[2];
1088 }
1089 }
1090 } elseif( $prefixLength || $lastPrefixLength ) {
1091 # Either open or close a level...
1092 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1093 $paragraphStack = false;
1094
1095 while( $commonPrefixLength < $lastPrefixLength ) {
1096 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1097 --$lastPrefixLength;
1098 }
1099 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1100 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1101 }
1102 while ( $prefixLength > $commonPrefixLength ) {
1103 $char = substr( $pref, $commonPrefixLength, 1 );
1104 $output .= $this->openList( $char );
1105
1106 if ( ";" == $char ) {
1107 # FIXME: This is dupe of code above
1108 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1109 $term = $match[1];
1110 $output .= $term . $this->nextItem( ":" );
1111 $t = $match[2];
1112 }
1113 }
1114 ++$commonPrefixLength;
1115 }
1116 $lastPrefix = $pref2;
1117 }
1118 if( 0 == $prefixLength ) {
1119 # No prefix (not in list)--go to paragraph mode
1120 $uniq_prefix = UNIQ_PREFIX;
1121 // XXX: use a stack for nestable elements like span, table and div
1122 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i", $t );
1123 $closematch = preg_match(
1124 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1125 "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1126 if ( $openmatch or $closematch ) {
1127 $paragraphStack = false;
1128 $output .= $this->closeParagraph();
1129 if($preOpenMatch and !$preCloseMatch) {
1130 $this->mInPre = true;
1131 }
1132 if ( $closematch ) {
1133 $inBlockElem = false;
1134 } else {
1135 $inBlockElem = true;
1136 }
1137 } else if ( !$inBlockElem && !$this->mInPre ) {
1138 if ( " " == $t{0} and trim($t) != '' ) {
1139 // pre
1140 if ($this->mLastSection != 'pre') {
1141 $paragraphStack = false;
1142 $output .= $this->closeParagraph().'<pre>';
1143 $this->mLastSection = 'pre';
1144 }
1145 } else {
1146 // paragraph
1147 if ( '' == trim($t) ) {
1148 if ( $paragraphStack ) {
1149 $output .= $paragraphStack.'<br />';
1150 $paragraphStack = false;
1151 $this->mLastSection = 'p';
1152 } else {
1153 if ($this->mLastSection != 'p' ) {
1154 $output .= $this->closeParagraph();
1155 $this->mLastSection = '';
1156 $paragraphStack = "<p>";
1157 } else {
1158 $paragraphStack = '</p><p>';
1159 }
1160 }
1161 } else {
1162 if ( $paragraphStack ) {
1163 $output .= $paragraphStack;
1164 $paragraphStack = false;
1165 $this->mLastSection = 'p';
1166 } else if ($this->mLastSection != 'p') {
1167 $output .= $this->closeParagraph().'<p>';
1168 $this->mLastSection = 'p';
1169 }
1170 }
1171 }
1172 }
1173 }
1174 if ($paragraphStack === false) {
1175 $output .= $t."\n";
1176 }
1177 }
1178 while ( $prefixLength ) {
1179 $output .= $this->closeList( $pref2{$prefixLength-1} );
1180 --$prefixLength;
1181 }
1182 if ( "" != $this->mLastSection ) {
1183 $output .= "</" . $this->mLastSection . ">";
1184 $this->mLastSection = "";
1185 }
1186
1187 wfProfileOut( $fname );
1188 return $output;
1189 }
1190
1191 function getVariableValue( $index ) {
1192 global $wgLang, $wgSitename, $wgServer;
1193
1194 switch ( $index ) {
1195 case MAG_CURRENTMONTH:
1196 return date( "m" );
1197 case MAG_CURRENTMONTHNAME:
1198 return $wgLang->getMonthName( date("n") );
1199 case MAG_CURRENTMONTHNAMEGEN:
1200 return $wgLang->getMonthNameGen( date("n") );
1201 case MAG_CURRENTDAY:
1202 return date("j");
1203 case MAG_PAGENAME:
1204 return $this->mTitle->getText();
1205 case MAG_NAMESPACE:
1206 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1207 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1208 case MAG_CURRENTDAYNAME:
1209 return $wgLang->getWeekdayName( date("w")+1 );
1210 case MAG_CURRENTYEAR:
1211 return date( "Y" );
1212 case MAG_CURRENTTIME:
1213 return $wgLang->time( wfTimestampNow(), false );
1214 case MAG_NUMBEROFARTICLES:
1215 return wfNumberOfArticles();
1216 case MAG_SITENAME:
1217 return $wgSitename;
1218 case MAG_SERVER:
1219 return $wgServer;
1220 default:
1221 return NULL;
1222 }
1223 }
1224
1225 function initialiseVariables()
1226 {
1227 global $wgVariableIDs;
1228 $this->mVariables = array();
1229 foreach ( $wgVariableIDs as $id ) {
1230 $mw =& MagicWord::get( $id );
1231 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1232 }
1233 }
1234
1235 /* private */ function replaceVariables( $text, $args = array() )
1236 {
1237 global $wgLang, $wgScript, $wgArticlePath;
1238
1239 $fname = "Parser::replaceVariables";
1240 wfProfileIn( $fname );
1241
1242 $bail = false;
1243 if ( !$this->mVariables ) {
1244 $this->initialiseVariables();
1245 }
1246 $titleChars = Title::legalChars();
1247 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1248
1249 # This function is called recursively. To keep track of arguments we need a stack:
1250 array_push( $this->mArgStack, $args );
1251
1252 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1253 $GLOBALS['wgCurParser'] =& $this;
1254
1255
1256 if ( $this->mOutputType == OT_HTML ) {
1257 # Variable substitution
1258 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1259
1260 # Argument substitution
1261 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1262 }
1263 # Template substitution
1264 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1265 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1266
1267 array_pop( $this->mArgStack );
1268
1269 wfProfileOut( $fname );
1270 return $text;
1271 }
1272
1273 function variableSubstitution( $matches )
1274 {
1275 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1276 $text = $this->mVariables[$matches[1]];
1277 $this->mOutput->mContainsOldMagic = true;
1278 } else {
1279 $text = $matches[0];
1280 }
1281 return $text;
1282 }
1283
1284 function braceSubstitution( $matches )
1285 {
1286 global $wgLinkCache, $wgLang;
1287 $fname = "Parser::braceSubstitution";
1288 $found = false;
1289 $nowiki = false;
1290 $noparse = false;
1291
1292 $title = NULL;
1293
1294 # $newline is an optional newline character before the braces
1295 # $part1 is the bit before the first |, and must contain only title characters
1296 # $args is a list of arguments, starting from index 0, not including $part1
1297
1298 $newline = $matches[1];
1299 $part1 = $matches[2];
1300 # If the third subpattern matched anything, it will start with |
1301 if ( $matches[3] !== "" ) {
1302 $args = explode( "|", substr( $matches[3], 1 ) );
1303 } else {
1304 $args = array();
1305 }
1306 $argc = count( $args );
1307
1308 # {{{}}}
1309 if ( strpos( $matches[0], "{{{" ) !== false ) {
1310 $text = $matches[0];
1311 $found = true;
1312 $noparse = true;
1313 }
1314
1315 # SUBST
1316 if ( !$found ) {
1317 $mwSubst =& MagicWord::get( MAG_SUBST );
1318 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1319 if ( $this->mOutputType != OT_WIKI ) {
1320 # Invalid SUBST not replaced at PST time
1321 # Return without further processing
1322 $text = $matches[0];
1323 $found = true;
1324 $noparse= true;
1325 }
1326 } elseif ( $this->mOutputType == OT_WIKI ) {
1327 # SUBST not found in PST pass, do nothing
1328 $text = $matches[0];
1329 $found = true;
1330 }
1331 }
1332
1333 # MSG, MSGNW and INT
1334 if ( !$found ) {
1335 # Check for MSGNW:
1336 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1337 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1338 $nowiki = true;
1339 } else {
1340 # Remove obsolete MSG:
1341 $mwMsg =& MagicWord::get( MAG_MSG );
1342 $mwMsg->matchStartAndRemove( $part1 );
1343 }
1344
1345 # Check if it is an internal message
1346 $mwInt =& MagicWord::get( MAG_INT );
1347 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1348 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1349 $text = wfMsgReal( $part1, $args, true );
1350 $found = true;
1351 }
1352 }
1353 }
1354
1355 # NS
1356 if ( !$found ) {
1357 # Check for NS: (namespace expansion)
1358 $mwNs = MagicWord::get( MAG_NS );
1359 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1360 if ( intval( $part1 ) ) {
1361 $text = $wgLang->getNsText( intval( $part1 ) );
1362 $found = true;
1363 } else {
1364 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1365 if ( !is_null( $index ) ) {
1366 $text = $wgLang->getNsText( $index );
1367 $found = true;
1368 }
1369 }
1370 }
1371 }
1372
1373 # LOCALURL and LOCALURLE
1374 if ( !$found ) {
1375 $mwLocal = MagicWord::get( MAG_LOCALURL );
1376 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1377
1378 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1379 $func = 'getLocalURL';
1380 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1381 $func = 'escapeLocalURL';
1382 } else {
1383 $func = '';
1384 }
1385
1386 if ( $func !== '' ) {
1387 $title = Title::newFromText( $part1 );
1388 if ( !is_null( $title ) ) {
1389 if ( $argc > 0 ) {
1390 $text = $title->$func( $args[0] );
1391 } else {
1392 $text = $title->$func();
1393 }
1394 $found = true;
1395 }
1396 }
1397 }
1398
1399 # Internal variables
1400 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1401 $text = $this->mVariables[$part1];
1402 $found = true;
1403 $this->mOutput->mContainsOldMagic = true;
1404 }
1405 /*
1406 # Arguments input from the caller
1407 $inputArgs = end( $this->mArgStack );
1408 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1409 $text = $inputArgs[$part1];
1410 $found = true;
1411 }
1412 */
1413 # Load from database
1414 if ( !$found ) {
1415 $title = Title::newFromText( $part1, NS_TEMPLATE );
1416 if ( !is_null( $title ) && !$title->isExternal() ) {
1417 # Check for excessive inclusion
1418 $dbk = $title->getPrefixedDBkey();
1419 if ( $this->incrementIncludeCount( $dbk ) ) {
1420 $article = new Article( $title );
1421 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1422 if ( $articleContent !== false ) {
1423 $found = true;
1424 $text = $articleContent;
1425
1426 }
1427 }
1428
1429 # If the title is valid but undisplayable, make a link to it
1430 if ( $this->mOutputType == OT_HTML && !$found ) {
1431 $text = "[[" . $title->getPrefixedText() . "]]";
1432 $found = true;
1433 }
1434 }
1435 }
1436
1437 # Recursive parsing, escaping and link table handling
1438 # Only for HTML output
1439 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1440 $text = wfEscapeWikiText( $text );
1441 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1442 # Clean up argument array
1443 $assocArgs = array();
1444 $index = 1;
1445 foreach( $args as $arg ) {
1446 $eqpos = strpos( $arg, "=" );
1447 if ( $eqpos === false ) {
1448 $assocArgs[$index++] = $arg;
1449 } else {
1450 $name = trim( substr( $arg, 0, $eqpos ) );
1451 $value = trim( substr( $arg, $eqpos+1 ) );
1452 if ( $value === false ) {
1453 $value = "";
1454 }
1455 if ( $name !== false ) {
1456 $assocArgs[$name] = $value;
1457 }
1458 }
1459 }
1460
1461 # Do not enter included links in link table
1462 if ( !is_null( $title ) ) {
1463 $wgLinkCache->suspend();
1464 }
1465
1466 # Run full parser on the included text
1467 $text = $this->stripParse( $text, $newline, $assocArgs );
1468
1469 # Resume the link cache and register the inclusion as a link
1470 if ( !is_null( $title ) ) {
1471 $wgLinkCache->resume();
1472 $wgLinkCache->addLinkObj( $title );
1473 }
1474 }
1475
1476 if ( !$found ) {
1477 return $matches[0];
1478 } else {
1479 return $text;
1480 }
1481 }
1482
1483 # Triple brace replacement -- used for template arguments
1484 function argSubstitution( $matches )
1485 {
1486 $newline = $matches[1];
1487 $arg = trim( $matches[2] );
1488 $text = $matches[0];
1489 $inputArgs = end( $this->mArgStack );
1490
1491 if ( array_key_exists( $arg, $inputArgs ) ) {
1492 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1493 }
1494
1495 return $text;
1496 }
1497
1498 # Returns true if the function is allowed to include this entity
1499 function incrementIncludeCount( $dbk )
1500 {
1501 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1502 $this->mIncludeCount[$dbk] = 0;
1503 }
1504 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1505 return true;
1506 } else {
1507 return false;
1508 }
1509 }
1510
1511
1512 # Cleans up HTML, removes dangerous tags and attributes
1513 /* private */ function removeHTMLtags( $text )
1514 {
1515 global $wgUseTidy, $wgUserHtml;
1516 $fname = "Parser::removeHTMLtags";
1517 wfProfileIn( $fname );
1518
1519 if( $wgUserHtml ) {
1520 $htmlpairs = array( # Tags that must be closed
1521 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1522 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1523 "strike", "strong", "tt", "var", "div", "center",
1524 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1525 "ruby", "rt" , "rb" , "rp", "p"
1526 );
1527 $htmlsingle = array(
1528 "br", "hr", "li", "dt", "dd"
1529 );
1530 $htmlnest = array( # Tags that can be nested--??
1531 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1532 "dl", "font", "big", "small", "sub", "sup"
1533 );
1534 $tabletags = array( # Can only appear inside table
1535 "td", "th", "tr"
1536 );
1537 } else {
1538 $htmlpairs = array();
1539 $htmlsingle = array();
1540 $htmlnest = array();
1541 $tabletags = array();
1542 }
1543
1544 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1545 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1546
1547 $htmlattrs = $this->getHTMLattrs () ;
1548
1549 # Remove HTML comments
1550 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1551
1552 $bits = explode( "<", $text );
1553 $text = array_shift( $bits );
1554 if(!$wgUseTidy) {
1555 $tagstack = array(); $tablestack = array();
1556 foreach ( $bits as $x ) {
1557 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1558 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1559 $x, $regs );
1560 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1561 error_reporting( $prev );
1562
1563 $badtag = 0 ;
1564 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1565 # Check our stack
1566 if ( $slash ) {
1567 # Closing a tag...
1568 if ( ! in_array( $t, $htmlsingle ) &&
1569 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1570 @array_push( $tagstack, $ot );
1571 $badtag = 1;
1572 } else {
1573 if ( $t == "table" ) {
1574 $tagstack = array_pop( $tablestack );
1575 }
1576 $newparams = "";
1577 }
1578 } else {
1579 # Keep track for later
1580 if ( in_array( $t, $tabletags ) &&
1581 ! in_array( "table", $tagstack ) ) {
1582 $badtag = 1;
1583 } else if ( in_array( $t, $tagstack ) &&
1584 ! in_array ( $t , $htmlnest ) ) {
1585 $badtag = 1 ;
1586 } else if ( ! in_array( $t, $htmlsingle ) ) {
1587 if ( $t == "table" ) {
1588 array_push( $tablestack, $tagstack );
1589 $tagstack = array();
1590 }
1591 array_push( $tagstack, $t );
1592 }
1593 # Strip non-approved attributes from the tag
1594 $newparams = $this->fixTagAttributes($params);
1595
1596 }
1597 if ( ! $badtag ) {
1598 $rest = str_replace( ">", "&gt;", $rest );
1599 $text .= "<$slash$t $newparams$brace$rest";
1600 continue;
1601 }
1602 }
1603 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1604 }
1605 # Close off any remaining tags
1606 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1607 $text .= "</$t>\n";
1608 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1609 }
1610 } else {
1611 # this might be possible using tidy itself
1612 foreach ( $bits as $x ) {
1613 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1614 $x, $regs );
1615 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1616 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1617 $newparams = $this->fixTagAttributes($params);
1618 $rest = str_replace( ">", "&gt;", $rest );
1619 $text .= "<$slash$t $newparams$brace$rest";
1620 } else {
1621 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1622 }
1623 }
1624 }
1625 wfProfileOut( $fname );
1626 return $text;
1627 }
1628
1629
1630 /*
1631 *
1632 * This function accomplishes several tasks:
1633 * 1) Auto-number headings if that option is enabled
1634 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1635 * 3) Add a Table of contents on the top for users who have enabled the option
1636 * 4) Auto-anchor headings
1637 *
1638 * It loops through all headlines, collects the necessary data, then splits up the
1639 * string and re-inserts the newly formatted headlines.
1640 *
1641 */
1642
1643 /* private */ function formatHeadings( $text, $isMain=true )
1644 {
1645 global $wgInputEncoding;
1646
1647 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1648 $doShowToc = $this->mOptions->getShowToc();
1649 if( !$this->mTitle->userCanEdit() ) {
1650 $showEditLink = 0;
1651 $rightClickHack = 0;
1652 } else {
1653 $showEditLink = $this->mOptions->getEditSection();
1654 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1655 }
1656
1657 # Inhibit editsection links if requested in the page
1658 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1659 if( $esw->matchAndRemove( $text ) ) {
1660 $showEditLink = 0;
1661 }
1662 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1663 # do not add TOC
1664 $mw =& MagicWord::get( MAG_NOTOC );
1665 if( $mw->matchAndRemove( $text ) ) {
1666 $doShowToc = 0;
1667 }
1668
1669 # never add the TOC to the Main Page. This is an entry page that should not
1670 # be more than 1-2 screens large anyway
1671 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1672 $doShowToc = 0;
1673 }
1674
1675 # Get all headlines for numbering them and adding funky stuff like [edit]
1676 # links - this is for later, but we need the number of headlines right now
1677 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1678
1679 # if there are fewer than 4 headlines in the article, do not show TOC
1680 if( $numMatches < 4 ) {
1681 $doShowToc = 0;
1682 }
1683
1684 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1685 # override above conditions and always show TOC
1686 $mw =& MagicWord::get( MAG_FORCETOC );
1687 if ($mw->matchAndRemove( $text ) ) {
1688 $doShowToc = 1;
1689 }
1690
1691
1692 # We need this to perform operations on the HTML
1693 $sk =& $this->mOptions->getSkin();
1694
1695 # headline counter
1696 $headlineCount = 0;
1697
1698 # Ugh .. the TOC should have neat indentation levels which can be
1699 # passed to the skin functions. These are determined here
1700 $toclevel = 0;
1701 $toc = "";
1702 $full = "";
1703 $head = array();
1704 $sublevelCount = array();
1705 $level = 0;
1706 $prevlevel = 0;
1707 foreach( $matches[3] as $headline ) {
1708 $numbering = "";
1709 if( $level ) {
1710 $prevlevel = $level;
1711 }
1712 $level = $matches[1][$headlineCount];
1713 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1714 # reset when we enter a new level
1715 $sublevelCount[$level] = 0;
1716 $toc .= $sk->tocIndent( $level - $prevlevel );
1717 $toclevel += $level - $prevlevel;
1718 }
1719 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1720 # reset when we step back a level
1721 $sublevelCount[$level+1]=0;
1722 $toc .= $sk->tocUnindent( $prevlevel - $level );
1723 $toclevel -= $prevlevel - $level;
1724 }
1725 # count number of headlines for each level
1726 @$sublevelCount[$level]++;
1727 if( $doNumberHeadings || $doShowToc ) {
1728 $dot = 0;
1729 for( $i = 1; $i <= $level; $i++ ) {
1730 if( !empty( $sublevelCount[$i] ) ) {
1731 if( $dot ) {
1732 $numbering .= ".";
1733 }
1734 $numbering .= $sublevelCount[$i];
1735 $dot = 1;
1736 }
1737 }
1738 }
1739
1740 # The canonized header is a version of the header text safe to use for links
1741 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1742 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1743 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1744
1745 # strip out HTML
1746 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1747 $tocline = trim( $canonized_headline );
1748 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
1749 $replacearray = array(
1750 '%3A' => ':',
1751 '%' => '.'
1752 );
1753 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
1754 $refer[$headlineCount] = $canonized_headline;
1755
1756 # count how many in assoc. array so we can track dupes in anchors
1757 @$refers[$canonized_headline]++;
1758 $refcount[$headlineCount]=$refers[$canonized_headline];
1759
1760 # Prepend the number to the heading text
1761
1762 if( $doNumberHeadings || $doShowToc ) {
1763 $tocline = $numbering . " " . $tocline;
1764
1765 # Don't number the heading if it is the only one (looks silly)
1766 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1767 # the two are different if the line contains a link
1768 $headline=$numbering . " " . $headline;
1769 }
1770 }
1771
1772 # Create the anchor for linking from the TOC to the section
1773 $anchor = $canonized_headline;
1774 if($refcount[$headlineCount] > 1 ) {
1775 $anchor .= "_" . $refcount[$headlineCount];
1776 }
1777 if( $doShowToc ) {
1778 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1779 }
1780 if( $showEditLink ) {
1781 if ( empty( $head[$headlineCount] ) ) {
1782 $head[$headlineCount] = "";
1783 }
1784 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1785 }
1786
1787 # Add the edit section span
1788 if( $rightClickHack ) {
1789 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1790 }
1791
1792 # give headline the correct <h#> tag
1793 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1794
1795 $headlineCount++;
1796 }
1797
1798 if( $doShowToc ) {
1799 $toclines = $headlineCount;
1800 $toc .= $sk->tocUnindent( $toclevel );
1801 $toc = $sk->tocTable( $toc );
1802 }
1803
1804 # split up and insert constructed headlines
1805
1806 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1807 $i = 0;
1808
1809 foreach( $blocks as $block ) {
1810 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1811 # This is the [edit] link that appears for the top block of text when
1812 # section editing is enabled
1813
1814 # Disabled because it broke block formatting
1815 # For example, a bullet point in the top line
1816 # $full .= $sk->editSectionLink(0);
1817 }
1818 $full .= $block;
1819 if( $doShowToc && !$i && $isMain) {
1820 # Top anchor now in skin
1821 $full = $full.$toc;
1822 }
1823
1824 if( !empty( $head[$i] ) ) {
1825 $full .= $head[$i];
1826 }
1827 $i++;
1828 }
1829
1830 return $full;
1831 }
1832
1833 /* private */ function magicISBN( $text )
1834 {
1835 global $wgLang;
1836
1837 $a = split( "ISBN ", " $text" );
1838 if ( count ( $a ) < 2 ) return $text;
1839 $text = substr( array_shift( $a ), 1);
1840 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1841
1842 foreach ( $a as $x ) {
1843 $isbn = $blank = "" ;
1844 while ( " " == $x{0} ) {
1845 $blank .= " ";
1846 $x = substr( $x, 1 );
1847 }
1848 while ( strstr( $valid, $x{0} ) != false ) {
1849 $isbn .= $x{0};
1850 $x = substr( $x, 1 );
1851 }
1852 $num = str_replace( "-", "", $isbn );
1853 $num = str_replace( " ", "", $num );
1854
1855 if ( "" == $num ) {
1856 $text .= "ISBN $blank$x";
1857 } else {
1858 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1859 $text .= "<a href=\"" .
1860 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1861 "\" class=\"internal\">ISBN $isbn</a>";
1862 $text .= $x;
1863 }
1864 }
1865 return $text;
1866 }
1867 /* private */ function magicRFC( $text )
1868 {
1869 global $wgLang;
1870
1871 $a = split( "RFC ", " $text" );
1872 if ( count ( $a ) < 2 ) return $text;
1873 $text = substr( array_shift( $a ), 1);
1874 $valid = "0123456789";
1875
1876 foreach ( $a as $x ) {
1877 $rfc = $blank = "" ;
1878 while ( " " == $x{0} ) {
1879 $blank .= " ";
1880 $x = substr( $x, 1 );
1881 }
1882 while ( strstr( $valid, $x{0} ) != false ) {
1883 $rfc .= $x{0};
1884 $x = substr( $x, 1 );
1885 }
1886
1887 if ( "" == $rfc ) {
1888 $text .= "RFC $blank$x";
1889 } else {
1890 $url = wfmsg( "rfcurl" );
1891 $url = str_replace( "$1", $rfc, $url);
1892 $sk =& $this->mOptions->getSkin();
1893 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1894 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1895 }
1896 }
1897 return $text;
1898 }
1899
1900 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1901 {
1902 $this->mOptions = $options;
1903 $this->mTitle =& $title;
1904 $this->mOutputType = OT_WIKI;
1905
1906 if ( $clearState ) {
1907 $this->clearState();
1908 }
1909
1910 $stripState = false;
1911 $pairs = array(
1912 "\r\n" => "\n",
1913 );
1914 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1915 // now with regexes
1916 /*
1917 $pairs = array(
1918 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1919 "/<br *?>/i" => "<br />",
1920 );
1921 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1922 */
1923 $text = $this->strip( $text, $stripState, false );
1924 $text = $this->pstPass2( $text, $user );
1925 $text = $this->unstrip( $text, $stripState );
1926 $text = $this->unstripNoWiki( $text, $stripState );
1927 return $text;
1928 }
1929
1930 /* private */ function pstPass2( $text, &$user )
1931 {
1932 global $wgLang, $wgLocaltimezone, $wgCurParser;
1933
1934 # Variable replacement
1935 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1936 $text = $this->replaceVariables( $text );
1937
1938 # Signatures
1939 #
1940 $n = $user->getName();
1941 $k = $user->getOption( "nickname" );
1942 if ( "" == $k ) { $k = $n; }
1943 if(isset($wgLocaltimezone)) {
1944 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1945 }
1946 /* Note: this is an ugly timezone hack for the European wikis */
1947 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1948 " (" . date( "T" ) . ")";
1949 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1950
1951 $text = preg_replace( "/~~~~~/", $d, $text );
1952 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1953 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1954 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1955 Namespace::getUser() ) . ":$n|$k]]", $text );
1956
1957 # Context links: [[|name]] and [[name (context)|]]
1958 #
1959 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1960 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1961 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1962 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1963
1964 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1965 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1966 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1967 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1968 # [[ns:page (cont)|]]
1969 $context = "";
1970 $t = $this->mTitle->getText();
1971 if ( preg_match( $conpat, $t, $m ) ) {
1972 $context = $m[2];
1973 }
1974 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1975 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1976 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1977
1978 if ( "" == $context ) {
1979 $text = preg_replace( $p2, "[[\\1]]", $text );
1980 } else {
1981 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1982 }
1983
1984 /*
1985 $mw =& MagicWord::get( MAG_SUBST );
1986 $wgCurParser = $this->fork();
1987 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1988 $this->merge( $wgCurParser );
1989 */
1990
1991 # Trim trailing whitespace
1992 # MAG_END (__END__) tag allows for trailing
1993 # whitespace to be deliberately included
1994 $text = rtrim( $text );
1995 $mw =& MagicWord::get( MAG_END );
1996 $mw->matchAndRemove( $text );
1997
1998 return $text;
1999 }
2000
2001 # Set up some variables which are usually set up in parse()
2002 # so that an external function can call some class members with confidence
2003 function startExternalParse( &$title, $options, $outputType, $clearState = true )
2004 {
2005 $this->mTitle =& $title;
2006 $this->mOptions = $options;
2007 $this->mOutputType = $outputType;
2008 if ( $clearState ) {
2009 $this->clearState();
2010 }
2011 }
2012
2013 function transformMsg( $text, $options ) {
2014 global $wgTitle;
2015 static $executing = false;
2016
2017 # Guard against infinite recursion
2018 if ( $executing ) {
2019 return $text;
2020 }
2021 $executing = true;
2022
2023 $this->mTitle = $wgTitle;
2024 $this->mOptions = $options;
2025 $this->mOutputType = OT_MSG;
2026 $this->clearState();
2027 $text = $this->replaceVariables( $text );
2028
2029 $executing = false;
2030 return $text;
2031 }
2032 }
2033
2034 class ParserOutput
2035 {
2036 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2037 var $mCacheTime; # Used in ParserCache
2038
2039 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2040 $containsOldMagic = false )
2041 {
2042 $this->mText = $text;
2043 $this->mLanguageLinks = $languageLinks;
2044 $this->mCategoryLinks = $categoryLinks;
2045 $this->mContainsOldMagic = $containsOldMagic;
2046 $this->mCacheTime = "";
2047 }
2048
2049 function getText() { return $this->mText; }
2050 function getLanguageLinks() { return $this->mLanguageLinks; }
2051 function getCategoryLinks() { return $this->mCategoryLinks; }
2052 function getCacheTime() { return $this->mCacheTime; }
2053 function containsOldMagic() { return $this->mContainsOldMagic; }
2054 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2055 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2056 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2057 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2058 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2059
2060 function merge( $other ) {
2061 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2062 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2063 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2064 }
2065
2066 }
2067
2068 class ParserOptions
2069 {
2070 # All variables are private
2071 var $mUseTeX; # Use texvc to expand <math> tags
2072 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2073 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2074 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2075 var $mAllowExternalImages; # Allow external images inline
2076 var $mSkin; # Reference to the preferred skin
2077 var $mDateFormat; # Date format index
2078 var $mEditSection; # Create "edit section" links
2079 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2080 var $mNumberHeadings; # Automatically number headings
2081 var $mShowToc; # Show table of contents
2082
2083 function getUseTeX() { return $this->mUseTeX; }
2084 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2085 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2086 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2087 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2088 function getSkin() { return $this->mSkin; }
2089 function getDateFormat() { return $this->mDateFormat; }
2090 function getEditSection() { return $this->mEditSection; }
2091 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2092 function getNumberHeadings() { return $this->mNumberHeadings; }
2093 function getShowToc() { return $this->mShowToc; }
2094
2095 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2096 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2097 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2098 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2099 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2100 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2101 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2102 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2103 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2104 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2105 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2106
2107 /* static */ function newFromUser( &$user )
2108 {
2109 $popts = new ParserOptions;
2110 $popts->initialiseFromUser( $user );
2111 return $popts;
2112 }
2113
2114 function initialiseFromUser( &$userInput )
2115 {
2116 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2117
2118 if ( !$userInput ) {
2119 $user = new User;
2120 $user->setLoaded( true );
2121 } else {
2122 $user =& $userInput;
2123 }
2124
2125 $this->mUseTeX = $wgUseTeX;
2126 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2127 $this->mUseDynamicDates = $wgUseDynamicDates;
2128 $this->mInterwikiMagic = $wgInterwikiMagic;
2129 $this->mAllowExternalImages = $wgAllowExternalImages;
2130 $this->mSkin =& $user->getSkin();
2131 $this->mDateFormat = $user->getOption( "date" );
2132 $this->mEditSection = $user->getOption( "editsection" );
2133 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2134 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2135 $this->mShowToc = $user->getOption( "showtoc" );
2136 }
2137
2138
2139 }
2140
2141 # Regex callbacks, used in Parser::replaceVariables
2142 function wfBraceSubstitution( $matches )
2143 {
2144 global $wgCurParser;
2145 return $wgCurParser->braceSubstitution( $matches );
2146 }
2147
2148 function wfArgSubstitution( $matches )
2149 {
2150 global $wgCurParser;
2151 return $wgCurParser->argSubstitution( $matches );
2152 }
2153
2154 function wfVariableSubstitution( $matches )
2155 {
2156 global $wgCurParser;
2157 return $wgCurParser->variableSubstitution( $matches );
2158 }
2159
2160 ?>