Fixed bug in edit conflict merge feature -- didn't decompress old_text
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 }
81
82 # First pass--just handle <nowiki> sections, pass the rest off
83 # to internalParse() which does all the real work.
84 #
85 # Returns a ParserOutput
86 #
87 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
88 {
89 global $wgUseTidy;
90 $fname = "Parser::parse";
91 wfProfileIn( $fname );
92
93 if ( $clearState ) {
94 $this->clearState();
95 }
96
97 $this->mOptions = $options;
98 $this->mTitle =& $title;
99 $this->mOutputType = OT_HTML;
100
101 $stripState = NULL;
102 $text = $this->strip( $text, $this->mStripState );
103 $text = $this->internalParse( $text, $linestart );
104 $text = $this->unstrip( $text, $this->mStripState );
105 # Clean up special characters, only run once, next-to-last before doBlockLevels
106 if(!$wgUseTidy) {
107 $fixtags = array(
108 # french spaces, last one Guillemet-left
109 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
110 # french spaces, Guillemet-right
111 "/(\\302\\253) /i"=>"\\1&nbsp;",
112 "/<hr *>/i" => '<hr/>',
113 "/<br *>/i" => '<br/>',
114 "/<center *>/i"=>'<div class="center">',
115 "/<\\/center *>/i" => '</div>',
116 # Clean up spare ampersands; note that we probably ought to be
117 # more careful about named entities.
118 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
119 );
120 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
121 } else {
122 $fixtags = array(
123 # french spaces, last one Guillemet-left
124 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
125 # french spaces, Guillemet-right
126 "/(\\302\\253) /i"=>"\\1&nbsp;",
127 "/<center *>/i"=>'<div class="center">',
128 "/<\\/center *>/i" => '</div>'
129 );
130 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
131 }
132 # only once and last
133 $text = $this->doBlockLevels( $text, $linestart );
134 if($wgUseTidy) {
135 $text = $this->tidy($text);
136 }
137 $this->mOutput->setText( $text );
138 wfProfileOut( $fname );
139 return $this->mOutput;
140 }
141
142 /* static */ function getRandomString()
143 {
144 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
145 }
146
147 # Replaces all occurrences of <$tag>content</$tag> in the text
148 # with a random marker and returns the new text. the output parameter
149 # $content will be an associative array filled with data on the form
150 # $unique_marker => content.
151
152 # If $content is already set, the additional entries will be appended
153
154 # If $tag is set to STRIP_COMMENTS, the function will extract
155 # <!-- HTML comments -->
156
157 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
158 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
159 if ( !$content ) {
160 $content = array( );
161 }
162 $n = 1;
163 $stripped = "";
164
165 while ( "" != $text ) {
166 if($tag==STRIP_COMMENTS) {
167 $p = preg_split( "/<!--/i", $text, 2 );
168 } else {
169 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
170 }
171 $stripped .= $p[0];
172 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
173 $text = "";
174 } else {
175 if($tag==STRIP_COMMENTS) {
176 $q = preg_split( "/-->/i", $p[1], 2 );
177 } else {
178 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
179 }
180 $marker = $rnd . sprintf("%08X", $n++);
181 $content[$marker] = $q[0];
182 $stripped .= $marker;
183 $text = $q[1];
184 }
185 }
186 return $stripped;
187 }
188
189 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
190 # If $render is set, performs necessary rendering operations on plugins
191 # Returns the text, and fills an array with data needed in unstrip()
192 # If the $state is already a valid strip state, it adds to the state
193
194 # When $stripcomments is set, HTML comments <!-- like this -->
195 # will be stripped in addition to other tags. This is important
196 # for section editing, where these comments cause confusion when
197 # counting the sections in the wikisource
198 function strip( $text, &$state, $stripcomments = false )
199 {
200 $render = ($this->mOutputType == OT_HTML);
201 $nowiki_content = array();
202 $hiero_content = array();
203 $timeline_content = array();
204 $math_content = array();
205 $pre_content = array();
206 $comment_content = array();
207
208 # Replace any instances of the placeholders
209 $uniq_prefix = UNIQ_PREFIX;
210 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
211
212 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
213 foreach( $nowiki_content as $marker => $content ){
214 if( $render ){
215 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
216 } else {
217 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
218 }
219 }
220
221 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
222 foreach( $hiero_content as $marker => $content ){
223 if( $render && $GLOBALS['wgUseWikiHiero']){
224 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
225 } else {
226 $hiero_content[$marker] = "<hiero>$content</hiero>";
227 }
228 }
229
230 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
231 foreach( $timeline_content as $marker => $content ){
232 if( $render && $GLOBALS['wgUseTimeline']){
233 $timeline_content[$marker] = renderTimeline( $content );
234 } else {
235 $timeline_content[$marker] = "<timeline>$content</timeline>";
236 }
237 }
238
239 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
240 foreach( $math_content as $marker => $content ){
241 if( $render ) {
242 if( $this->mOptions->getUseTeX() ) {
243 $math_content[$marker] = renderMath( $content );
244 } else {
245 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
246 }
247 } else {
248 $math_content[$marker] = "<math>$content</math>";
249 }
250 }
251
252 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
253 foreach( $pre_content as $marker => $content ){
254 if( $render ){
255 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
256 } else {
257 $pre_content[$marker] = "<pre>$content</pre>";
258 }
259 }
260 if($stripcomments) {
261 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
262 foreach( $comment_content as $marker => $content ){
263 $comment_content[$marker] = "<!--$content-->";
264 }
265 }
266
267 # Merge state with the pre-existing state, if there is one
268 if ( $state ) {
269 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
270 $state['hiero'] = $state['hiero'] + $hiero_content;
271 $state['timeline'] = $state['timeline'] + $timeline_content;
272 $state['math'] = $state['math'] + $math_content;
273 $state['pre'] = $state['pre'] + $pre_content;
274 $state['comment'] = $state['comment'] + $comment_content;
275 } else {
276 $state = array(
277 'nowiki' => $nowiki_content,
278 'hiero' => $hiero_content,
279 'timeline' => $timeline_content,
280 'math' => $math_content,
281 'pre' => $pre_content,
282 'comment' => $comment_content
283 );
284 }
285 return $text;
286 }
287
288 function unstrip( $text, &$state )
289 {
290 # Must expand in reverse order, otherwise nested tags will be corrupted
291 $contentDict = end( $state );
292 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
293 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
294 $text = str_replace( key( $contentDict ), $content, $text );
295 }
296 }
297
298 return $text;
299 }
300
301 # Add an item to the strip state
302 # Returns the unique tag which must be inserted into the stripped text
303 # The tag will be replaced with the original text in unstrip()
304
305 function insertStripItem( $text, &$state )
306 {
307 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
308 if ( !$state ) {
309 $state = array(
310 'nowiki' => array(),
311 'hiero' => array(),
312 'math' => array(),
313 'pre' => array()
314 );
315 }
316 $state['item'][$rnd] = $text;
317 return $rnd;
318 }
319
320 # This method generates the list of subcategories and pages for a category
321 function categoryMagic ()
322 {
323 global $wgLang , $wgUser ;
324 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
325
326 $cns = Namespace::getCategory() ;
327 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
328
329 $r = "<br style=\"clear:both;\"/>\n";
330
331
332 $sk =& $wgUser->getSkin() ;
333
334 $articles = array() ;
335 $children = array() ;
336 $data = array () ;
337 $id = $this->mTitle->getArticleID() ;
338
339 # FIXME: add limits
340 $t = wfStrencode( $this->mTitle->getDBKey() );
341 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
342 $res = wfQuery ( $sql, DB_READ ) ;
343 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
344
345 # For all pages that link to this category
346 foreach ( $data AS $x )
347 {
348 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
349 if ( $t != "" ) $t .= ":" ;
350 $t .= $x->cur_title ;
351
352 if ( $x->cur_namespace == $cns ) {
353 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
354 } else {
355 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
356 }
357 }
358 wfFreeResult ( $res ) ;
359
360 # Showing subcategories
361 if ( count ( $children ) > 0 ) {
362 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
363 $r .= implode ( ", " , $children ) ;
364 }
365
366 # Showing pages in this category
367 if ( count ( $articles ) > 0 ) {
368 $ti = $this->mTitle->getText() ;
369 $h = wfMsg( "category_header", $ti );
370 $r .= "<h2>{$h}</h2>\n" ;
371 $r .= implode ( ", " , $articles ) ;
372 }
373
374
375 return $r ;
376 }
377
378 function getHTMLattrs ()
379 {
380 $htmlattrs = array( # Allowed attributes--no scripting, etc.
381 "title", "align", "lang", "dir", "width", "height",
382 "bgcolor", "clear", /* BR */ "noshade", /* HR */
383 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
384 /* FONT */ "type", "start", "value", "compact",
385 /* For various lists, mostly deprecated but safe */
386 "summary", "width", "border", "frame", "rules",
387 "cellspacing", "cellpadding", "valign", "char",
388 "charoff", "colgroup", "col", "span", "abbr", "axis",
389 "headers", "scope", "rowspan", "colspan", /* Tables */
390 "id", "class", "name", "style" /* For CSS */
391 );
392 return $htmlattrs ;
393 }
394
395 function fixTagAttributes ( $t )
396 {
397 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
398 $htmlattrs = $this->getHTMLattrs() ;
399
400 # Strip non-approved attributes from the tag
401 $t = preg_replace(
402 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
403 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
404 $t);
405 # Strip javascript "expression" from stylesheets. Brute force approach:
406 # If anythin offensive is found, all attributes of the HTML tag are dropped
407
408 if( preg_match(
409 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
410 wfMungeToUtf8( $t ) ) )
411 {
412 $t="";
413 }
414
415 return trim ( $t ) ;
416 }
417
418 /* interface with html tidy, used if $wgUseTidy = true */
419 function tidy ( $text ) {
420 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
421 global $wgInputEncoding, $wgOutputEncoding;
422 $fname = "Parser::tidy";
423 wfProfileIn( $fname );
424
425 $cleansource = '';
426 switch(strtoupper($wgOutputEncoding)) {
427 case 'ISO-8859-1':
428 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
429 break;
430 case 'UTF-8':
431 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
432 break;
433 default:
434 $wgTidyOpts .= ' -raw';
435 }
436
437 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
438 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
439 '<head><title>test</title></head><body>'.$text.'</body></html>';
440 $descriptorspec = array(
441 0 => array("pipe", "r"),
442 1 => array("pipe", "w"),
443 2 => array("file", "/dev/null", "a")
444 );
445 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
446 if (is_resource($process)) {
447 fwrite($pipes[0], $text);
448 fclose($pipes[0]);
449 while (!feof($pipes[1])) {
450 $cleansource .= fgets($pipes[1], 1024);
451 }
452 fclose($pipes[1]);
453 $return_value = proc_close($process);
454 }
455
456 wfProfileOut( $fname );
457
458 if( $cleansource == '' && $text != '') {
459 wfDebug( "Tidy error detected!\n" );
460 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
461 } else {
462 return $cleansource;
463 }
464 }
465
466 function doTableStuff ( $t )
467 {
468 $t = explode ( "\n" , $t ) ;
469 $td = array () ; # Is currently a td tag open?
470 $ltd = array () ; # Was it TD or TH?
471 $tr = array () ; # Is currently a tr tag open?
472 $ltr = array () ; # tr attributes
473 foreach ( $t AS $k => $x )
474 {
475 $x = trim ( $x ) ;
476 $fc = substr ( $x , 0 , 1 ) ;
477 if ( "{|" == substr ( $x , 0 , 2 ) )
478 {
479 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
480 array_push ( $td , false ) ;
481 array_push ( $ltd , "" ) ;
482 array_push ( $tr , false ) ;
483 array_push ( $ltr , "" ) ;
484 }
485 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
486 else if ( "|}" == substr ( $x , 0 , 2 ) )
487 {
488 $z = "</table>\n" ;
489 $l = array_pop ( $ltd ) ;
490 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
491 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
492 array_pop ( $ltr ) ;
493 $t[$k] = $z ;
494 }
495 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
496 {
497 $z = trim ( substr ( $x , 2 ) ) ;
498 $t[$k] = "<caption>{$z}</caption>\n" ;
499 }*/
500 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
501 {
502 $x = substr ( $x , 1 ) ;
503 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
504 $z = "" ;
505 $l = array_pop ( $ltd ) ;
506 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
507 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
508 array_pop ( $ltr ) ;
509 $t[$k] = $z ;
510 array_push ( $tr , false ) ;
511 array_push ( $td , false ) ;
512 array_push ( $ltd , "" ) ;
513 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
514 }
515 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
516 {
517 if ( "|+" == substr ( $x , 0 , 2 ) )
518 {
519 $fc = "+" ;
520 $x = substr ( $x , 1 ) ;
521 }
522 $after = substr ( $x , 1 ) ;
523 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
524 $after = explode ( "||" , $after ) ;
525 $t[$k] = "" ;
526 foreach ( $after AS $theline )
527 {
528 $z = "" ;
529 if ( $fc != "+" )
530 {
531 $tra = array_pop ( $ltr ) ;
532 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
533 array_push ( $tr , true ) ;
534 array_push ( $ltr , "" ) ;
535 }
536
537 $l = array_pop ( $ltd ) ;
538 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
539 if ( $fc == "|" ) $l = "td" ;
540 else if ( $fc == "!" ) $l = "th" ;
541 else if ( $fc == "+" ) $l = "caption" ;
542 else $l = "" ;
543 array_push ( $ltd , $l ) ;
544 $y = explode ( "|" , $theline , 2 ) ;
545 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
546 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
547 $t[$k] .= $y ;
548 array_push ( $td , true ) ;
549 }
550 }
551 }
552
553 # Closing open td, tr && table
554 while ( count ( $td ) > 0 )
555 {
556 if ( array_pop ( $td ) ) $t[] = "</td>" ;
557 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
558 $t[] = "</table>" ;
559 }
560
561 $t = implode ( "\n" , $t ) ;
562 # $t = $this->removeHTMLtags( $t );
563 return $t ;
564 }
565
566 # Parses the text and adds the result to the strip state
567 # Returns the strip tag
568 function stripParse( $text, $linestart, $args )
569 {
570 $text = $this->strip( $text, $this->mStripState );
571 $text = $this->internalParse( $text, $linestart, $args, false );
572 if( $linestart ) {
573 $text = "\n" . $text;
574 }
575 return $this->insertStripItem( $text, $this->mStripState );
576 }
577
578 function internalParse( $text, $linestart, $args = array(), $isMain=true )
579 {
580 $fname = "Parser::internalParse";
581 wfProfileIn( $fname );
582
583 $text = $this->removeHTMLtags( $text );
584 $text = $this->replaceVariables( $text, $args );
585
586 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr/>", $text );
587
588 $text = $this->doHeadings( $text );
589 if($this->mOptions->getUseDynamicDates()) {
590 global $wgDateFormatter;
591 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
592 }
593 $text = $this->doAllQuotes( $text );
594 $text = $this->replaceExternalLinks( $text );
595 $text = $this->replaceInternalLinks ( $text );
596 $text = $this->replaceInternalLinks ( $text );
597 //$text = $this->doTokenizedParser ( $text );
598 $text = $this->doTableStuff ( $text ) ;
599 $text = $this->magicISBN( $text );
600 $text = $this->magicRFC( $text );
601 $text = $this->formatHeadings( $text, $isMain );
602 $sk =& $this->mOptions->getSkin();
603 $text = $sk->transformContent( $text );
604
605 if ( !isset ( $this->categoryMagicDone ) ) {
606 $text .= $this->categoryMagic () ;
607 $this->categoryMagicDone = true ;
608 }
609
610 wfProfileOut( $fname );
611 return $text;
612 }
613
614
615 /* private */ function doHeadings( $text )
616 {
617 for ( $i = 6; $i >= 1; --$i ) {
618 $h = substr( "======", 0, $i );
619 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
620 "<h{$i}>\\1</h{$i}>\\2", $text );
621 }
622 return $text;
623 }
624
625 /* private */ function doAllQuotes( $text )
626 {
627 $outtext = "";
628 $lines = explode( "\n", $text );
629 foreach ( $lines as $line ) {
630 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
631 }
632 return substr($outtext, 0,-1);
633 }
634
635 /* private */ function doQuotes( $pre, $text, $mode )
636 {
637 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
638 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
639 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
640 if ( substr ($m[2], 0, 1) == "'" ) {
641 $m[2] = substr ($m[2], 1);
642 if ($mode == "em") {
643 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
644 } else if ($mode == "strong") {
645 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
646 } else if (($mode == "emstrong") || ($mode == "both")) {
647 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
648 } else if ($mode == "strongem") {
649 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
650 } else {
651 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
652 }
653 } else {
654 if ($mode == "strong") {
655 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
656 } else if ($mode == "em") {
657 return $m1_em . $this->doQuotes ( "", $m[2], "" );
658 } else if ($mode == "emstrong") {
659 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
660 } else if (($mode == "strongem") || ($mode == "both")) {
661 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
662 } else {
663 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
664 }
665 }
666 } else {
667 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
668 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
669 if ($mode == "") {
670 return $pre . $text;
671 } else if ($mode == "em") {
672 return $pre . $text_em;
673 } else if ($mode == "strong") {
674 return $pre . $text_strong;
675 } else if ($mode == "strongem") {
676 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
677 } else {
678 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
679 }
680 }
681 }
682
683 # Note: we have to do external links before the internal ones,
684 # and otherwise take great care in the order of things here, so
685 # that we don't end up interpreting some URLs twice.
686
687 /* private */ function replaceExternalLinks( $text )
688 {
689 $fname = "Parser::replaceExternalLinks";
690 wfProfileIn( $fname );
691 $text = $this->subReplaceExternalLinks( $text, "http", true );
692 $text = $this->subReplaceExternalLinks( $text, "https", true );
693 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
694 $text = $this->subReplaceExternalLinks( $text, "irc", false );
695 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
696 $text = $this->subReplaceExternalLinks( $text, "news", false );
697 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
698 wfProfileOut( $fname );
699 return $text;
700 }
701
702 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
703 {
704 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
705 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
706
707 # this is the list of separators that should be ignored if they
708 # are the last character of an URL but that should be included
709 # if they occur within the URL, e.g. "go to www.foo.com, where .."
710 # in this case, the last comma should not become part of the URL,
711 # but in "www.foo.com/123,2342,32.htm" it should.
712 $sep = ",;\.:";
713 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
714 $images = "gif|png|jpg|jpeg";
715
716 # PLEASE NOTE: The curly braces { } are not part of the regex,
717 # they are interpreted as part of the string (used to tell PHP
718 # that the content of the string should be inserted there).
719 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
720 "((?i){$images})([^{$uc}]|$)/";
721
722 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
723 $sk =& $this->mOptions->getSkin();
724
725 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
726 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
727 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
728 }
729 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
730 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
731 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
732 "</a>\\5", $s );
733 $s = str_replace( $unique, $protocol, $s );
734
735 $a = explode( "[{$protocol}:", " " . $s );
736 $s = array_shift( $a );
737 $s = substr( $s, 1 );
738
739 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
740 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
741
742 foreach ( $a as $line ) {
743 if ( preg_match( $e1, $line, $m ) ) {
744 $link = "{$protocol}:{$m[1]}";
745 $trail = $m[2];
746 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
747 else { $text = wfEscapeHTML( $link ); }
748 } else if ( preg_match( $e2, $line, $m ) ) {
749 $link = "{$protocol}:{$m[1]}";
750 $text = $m[2];
751 $trail = $m[3];
752 } else {
753 $s .= "[{$protocol}:" . $line;
754 continue;
755 }
756 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
757 $paren = "";
758 } else {
759 # Expand the URL for printable version
760 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
761 }
762 $la = $sk->getExternalLinkAttributes( $link, $text );
763 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
764
765 }
766 return $s;
767 }
768
769
770 /* private */ function replaceInternalLinks( $s )
771 {
772 global $wgLang, $wgLinkCache;
773 global $wgNamespacesWithSubpages, $wgLanguageCode;
774 static $fname = "Parser::replaceInternalLink" ;
775 wfProfileIn( $fname );
776
777 wfProfileIn( "$fname-setup" );
778 static $tc = FALSE;
779 # the % is needed to support urlencoded titles as well
780 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
781 $sk =& $this->mOptions->getSkin();
782
783 $a = explode( "[[", " " . $s );
784 $s = array_shift( $a );
785 $s = substr( $s, 1 );
786
787 # Match a link having the form [[namespace:link|alternate]]trail
788 static $e1 = FALSE;
789 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
790 # Match the end of a line for a word that's not followed by whitespace,
791 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
792 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
793 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
794 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
795
796
797 # Special and Media are pseudo-namespaces; no pages actually exist in them
798 static $image = FALSE;
799 static $special = FALSE;
800 static $media = FALSE;
801 static $category = FALSE;
802 if ( !$image ) { $image = Namespace::getImage(); }
803 if ( !$special ) { $special = Namespace::getSpecial(); }
804 if ( !$media ) { $media = Namespace::getMedia(); }
805 if ( !$category ) { $category = Namespace::getCategory(); }
806
807 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
808
809 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
810 $new_prefix = $m[2];
811 $s = $m[1];
812 } else {
813 $new_prefix="";
814 }
815
816 wfProfileOut( "$fname-setup" );
817
818 foreach ( $a as $line ) {
819 $prefix = $new_prefix;
820
821 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
822 $text = $m[2];
823 # fix up urlencoded title texts
824 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
825 $trail = $m[3];
826 } else { # Invalid form; output directly
827 $s .= $prefix . "[[" . $line ;
828 wfProfileOut( $fname );
829 continue;
830 }
831
832 /* Valid link forms:
833 Foobar -- normal
834 :Foobar -- override special treatment of prefix (images, language links)
835 /Foobar -- convert to CurrentPage/Foobar
836 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
837 */
838 $c = substr($m[1],0,1);
839 $noforce = ($c != ":");
840 if( $c == "/" ) { # subpage
841 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
842 $m[1]=substr($m[1],1,strlen($m[1])-2);
843 $noslash=$m[1];
844 } else {
845 $noslash=substr($m[1],1);
846 }
847 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
848 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
849 if( "" == $text ) {
850 $text= $m[1];
851 } # this might be changed for ugliness reasons
852 } else {
853 $link = $noslash; # no subpage allowed, use standard link
854 }
855 } elseif( $noforce ) { # no subpage
856 $link = $m[1];
857 } else {
858 $link = substr( $m[1], 1 );
859 }
860 $wasblank = ( "" == $text );
861 if( $wasblank )
862 $text = $link;
863
864 $nt = Title::newFromText( $link );
865 if( !$nt ) {
866 $s .= $prefix . "[[" . $line;
867 wfProfileOut( $fname );
868 continue;
869 }
870 $ns = $nt->getNamespace();
871 $iw = $nt->getInterWiki();
872 if( $noforce ) {
873 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
874 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
875 $tmp = $prefix . $trail ;
876 wfProfileOut( $fname );
877 $s .= (trim($tmp) == '')? '': $tmp;
878 continue;
879 }
880 if ( $ns == $image ) {
881 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
882 $wgLinkCache->addImageLinkObj( $nt );
883 wfProfileOut( $fname );
884 continue;
885 }
886 if ( $ns == $category ) {
887 $t = $nt->getText() ;
888 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
889
890 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
891 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
892 $wgLinkCache->resume();
893
894 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
895 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
896 $this->mOutput->mCategoryLinks[] = $t ;
897 $s .= $prefix . $trail ;
898 wfProfileOut( $fname );
899 continue;
900 }
901 }
902 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
903 ( strpos( $link, "#" ) == FALSE ) ) {
904 # Self-links are handled specially; generally de-link and change to bold.
905 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
906 wfProfileOut( $fname );
907 continue;
908 }
909
910 if( $ns == $media ) {
911 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
912 $wgLinkCache->addImageLinkObj( $nt );
913 wfProfileOut( $fname );
914 continue;
915 } elseif( $ns == $special ) {
916 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
917 wfProfileOut( $fname );
918 continue;
919 }
920 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
921 }
922 wfProfileOut( $fname );
923 return $s;
924 }
925
926 # Some functions here used by doBlockLevels()
927 #
928 /* private */ function closeParagraph()
929 {
930 $result = "";
931 if ( '' != $this->mLastSection ) {
932 $result = "</" . $this->mLastSection . ">\n";
933 }
934 $this->mInPre = false;
935 $this->mLastSection = "";
936 return $result;
937 }
938 # getCommon() returns the length of the longest common substring
939 # of both arguments, starting at the beginning of both.
940 #
941 /* private */ function getCommon( $st1, $st2 )
942 {
943 $fl = strlen( $st1 );
944 $shorter = strlen( $st2 );
945 if ( $fl < $shorter ) { $shorter = $fl; }
946
947 for ( $i = 0; $i < $shorter; ++$i ) {
948 if ( $st1{$i} != $st2{$i} ) { break; }
949 }
950 return $i;
951 }
952 # These next three functions open, continue, and close the list
953 # element appropriate to the prefix character passed into them.
954 #
955 /* private */ function openList( $char )
956 {
957 $result = $this->closeParagraph();
958
959 if ( "*" == $char ) { $result .= "<ul><li>"; }
960 else if ( "#" == $char ) { $result .= "<ol><li>"; }
961 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
962 else if ( ";" == $char ) {
963 $result .= "<dl><dt>";
964 $this->mDTopen = true;
965 }
966 else { $result = "<!-- ERR 1 -->"; }
967
968 return $result;
969 }
970
971 /* private */ function nextItem( $char )
972 {
973 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
974 else if ( ":" == $char || ";" == $char ) {
975 $close = "</dd>";
976 if ( $this->mDTopen ) { $close = "</dt>"; }
977 if ( ";" == $char ) {
978 $this->mDTopen = true;
979 return $close . "<dt>";
980 } else {
981 $this->mDTopen = false;
982 return $close . "<dd>";
983 }
984 }
985 return "<!-- ERR 2 -->";
986 }
987
988 /* private */function closeList( $char )
989 {
990 if ( "*" == $char ) { $text = "</li></ul>"; }
991 else if ( "#" == $char ) { $text = "</li></ol>"; }
992 else if ( ":" == $char ) {
993 if ( $this->mDTopen ) {
994 $this->mDTopen = false;
995 $text = "</dt></dl>";
996 } else {
997 $text = "</dd></dl>";
998 }
999 }
1000 else { return "<!-- ERR 3 -->"; }
1001 return $text."\n";
1002 }
1003
1004 /* private */ function doBlockLevels( $text, $linestart ) {
1005 $fname = "Parser::doBlockLevels";
1006 wfProfileIn( $fname );
1007
1008 # Parsing through the text line by line. The main thing
1009 # happening here is handling of block-level elements p, pre,
1010 # and making lists from lines starting with * # : etc.
1011 #
1012 $textLines = explode( "\n", $text );
1013
1014 $lastPrefix = $output = $lastLine = '';
1015 $this->mDTopen = $inBlockElem = false;
1016 $prefixLength = 0;
1017 $paragraphStack = false;
1018
1019 if ( !$linestart ) {
1020 $output .= array_shift( $textLines );
1021 }
1022 foreach ( $textLines as $oLine ) {
1023 $lastPrefixLength = strlen( $lastPrefix );
1024 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1025 $preOpenMatch = preg_match("/<pre/i", $oLine );
1026 if (!$this->mInPre) {
1027 $this->mInPre = !empty($preOpenMatch);
1028 }
1029 if ( !$this->mInPre ) {
1030 # Multiple prefixes may abut each other for nested lists.
1031 $prefixLength = strspn( $oLine, "*#:;" );
1032 $pref = substr( $oLine, 0, $prefixLength );
1033
1034 # eh?
1035 $pref2 = str_replace( ";", ":", $pref );
1036 $t = substr( $oLine, $prefixLength );
1037 } else {
1038 # Don't interpret any other prefixes in preformatted text
1039 $prefixLength = 0;
1040 $pref = $pref2 = '';
1041 $t = $oLine;
1042 }
1043
1044 # List generation
1045 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1046 # Same as the last item, so no need to deal with nesting or opening stuff
1047 $output .= $this->nextItem( substr( $pref, -1 ) );
1048 $paragraphStack = false;
1049
1050 if ( ";" == substr( $pref, -1 ) ) {
1051 # The one nasty exception: definition lists work like this:
1052 # ; title : definition text
1053 # So we check for : in the remainder text to split up the
1054 # title and definition, without b0rking links.
1055 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1056 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1057 $term = $match[1];
1058 $output .= $term . $this->nextItem( ":" );
1059 $t = $match[2];
1060 }
1061 }
1062 } elseif( $prefixLength || $lastPrefixLength ) {
1063 # Either open or close a level...
1064 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1065 $paragraphStack = false;
1066
1067 while( $commonPrefixLength < $lastPrefixLength ) {
1068 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1069 --$lastPrefixLength;
1070 }
1071 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1072 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1073 }
1074 while ( $prefixLength > $commonPrefixLength ) {
1075 $char = substr( $pref, $commonPrefixLength, 1 );
1076 $output .= $this->openList( $char );
1077
1078 if ( ";" == $char ) {
1079 # FIXME: This is dupe of code above
1080 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1081 $term = $match[1];
1082 $output .= $term . $this->nextItem( ":" );
1083 $t = $match[2];
1084 }
1085 }
1086 ++$commonPrefixLength;
1087 }
1088 $lastPrefix = $pref2;
1089 }
1090 if( 0 == $prefixLength ) {
1091 # No prefix (not in list)--go to paragraph mode
1092 $uniq_prefix = UNIQ_PREFIX;
1093 // XXX: use a stack for nestable elements like span, table and div
1094 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1095 $closematch = preg_match(
1096 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1097 "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1098 if ( $openmatch or $closematch ) {
1099 $paragraphStack = false;
1100 $output .= $this->closeParagraph();
1101 if($preOpenMatch and !$preCloseMatch) {
1102 $this->mInPre = true;
1103 }
1104 if ( $closematch ) {
1105 $inBlockElem = false;
1106 } else {
1107 $inBlockElem = true;
1108 }
1109 } else if ( !$inBlockElem && !$this->mInPre ) {
1110 if ( " " == $t{0} and trim($t) != '' ) {
1111 // pre
1112 if ($this->mLastSection != 'pre') {
1113 $paragraphStack = false;
1114 $output .= $this->closeParagraph().'<pre>';
1115 $this->mLastSection = 'pre';
1116 }
1117 } else {
1118 // paragraph
1119 if ( '' == trim($t) ) {
1120 if ( $paragraphStack ) {
1121 $output .= $paragraphStack.'<br/>';
1122 $paragraphStack = false;
1123 $this->mLastSection = 'p';
1124 } else {
1125 if ($this->mLastSection != 'p' ) {
1126 $output .= $this->closeParagraph();
1127 $this->mLastSection = '';
1128 $paragraphStack = "<p>";
1129 } else {
1130 $paragraphStack = '</p><p>';
1131 }
1132 }
1133 } else {
1134 if ( $paragraphStack ) {
1135 $output .= $paragraphStack;
1136 $paragraphStack = false;
1137 $this->mLastSection = 'p';
1138 } else if ($this->mLastSection != 'p') {
1139 $output .= $this->closeParagraph().'<p>';
1140 $this->mLastSection = 'p';
1141 }
1142 }
1143 }
1144 }
1145 }
1146 if ($paragraphStack === false) {
1147 $output .= $t."\n";
1148 }
1149 }
1150 while ( $prefixLength ) {
1151 $output .= $this->closeList( $pref2{$prefixLength-1} );
1152 --$prefixLength;
1153 }
1154 if ( "" != $this->mLastSection ) {
1155 $output .= "</" . $this->mLastSection . ">";
1156 $this->mLastSection = "";
1157 }
1158
1159 wfProfileOut( $fname );
1160 return $output;
1161 }
1162
1163 function getVariableValue( $index ) {
1164 global $wgLang, $wgSitename, $wgServer;
1165
1166 switch ( $index ) {
1167 case MAG_CURRENTMONTH:
1168 return date( "m" );
1169 case MAG_CURRENTMONTHNAME:
1170 return $wgLang->getMonthName( date("n") );
1171 case MAG_CURRENTMONTHNAMEGEN:
1172 return $wgLang->getMonthNameGen( date("n") );
1173 case MAG_CURRENTDAY:
1174 return date("j");
1175 case MAG_PAGENAME:
1176 return $this->mTitle->getText();
1177 case MAG_NAMESPACE:
1178 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1179 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1180 case MAG_CURRENTDAYNAME:
1181 return $wgLang->getWeekdayName( date("w")+1 );
1182 case MAG_CURRENTYEAR:
1183 return date( "Y" );
1184 case MAG_CURRENTTIME:
1185 return $wgLang->time( wfTimestampNow(), false );
1186 case MAG_NUMBEROFARTICLES:
1187 return wfNumberOfArticles();
1188 case MAG_SITENAME:
1189 return $wgSitename;
1190 case MAG_SERVER:
1191 return $wgServer;
1192 default:
1193 return NULL;
1194 }
1195 }
1196
1197 function initialiseVariables()
1198 {
1199 global $wgVariableIDs;
1200 $this->mVariables = array();
1201 foreach ( $wgVariableIDs as $id ) {
1202 $mw =& MagicWord::get( $id );
1203 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1204 }
1205 }
1206
1207 /* private */ function replaceVariables( $text, $args = array() )
1208 {
1209 global $wgLang, $wgScript, $wgArticlePath;
1210
1211 $fname = "Parser::replaceVariables";
1212 wfProfileIn( $fname );
1213
1214 $bail = false;
1215 if ( !$this->mVariables ) {
1216 $this->initialiseVariables();
1217 }
1218 $titleChars = Title::legalChars();
1219 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1220
1221 # This function is called recursively. To keep track of arguments we need a stack:
1222 array_push( $this->mArgStack, $args );
1223
1224 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1225 $GLOBALS['wgCurParser'] =& $this;
1226
1227
1228 if ( $this->mOutputType == OT_HTML ) {
1229 # Variable substitution
1230 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1231
1232 # Argument substitution
1233 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1234 }
1235 # Template substitution
1236 $regex = "/(\\n?){{([$titleChars]*)(\\|.*?|)}}/s";
1237 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1238
1239 array_pop( $this->mArgStack );
1240
1241 wfProfileOut( $fname );
1242 return $text;
1243 }
1244
1245 function variableSubstitution( $matches )
1246 {
1247 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1248 $text = $this->mVariables[$matches[1]];
1249 $this->mOutput->mContainsOldMagic = true;
1250 } else {
1251 $text = $matches[0];
1252 }
1253 return $text;
1254 }
1255
1256 function braceSubstitution( $matches )
1257 {
1258 global $wgLinkCache, $wgLang;
1259 $fname = "Parser::braceSubstitution";
1260 $found = false;
1261 $nowiki = false;
1262 $noparse = false;
1263
1264 $title = NULL;
1265
1266 # $newline is an optional newline character before the braces
1267 # $part1 is the bit before the first |, and must contain only title characters
1268 # $args is a list of arguments, starting from index 0, not including $part1
1269
1270 $newline = $matches[1];
1271 $part1 = $matches[2];
1272 # If the third subpattern matched anything, it will start with |
1273 if ( $matches[3] !== "" ) {
1274 $args = explode( "|", substr( $matches[3], 1 ) );
1275 } else {
1276 $args = array();
1277 }
1278 $argc = count( $args );
1279
1280 # {{{}}}
1281 if ( strpos( $matches[0], "{{{" ) !== false ) {
1282 $text = $matches[0];
1283 $found = true;
1284 $noparse = true;
1285 }
1286
1287 # SUBST
1288 if ( !$found ) {
1289 $mwSubst =& MagicWord::get( MAG_SUBST );
1290 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1291 if ( $this->mOutputType != OT_WIKI ) {
1292 # Invalid SUBST not replaced at PST time
1293 # Return without further processing
1294 $text = $matches[0];
1295 $found = true;
1296 $noparse= true;
1297 }
1298 } elseif ( $this->mOutputType == OT_WIKI ) {
1299 # SUBST not found in PST pass, do nothing
1300 $text = $matches[0];
1301 $found = true;
1302 }
1303 }
1304
1305 # MSG, MSGNW and INT
1306 if ( !$found ) {
1307 # Check for MSGNW:
1308 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1309 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1310 $nowiki = true;
1311 } else {
1312 # Remove obsolete MSG:
1313 $mwMsg =& MagicWord::get( MAG_MSG );
1314 $mwMsg->matchStartAndRemove( $part1 );
1315 }
1316
1317 # Check if it is an internal message
1318 $mwInt =& MagicWord::get( MAG_INT );
1319 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1320 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1321 $text = wfMsgReal( $part1, $args, true );
1322 $found = true;
1323 }
1324 }
1325 }
1326
1327 # NS
1328 if ( !$found ) {
1329 # Check for NS: (namespace expansion)
1330 $mwNs = MagicWord::get( MAG_NS );
1331 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1332 if ( intval( $part1 ) ) {
1333 $text = $wgLang->getNsText( intval( $part1 ) );
1334 $found = true;
1335 } else {
1336 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1337 if ( !is_null( $index ) ) {
1338 $text = $wgLang->getNsText( $index );
1339 $found = true;
1340 }
1341 }
1342 }
1343 }
1344
1345 # LOCALURL and LOCALURLE
1346 if ( !$found ) {
1347 $mwLocal = MagicWord::get( MAG_LOCALURL );
1348 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1349
1350 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1351 $func = 'getLocalURL';
1352 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1353 $func = 'escapeLocalURL';
1354 } else {
1355 $func = '';
1356 }
1357
1358 if ( $func !== '' ) {
1359 $title = Title::newFromText( $part1 );
1360 if ( !is_null( $title ) ) {
1361 if ( $argc > 0 ) {
1362 $text = $title->$func( $args[0] );
1363 } else {
1364 $text = $title->$func();
1365 }
1366 $found = true;
1367 }
1368 }
1369 }
1370
1371 # Internal variables
1372 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1373 $text = $this->mVariables[$part1];
1374 $found = true;
1375 $this->mOutput->mContainsOldMagic = true;
1376 }
1377 /*
1378 # Arguments input from the caller
1379 $inputArgs = end( $this->mArgStack );
1380 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1381 $text = $inputArgs[$part1];
1382 $found = true;
1383 }
1384 */
1385 # Load from database
1386 if ( !$found ) {
1387 $title = Title::newFromText( $part1, NS_TEMPLATE );
1388 if ( !is_null( $title ) && !$title->isExternal() ) {
1389 # Check for excessive inclusion
1390 $dbk = $title->getPrefixedDBkey();
1391 if ( $this->incrementIncludeCount( $dbk ) ) {
1392 $article = new Article( $title );
1393 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1394 if ( $articleContent !== false ) {
1395 $found = true;
1396 $text = $articleContent;
1397
1398 }
1399 }
1400
1401 # If the title is valid but undisplayable, make a link to it
1402 if ( $this->mOutputType == OT_HTML && !$found ) {
1403 $text = "[[" . $title->getPrefixedText() . "]]";
1404 $found = true;
1405 }
1406 }
1407 }
1408
1409 # Recursive parsing, escaping and link table handling
1410 # Only for HTML output
1411 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1412 $text = wfEscapeWikiText( $text );
1413 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1414 # Clean up argument array
1415 $assocArgs = array();
1416 $index = 1;
1417 foreach( $args as $arg ) {
1418 $eqpos = strpos( $arg, "=" );
1419 if ( $eqpos === false ) {
1420 $assocArgs[$index++] = $arg;
1421 } else {
1422 $name = trim( substr( $arg, 0, $eqpos ) );
1423 $value = trim( substr( $arg, $eqpos+1 ) );
1424 if ( $value === false ) {
1425 $value = "";
1426 }
1427 if ( $name !== false ) {
1428 $assocArgs[$name] = $value;
1429 }
1430 }
1431 }
1432
1433 # Do not enter included links in link table
1434 if ( !is_null( $title ) ) {
1435 $wgLinkCache->suspend();
1436 }
1437
1438 # Run full parser on the included text
1439 $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
1440
1441 # Resume the link cache and register the inclusion as a link
1442 if ( !is_null( $title ) ) {
1443 $wgLinkCache->resume();
1444 $wgLinkCache->addLinkObj( $title );
1445 }
1446 }
1447
1448 if ( !$found ) {
1449 return $matches[0];
1450 } else {
1451 return $text;
1452 }
1453 }
1454
1455 # Triple brace replacement -- used for template arguments
1456 function argSubstitution( $matches )
1457 {
1458 $newline = $matches[1];
1459 $arg = trim( $matches[2] );
1460 $text = $matches[0];
1461 $inputArgs = end( $this->mArgStack );
1462
1463 if ( array_key_exists( $arg, $inputArgs ) ) {
1464 $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
1465 }
1466
1467 return $text;
1468 }
1469
1470 # Returns true if the function is allowed to include this entity
1471 function incrementIncludeCount( $dbk )
1472 {
1473 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1474 $this->mIncludeCount[$dbk] = 0;
1475 }
1476 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1477 return true;
1478 } else {
1479 return false;
1480 }
1481 }
1482
1483
1484 # Cleans up HTML, removes dangerous tags and attributes
1485 /* private */ function removeHTMLtags( $text )
1486 {
1487 global $wgUseTidy, $wgUserHtml;
1488 $fname = "Parser::removeHTMLtags";
1489 wfProfileIn( $fname );
1490
1491 if( $wgUserHtml ) {
1492 $htmlpairs = array( # Tags that must be closed
1493 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1494 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1495 "strike", "strong", "tt", "var", "div", "center",
1496 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1497 "ruby", "rt" , "rb" , "rp", "p"
1498 );
1499 $htmlsingle = array(
1500 "br", "hr", "li", "dt", "dd"
1501 );
1502 $htmlnest = array( # Tags that can be nested--??
1503 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1504 "dl", "font", "big", "small", "sub", "sup"
1505 );
1506 $tabletags = array( # Can only appear inside table
1507 "td", "th", "tr"
1508 );
1509 } else {
1510 $htmlpairs = array();
1511 $htmlsingle = array();
1512 $htmlnest = array();
1513 $tabletags = array();
1514 }
1515
1516 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1517 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1518
1519 $htmlattrs = $this->getHTMLattrs () ;
1520
1521 # Remove HTML comments
1522 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1523
1524 $bits = explode( "<", $text );
1525 $text = array_shift( $bits );
1526 if(!$wgUseTidy) {
1527 $tagstack = array(); $tablestack = array();
1528 foreach ( $bits as $x ) {
1529 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1530 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1531 $x, $regs );
1532 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1533 error_reporting( $prev );
1534
1535 $badtag = 0 ;
1536 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1537 # Check our stack
1538 if ( $slash ) {
1539 # Closing a tag...
1540 if ( ! in_array( $t, $htmlsingle ) &&
1541 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1542 if(!empty($ot)) array_push( $tagstack, $ot );
1543 $badtag = 1;
1544 } else {
1545 if ( $t == "table" ) {
1546 $tagstack = array_pop( $tablestack );
1547 }
1548 $newparams = "";
1549 }
1550 } else {
1551 # Keep track for later
1552 if ( in_array( $t, $tabletags ) &&
1553 ! in_array( "table", $tagstack ) ) {
1554 $badtag = 1;
1555 } else if ( in_array( $t, $tagstack ) &&
1556 ! in_array ( $t , $htmlnest ) ) {
1557 $badtag = 1 ;
1558 } else if ( ! in_array( $t, $htmlsingle ) ) {
1559 if ( $t == "table" ) {
1560 array_push( $tablestack, $tagstack );
1561 $tagstack = array();
1562 }
1563 array_push( $tagstack, $t );
1564 }
1565 # Strip non-approved attributes from the tag
1566 $newparams = $this->fixTagAttributes($params);
1567
1568 }
1569 if ( ! $badtag ) {
1570 $rest = str_replace( ">", "&gt;", $rest );
1571 $text .= "<$slash$t $newparams$brace$rest";
1572 continue;
1573 }
1574 }
1575 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1576 }
1577 # Close off any remaining tags
1578 while ( $t = array_pop( $tagstack ) ) {
1579 $text .= "</$t>\n";
1580 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1581 }
1582 } else {
1583 # this might be possible using tidy itself
1584 foreach ( $bits as $x ) {
1585 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1586 $x, $regs );
1587 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1588 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1589 $newparams = $this->fixTagAttributes($params);
1590 $rest = str_replace( ">", "&gt;", $rest );
1591 $text .= "<$slash$t $newparams$brace$rest";
1592 } else {
1593 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1594 }
1595 }
1596 }
1597 wfProfileOut( $fname );
1598 return $text;
1599 }
1600
1601
1602 /*
1603 *
1604 * This function accomplishes several tasks:
1605 * 1) Auto-number headings if that option is enabled
1606 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1607 * 3) Add a Table of contents on the top for users who have enabled the option
1608 * 4) Auto-anchor headings
1609 *
1610 * It loops through all headlines, collects the necessary data, then splits up the
1611 * string and re-inserts the newly formatted headlines.
1612 *
1613 */
1614
1615 /* private */ function formatHeadings( $text, $isMain=true )
1616 {
1617 global $wgInputEncoding;
1618
1619 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1620 $doShowToc = $this->mOptions->getShowToc();
1621 if( !$this->mTitle->userCanEdit() ) {
1622 $showEditLink = 0;
1623 $rightClickHack = 0;
1624 } else {
1625 $showEditLink = $this->mOptions->getEditSection();
1626 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1627 }
1628
1629 # Inhibit editsection links if requested in the page
1630 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1631 if( $esw->matchAndRemove( $text ) ) {
1632 $showEditLink = 0;
1633 }
1634 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1635 # do not add TOC
1636 $mw =& MagicWord::get( MAG_NOTOC );
1637 if( $mw->matchAndRemove( $text ) ) {
1638 $doShowToc = 0;
1639 }
1640
1641 # never add the TOC to the Main Page. This is an entry page that should not
1642 # be more than 1-2 screens large anyway
1643 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1644 $doShowToc = 0;
1645 }
1646
1647 # Get all headlines for numbering them and adding funky stuff like [edit]
1648 # links - this is for later, but we need the number of headlines right now
1649 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1650
1651 # if there are fewer than 4 headlines in the article, do not show TOC
1652 if( $numMatches < 4 ) {
1653 $doShowToc = 0;
1654 }
1655
1656 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1657 # override above conditions and always show TOC
1658 $mw =& MagicWord::get( MAG_FORCETOC );
1659 if ($mw->matchAndRemove( $text ) ) {
1660 $doShowToc = 1;
1661 }
1662
1663
1664 # We need this to perform operations on the HTML
1665 $sk =& $this->mOptions->getSkin();
1666
1667 # headline counter
1668 $headlineCount = 0;
1669
1670 # Ugh .. the TOC should have neat indentation levels which can be
1671 # passed to the skin functions. These are determined here
1672 $toclevel = 0;
1673 $toc = "";
1674 $full = "";
1675 $head = array();
1676 $sublevelCount = array();
1677 $level = 0;
1678 $prevlevel = 0;
1679 foreach( $matches[3] as $headline ) {
1680 $numbering = "";
1681 if( $level ) {
1682 $prevlevel = $level;
1683 }
1684 $level = $matches[1][$headlineCount];
1685 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1686 # reset when we enter a new level
1687 $sublevelCount[$level] = 0;
1688 $toc .= $sk->tocIndent( $level - $prevlevel );
1689 $toclevel += $level - $prevlevel;
1690 }
1691 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1692 # reset when we step back a level
1693 $sublevelCount[$level+1]=0;
1694 $toc .= $sk->tocUnindent( $prevlevel - $level );
1695 $toclevel -= $prevlevel - $level;
1696 }
1697 # count number of headlines for each level
1698 @$sublevelCount[$level]++;
1699 if( $doNumberHeadings || $doShowToc ) {
1700 $dot = 0;
1701 for( $i = 1; $i <= $level; $i++ ) {
1702 if( !empty( $sublevelCount[$i] ) ) {
1703 if( $dot ) {
1704 $numbering .= ".";
1705 }
1706 $numbering .= $sublevelCount[$i];
1707 $dot = 1;
1708 }
1709 }
1710 }
1711
1712 # The canonized header is a version of the header text safe to use for links
1713 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1714 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1715
1716 # strip out HTML
1717 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1718 $tocline = trim( $canonized_headline );
1719 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1720 # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1721 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1722 $refer[$headlineCount] = $canonized_headline;
1723
1724 # count how many in assoc. array so we can track dupes in anchors
1725 @$refers[$canonized_headline]++;
1726 $refcount[$headlineCount]=$refers[$canonized_headline];
1727
1728 # Prepend the number to the heading text
1729
1730 if( $doNumberHeadings || $doShowToc ) {
1731 $tocline = $numbering . " " . $tocline;
1732
1733 # Don't number the heading if it is the only one (looks silly)
1734 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1735 # the two are different if the line contains a link
1736 $headline=$numbering . " " . $headline;
1737 }
1738 }
1739
1740 # Create the anchor for linking from the TOC to the section
1741 $anchor = $canonized_headline;
1742 if($refcount[$headlineCount] > 1 ) {
1743 $anchor .= "_" . $refcount[$headlineCount];
1744 }
1745 if( $doShowToc ) {
1746 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1747 }
1748 if( $showEditLink ) {
1749 if ( empty( $head[$headlineCount] ) ) {
1750 $head[$headlineCount] = "";
1751 }
1752 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1753 }
1754
1755 # Add the edit section span
1756 if( $rightClickHack ) {
1757 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1758 }
1759
1760 # give headline the correct <h#> tag
1761 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1762
1763 $headlineCount++;
1764 }
1765
1766 if( $doShowToc ) {
1767 $toclines = $headlineCount;
1768 $toc .= $sk->tocUnindent( $toclevel );
1769 $toc = $sk->tocTable( $toc );
1770 }
1771
1772 # split up and insert constructed headlines
1773
1774 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1775 $i = 0;
1776
1777 foreach( $blocks as $block ) {
1778 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1779 # This is the [edit] link that appears for the top block of text when
1780 # section editing is enabled
1781
1782 # Disabled because it broke block formatting
1783 # For example, a bullet point in the top line
1784 # $full .= $sk->editSectionLink(0);
1785 }
1786 $full .= $block;
1787 if( $doShowToc && !$i && $isMain) {
1788 # Top anchor now in skin
1789 $full = $full.$toc;
1790 }
1791
1792 if( !empty( $head[$i] ) ) {
1793 $full .= $head[$i];
1794 }
1795 $i++;
1796 }
1797
1798 return $full;
1799 }
1800
1801 /* private */ function magicISBN( $text )
1802 {
1803 global $wgLang;
1804
1805 $a = split( "ISBN ", " $text" );
1806 if ( count ( $a ) < 2 ) return $text;
1807 $text = substr( array_shift( $a ), 1);
1808 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1809
1810 foreach ( $a as $x ) {
1811 $isbn = $blank = "" ;
1812 while ( " " == $x{0} ) {
1813 $blank .= " ";
1814 $x = substr( $x, 1 );
1815 }
1816 while ( strstr( $valid, $x{0} ) != false ) {
1817 $isbn .= $x{0};
1818 $x = substr( $x, 1 );
1819 }
1820 $num = str_replace( "-", "", $isbn );
1821 $num = str_replace( " ", "", $num );
1822
1823 if ( "" == $num ) {
1824 $text .= "ISBN $blank$x";
1825 } else {
1826 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1827 $text .= "<a href=\"" .
1828 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1829 "\" class=\"internal\">ISBN $isbn</a>";
1830 $text .= $x;
1831 }
1832 }
1833 return $text;
1834 }
1835 /* private */ function magicRFC( $text )
1836 {
1837 global $wgLang;
1838
1839 $a = split( "RFC ", " $text" );
1840 if ( count ( $a ) < 2 ) return $text;
1841 $text = substr( array_shift( $a ), 1);
1842 $valid = "0123456789";
1843
1844 foreach ( $a as $x ) {
1845 $rfc = $blank = "" ;
1846 while ( " " == $x{0} ) {
1847 $blank .= " ";
1848 $x = substr( $x, 1 );
1849 }
1850 while ( strstr( $valid, $x{0} ) != false ) {
1851 $rfc .= $x{0};
1852 $x = substr( $x, 1 );
1853 }
1854
1855 if ( "" == $rfc ) {
1856 $text .= "RFC $blank$x";
1857 } else {
1858 $url = wfmsg( "rfcurl" );
1859 $url = str_replace( "$1", $rfc, $url);
1860 $sk =& $this->mOptions->getSkin();
1861 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1862 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1863 }
1864 }
1865 return $text;
1866 }
1867
1868 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1869 {
1870 $this->mOptions = $options;
1871 $this->mTitle =& $title;
1872 $this->mOutputType = OT_WIKI;
1873
1874 if ( $clearState ) {
1875 $this->clearState();
1876 }
1877
1878 $stripState = false;
1879 $pairs = array(
1880 "\r\n" => "\n",
1881 );
1882 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1883 // now with regexes
1884 $pairs = array(
1885 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1886 "/<br *?>/i" => "<br/>",
1887 );
1888 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1889 $text = $this->strip( $text, $stripState, false );
1890 $text = $this->pstPass2( $text, $user );
1891 $text = $this->unstrip( $text, $stripState );
1892 return $text;
1893 }
1894
1895 /* private */ function pstPass2( $text, &$user )
1896 {
1897 global $wgLang, $wgLocaltimezone, $wgCurParser;
1898
1899 # Variable replacement
1900 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1901 $text = $this->replaceVariables( $text );
1902
1903 # Signatures
1904 #
1905 $n = $user->getName();
1906 $k = $user->getOption( "nickname" );
1907 if ( "" == $k ) { $k = $n; }
1908 if(isset($wgLocaltimezone)) {
1909 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1910 }
1911 /* Note: this is an ugly timezone hack for the European wikis */
1912 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1913 " (" . date( "T" ) . ")";
1914 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1915
1916 $text = preg_replace( "/~~~~~/", $d, $text );
1917 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1918 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1919 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1920 Namespace::getUser() ) . ":$n|$k]]", $text );
1921
1922 # Context links: [[|name]] and [[name (context)|]]
1923 #
1924 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1925 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1926 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1927 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1928
1929 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1930 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1931 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1932 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1933 # [[ns:page (cont)|]]
1934 $context = "";
1935 $t = $this->mTitle->getText();
1936 if ( preg_match( $conpat, $t, $m ) ) {
1937 $context = $m[2];
1938 }
1939 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1940 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1941 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1942
1943 if ( "" == $context ) {
1944 $text = preg_replace( $p2, "[[\\1]]", $text );
1945 } else {
1946 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1947 }
1948
1949 /*
1950 $mw =& MagicWord::get( MAG_SUBST );
1951 $wgCurParser = $this->fork();
1952 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1953 $this->merge( $wgCurParser );
1954 */
1955
1956 # Trim trailing whitespace
1957 # MAG_END (__END__) tag allows for trailing
1958 # whitespace to be deliberately included
1959 $text = rtrim( $text );
1960 $mw =& MagicWord::get( MAG_END );
1961 $mw->matchAndRemove( $text );
1962
1963 return $text;
1964 }
1965
1966 # Set up some variables which are usually set up in parse()
1967 # so that an external function can call some class members with confidence
1968 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1969 {
1970 $this->mTitle =& $title;
1971 $this->mOptions = $options;
1972 $this->mOutputType = $outputType;
1973 if ( $clearState ) {
1974 $this->clearState();
1975 }
1976 }
1977
1978 function transformMsg( $text, $options ) {
1979 global $wgTitle;
1980 static $executing = false;
1981
1982 # Guard against infinite recursion
1983 if ( $executing ) {
1984 return $text;
1985 }
1986 $executing = true;
1987
1988 $this->mTitle = $wgTitle;
1989 $this->mOptions = $options;
1990 $this->mOutputType = OT_MSG;
1991 $this->clearState();
1992 $text = $this->replaceVariables( $text );
1993
1994 $executing = false;
1995 return $text;
1996 }
1997 }
1998
1999 class ParserOutput
2000 {
2001 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2002 var $mTouched; # Used for caching
2003
2004 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2005 $containsOldMagic = false )
2006 {
2007 $this->mText = $text;
2008 $this->mLanguageLinks = $languageLinks;
2009 $this->mCategoryLinks = $categoryLinks;
2010 $this->mContainsOldMagic = $containsOldMagic;
2011 $this->mTouched = "";
2012 }
2013
2014 function getText() { return $this->mText; }
2015 function getLanguageLinks() { return $this->mLanguageLinks; }
2016 function getCategoryLinks() { return $this->mCategoryLinks; }
2017 function getTouched() { return $this->mTouched; }
2018 function containsOldMagic() { return $this->mContainsOldMagic; }
2019 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2020 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2021 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2022 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2023 function setTouched( $t ) { return wfSetVar( $this->mTouched, $t ); }
2024
2025 function merge( $other ) {
2026 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2027 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2028 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2029 }
2030
2031 }
2032
2033 class ParserOptions
2034 {
2035 # All variables are private
2036 var $mUseTeX; # Use texvc to expand <math> tags
2037 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2038 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2039 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2040 var $mAllowExternalImages; # Allow external images inline
2041 var $mSkin; # Reference to the preferred skin
2042 var $mDateFormat; # Date format index
2043 var $mEditSection; # Create "edit section" links
2044 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2045 var $mNumberHeadings; # Automatically number headings
2046 var $mShowToc; # Show table of contents
2047
2048 function getUseTeX() { return $this->mUseTeX; }
2049 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2050 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2051 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2052 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2053 function getSkin() { return $this->mSkin; }
2054 function getDateFormat() { return $this->mDateFormat; }
2055 function getEditSection() { return $this->mEditSection; }
2056 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2057 function getNumberHeadings() { return $this->mNumberHeadings; }
2058 function getShowToc() { return $this->mShowToc; }
2059
2060 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2061 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2062 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2063 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2064 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2065 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2066 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2067 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2068 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2069 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2070 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2071
2072 /* static */ function newFromUser( &$user )
2073 {
2074 $popts = new ParserOptions;
2075 $popts->initialiseFromUser( $user );
2076 return $popts;
2077 }
2078
2079 function initialiseFromUser( &$userInput )
2080 {
2081 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2082
2083 if ( !$userInput ) {
2084 $user = new User;
2085 $user->setLoaded( true );
2086 } else {
2087 $user =& $userInput;
2088 }
2089
2090 $this->mUseTeX = $wgUseTeX;
2091 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2092 $this->mUseDynamicDates = $wgUseDynamicDates;
2093 $this->mInterwikiMagic = $wgInterwikiMagic;
2094 $this->mAllowExternalImages = $wgAllowExternalImages;
2095 $this->mSkin =& $user->getSkin();
2096 $this->mDateFormat = $user->getOption( "date" );
2097 $this->mEditSection = $user->getOption( "editsection" );
2098 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2099 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2100 $this->mShowToc = $user->getOption( "showtoc" );
2101 }
2102
2103
2104 }
2105
2106 # Regex callbacks, used in Parser::replaceVariables
2107 function wfBraceSubstitution( $matches )
2108 {
2109 global $wgCurParser;
2110 return $wgCurParser->braceSubstitution( $matches );
2111 }
2112
2113 function wfArgSubstitution( $matches )
2114 {
2115 global $wgCurParser;
2116 return $wgCurParser->argSubstitution( $matches );
2117 }
2118
2119 function wfVariableSubstitution( $matches )
2120 {
2121 global $wgCurParser;
2122 return $wgCurParser->variableSubstitution( $matches );
2123 }
2124
2125 ?>