workaround, strip nowikis again in doBlockLevels. BUG #96447
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 $this->mInPre = false;
81 $this->mInNowiki = false;
82 }
83
84 # First pass--just handle <nowiki> sections, pass the rest off
85 # to internalParse() which does all the real work.
86 #
87 # Returns a ParserOutput
88 #
89 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
90 {
91 global $wgUseTidy;
92 $fname = "Parser::parse";
93 wfProfileIn( $fname );
94
95 if ( $clearState ) {
96 $this->clearState();
97 }
98
99 $this->mOptions = $options;
100 $this->mTitle =& $title;
101 $this->mOutputType = OT_HTML;
102
103 $stripState = NULL;
104 $text = $this->strip( $text, $this->mStripState );
105 $text = $this->internalParse( $text, $linestart );
106 $text = $this->unstrip( $text, $this->mStripState );
107 # Clean up special characters, only run once, next-to-last before doBlockLevels
108 if(!$wgUseTidy) {
109 $fixtags = array(
110 # french spaces, last one Guillemet-left
111 # only if there is something before the space
112 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
113 # french spaces, Guillemet-right
114 "/(\\302\\253) /i"=>"\\1&nbsp;",
115 "/<hr *>/i" => '<hr />',
116 "/<br *>/i" => '<br />',
117 "/<center *>/i"=>'<div class="center">',
118 "/<\\/center *>/i" => '</div>',
119 # Clean up spare ampersands; note that we probably ought to be
120 # more careful about named entities.
121 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
122 );
123 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
124 } else {
125 $fixtags = array(
126 # french spaces, last one Guillemet-left
127 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
128 # french spaces, Guillemet-right
129 "/(\\302\\253) /i"=>"\\1&nbsp;",
130 "/<center *>/i"=>'<div class="center">',
131 "/<\\/center *>/i" => '</div>'
132 );
133 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
134 }
135 # only once and last
136 $text = $this->doBlockLevels( $text, $linestart );
137 if($wgUseTidy) {
138 $text = $this->tidy($text);
139 }
140 $this->mOutput->setText( $text );
141 wfProfileOut( $fname );
142 return $this->mOutput;
143 }
144
145 /* static */ function getRandomString()
146 {
147 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
148 }
149
150 # Replaces all occurrences of <$tag>content</$tag> in the text
151 # with a random marker and returns the new text. the output parameter
152 # $content will be an associative array filled with data on the form
153 # $unique_marker => content.
154
155 # If $content is already set, the additional entries will be appended
156
157 # If $tag is set to STRIP_COMMENTS, the function will extract
158 # <!-- HTML comments -->
159
160 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
161 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
162 if ( !$content ) {
163 $content = array( );
164 }
165 $n = 1;
166 $stripped = "";
167
168 while ( "" != $text ) {
169 if($tag==STRIP_COMMENTS) {
170 $p = preg_split( "/<!--/i", $text, 2 );
171 } else {
172 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
173 }
174 $stripped .= $p[0];
175 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
176 $text = "";
177 } else {
178 if($tag==STRIP_COMMENTS) {
179 $q = preg_split( "/-->/i", $p[1], 2 );
180 } else {
181 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
182 }
183 $marker = $rnd . sprintf("%08X", $n++);
184 $content[$marker] = $q[0];
185 $stripped .= $marker;
186 $text = $q[1];
187 }
188 }
189 return $stripped;
190 }
191
192 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
193 # If $render is set, performs necessary rendering operations on plugins
194 # Returns the text, and fills an array with data needed in unstrip()
195 # If the $state is already a valid strip state, it adds to the state
196
197 # When $stripcomments is set, HTML comments <!-- like this -->
198 # will be stripped in addition to other tags. This is important
199 # for section editing, where these comments cause confusion when
200 # counting the sections in the wikisource
201 function strip( $text, &$state, $stripcomments = false )
202 {
203 $render = ($this->mOutputType == OT_HTML);
204 $nowiki_content = array();
205 $hiero_content = array();
206 $timeline_content = array();
207 $math_content = array();
208 $pre_content = array();
209 $comment_content = array();
210
211 # Replace any instances of the placeholders
212 $uniq_prefix = UNIQ_PREFIX;
213 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
214
215 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
216 foreach( $nowiki_content as $marker => $content ){
217 //if( $render ){
218 //# use span to mark nowiki areas, note the trailing whitespace in span to avoid collisions with other spans
219 //$nowiki_content[$marker] = '<span class="nowiki">'.wfEscapeHTMLTagsOnly( $content )."</span >";
220 //} else {
221 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
222 //}
223 }
224
225 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
226 foreach( $hiero_content as $marker => $content ){
227 if( $render && $GLOBALS['wgUseWikiHiero']){
228 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
229 } else {
230 $hiero_content[$marker] = "<hiero>$content</hiero>";
231 }
232 }
233
234 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
235 foreach( $timeline_content as $marker => $content ){
236 if( $render && $GLOBALS['wgUseTimeline']){
237 $timeline_content[$marker] = renderTimeline( $content );
238 } else {
239 $timeline_content[$marker] = "<timeline>$content</timeline>";
240 }
241 }
242
243 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
244 foreach( $math_content as $marker => $content ){
245 if( $render ) {
246 if( $this->mOptions->getUseTeX() ) {
247 $math_content[$marker] = renderMath( $content );
248 } else {
249 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
250 }
251 } else {
252 $math_content[$marker] = "<math>$content</math>";
253 }
254 }
255
256 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
257 foreach( $pre_content as $marker => $content ){
258 if( $render ){
259 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
260 } else {
261 $pre_content[$marker] = "<pre>$content</pre>";
262 }
263 }
264 if($stripcomments) {
265 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
266 foreach( $comment_content as $marker => $content ){
267 $comment_content[$marker] = "<!--$content-->";
268 }
269 }
270
271 # Merge state with the pre-existing state, if there is one
272 if ( $state ) {
273 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
274 $state['hiero'] = $state['hiero'] + $hiero_content;
275 $state['timeline'] = $state['timeline'] + $timeline_content;
276 $state['math'] = $state['math'] + $math_content;
277 $state['pre'] = $state['pre'] + $pre_content;
278 $state['comment'] = $state['comment'] + $comment_content;
279 } else {
280 $state = array(
281 'nowiki' => $nowiki_content,
282 'hiero' => $hiero_content,
283 'timeline' => $timeline_content,
284 'math' => $math_content,
285 'pre' => $pre_content,
286 'comment' => $comment_content
287 );
288 }
289 return $text;
290 }
291
292 function unstrip( $text, &$state )
293 {
294 # Must expand in reverse order, otherwise nested tags will be corrupted
295 $contentDict = end( $state );
296 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
297 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
298 $text = str_replace( key( $contentDict ), $content, $text );
299 }
300 }
301
302 return $text;
303 }
304
305 # Add an item to the strip state
306 # Returns the unique tag which must be inserted into the stripped text
307 # The tag will be replaced with the original text in unstrip()
308
309 function insertStripItem( $text, &$state )
310 {
311 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
312 if ( !$state ) {
313 $state = array(
314 'nowiki' => array(),
315 'hiero' => array(),
316 'math' => array(),
317 'pre' => array()
318 );
319 }
320 $state['item'][$rnd] = $text;
321 return $rnd;
322 }
323
324 # This method generates the list of subcategories and pages for a category
325 function categoryMagic ()
326 {
327 global $wgLang , $wgUser ;
328 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
329
330 $cns = Namespace::getCategory() ;
331 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
332
333 $r = "<br style=\"clear:both;\"/>\n";
334
335
336 $sk =& $wgUser->getSkin() ;
337
338 $articles = array() ;
339 $children = array() ;
340 $data = array () ;
341 $id = $this->mTitle->getArticleID() ;
342
343 # FIXME: add limits
344 $t = wfStrencode( $this->mTitle->getDBKey() );
345 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
346 $res = wfQuery ( $sql, DB_READ ) ;
347 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
348
349 # For all pages that link to this category
350 foreach ( $data AS $x )
351 {
352 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
353 if ( $t != "" ) $t .= ":" ;
354 $t .= $x->cur_title ;
355
356 if ( $x->cur_namespace == $cns ) {
357 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
358 } else {
359 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
360 }
361 }
362 wfFreeResult ( $res ) ;
363
364 # Showing subcategories
365 if ( count ( $children ) > 0 ) {
366 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
367 $r .= implode ( ", " , $children ) ;
368 }
369
370 # Showing pages in this category
371 if ( count ( $articles ) > 0 ) {
372 $ti = $this->mTitle->getText() ;
373 $h = wfMsg( "category_header", $ti );
374 $r .= "<h2>{$h}</h2>\n" ;
375 $r .= implode ( ", " , $articles ) ;
376 }
377
378
379 return $r ;
380 }
381
382 function getHTMLattrs ()
383 {
384 $htmlattrs = array( # Allowed attributes--no scripting, etc.
385 "title", "align", "lang", "dir", "width", "height",
386 "bgcolor", "clear", /* BR */ "noshade", /* HR */
387 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
388 /* FONT */ "type", "start", "value", "compact",
389 /* For various lists, mostly deprecated but safe */
390 "summary", "width", "border", "frame", "rules",
391 "cellspacing", "cellpadding", "valign", "char",
392 "charoff", "colgroup", "col", "span", "abbr", "axis",
393 "headers", "scope", "rowspan", "colspan", /* Tables */
394 "id", "class", "name", "style" /* For CSS */
395 );
396 return $htmlattrs ;
397 }
398
399 function fixTagAttributes ( $t )
400 {
401 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
402 $htmlattrs = $this->getHTMLattrs() ;
403
404 # Strip non-approved attributes from the tag
405 $t = preg_replace(
406 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
407 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
408 $t);
409 # Strip javascript "expression" from stylesheets. Brute force approach:
410 # If anythin offensive is found, all attributes of the HTML tag are dropped
411
412 if( preg_match(
413 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
414 wfMungeToUtf8( $t ) ) )
415 {
416 $t="";
417 }
418
419 return trim ( $t ) ;
420 }
421
422 /* interface with html tidy, used if $wgUseTidy = true */
423 function tidy ( $text ) {
424 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
425 global $wgInputEncoding, $wgOutputEncoding;
426 $fname = "Parser::tidy";
427 wfProfileIn( $fname );
428
429 $cleansource = '';
430 switch(strtoupper($wgOutputEncoding)) {
431 case 'ISO-8859-1':
432 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
433 break;
434 case 'UTF-8':
435 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
436 break;
437 default:
438 $wgTidyOpts .= ' -raw';
439 }
440
441 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
442 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
443 '<head><title>test</title></head><body>'.$text.'</body></html>';
444 $descriptorspec = array(
445 0 => array("pipe", "r"),
446 1 => array("pipe", "w"),
447 2 => array("file", "/dev/null", "a")
448 );
449 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
450 if (is_resource($process)) {
451 fwrite($pipes[0], $wrappedtext);
452 fclose($pipes[0]);
453 while (!feof($pipes[1])) {
454 $cleansource .= fgets($pipes[1], 1024);
455 }
456 fclose($pipes[1]);
457 $return_value = proc_close($process);
458 }
459
460 wfProfileOut( $fname );
461
462 if( $cleansource == '' && $text != '') {
463 wfDebug( "Tidy error detected!\n" );
464 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
465 } else {
466 return $cleansource;
467 }
468 }
469
470 function doTableStuff ( $t )
471 {
472 $t = explode ( "\n" , $t ) ;
473 $td = array () ; # Is currently a td tag open?
474 $ltd = array () ; # Was it TD or TH?
475 $tr = array () ; # Is currently a tr tag open?
476 $ltr = array () ; # tr attributes
477 foreach ( $t AS $k => $x )
478 {
479 $x = trim ( $x ) ;
480 $fc = substr ( $x , 0 , 1 ) ;
481 if ( "{|" == substr ( $x , 0 , 2 ) )
482 {
483 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
484 array_push ( $td , false ) ;
485 array_push ( $ltd , "" ) ;
486 array_push ( $tr , false ) ;
487 array_push ( $ltr , "" ) ;
488 }
489 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
490 else if ( "|}" == substr ( $x , 0 , 2 ) )
491 {
492 $z = "</table>\n" ;
493 $l = array_pop ( $ltd ) ;
494 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
495 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
496 array_pop ( $ltr ) ;
497 $t[$k] = $z ;
498 }
499 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
500 {
501 $z = trim ( substr ( $x , 2 ) ) ;
502 $t[$k] = "<caption>{$z}</caption>\n" ;
503 }*/
504 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
505 {
506 $x = substr ( $x , 1 ) ;
507 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
508 $z = "" ;
509 $l = array_pop ( $ltd ) ;
510 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
511 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
512 array_pop ( $ltr ) ;
513 $t[$k] = $z ;
514 array_push ( $tr , false ) ;
515 array_push ( $td , false ) ;
516 array_push ( $ltd , "" ) ;
517 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
518 }
519 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
520 {
521 if ( "|+" == substr ( $x , 0 , 2 ) )
522 {
523 $fc = "+" ;
524 $x = substr ( $x , 1 ) ;
525 }
526 $after = substr ( $x , 1 ) ;
527 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
528 $after = explode ( "||" , $after ) ;
529 $t[$k] = "" ;
530 foreach ( $after AS $theline )
531 {
532 $z = "" ;
533 if ( $fc != "+" )
534 {
535 $tra = array_pop ( $ltr ) ;
536 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
537 array_push ( $tr , true ) ;
538 array_push ( $ltr , "" ) ;
539 }
540
541 $l = array_pop ( $ltd ) ;
542 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
543 if ( $fc == "|" ) $l = "td" ;
544 else if ( $fc == "!" ) $l = "th" ;
545 else if ( $fc == "+" ) $l = "caption" ;
546 else $l = "" ;
547 array_push ( $ltd , $l ) ;
548 $y = explode ( "|" , $theline , 2 ) ;
549 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
550 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
551 $t[$k] .= $y ;
552 array_push ( $td , true ) ;
553 }
554 }
555 }
556
557 # Closing open td, tr && table
558 while ( count ( $td ) > 0 )
559 {
560 if ( array_pop ( $td ) ) $t[] = "</td>" ;
561 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
562 $t[] = "</table>" ;
563 }
564
565 $t = implode ( "\n" , $t ) ;
566 # $t = $this->removeHTMLtags( $t );
567 return $t ;
568 }
569
570 # Parses the text and adds the result to the strip state
571 # Returns the strip tag
572 function stripParse( $text, $newline, $args )
573 {
574 $text = $this->strip( $text, $this->mStripState );
575 $text = $this->internalParse( $text, (bool)$newline, $args, false );
576 return $newline.$this->insertStripItem( $text, $this->mStripState );
577 }
578
579 function internalParse( $text, $linestart, $args = array(), $isMain=true )
580 {
581 $fname = "Parser::internalParse";
582 wfProfileIn( $fname );
583
584 $text = $this->removeHTMLtags( $text );
585 $text = $this->replaceVariables( $text, $args );
586
587 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
588
589 $text = $this->doHeadings( $text );
590 if($this->mOptions->getUseDynamicDates()) {
591 global $wgDateFormatter;
592 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
593 }
594 $text = $this->doAllQuotes( $text );
595 $text = $this->replaceExternalLinks( $text );
596 $text = $this->replaceInternalLinks ( $text );
597 $text = $this->replaceInternalLinks ( $text );
598 //$text = $this->doTokenizedParser ( $text );
599 $text = $this->doTableStuff ( $text ) ;
600 $text = $this->magicISBN( $text );
601 $text = $this->magicRFC( $text );
602 $text = $this->formatHeadings( $text, $isMain );
603 $sk =& $this->mOptions->getSkin();
604 $text = $sk->transformContent( $text );
605
606 if ( !isset ( $this->categoryMagicDone ) ) {
607 $text .= $this->categoryMagic () ;
608 $this->categoryMagicDone = true ;
609 }
610
611 wfProfileOut( $fname );
612 return $text;
613 }
614
615
616 /* private */ function doHeadings( $text )
617 {
618 for ( $i = 6; $i >= 1; --$i ) {
619 $h = substr( "======", 0, $i );
620 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
621 "<h{$i}>\\1</h{$i}>\\2", $text );
622 }
623 return $text;
624 }
625
626 /* private */ function doAllQuotes( $text )
627 {
628 $outtext = "";
629 $lines = explode( "\n", $text );
630 foreach ( $lines as $line ) {
631 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
632 }
633 return substr($outtext, 0,-1);
634 }
635
636 /* private */ function doQuotes( $pre, $text, $mode )
637 {
638 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
639 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
640 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
641 if ( substr ($m[2], 0, 1) == "'" ) {
642 $m[2] = substr ($m[2], 1);
643 if ($mode == "em") {
644 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
645 } else if ($mode == "strong") {
646 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
647 } else if (($mode == "emstrong") || ($mode == "both")) {
648 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
649 } else if ($mode == "strongem") {
650 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
651 } else {
652 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
653 }
654 } else {
655 if ($mode == "strong") {
656 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
657 } else if ($mode == "em") {
658 return $m1_em . $this->doQuotes ( "", $m[2], "" );
659 } else if ($mode == "emstrong") {
660 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
661 } else if (($mode == "strongem") || ($mode == "both")) {
662 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
663 } else {
664 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
665 }
666 }
667 } else {
668 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
669 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
670 if ($mode == "") {
671 return $pre . $text;
672 } else if ($mode == "em") {
673 return $pre . $text_em;
674 } else if ($mode == "strong") {
675 return $pre . $text_strong;
676 } else if ($mode == "strongem") {
677 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
678 } else {
679 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
680 }
681 }
682 }
683
684 # Note: we have to do external links before the internal ones,
685 # and otherwise take great care in the order of things here, so
686 # that we don't end up interpreting some URLs twice.
687
688 /* private */ function replaceExternalLinks( $text )
689 {
690 $fname = "Parser::replaceExternalLinks";
691 wfProfileIn( $fname );
692 $text = $this->subReplaceExternalLinks( $text, "http", true );
693 $text = $this->subReplaceExternalLinks( $text, "https", true );
694 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
695 $text = $this->subReplaceExternalLinks( $text, "irc", false );
696 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
697 $text = $this->subReplaceExternalLinks( $text, "news", false );
698 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
699 wfProfileOut( $fname );
700 return $text;
701 }
702
703 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
704 {
705 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
706 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
707
708 # this is the list of separators that should be ignored if they
709 # are the last character of an URL but that should be included
710 # if they occur within the URL, e.g. "go to www.foo.com, where .."
711 # in this case, the last comma should not become part of the URL,
712 # but in "www.foo.com/123,2342,32.htm" it should.
713 $sep = ",;\.:";
714 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
715 $images = "gif|png|jpg|jpeg";
716
717 # PLEASE NOTE: The curly braces { } are not part of the regex,
718 # they are interpreted as part of the string (used to tell PHP
719 # that the content of the string should be inserted there).
720 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
721 "((?i){$images})([^{$uc}]|$)/";
722
723 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
724 $sk =& $this->mOptions->getSkin();
725
726 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
727 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
728 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
729 }
730 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
731 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
732 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
733 "</a>\\5", $s );
734 $s = str_replace( $unique, $protocol, $s );
735
736 $a = explode( "[{$protocol}:", " " . $s );
737 $s = array_shift( $a );
738 $s = substr( $s, 1 );
739
740 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
741 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
742
743 foreach ( $a as $line ) {
744 if ( preg_match( $e1, $line, $m ) ) {
745 $link = "{$protocol}:{$m[1]}";
746 $trail = $m[2];
747 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
748 else { $text = wfEscapeHTML( $link ); }
749 } else if ( preg_match( $e2, $line, $m ) ) {
750 $link = "{$protocol}:{$m[1]}";
751 $text = $m[2];
752 $trail = $m[3];
753 } else {
754 $s .= "[{$protocol}:" . $line;
755 continue;
756 }
757 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
758 $paren = "";
759 } else {
760 # Expand the URL for printable version
761 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
762 }
763 $la = $sk->getExternalLinkAttributes( $link, $text );
764 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
765
766 }
767 return $s;
768 }
769
770
771 /* private */ function replaceInternalLinks( $s )
772 {
773 global $wgLang, $wgLinkCache;
774 global $wgNamespacesWithSubpages, $wgLanguageCode;
775 static $fname = "Parser::replaceInternalLink" ;
776 wfProfileIn( $fname );
777
778 wfProfileIn( "$fname-setup" );
779 static $tc = FALSE;
780 # the % is needed to support urlencoded titles as well
781 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
782 $sk =& $this->mOptions->getSkin();
783
784 $a = explode( "[[", " " . $s );
785 $s = array_shift( $a );
786 $s = substr( $s, 1 );
787
788 # Match a link having the form [[namespace:link|alternate]]trail
789 static $e1 = FALSE;
790 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
791 # Match the end of a line for a word that's not followed by whitespace,
792 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
793 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
794 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
795 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
796
797
798 # Special and Media are pseudo-namespaces; no pages actually exist in them
799 static $image = FALSE;
800 static $special = FALSE;
801 static $media = FALSE;
802 static $category = FALSE;
803 if ( !$image ) { $image = Namespace::getImage(); }
804 if ( !$special ) { $special = Namespace::getSpecial(); }
805 if ( !$media ) { $media = Namespace::getMedia(); }
806 if ( !$category ) { $category = Namespace::getCategory(); }
807
808 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
809
810 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
811 $new_prefix = $m[2];
812 $s = $m[1];
813 } else {
814 $new_prefix="";
815 }
816
817 wfProfileOut( "$fname-setup" );
818
819 foreach ( $a as $line ) {
820 $prefix = $new_prefix;
821
822 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
823 $text = $m[2];
824 # fix up urlencoded title texts
825 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
826 $trail = $m[3];
827 } else { # Invalid form; output directly
828 $s .= $prefix . "[[" . $line ;
829 wfProfileOut( $fname );
830 continue;
831 }
832
833 /* Valid link forms:
834 Foobar -- normal
835 :Foobar -- override special treatment of prefix (images, language links)
836 /Foobar -- convert to CurrentPage/Foobar
837 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
838 */
839 $c = substr($m[1],0,1);
840 $noforce = ($c != ":");
841 if( $c == "/" ) { # subpage
842 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
843 $m[1]=substr($m[1],1,strlen($m[1])-2);
844 $noslash=$m[1];
845 } else {
846 $noslash=substr($m[1],1);
847 }
848 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
849 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
850 if( "" == $text ) {
851 $text= $m[1];
852 } # this might be changed for ugliness reasons
853 } else {
854 $link = $noslash; # no subpage allowed, use standard link
855 }
856 } elseif( $noforce ) { # no subpage
857 $link = $m[1];
858 } else {
859 $link = substr( $m[1], 1 );
860 }
861 $wasblank = ( "" == $text );
862 if( $wasblank )
863 $text = $link;
864
865 $nt = Title::newFromText( $link );
866 if( !$nt ) {
867 $s .= $prefix . "[[" . $line;
868 wfProfileOut( $fname );
869 continue;
870 }
871 $ns = $nt->getNamespace();
872 $iw = $nt->getInterWiki();
873 if( $noforce ) {
874 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
875 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
876 $tmp = $prefix . $trail ;
877 wfProfileOut( $fname );
878 $s .= (trim($tmp) == '')? '': $tmp;
879 continue;
880 }
881 if ( $ns == $image ) {
882 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
883 $wgLinkCache->addImageLinkObj( $nt );
884 wfProfileOut( $fname );
885 continue;
886 }
887 if ( $ns == $category ) {
888 $t = $nt->getText() ;
889 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
890
891 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
892 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
893 $wgLinkCache->resume();
894
895 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
896 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
897 $this->mOutput->mCategoryLinks[] = $t ;
898 $s .= $prefix . $trail ;
899 wfProfileOut( $fname );
900 continue;
901 }
902 }
903 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
904 ( strpos( $link, "#" ) == FALSE ) ) {
905 # Self-links are handled specially; generally de-link and change to bold.
906 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
907 wfProfileOut( $fname );
908 continue;
909 }
910
911 if( $ns == $media ) {
912 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
913 $wgLinkCache->addImageLinkObj( $nt );
914 wfProfileOut( $fname );
915 continue;
916 } elseif( $ns == $special ) {
917 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
918 wfProfileOut( $fname );
919 continue;
920 }
921 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
922 }
923 wfProfileOut( $fname );
924 return $s;
925 }
926
927 # Some functions here used by doBlockLevels()
928 #
929 /* private */ function closeParagraph()
930 {
931 $result = "";
932 if ( '' != $this->mLastSection ) {
933 $result = "</" . $this->mLastSection . ">\n";
934 }
935 $this->mInPre = false;
936 $this->mLastSection = "";
937 return $result;
938 }
939 # getCommon() returns the length of the longest common substring
940 # of both arguments, starting at the beginning of both.
941 #
942 /* private */ function getCommon( $st1, $st2 )
943 {
944 $fl = strlen( $st1 );
945 $shorter = strlen( $st2 );
946 if ( $fl < $shorter ) { $shorter = $fl; }
947
948 for ( $i = 0; $i < $shorter; ++$i ) {
949 if ( $st1{$i} != $st2{$i} ) { break; }
950 }
951 return $i;
952 }
953 # These next three functions open, continue, and close the list
954 # element appropriate to the prefix character passed into them.
955 #
956 /* private */ function openList( $char )
957 {
958 $result = $this->closeParagraph();
959
960 if ( "*" == $char ) { $result .= "<ul><li>"; }
961 else if ( "#" == $char ) { $result .= "<ol><li>"; }
962 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
963 else if ( ";" == $char ) {
964 $result .= "<dl><dt>";
965 $this->mDTopen = true;
966 }
967 else { $result = "<!-- ERR 1 -->"; }
968
969 return $result;
970 }
971
972 /* private */ function nextItem( $char )
973 {
974 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
975 else if ( ":" == $char || ";" == $char ) {
976 $close = "</dd>";
977 if ( $this->mDTopen ) { $close = "</dt>"; }
978 if ( ";" == $char ) {
979 $this->mDTopen = true;
980 return $close . "<dt>";
981 } else {
982 $this->mDTopen = false;
983 return $close . "<dd>";
984 }
985 }
986 return "<!-- ERR 2 -->";
987 }
988
989 /* private */function closeList( $char )
990 {
991 if ( "*" == $char ) { $text = "</li></ul>"; }
992 else if ( "#" == $char ) { $text = "</li></ol>"; }
993 else if ( ":" == $char ) {
994 if ( $this->mDTopen ) {
995 $this->mDTopen = false;
996 $text = "</dt></dl>";
997 } else {
998 $text = "</dd></dl>";
999 }
1000 }
1001 else { return "<!-- ERR 3 -->"; }
1002 return $text."\n";
1003 }
1004
1005 /* private */ function doBlockLevels( $text, $linestart ) {
1006 $fname = "Parser::doBlockLevels";
1007 wfProfileIn( $fname );
1008
1009 # Parsing through the text line by line. The main thing
1010 # happening here is handling of block-level elements p, pre,
1011 # and making lists from lines starting with * # : etc.
1012 #
1013
1014 // Strip nowiki's again.
1015 $text = $this->strip($text,$dblStripState);
1016 $textLines = explode( "\n", $text );
1017
1018 $lastPrefix = $output = $lastLine = '';
1019 $this->mDTopen = $inBlockElem = false;
1020 $prefixLength = 0;
1021 $paragraphStack = false;
1022
1023 if ( !$linestart ) {
1024 $output .= array_shift( $textLines );
1025 }
1026 foreach ( $textLines as $oLine ) {
1027 $lastPrefixLength = strlen( $lastPrefix );
1028 $preOpenMatch = preg_match("/<pre/i", $oLine );
1029 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1030 $nowikiOpenMatch = preg_match("/<span class=\"nowiki\"/", $oLine );
1031 $nowikiCloseMatch = preg_match("/<\\/span >/", $oLine );
1032 if($nowikiOpenMatch) $nowikiFullMatch = preg_match("/^(.*)<span class=\"nowiki\"/", $oLine, $nowikiOpenMatches );
1033 if (!$this->mInPre) {
1034 $this->mInPre = !empty($preOpenMatch);
1035 }
1036 if (!$this->mInNowiki) {
1037 $this->mInNowiki = !empty($nowikiOpenMatch);
1038 }
1039 if (
1040 !$this->mInPre && (!$this->mInNowiki ||
1041 ($nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0) )
1042 )
1043 {
1044 # Multiple prefixes may abut each other for nested lists.
1045 $prefixLength = strspn( $oLine, "*#:;" );
1046 $pref = substr( $oLine, 0, $prefixLength );
1047
1048 # eh?
1049 $pref2 = str_replace( ";", ":", $pref );
1050 $t = substr( $oLine, $prefixLength );
1051 } else {
1052 # Don't interpret any other prefixes in preformatted text
1053 $prefixLength = 0;
1054 $pref = $pref2 = '';
1055 $t = $oLine;
1056 }
1057
1058 # List generation
1059 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1060 # Same as the last item, so no need to deal with nesting or opening stuff
1061 $output .= $this->nextItem( substr( $pref, -1 ) );
1062 $paragraphStack = false;
1063
1064 if ( ";" == substr( $pref, -1 ) ) {
1065 # The one nasty exception: definition lists work like this:
1066 # ; title : definition text
1067 # So we check for : in the remainder text to split up the
1068 # title and definition, without b0rking links.
1069 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1070 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1071 $term = $match[1];
1072 $output .= $term . $this->nextItem( ":" );
1073 $t = $match[2];
1074 }
1075 }
1076 } elseif( $prefixLength || $lastPrefixLength ) {
1077 # Either open or close a level...
1078 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1079 $paragraphStack = false;
1080
1081 while( $commonPrefixLength < $lastPrefixLength ) {
1082 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1083 --$lastPrefixLength;
1084 }
1085 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1086 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1087 }
1088 while ( $prefixLength > $commonPrefixLength ) {
1089 $char = substr( $pref, $commonPrefixLength, 1 );
1090 $output .= $this->openList( $char );
1091
1092 if ( ";" == $char ) {
1093 # FIXME: This is dupe of code above
1094 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1095 $term = $match[1];
1096 $output .= $term . $this->nextItem( ":" );
1097 $t = $match[2];
1098 }
1099 }
1100 ++$commonPrefixLength;
1101 }
1102 $lastPrefix = $pref2;
1103 }
1104 if( 0 == $prefixLength ) {
1105 # No prefix (not in list)--go to paragraph mode
1106 $uniq_prefix = UNIQ_PREFIX;
1107 // XXX: use a stack for nestable elements like span, table and div
1108 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/td|<\\/th)/i", $t );
1109 $closematch = preg_match(
1110 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1111 "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1112 if ( $openmatch or $closematch ) {
1113 $paragraphStack = false;
1114 $output .= $this->closeParagraph();
1115 if($preOpenMatch and !$preCloseMatch) {
1116 $this->mInPre = true;
1117 }
1118 if ( $closematch ) {
1119 $inBlockElem = false;
1120 } else {
1121 $inBlockElem = true;
1122 }
1123 } else if (
1124 !$inBlockElem && !$this->mInPre &&
1125 (!$this->mInNowiki || ($nowikiOpenMatch && trim($nowikiOpenMatches[1]) == '' ) ) )
1126 {
1127 if ( " " == $t{0} and trim($t) != '' and (!$this->mInNowiki || $nowikiOpenMatch && strlen($nowikiOpenMatches[1]) > 0 ) ) {
1128 // pre
1129 if ($this->mLastSection != 'pre') {
1130 $paragraphStack = false;
1131 $output .= $this->closeParagraph().'<pre>';
1132 $this->mLastSection = 'pre';
1133 }
1134 } else {
1135 // paragraph
1136 if ( '' == trim($t) ) {
1137 if ( $paragraphStack ) {
1138 $output .= $paragraphStack.'<br />';
1139 $paragraphStack = false;
1140 $this->mLastSection = 'p';
1141 } else {
1142 if ($this->mLastSection != 'p' ) {
1143 $output .= $this->closeParagraph();
1144 $this->mLastSection = '';
1145 $paragraphStack = "<p>";
1146 } else {
1147 $paragraphStack = '</p><p>';
1148 }
1149 }
1150 } else {
1151 if ( $paragraphStack ) {
1152 $output .= $paragraphStack;
1153 $paragraphStack = false;
1154 $this->mLastSection = 'p';
1155 } else if ($this->mLastSection != 'p') {
1156 $output .= $this->closeParagraph().'<p>';
1157 $this->mLastSection = 'p';
1158 }
1159 }
1160 }
1161 }
1162 }
1163 if($nowikiCloseMatch) $this->mInNowiki = false;
1164 if ($paragraphStack === false) {
1165 $output .= $t."\n";
1166 }
1167 }
1168 while ( $prefixLength ) {
1169 $output .= $this->closeList( $pref2{$prefixLength-1} );
1170 --$prefixLength;
1171 }
1172 if ( "" != $this->mLastSection ) {
1173 $output .= "</" . $this->mLastSection . ">";
1174 $this->mLastSection = "";
1175 }
1176 $output = $this->unstrip( $output, $dblStripState );
1177
1178 wfProfileOut( $fname );
1179 return $output;
1180 }
1181
1182 function getVariableValue( $index ) {
1183 global $wgLang, $wgSitename, $wgServer;
1184
1185 switch ( $index ) {
1186 case MAG_CURRENTMONTH:
1187 return date( "m" );
1188 case MAG_CURRENTMONTHNAME:
1189 return $wgLang->getMonthName( date("n") );
1190 case MAG_CURRENTMONTHNAMEGEN:
1191 return $wgLang->getMonthNameGen( date("n") );
1192 case MAG_CURRENTDAY:
1193 return date("j");
1194 case MAG_PAGENAME:
1195 return $this->mTitle->getText();
1196 case MAG_NAMESPACE:
1197 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1198 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1199 case MAG_CURRENTDAYNAME:
1200 return $wgLang->getWeekdayName( date("w")+1 );
1201 case MAG_CURRENTYEAR:
1202 return date( "Y" );
1203 case MAG_CURRENTTIME:
1204 return $wgLang->time( wfTimestampNow(), false );
1205 case MAG_NUMBEROFARTICLES:
1206 return wfNumberOfArticles();
1207 case MAG_SITENAME:
1208 return $wgSitename;
1209 case MAG_SERVER:
1210 return $wgServer;
1211 default:
1212 return NULL;
1213 }
1214 }
1215
1216 function initialiseVariables()
1217 {
1218 global $wgVariableIDs;
1219 $this->mVariables = array();
1220 foreach ( $wgVariableIDs as $id ) {
1221 $mw =& MagicWord::get( $id );
1222 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1223 }
1224 }
1225
1226 /* private */ function replaceVariables( $text, $args = array() )
1227 {
1228 global $wgLang, $wgScript, $wgArticlePath;
1229
1230 $fname = "Parser::replaceVariables";
1231 wfProfileIn( $fname );
1232
1233 $bail = false;
1234 if ( !$this->mVariables ) {
1235 $this->initialiseVariables();
1236 }
1237 $titleChars = Title::legalChars();
1238 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1239
1240 # This function is called recursively. To keep track of arguments we need a stack:
1241 array_push( $this->mArgStack, $args );
1242
1243 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1244 $GLOBALS['wgCurParser'] =& $this;
1245
1246
1247 if ( $this->mOutputType == OT_HTML ) {
1248 # Variable substitution
1249 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1250
1251 # Argument substitution
1252 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1253 }
1254 # Template substitution
1255 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1256 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1257
1258 array_pop( $this->mArgStack );
1259
1260 wfProfileOut( $fname );
1261 return $text;
1262 }
1263
1264 function variableSubstitution( $matches )
1265 {
1266 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1267 $text = $this->mVariables[$matches[1]];
1268 $this->mOutput->mContainsOldMagic = true;
1269 } else {
1270 $text = $matches[0];
1271 }
1272 return $text;
1273 }
1274
1275 function braceSubstitution( $matches )
1276 {
1277 global $wgLinkCache, $wgLang;
1278 $fname = "Parser::braceSubstitution";
1279 $found = false;
1280 $nowiki = false;
1281 $noparse = false;
1282
1283 $title = NULL;
1284
1285 # $newline is an optional newline character before the braces
1286 # $part1 is the bit before the first |, and must contain only title characters
1287 # $args is a list of arguments, starting from index 0, not including $part1
1288
1289 $newline = $matches[1];
1290 $part1 = $matches[2];
1291 # If the third subpattern matched anything, it will start with |
1292 if ( $matches[3] !== "" ) {
1293 $args = explode( "|", substr( $matches[3], 1 ) );
1294 } else {
1295 $args = array();
1296 }
1297 $argc = count( $args );
1298
1299 # {{{}}}
1300 if ( strpos( $matches[0], "{{{" ) !== false ) {
1301 $text = $matches[0];
1302 $found = true;
1303 $noparse = true;
1304 }
1305
1306 # SUBST
1307 if ( !$found ) {
1308 $mwSubst =& MagicWord::get( MAG_SUBST );
1309 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1310 if ( $this->mOutputType != OT_WIKI ) {
1311 # Invalid SUBST not replaced at PST time
1312 # Return without further processing
1313 $text = $matches[0];
1314 $found = true;
1315 $noparse= true;
1316 }
1317 } elseif ( $this->mOutputType == OT_WIKI ) {
1318 # SUBST not found in PST pass, do nothing
1319 $text = $matches[0];
1320 $found = true;
1321 }
1322 }
1323
1324 # MSG, MSGNW and INT
1325 if ( !$found ) {
1326 # Check for MSGNW:
1327 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1328 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1329 $nowiki = true;
1330 } else {
1331 # Remove obsolete MSG:
1332 $mwMsg =& MagicWord::get( MAG_MSG );
1333 $mwMsg->matchStartAndRemove( $part1 );
1334 }
1335
1336 # Check if it is an internal message
1337 $mwInt =& MagicWord::get( MAG_INT );
1338 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1339 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1340 $text = wfMsgReal( $part1, $args, true );
1341 $found = true;
1342 }
1343 }
1344 }
1345
1346 # NS
1347 if ( !$found ) {
1348 # Check for NS: (namespace expansion)
1349 $mwNs = MagicWord::get( MAG_NS );
1350 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1351 if ( intval( $part1 ) ) {
1352 $text = $wgLang->getNsText( intval( $part1 ) );
1353 $found = true;
1354 } else {
1355 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1356 if ( !is_null( $index ) ) {
1357 $text = $wgLang->getNsText( $index );
1358 $found = true;
1359 }
1360 }
1361 }
1362 }
1363
1364 # LOCALURL and LOCALURLE
1365 if ( !$found ) {
1366 $mwLocal = MagicWord::get( MAG_LOCALURL );
1367 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1368
1369 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1370 $func = 'getLocalURL';
1371 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1372 $func = 'escapeLocalURL';
1373 } else {
1374 $func = '';
1375 }
1376
1377 if ( $func !== '' ) {
1378 $title = Title::newFromText( $part1 );
1379 if ( !is_null( $title ) ) {
1380 if ( $argc > 0 ) {
1381 $text = $title->$func( $args[0] );
1382 } else {
1383 $text = $title->$func();
1384 }
1385 $found = true;
1386 }
1387 }
1388 }
1389
1390 # Internal variables
1391 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1392 $text = $this->mVariables[$part1];
1393 $found = true;
1394 $this->mOutput->mContainsOldMagic = true;
1395 }
1396 /*
1397 # Arguments input from the caller
1398 $inputArgs = end( $this->mArgStack );
1399 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1400 $text = $inputArgs[$part1];
1401 $found = true;
1402 }
1403 */
1404 # Load from database
1405 if ( !$found ) {
1406 $title = Title::newFromText( $part1, NS_TEMPLATE );
1407 if ( !is_null( $title ) && !$title->isExternal() ) {
1408 # Check for excessive inclusion
1409 $dbk = $title->getPrefixedDBkey();
1410 if ( $this->incrementIncludeCount( $dbk ) ) {
1411 $article = new Article( $title );
1412 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1413 if ( $articleContent !== false ) {
1414 $found = true;
1415 $text = $articleContent;
1416
1417 }
1418 }
1419
1420 # If the title is valid but undisplayable, make a link to it
1421 if ( $this->mOutputType == OT_HTML && !$found ) {
1422 $text = "[[" . $title->getPrefixedText() . "]]";
1423 $found = true;
1424 }
1425 }
1426 }
1427
1428 # Recursive parsing, escaping and link table handling
1429 # Only for HTML output
1430 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1431 $text = wfEscapeWikiText( $text );
1432 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1433 # Clean up argument array
1434 $assocArgs = array();
1435 $index = 1;
1436 foreach( $args as $arg ) {
1437 $eqpos = strpos( $arg, "=" );
1438 if ( $eqpos === false ) {
1439 $assocArgs[$index++] = $arg;
1440 } else {
1441 $name = trim( substr( $arg, 0, $eqpos ) );
1442 $value = trim( substr( $arg, $eqpos+1 ) );
1443 if ( $value === false ) {
1444 $value = "";
1445 }
1446 if ( $name !== false ) {
1447 $assocArgs[$name] = $value;
1448 }
1449 }
1450 }
1451
1452 # Do not enter included links in link table
1453 if ( !is_null( $title ) ) {
1454 $wgLinkCache->suspend();
1455 }
1456
1457 # Run full parser on the included text
1458 $text = $this->stripParse( $text, $newline, $assocArgs );
1459
1460 # Resume the link cache and register the inclusion as a link
1461 if ( !is_null( $title ) ) {
1462 $wgLinkCache->resume();
1463 $wgLinkCache->addLinkObj( $title );
1464 }
1465 }
1466
1467 if ( !$found ) {
1468 return $matches[0];
1469 } else {
1470 return $text;
1471 }
1472 }
1473
1474 # Triple brace replacement -- used for template arguments
1475 function argSubstitution( $matches )
1476 {
1477 $newline = $matches[1];
1478 $arg = trim( $matches[2] );
1479 $text = $matches[0];
1480 $inputArgs = end( $this->mArgStack );
1481
1482 if ( array_key_exists( $arg, $inputArgs ) ) {
1483 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1484 }
1485
1486 return $text;
1487 }
1488
1489 # Returns true if the function is allowed to include this entity
1490 function incrementIncludeCount( $dbk )
1491 {
1492 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1493 $this->mIncludeCount[$dbk] = 0;
1494 }
1495 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1496 return true;
1497 } else {
1498 return false;
1499 }
1500 }
1501
1502
1503 # Cleans up HTML, removes dangerous tags and attributes
1504 /* private */ function removeHTMLtags( $text )
1505 {
1506 global $wgUseTidy, $wgUserHtml;
1507 $fname = "Parser::removeHTMLtags";
1508 wfProfileIn( $fname );
1509
1510 if( $wgUserHtml ) {
1511 $htmlpairs = array( # Tags that must be closed
1512 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1513 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1514 "strike", "strong", "tt", "var", "div", "center",
1515 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1516 "ruby", "rt" , "rb" , "rp", "p"
1517 );
1518 $htmlsingle = array(
1519 "br", "hr", "li", "dt", "dd"
1520 );
1521 $htmlnest = array( # Tags that can be nested--??
1522 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1523 "dl", "font", "big", "small", "sub", "sup"
1524 );
1525 $tabletags = array( # Can only appear inside table
1526 "td", "th", "tr"
1527 );
1528 } else {
1529 $htmlpairs = array();
1530 $htmlsingle = array();
1531 $htmlnest = array();
1532 $tabletags = array();
1533 }
1534
1535 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1536 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1537
1538 $htmlattrs = $this->getHTMLattrs () ;
1539
1540 # Remove HTML comments
1541 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1542
1543 $bits = explode( "<", $text );
1544 $text = array_shift( $bits );
1545 if(!$wgUseTidy) {
1546 $tagstack = array(); $tablestack = array();
1547 foreach ( $bits as $x ) {
1548 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1549 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1550 $x, $regs );
1551 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1552 error_reporting( $prev );
1553
1554 $badtag = 0 ;
1555 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1556 # Check our stack
1557 if ( $slash ) {
1558 # Closing a tag...
1559 if ( ! in_array( $t, $htmlsingle ) &&
1560 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1561 @array_push( $tagstack, $ot );
1562 $badtag = 1;
1563 } else {
1564 if ( $t == "table" ) {
1565 $tagstack = array_pop( $tablestack );
1566 }
1567 $newparams = "";
1568 }
1569 } else {
1570 # Keep track for later
1571 if ( in_array( $t, $tabletags ) &&
1572 ! in_array( "table", $tagstack ) ) {
1573 $badtag = 1;
1574 } else if ( in_array( $t, $tagstack ) &&
1575 ! in_array ( $t , $htmlnest ) ) {
1576 $badtag = 1 ;
1577 } else if ( ! in_array( $t, $htmlsingle ) ) {
1578 if ( $t == "table" ) {
1579 array_push( $tablestack, $tagstack );
1580 $tagstack = array();
1581 }
1582 array_push( $tagstack, $t );
1583 }
1584 # Strip non-approved attributes from the tag
1585 $newparams = $this->fixTagAttributes($params);
1586
1587 }
1588 if ( ! $badtag ) {
1589 $rest = str_replace( ">", "&gt;", $rest );
1590 $text .= "<$slash$t $newparams$brace$rest";
1591 continue;
1592 }
1593 }
1594 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1595 }
1596 # Close off any remaining tags
1597 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1598 $text .= "</$t>\n";
1599 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1600 }
1601 } else {
1602 # this might be possible using tidy itself
1603 foreach ( $bits as $x ) {
1604 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1605 $x, $regs );
1606 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1607 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1608 $newparams = $this->fixTagAttributes($params);
1609 $rest = str_replace( ">", "&gt;", $rest );
1610 $text .= "<$slash$t $newparams$brace$rest";
1611 } else {
1612 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1613 }
1614 }
1615 }
1616 wfProfileOut( $fname );
1617 return $text;
1618 }
1619
1620
1621 /*
1622 *
1623 * This function accomplishes several tasks:
1624 * 1) Auto-number headings if that option is enabled
1625 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1626 * 3) Add a Table of contents on the top for users who have enabled the option
1627 * 4) Auto-anchor headings
1628 *
1629 * It loops through all headlines, collects the necessary data, then splits up the
1630 * string and re-inserts the newly formatted headlines.
1631 *
1632 */
1633
1634 /* private */ function formatHeadings( $text, $isMain=true )
1635 {
1636 global $wgInputEncoding;
1637
1638 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1639 $doShowToc = $this->mOptions->getShowToc();
1640 if( !$this->mTitle->userCanEdit() ) {
1641 $showEditLink = 0;
1642 $rightClickHack = 0;
1643 } else {
1644 $showEditLink = $this->mOptions->getEditSection();
1645 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1646 }
1647
1648 # Inhibit editsection links if requested in the page
1649 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1650 if( $esw->matchAndRemove( $text ) ) {
1651 $showEditLink = 0;
1652 }
1653 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1654 # do not add TOC
1655 $mw =& MagicWord::get( MAG_NOTOC );
1656 if( $mw->matchAndRemove( $text ) ) {
1657 $doShowToc = 0;
1658 }
1659
1660 # never add the TOC to the Main Page. This is an entry page that should not
1661 # be more than 1-2 screens large anyway
1662 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1663 $doShowToc = 0;
1664 }
1665
1666 # Get all headlines for numbering them and adding funky stuff like [edit]
1667 # links - this is for later, but we need the number of headlines right now
1668 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1669
1670 # if there are fewer than 4 headlines in the article, do not show TOC
1671 if( $numMatches < 4 ) {
1672 $doShowToc = 0;
1673 }
1674
1675 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1676 # override above conditions and always show TOC
1677 $mw =& MagicWord::get( MAG_FORCETOC );
1678 if ($mw->matchAndRemove( $text ) ) {
1679 $doShowToc = 1;
1680 }
1681
1682
1683 # We need this to perform operations on the HTML
1684 $sk =& $this->mOptions->getSkin();
1685
1686 # headline counter
1687 $headlineCount = 0;
1688
1689 # Ugh .. the TOC should have neat indentation levels which can be
1690 # passed to the skin functions. These are determined here
1691 $toclevel = 0;
1692 $toc = "";
1693 $full = "";
1694 $head = array();
1695 $sublevelCount = array();
1696 $level = 0;
1697 $prevlevel = 0;
1698 foreach( $matches[3] as $headline ) {
1699 $numbering = "";
1700 if( $level ) {
1701 $prevlevel = $level;
1702 }
1703 $level = $matches[1][$headlineCount];
1704 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1705 # reset when we enter a new level
1706 $sublevelCount[$level] = 0;
1707 $toc .= $sk->tocIndent( $level - $prevlevel );
1708 $toclevel += $level - $prevlevel;
1709 }
1710 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1711 # reset when we step back a level
1712 $sublevelCount[$level+1]=0;
1713 $toc .= $sk->tocUnindent( $prevlevel - $level );
1714 $toclevel -= $prevlevel - $level;
1715 }
1716 # count number of headlines for each level
1717 @$sublevelCount[$level]++;
1718 if( $doNumberHeadings || $doShowToc ) {
1719 $dot = 0;
1720 for( $i = 1; $i <= $level; $i++ ) {
1721 if( !empty( $sublevelCount[$i] ) ) {
1722 if( $dot ) {
1723 $numbering .= ".";
1724 }
1725 $numbering .= $sublevelCount[$i];
1726 $dot = 1;
1727 }
1728 }
1729 }
1730
1731 # The canonized header is a version of the header text safe to use for links
1732 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1733 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1734
1735 # strip out HTML
1736 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1737 $tocline = trim( $canonized_headline );
1738 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1739 # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1740 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1741 $refer[$headlineCount] = $canonized_headline;
1742
1743 # count how many in assoc. array so we can track dupes in anchors
1744 @$refers[$canonized_headline]++;
1745 $refcount[$headlineCount]=$refers[$canonized_headline];
1746
1747 # Prepend the number to the heading text
1748
1749 if( $doNumberHeadings || $doShowToc ) {
1750 $tocline = $numbering . " " . $tocline;
1751
1752 # Don't number the heading if it is the only one (looks silly)
1753 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1754 # the two are different if the line contains a link
1755 $headline=$numbering . " " . $headline;
1756 }
1757 }
1758
1759 # Create the anchor for linking from the TOC to the section
1760 $anchor = $canonized_headline;
1761 if($refcount[$headlineCount] > 1 ) {
1762 $anchor .= "_" . $refcount[$headlineCount];
1763 }
1764 if( $doShowToc ) {
1765 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1766 }
1767 if( $showEditLink ) {
1768 if ( empty( $head[$headlineCount] ) ) {
1769 $head[$headlineCount] = "";
1770 }
1771 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1772 }
1773
1774 # Add the edit section span
1775 if( $rightClickHack ) {
1776 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1777 }
1778
1779 # give headline the correct <h#> tag
1780 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1781
1782 $headlineCount++;
1783 }
1784
1785 if( $doShowToc ) {
1786 $toclines = $headlineCount;
1787 $toc .= $sk->tocUnindent( $toclevel );
1788 $toc = $sk->tocTable( $toc );
1789 }
1790
1791 # split up and insert constructed headlines
1792
1793 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1794 $i = 0;
1795
1796 foreach( $blocks as $block ) {
1797 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1798 # This is the [edit] link that appears for the top block of text when
1799 # section editing is enabled
1800
1801 # Disabled because it broke block formatting
1802 # For example, a bullet point in the top line
1803 # $full .= $sk->editSectionLink(0);
1804 }
1805 $full .= $block;
1806 if( $doShowToc && !$i && $isMain) {
1807 # Top anchor now in skin
1808 $full = $full.$toc;
1809 }
1810
1811 if( !empty( $head[$i] ) ) {
1812 $full .= $head[$i];
1813 }
1814 $i++;
1815 }
1816
1817 return $full;
1818 }
1819
1820 /* private */ function magicISBN( $text )
1821 {
1822 global $wgLang;
1823
1824 $a = split( "ISBN ", " $text" );
1825 if ( count ( $a ) < 2 ) return $text;
1826 $text = substr( array_shift( $a ), 1);
1827 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1828
1829 foreach ( $a as $x ) {
1830 $isbn = $blank = "" ;
1831 while ( " " == $x{0} ) {
1832 $blank .= " ";
1833 $x = substr( $x, 1 );
1834 }
1835 while ( strstr( $valid, $x{0} ) != false ) {
1836 $isbn .= $x{0};
1837 $x = substr( $x, 1 );
1838 }
1839 $num = str_replace( "-", "", $isbn );
1840 $num = str_replace( " ", "", $num );
1841
1842 if ( "" == $num ) {
1843 $text .= "ISBN $blank$x";
1844 } else {
1845 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1846 $text .= "<a href=\"" .
1847 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1848 "\" class=\"internal\">ISBN $isbn</a>";
1849 $text .= $x;
1850 }
1851 }
1852 return $text;
1853 }
1854 /* private */ function magicRFC( $text )
1855 {
1856 global $wgLang;
1857
1858 $a = split( "RFC ", " $text" );
1859 if ( count ( $a ) < 2 ) return $text;
1860 $text = substr( array_shift( $a ), 1);
1861 $valid = "0123456789";
1862
1863 foreach ( $a as $x ) {
1864 $rfc = $blank = "" ;
1865 while ( " " == $x{0} ) {
1866 $blank .= " ";
1867 $x = substr( $x, 1 );
1868 }
1869 while ( strstr( $valid, $x{0} ) != false ) {
1870 $rfc .= $x{0};
1871 $x = substr( $x, 1 );
1872 }
1873
1874 if ( "" == $rfc ) {
1875 $text .= "RFC $blank$x";
1876 } else {
1877 $url = wfmsg( "rfcurl" );
1878 $url = str_replace( "$1", $rfc, $url);
1879 $sk =& $this->mOptions->getSkin();
1880 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1881 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1882 }
1883 }
1884 return $text;
1885 }
1886
1887 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1888 {
1889 $this->mOptions = $options;
1890 $this->mTitle =& $title;
1891 $this->mOutputType = OT_WIKI;
1892
1893 if ( $clearState ) {
1894 $this->clearState();
1895 }
1896
1897 $stripState = false;
1898 $pairs = array(
1899 "\r\n" => "\n",
1900 );
1901 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1902 // now with regexes
1903 /*
1904 $pairs = array(
1905 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1906 "/<br *?>/i" => "<br />",
1907 );
1908 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1909 */
1910 $text = $this->strip( $text, $stripState, false );
1911 $text = $this->pstPass2( $text, $user );
1912 $text = $this->unstrip( $text, $stripState );
1913 return $text;
1914 }
1915
1916 /* private */ function pstPass2( $text, &$user )
1917 {
1918 global $wgLang, $wgLocaltimezone, $wgCurParser;
1919
1920 # Variable replacement
1921 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1922 $text = $this->replaceVariables( $text );
1923
1924 # Signatures
1925 #
1926 $n = $user->getName();
1927 $k = $user->getOption( "nickname" );
1928 if ( "" == $k ) { $k = $n; }
1929 if(isset($wgLocaltimezone)) {
1930 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1931 }
1932 /* Note: this is an ugly timezone hack for the European wikis */
1933 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1934 " (" . date( "T" ) . ")";
1935 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1936
1937 $text = preg_replace( "/~~~~~/", $d, $text );
1938 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1939 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1940 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1941 Namespace::getUser() ) . ":$n|$k]]", $text );
1942
1943 # Context links: [[|name]] and [[name (context)|]]
1944 #
1945 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1946 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1947 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1948 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1949
1950 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1951 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1952 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1953 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1954 # [[ns:page (cont)|]]
1955 $context = "";
1956 $t = $this->mTitle->getText();
1957 if ( preg_match( $conpat, $t, $m ) ) {
1958 $context = $m[2];
1959 }
1960 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1961 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1962 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1963
1964 if ( "" == $context ) {
1965 $text = preg_replace( $p2, "[[\\1]]", $text );
1966 } else {
1967 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1968 }
1969
1970 /*
1971 $mw =& MagicWord::get( MAG_SUBST );
1972 $wgCurParser = $this->fork();
1973 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1974 $this->merge( $wgCurParser );
1975 */
1976
1977 # Trim trailing whitespace
1978 # MAG_END (__END__) tag allows for trailing
1979 # whitespace to be deliberately included
1980 $text = rtrim( $text );
1981 $mw =& MagicWord::get( MAG_END );
1982 $mw->matchAndRemove( $text );
1983
1984 return $text;
1985 }
1986
1987 # Set up some variables which are usually set up in parse()
1988 # so that an external function can call some class members with confidence
1989 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1990 {
1991 $this->mTitle =& $title;
1992 $this->mOptions = $options;
1993 $this->mOutputType = $outputType;
1994 if ( $clearState ) {
1995 $this->clearState();
1996 }
1997 }
1998
1999 function transformMsg( $text, $options ) {
2000 global $wgTitle;
2001 static $executing = false;
2002
2003 # Guard against infinite recursion
2004 if ( $executing ) {
2005 return $text;
2006 }
2007 $executing = true;
2008
2009 $this->mTitle = $wgTitle;
2010 $this->mOptions = $options;
2011 $this->mOutputType = OT_MSG;
2012 $this->clearState();
2013 $text = $this->replaceVariables( $text );
2014
2015 $executing = false;
2016 return $text;
2017 }
2018 }
2019
2020 class ParserOutput
2021 {
2022 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2023 var $mCacheTime; # Used in ParserCache
2024
2025 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2026 $containsOldMagic = false )
2027 {
2028 $this->mText = $text;
2029 $this->mLanguageLinks = $languageLinks;
2030 $this->mCategoryLinks = $categoryLinks;
2031 $this->mContainsOldMagic = $containsOldMagic;
2032 $this->mCacheTime = "";
2033 }
2034
2035 function getText() { return $this->mText; }
2036 function getLanguageLinks() { return $this->mLanguageLinks; }
2037 function getCategoryLinks() { return $this->mCategoryLinks; }
2038 function getCacheTime() { return $this->mCacheTime; }
2039 function containsOldMagic() { return $this->mContainsOldMagic; }
2040 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2041 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2042 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2043 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2044 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2045
2046 function merge( $other ) {
2047 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2048 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2049 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2050 }
2051
2052 }
2053
2054 class ParserOptions
2055 {
2056 # All variables are private
2057 var $mUseTeX; # Use texvc to expand <math> tags
2058 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2059 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2060 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2061 var $mAllowExternalImages; # Allow external images inline
2062 var $mSkin; # Reference to the preferred skin
2063 var $mDateFormat; # Date format index
2064 var $mEditSection; # Create "edit section" links
2065 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2066 var $mNumberHeadings; # Automatically number headings
2067 var $mShowToc; # Show table of contents
2068
2069 function getUseTeX() { return $this->mUseTeX; }
2070 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2071 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2072 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2073 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2074 function getSkin() { return $this->mSkin; }
2075 function getDateFormat() { return $this->mDateFormat; }
2076 function getEditSection() { return $this->mEditSection; }
2077 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2078 function getNumberHeadings() { return $this->mNumberHeadings; }
2079 function getShowToc() { return $this->mShowToc; }
2080
2081 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2082 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2083 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2084 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2085 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2086 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2087 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2088 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2089 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2090 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2091 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2092
2093 /* static */ function newFromUser( &$user )
2094 {
2095 $popts = new ParserOptions;
2096 $popts->initialiseFromUser( $user );
2097 return $popts;
2098 }
2099
2100 function initialiseFromUser( &$userInput )
2101 {
2102 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2103
2104 if ( !$userInput ) {
2105 $user = new User;
2106 $user->setLoaded( true );
2107 } else {
2108 $user =& $userInput;
2109 }
2110
2111 $this->mUseTeX = $wgUseTeX;
2112 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2113 $this->mUseDynamicDates = $wgUseDynamicDates;
2114 $this->mInterwikiMagic = $wgInterwikiMagic;
2115 $this->mAllowExternalImages = $wgAllowExternalImages;
2116 $this->mSkin =& $user->getSkin();
2117 $this->mDateFormat = $user->getOption( "date" );
2118 $this->mEditSection = $user->getOption( "editsection" );
2119 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2120 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2121 $this->mShowToc = $user->getOption( "showtoc" );
2122 }
2123
2124
2125 }
2126
2127 # Regex callbacks, used in Parser::replaceVariables
2128 function wfBraceSubstitution( $matches )
2129 {
2130 global $wgCurParser;
2131 return $wgCurParser->braceSubstitution( $matches );
2132 }
2133
2134 function wfArgSubstitution( $matches )
2135 {
2136 global $wgCurParser;
2137 return $wgCurParser->argSubstitution( $matches );
2138 }
2139
2140 function wfVariableSubstitution( $matches )
2141 {
2142 global $wgCurParser;
2143 return $wgCurParser->variableSubstitution( $matches );
2144 }
2145
2146 ?>