section anchor encoding:
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 $this->mInPre = false;
81 }
82
83 # First pass--just handle <nowiki> sections, pass the rest off
84 # to internalParse() which does all the real work.
85 #
86 # Returns a ParserOutput
87 #
88 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
89 {
90 global $wgUseTidy;
91 $fname = "Parser::parse";
92 wfProfileIn( $fname );
93
94 if ( $clearState ) {
95 $this->clearState();
96 }
97
98 $this->mOptions = $options;
99 $this->mTitle =& $title;
100 $this->mOutputType = OT_HTML;
101
102 $stripState = NULL;
103 $text = $this->strip( $text, $this->mStripState );
104 $text = $this->internalParse( $text, $linestart );
105 $text = $this->unstrip( $text, $this->mStripState );
106 # Clean up special characters, only run once, next-to-last before doBlockLevels
107 if(!$wgUseTidy) {
108 $fixtags = array(
109 # french spaces, last one Guillemet-left
110 # only if there is something before the space
111 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
112 # french spaces, Guillemet-right
113 "/(\\302\\253) /i"=>"\\1&nbsp;",
114 "/<hr *>/i" => '<hr />',
115 "/<br *>/i" => '<br />',
116 "/<center *>/i"=>'<div class="center">',
117 "/<\\/center *>/i" => '</div>',
118 # Clean up spare ampersands; note that we probably ought to be
119 # more careful about named entities.
120 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
121 );
122 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
123 } else {
124 $fixtags = array(
125 # french spaces, last one Guillemet-left
126 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
127 # french spaces, Guillemet-right
128 "/(\\302\\253) /i"=>"\\1&nbsp;",
129 "/<center *>/i"=>'<div class="center">',
130 "/<\\/center *>/i" => '</div>'
131 );
132 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
133 }
134 # only once and last
135 $text = $this->doBlockLevels( $text, $linestart );
136 $text = $this->unstripNoWiki( $text, $this->mStripState );
137 if($wgUseTidy) {
138 $text = $this->tidy($text);
139 }
140 $this->mOutput->setText( $text );
141 wfProfileOut( $fname );
142 return $this->mOutput;
143 }
144
145 /* static */ function getRandomString()
146 {
147 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
148 }
149
150 # Replaces all occurrences of <$tag>content</$tag> in the text
151 # with a random marker and returns the new text. the output parameter
152 # $content will be an associative array filled with data on the form
153 # $unique_marker => content.
154
155 # If $content is already set, the additional entries will be appended
156
157 # If $tag is set to STRIP_COMMENTS, the function will extract
158 # <!-- HTML comments -->
159
160 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
161 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
162 if ( !$content ) {
163 $content = array( );
164 }
165 $n = 1;
166 $stripped = "";
167
168 while ( "" != $text ) {
169 if($tag==STRIP_COMMENTS) {
170 $p = preg_split( "/<!--/i", $text, 2 );
171 } else {
172 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
173 }
174 $stripped .= $p[0];
175 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
176 $text = "";
177 } else {
178 if($tag==STRIP_COMMENTS) {
179 $q = preg_split( "/-->/i", $p[1], 2 );
180 } else {
181 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
182 }
183 $marker = $rnd . sprintf("%08X", $n++);
184 $content[$marker] = $q[0];
185 $stripped .= $marker;
186 $text = $q[1];
187 }
188 }
189 return $stripped;
190 }
191
192 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
193 # If $render is set, performs necessary rendering operations on plugins
194 # Returns the text, and fills an array with data needed in unstrip()
195 # If the $state is already a valid strip state, it adds to the state
196
197 # When $stripcomments is set, HTML comments <!-- like this -->
198 # will be stripped in addition to other tags. This is important
199 # for section editing, where these comments cause confusion when
200 # counting the sections in the wikisource
201 function strip( $text, &$state, $stripcomments = false )
202 {
203 $render = ($this->mOutputType == OT_HTML);
204 $nowiki_content = array();
205 $hiero_content = array();
206 $timeline_content = array();
207 $math_content = array();
208 $pre_content = array();
209 $comment_content = array();
210
211 # Replace any instances of the placeholders
212 $uniq_prefix = UNIQ_PREFIX;
213 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
214
215 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
216 foreach( $nowiki_content as $marker => $content ){
217 if( $render ){
218 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
219 } else {
220 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
221 }
222 }
223
224 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
225 foreach( $hiero_content as $marker => $content ){
226 if( $render && $GLOBALS['wgUseWikiHiero']){
227 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
228 } else {
229 $hiero_content[$marker] = "<hiero>$content</hiero>";
230 }
231 }
232
233 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
234 foreach( $timeline_content as $marker => $content ){
235 if( $render && $GLOBALS['wgUseTimeline']){
236 $timeline_content[$marker] = renderTimeline( $content );
237 } else {
238 $timeline_content[$marker] = "<timeline>$content</timeline>";
239 }
240 }
241
242 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
243 foreach( $math_content as $marker => $content ){
244 if( $render ) {
245 if( $this->mOptions->getUseTeX() ) {
246 $math_content[$marker] = renderMath( $content );
247 } else {
248 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
249 }
250 } else {
251 $math_content[$marker] = "<math>$content</math>";
252 }
253 }
254
255 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
256 foreach( $pre_content as $marker => $content ){
257 if( $render ){
258 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
259 } else {
260 $pre_content[$marker] = "<pre>$content</pre>";
261 }
262 }
263 if($stripcomments) {
264 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
265 foreach( $comment_content as $marker => $content ){
266 $comment_content[$marker] = "<!--$content-->";
267 }
268 }
269
270 # Merge state with the pre-existing state, if there is one
271 if ( $state ) {
272 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
273 $state['hiero'] = $state['hiero'] + $hiero_content;
274 $state['timeline'] = $state['timeline'] + $timeline_content;
275 $state['math'] = $state['math'] + $math_content;
276 $state['pre'] = $state['pre'] + $pre_content;
277 $state['comment'] = $state['comment'] + $comment_content;
278 } else {
279 $state = array(
280 'nowiki' => $nowiki_content,
281 'hiero' => $hiero_content,
282 'timeline' => $timeline_content,
283 'math' => $math_content,
284 'pre' => $pre_content,
285 'comment' => $comment_content
286 );
287 }
288 return $text;
289 }
290
291 # always call unstripNoWiki() after this one
292 function unstrip( $text, &$state )
293 {
294 # Must expand in reverse order, otherwise nested tags will be corrupted
295 $contentDict = end( $state );
296 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
297 if( key($state) != 'nowiki') {
298 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
299 $text = str_replace( key( $contentDict ), $content, $text );
300 }
301 }
302 }
303
304 return $text;
305 }
306 # always call this after unstrip() to preserve the order
307 function unstripNoWiki( $text, &$state )
308 {
309 # Must expand in reverse order, otherwise nested tags will be corrupted
310 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
311 $text = str_replace( key( $state['nowiki'] ), $content, $text );
312 }
313
314 return $text;
315 }
316
317 # Add an item to the strip state
318 # Returns the unique tag which must be inserted into the stripped text
319 # The tag will be replaced with the original text in unstrip()
320
321 function insertStripItem( $text, &$state )
322 {
323 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
324 if ( !$state ) {
325 $state = array(
326 'nowiki' => array(),
327 'hiero' => array(),
328 'math' => array(),
329 'pre' => array()
330 );
331 }
332 $state['item'][$rnd] = $text;
333 return $rnd;
334 }
335
336 # This method generates the list of subcategories and pages for a category
337 function categoryMagic ()
338 {
339 global $wgLang , $wgUser ;
340 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
341
342 $cns = Namespace::getCategory() ;
343 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
344
345 $r = "<br style=\"clear:both;\"/>\n";
346
347
348 $sk =& $wgUser->getSkin() ;
349
350 $articles = array() ;
351 $children = array() ;
352 $data = array () ;
353 $id = $this->mTitle->getArticleID() ;
354
355 # FIXME: add limits
356 $t = wfStrencode( $this->mTitle->getDBKey() );
357 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
358 $res = wfQuery ( $sql, DB_READ ) ;
359 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
360
361 # For all pages that link to this category
362 foreach ( $data AS $x )
363 {
364 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
365 if ( $t != "" ) $t .= ":" ;
366 $t .= $x->cur_title ;
367
368 if ( $x->cur_namespace == $cns ) {
369 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
370 } else {
371 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
372 }
373 }
374 wfFreeResult ( $res ) ;
375
376 # Showing subcategories
377 if ( count ( $children ) > 0 ) {
378 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
379 $r .= implode ( ", " , $children ) ;
380 }
381
382 # Showing pages in this category
383 if ( count ( $articles ) > 0 ) {
384 $ti = $this->mTitle->getText() ;
385 $h = wfMsg( "category_header", $ti );
386 $r .= "<h2>{$h}</h2>\n" ;
387 $r .= implode ( ", " , $articles ) ;
388 }
389
390
391 return $r ;
392 }
393
394 function getHTMLattrs ()
395 {
396 $htmlattrs = array( # Allowed attributes--no scripting, etc.
397 "title", "align", "lang", "dir", "width", "height",
398 "bgcolor", "clear", /* BR */ "noshade", /* HR */
399 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
400 /* FONT */ "type", "start", "value", "compact",
401 /* For various lists, mostly deprecated but safe */
402 "summary", "width", "border", "frame", "rules",
403 "cellspacing", "cellpadding", "valign", "char",
404 "charoff", "colgroup", "col", "span", "abbr", "axis",
405 "headers", "scope", "rowspan", "colspan", /* Tables */
406 "id", "class", "name", "style" /* For CSS */
407 );
408 return $htmlattrs ;
409 }
410
411 function fixTagAttributes ( $t )
412 {
413 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
414 $htmlattrs = $this->getHTMLattrs() ;
415
416 # Strip non-approved attributes from the tag
417 $t = preg_replace(
418 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
419 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
420 $t);
421 # Strip javascript "expression" from stylesheets. Brute force approach:
422 # If anythin offensive is found, all attributes of the HTML tag are dropped
423
424 if( preg_match(
425 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
426 wfMungeToUtf8( $t ) ) )
427 {
428 $t="";
429 }
430
431 return trim ( $t ) ;
432 }
433
434 /* interface with html tidy, used if $wgUseTidy = true */
435 function tidy ( $text ) {
436 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
437 global $wgInputEncoding, $wgOutputEncoding;
438 $fname = "Parser::tidy";
439 wfProfileIn( $fname );
440
441 $cleansource = '';
442 switch(strtoupper($wgOutputEncoding)) {
443 case 'ISO-8859-1':
444 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
445 break;
446 case 'UTF-8':
447 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
448 break;
449 default:
450 $wgTidyOpts .= ' -raw';
451 }
452
453 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
454 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
455 '<head><title>test</title></head><body>'.$text.'</body></html>';
456 $descriptorspec = array(
457 0 => array("pipe", "r"),
458 1 => array("pipe", "w"),
459 2 => array("file", "/dev/null", "a")
460 );
461 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
462 if (is_resource($process)) {
463 fwrite($pipes[0], $wrappedtext);
464 fclose($pipes[0]);
465 while (!feof($pipes[1])) {
466 $cleansource .= fgets($pipes[1], 1024);
467 }
468 fclose($pipes[1]);
469 $return_value = proc_close($process);
470 }
471
472 wfProfileOut( $fname );
473
474 if( $cleansource == '' && $text != '') {
475 wfDebug( "Tidy error detected!\n" );
476 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
477 } else {
478 return $cleansource;
479 }
480 }
481
482 function doTableStuff ( $t )
483 {
484 $t = explode ( "\n" , $t ) ;
485 $td = array () ; # Is currently a td tag open?
486 $ltd = array () ; # Was it TD or TH?
487 $tr = array () ; # Is currently a tr tag open?
488 $ltr = array () ; # tr attributes
489 foreach ( $t AS $k => $x )
490 {
491 $x = trim ( $x ) ;
492 $fc = substr ( $x , 0 , 1 ) ;
493 if ( "{|" == substr ( $x , 0 , 2 ) )
494 {
495 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
496 array_push ( $td , false ) ;
497 array_push ( $ltd , "" ) ;
498 array_push ( $tr , false ) ;
499 array_push ( $ltr , "" ) ;
500 }
501 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
502 else if ( "|}" == substr ( $x , 0 , 2 ) )
503 {
504 $z = "</table>\n" ;
505 $l = array_pop ( $ltd ) ;
506 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
507 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
508 array_pop ( $ltr ) ;
509 $t[$k] = $z ;
510 }
511 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
512 {
513 $z = trim ( substr ( $x , 2 ) ) ;
514 $t[$k] = "<caption>{$z}</caption>\n" ;
515 }*/
516 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
517 {
518 $x = substr ( $x , 1 ) ;
519 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
520 $z = "" ;
521 $l = array_pop ( $ltd ) ;
522 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
523 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
524 array_pop ( $ltr ) ;
525 $t[$k] = $z ;
526 array_push ( $tr , false ) ;
527 array_push ( $td , false ) ;
528 array_push ( $ltd , "" ) ;
529 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
530 }
531 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
532 {
533 if ( "|+" == substr ( $x , 0 , 2 ) )
534 {
535 $fc = "+" ;
536 $x = substr ( $x , 1 ) ;
537 }
538 $after = substr ( $x , 1 ) ;
539 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
540 $after = explode ( "||" , $after ) ;
541 $t[$k] = "" ;
542 foreach ( $after AS $theline )
543 {
544 $z = "" ;
545 if ( $fc != "+" )
546 {
547 $tra = array_pop ( $ltr ) ;
548 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
549 array_push ( $tr , true ) ;
550 array_push ( $ltr , "" ) ;
551 }
552
553 $l = array_pop ( $ltd ) ;
554 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
555 if ( $fc == "|" ) $l = "td" ;
556 else if ( $fc == "!" ) $l = "th" ;
557 else if ( $fc == "+" ) $l = "caption" ;
558 else $l = "" ;
559 array_push ( $ltd , $l ) ;
560 $y = explode ( "|" , $theline , 2 ) ;
561 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
562 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
563 $t[$k] .= $y ;
564 array_push ( $td , true ) ;
565 }
566 }
567 }
568
569 # Closing open td, tr && table
570 while ( count ( $td ) > 0 )
571 {
572 if ( array_pop ( $td ) ) $t[] = "</td>" ;
573 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
574 $t[] = "</table>" ;
575 }
576
577 $t = implode ( "\n" , $t ) ;
578 # $t = $this->removeHTMLtags( $t );
579 return $t ;
580 }
581
582 # Parses the text and adds the result to the strip state
583 # Returns the strip tag
584 function stripParse( $text, $newline, $args )
585 {
586 $text = $this->strip( $text, $this->mStripState );
587 $text = $this->internalParse( $text, (bool)$newline, $args, false );
588 return $newline.$this->insertStripItem( $text, $this->mStripState );
589 }
590
591 function internalParse( $text, $linestart, $args = array(), $isMain=true )
592 {
593 $fname = "Parser::internalParse";
594 wfProfileIn( $fname );
595
596 $text = $this->removeHTMLtags( $text );
597 $text = $this->replaceVariables( $text, $args );
598
599 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
600
601 $text = $this->doHeadings( $text );
602 if($this->mOptions->getUseDynamicDates()) {
603 global $wgDateFormatter;
604 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
605 }
606 $text = $this->doAllQuotes( $text );
607 $text = $this->replaceExternalLinks( $text );
608 $text = $this->replaceInternalLinks ( $text );
609 $text = $this->replaceInternalLinks ( $text );
610 //$text = $this->doTokenizedParser ( $text );
611 $text = $this->doTableStuff ( $text ) ;
612 $text = $this->magicISBN( $text );
613 $text = $this->magicRFC( $text );
614 $text = $this->formatHeadings( $text, $isMain );
615 $sk =& $this->mOptions->getSkin();
616 $text = $sk->transformContent( $text );
617
618 if ( !isset ( $this->categoryMagicDone ) ) {
619 $text .= $this->categoryMagic () ;
620 $this->categoryMagicDone = true ;
621 }
622
623 wfProfileOut( $fname );
624 return $text;
625 }
626
627
628 /* private */ function doHeadings( $text )
629 {
630 for ( $i = 6; $i >= 1; --$i ) {
631 $h = substr( "======", 0, $i );
632 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
633 "<h{$i}>\\1</h{$i}>\\2", $text );
634 }
635 return $text;
636 }
637
638 /* private */ function doAllQuotes( $text )
639 {
640 $outtext = "";
641 $lines = explode( "\n", $text );
642 foreach ( $lines as $line ) {
643 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
644 }
645 return substr($outtext, 0,-1);
646 }
647
648 /* private */ function doQuotes( $pre, $text, $mode )
649 {
650 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
651 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
652 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
653 if ( substr ($m[2], 0, 1) == "'" ) {
654 $m[2] = substr ($m[2], 1);
655 if ($mode == "em") {
656 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
657 } else if ($mode == "strong") {
658 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
659 } else if (($mode == "emstrong") || ($mode == "both")) {
660 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
661 } else if ($mode == "strongem") {
662 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
663 } else {
664 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
665 }
666 } else {
667 if ($mode == "strong") {
668 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
669 } else if ($mode == "em") {
670 return $m1_em . $this->doQuotes ( "", $m[2], "" );
671 } else if ($mode == "emstrong") {
672 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
673 } else if (($mode == "strongem") || ($mode == "both")) {
674 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
675 } else {
676 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
677 }
678 }
679 } else {
680 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
681 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
682 if ($mode == "") {
683 return $pre . $text;
684 } else if ($mode == "em") {
685 return $pre . $text_em;
686 } else if ($mode == "strong") {
687 return $pre . $text_strong;
688 } else if ($mode == "strongem") {
689 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
690 } else {
691 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
692 }
693 }
694 }
695
696 # Note: we have to do external links before the internal ones,
697 # and otherwise take great care in the order of things here, so
698 # that we don't end up interpreting some URLs twice.
699
700 /* private */ function replaceExternalLinks( $text )
701 {
702 $fname = "Parser::replaceExternalLinks";
703 wfProfileIn( $fname );
704 $text = $this->subReplaceExternalLinks( $text, "http", true );
705 $text = $this->subReplaceExternalLinks( $text, "https", true );
706 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
707 $text = $this->subReplaceExternalLinks( $text, "irc", false );
708 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
709 $text = $this->subReplaceExternalLinks( $text, "news", false );
710 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
711 wfProfileOut( $fname );
712 return $text;
713 }
714
715 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
716 {
717 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
718 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
719
720 # this is the list of separators that should be ignored if they
721 # are the last character of an URL but that should be included
722 # if they occur within the URL, e.g. "go to www.foo.com, where .."
723 # in this case, the last comma should not become part of the URL,
724 # but in "www.foo.com/123,2342,32.htm" it should.
725 $sep = ",;\.:";
726 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
727 $images = "gif|png|jpg|jpeg";
728
729 # PLEASE NOTE: The curly braces { } are not part of the regex,
730 # they are interpreted as part of the string (used to tell PHP
731 # that the content of the string should be inserted there).
732 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
733 "((?i){$images})([^{$uc}]|$)/";
734
735 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
736 $sk =& $this->mOptions->getSkin();
737
738 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
739 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
740 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
741 }
742 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
743 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
744 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
745 "</a>\\5", $s );
746 $s = str_replace( $unique, $protocol, $s );
747
748 $a = explode( "[{$protocol}:", " " . $s );
749 $s = array_shift( $a );
750 $s = substr( $s, 1 );
751
752 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
753 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
754
755 foreach ( $a as $line ) {
756 if ( preg_match( $e1, $line, $m ) ) {
757 $link = "{$protocol}:{$m[1]}";
758 $trail = $m[2];
759 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
760 else { $text = wfEscapeHTML( $link ); }
761 } else if ( preg_match( $e2, $line, $m ) ) {
762 $link = "{$protocol}:{$m[1]}";
763 $text = $m[2];
764 $trail = $m[3];
765 } else {
766 $s .= "[{$protocol}:" . $line;
767 continue;
768 }
769 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
770 $paren = "";
771 } else {
772 # Expand the URL for printable version
773 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
774 }
775 $la = $sk->getExternalLinkAttributes( $link, $text );
776 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
777
778 }
779 return $s;
780 }
781
782
783 /* private */ function replaceInternalLinks( $s )
784 {
785 global $wgLang, $wgLinkCache;
786 global $wgNamespacesWithSubpages, $wgLanguageCode;
787 static $fname = "Parser::replaceInternalLink" ;
788 wfProfileIn( $fname );
789
790 wfProfileIn( "$fname-setup" );
791 static $tc = FALSE;
792 # the % is needed to support urlencoded titles as well
793 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
794 $sk =& $this->mOptions->getSkin();
795
796 $a = explode( "[[", " " . $s );
797 $s = array_shift( $a );
798 $s = substr( $s, 1 );
799
800 # Match a link having the form [[namespace:link|alternate]]trail
801 static $e1 = FALSE;
802 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
803 # Match the end of a line for a word that's not followed by whitespace,
804 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
805 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
806 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
807 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
808
809
810 # Special and Media are pseudo-namespaces; no pages actually exist in them
811 static $image = FALSE;
812 static $special = FALSE;
813 static $media = FALSE;
814 static $category = FALSE;
815 if ( !$image ) { $image = Namespace::getImage(); }
816 if ( !$special ) { $special = Namespace::getSpecial(); }
817 if ( !$media ) { $media = Namespace::getMedia(); }
818 if ( !$category ) { $category = Namespace::getCategory(); }
819
820 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
821
822 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
823 $new_prefix = $m[2];
824 $s = $m[1];
825 } else {
826 $new_prefix="";
827 }
828
829 wfProfileOut( "$fname-setup" );
830
831 foreach ( $a as $line ) {
832 $prefix = $new_prefix;
833
834 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
835 $text = $m[2];
836 # fix up urlencoded title texts
837 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
838 $trail = $m[3];
839 } else { # Invalid form; output directly
840 $s .= $prefix . "[[" . $line ;
841 wfProfileOut( $fname );
842 continue;
843 }
844
845 /* Valid link forms:
846 Foobar -- normal
847 :Foobar -- override special treatment of prefix (images, language links)
848 /Foobar -- convert to CurrentPage/Foobar
849 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
850 */
851 $c = substr($m[1],0,1);
852 $noforce = ($c != ":");
853 if( $c == "/" ) { # subpage
854 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
855 $m[1]=substr($m[1],1,strlen($m[1])-2);
856 $noslash=$m[1];
857 } else {
858 $noslash=substr($m[1],1);
859 }
860 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
861 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
862 if( "" == $text ) {
863 $text= $m[1];
864 } # this might be changed for ugliness reasons
865 } else {
866 $link = $noslash; # no subpage allowed, use standard link
867 }
868 } elseif( $noforce ) { # no subpage
869 $link = $m[1];
870 } else {
871 $link = substr( $m[1], 1 );
872 }
873 $wasblank = ( "" == $text );
874 if( $wasblank )
875 $text = $link;
876
877 $nt = Title::newFromText( $link );
878 if( !$nt ) {
879 $s .= $prefix . "[[" . $line;
880 wfProfileOut( $fname );
881 continue;
882 }
883 $ns = $nt->getNamespace();
884 $iw = $nt->getInterWiki();
885 if( $noforce ) {
886 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
887 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
888 $tmp = $prefix . $trail ;
889 wfProfileOut( $fname );
890 $s .= (trim($tmp) == '')? '': $tmp;
891 continue;
892 }
893 if ( $ns == $image ) {
894 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
895 $wgLinkCache->addImageLinkObj( $nt );
896 wfProfileOut( $fname );
897 continue;
898 }
899 if ( $ns == $category ) {
900 $t = $nt->getText() ;
901 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
902
903 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
904 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
905 $wgLinkCache->resume();
906
907 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
908 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
909 $this->mOutput->mCategoryLinks[] = $t ;
910 $s .= $prefix . $trail ;
911 wfProfileOut( $fname );
912 continue;
913 }
914 }
915 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
916 ( strpos( $link, "#" ) == FALSE ) ) {
917 # Self-links are handled specially; generally de-link and change to bold.
918 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
919 wfProfileOut( $fname );
920 continue;
921 }
922
923 if( $ns == $media ) {
924 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
925 $wgLinkCache->addImageLinkObj( $nt );
926 wfProfileOut( $fname );
927 continue;
928 } elseif( $ns == $special ) {
929 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
930 wfProfileOut( $fname );
931 continue;
932 }
933 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
934 }
935 wfProfileOut( $fname );
936 return $s;
937 }
938
939 # Some functions here used by doBlockLevels()
940 #
941 /* private */ function closeParagraph()
942 {
943 $result = "";
944 if ( '' != $this->mLastSection ) {
945 $result = "</" . $this->mLastSection . ">\n";
946 }
947 $this->mInPre = false;
948 $this->mLastSection = "";
949 return $result;
950 }
951 # getCommon() returns the length of the longest common substring
952 # of both arguments, starting at the beginning of both.
953 #
954 /* private */ function getCommon( $st1, $st2 )
955 {
956 $fl = strlen( $st1 );
957 $shorter = strlen( $st2 );
958 if ( $fl < $shorter ) { $shorter = $fl; }
959
960 for ( $i = 0; $i < $shorter; ++$i ) {
961 if ( $st1{$i} != $st2{$i} ) { break; }
962 }
963 return $i;
964 }
965 # These next three functions open, continue, and close the list
966 # element appropriate to the prefix character passed into them.
967 #
968 /* private */ function openList( $char )
969 {
970 $result = $this->closeParagraph();
971
972 if ( "*" == $char ) { $result .= "<ul><li>"; }
973 else if ( "#" == $char ) { $result .= "<ol><li>"; }
974 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
975 else if ( ";" == $char ) {
976 $result .= "<dl><dt>";
977 $this->mDTopen = true;
978 }
979 else { $result = "<!-- ERR 1 -->"; }
980
981 return $result;
982 }
983
984 /* private */ function nextItem( $char )
985 {
986 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
987 else if ( ":" == $char || ";" == $char ) {
988 $close = "</dd>";
989 if ( $this->mDTopen ) { $close = "</dt>"; }
990 if ( ";" == $char ) {
991 $this->mDTopen = true;
992 return $close . "<dt>";
993 } else {
994 $this->mDTopen = false;
995 return $close . "<dd>";
996 }
997 }
998 return "<!-- ERR 2 -->";
999 }
1000
1001 /* private */function closeList( $char )
1002 {
1003 if ( "*" == $char ) { $text = "</li></ul>"; }
1004 else if ( "#" == $char ) { $text = "</li></ol>"; }
1005 else if ( ":" == $char ) {
1006 if ( $this->mDTopen ) {
1007 $this->mDTopen = false;
1008 $text = "</dt></dl>";
1009 } else {
1010 $text = "</dd></dl>";
1011 }
1012 }
1013 else { return "<!-- ERR 3 -->"; }
1014 return $text."\n";
1015 }
1016
1017 /* private */ function doBlockLevels( $text, $linestart ) {
1018 $fname = "Parser::doBlockLevels";
1019 wfProfileIn( $fname );
1020
1021 # Parsing through the text line by line. The main thing
1022 # happening here is handling of block-level elements p, pre,
1023 # and making lists from lines starting with * # : etc.
1024 #
1025 $textLines = explode( "\n", $text );
1026
1027 $lastPrefix = $output = $lastLine = '';
1028 $this->mDTopen = $inBlockElem = false;
1029 $prefixLength = 0;
1030 $paragraphStack = false;
1031
1032 if ( !$linestart ) {
1033 $output .= array_shift( $textLines );
1034 }
1035 foreach ( $textLines as $oLine ) {
1036 $lastPrefixLength = strlen( $lastPrefix );
1037 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1038 $preOpenMatch = preg_match("/<pre/i", $oLine );
1039 if (!$this->mInPre) {
1040 $this->mInPre = !empty($preOpenMatch);
1041 }
1042 if ( !$this->mInPre ) {
1043 # Multiple prefixes may abut each other for nested lists.
1044 $prefixLength = strspn( $oLine, "*#:;" );
1045 $pref = substr( $oLine, 0, $prefixLength );
1046
1047 # eh?
1048 $pref2 = str_replace( ";", ":", $pref );
1049 $t = substr( $oLine, $prefixLength );
1050 } else {
1051 # Don't interpret any other prefixes in preformatted text
1052 $prefixLength = 0;
1053 $pref = $pref2 = '';
1054 $t = $oLine;
1055 }
1056
1057 # List generation
1058 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1059 # Same as the last item, so no need to deal with nesting or opening stuff
1060 $output .= $this->nextItem( substr( $pref, -1 ) );
1061 $paragraphStack = false;
1062
1063 if ( ";" == substr( $pref, -1 ) ) {
1064 # The one nasty exception: definition lists work like this:
1065 # ; title : definition text
1066 # So we check for : in the remainder text to split up the
1067 # title and definition, without b0rking links.
1068 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1069 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1070 $term = $match[1];
1071 $output .= $term . $this->nextItem( ":" );
1072 $t = $match[2];
1073 }
1074 }
1075 } elseif( $prefixLength || $lastPrefixLength ) {
1076 # Either open or close a level...
1077 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1078 $paragraphStack = false;
1079
1080 while( $commonPrefixLength < $lastPrefixLength ) {
1081 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1082 --$lastPrefixLength;
1083 }
1084 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1085 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1086 }
1087 while ( $prefixLength > $commonPrefixLength ) {
1088 $char = substr( $pref, $commonPrefixLength, 1 );
1089 $output .= $this->openList( $char );
1090
1091 if ( ";" == $char ) {
1092 # FIXME: This is dupe of code above
1093 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1094 $term = $match[1];
1095 $output .= $term . $this->nextItem( ":" );
1096 $t = $match[2];
1097 }
1098 }
1099 ++$commonPrefixLength;
1100 }
1101 $lastPrefix = $pref2;
1102 }
1103 if( 0 == $prefixLength ) {
1104 # No prefix (not in list)--go to paragraph mode
1105 $uniq_prefix = UNIQ_PREFIX;
1106 // XXX: use a stack for nestable elements like span, table and div
1107 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/td|<\\/th)/i", $t );
1108 $closematch = preg_match(
1109 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1110 "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1111 if ( $openmatch or $closematch ) {
1112 $paragraphStack = false;
1113 $output .= $this->closeParagraph();
1114 if($preOpenMatch and !$preCloseMatch) {
1115 $this->mInPre = true;
1116 }
1117 if ( $closematch ) {
1118 $inBlockElem = false;
1119 } else {
1120 $inBlockElem = true;
1121 }
1122 } else if ( !$inBlockElem && !$this->mInPre ) {
1123 if ( " " == $t{0} and trim($t) != '' ) {
1124 // pre
1125 if ($this->mLastSection != 'pre') {
1126 $paragraphStack = false;
1127 $output .= $this->closeParagraph().'<pre>';
1128 $this->mLastSection = 'pre';
1129 }
1130 } else {
1131 // paragraph
1132 if ( '' == trim($t) ) {
1133 if ( $paragraphStack ) {
1134 $output .= $paragraphStack.'<br />';
1135 $paragraphStack = false;
1136 $this->mLastSection = 'p';
1137 } else {
1138 if ($this->mLastSection != 'p' ) {
1139 $output .= $this->closeParagraph();
1140 $this->mLastSection = '';
1141 $paragraphStack = "<p>";
1142 } else {
1143 $paragraphStack = '</p><p>';
1144 }
1145 }
1146 } else {
1147 if ( $paragraphStack ) {
1148 $output .= $paragraphStack;
1149 $paragraphStack = false;
1150 $this->mLastSection = 'p';
1151 } else if ($this->mLastSection != 'p') {
1152 $output .= $this->closeParagraph().'<p>';
1153 $this->mLastSection = 'p';
1154 }
1155 }
1156 }
1157 }
1158 }
1159 if ($paragraphStack === false) {
1160 $output .= $t."\n";
1161 }
1162 }
1163 while ( $prefixLength ) {
1164 $output .= $this->closeList( $pref2{$prefixLength-1} );
1165 --$prefixLength;
1166 }
1167 if ( "" != $this->mLastSection ) {
1168 $output .= "</" . $this->mLastSection . ">";
1169 $this->mLastSection = "";
1170 }
1171
1172 wfProfileOut( $fname );
1173 return $output;
1174 }
1175
1176 function getVariableValue( $index ) {
1177 global $wgLang, $wgSitename, $wgServer;
1178
1179 switch ( $index ) {
1180 case MAG_CURRENTMONTH:
1181 return date( "m" );
1182 case MAG_CURRENTMONTHNAME:
1183 return $wgLang->getMonthName( date("n") );
1184 case MAG_CURRENTMONTHNAMEGEN:
1185 return $wgLang->getMonthNameGen( date("n") );
1186 case MAG_CURRENTDAY:
1187 return date("j");
1188 case MAG_PAGENAME:
1189 return $this->mTitle->getText();
1190 case MAG_NAMESPACE:
1191 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1192 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1193 case MAG_CURRENTDAYNAME:
1194 return $wgLang->getWeekdayName( date("w")+1 );
1195 case MAG_CURRENTYEAR:
1196 return date( "Y" );
1197 case MAG_CURRENTTIME:
1198 return $wgLang->time( wfTimestampNow(), false );
1199 case MAG_NUMBEROFARTICLES:
1200 return wfNumberOfArticles();
1201 case MAG_SITENAME:
1202 return $wgSitename;
1203 case MAG_SERVER:
1204 return $wgServer;
1205 default:
1206 return NULL;
1207 }
1208 }
1209
1210 function initialiseVariables()
1211 {
1212 global $wgVariableIDs;
1213 $this->mVariables = array();
1214 foreach ( $wgVariableIDs as $id ) {
1215 $mw =& MagicWord::get( $id );
1216 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1217 }
1218 }
1219
1220 /* private */ function replaceVariables( $text, $args = array() )
1221 {
1222 global $wgLang, $wgScript, $wgArticlePath;
1223
1224 $fname = "Parser::replaceVariables";
1225 wfProfileIn( $fname );
1226
1227 $bail = false;
1228 if ( !$this->mVariables ) {
1229 $this->initialiseVariables();
1230 }
1231 $titleChars = Title::legalChars();
1232 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1233
1234 # This function is called recursively. To keep track of arguments we need a stack:
1235 array_push( $this->mArgStack, $args );
1236
1237 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1238 $GLOBALS['wgCurParser'] =& $this;
1239
1240
1241 if ( $this->mOutputType == OT_HTML ) {
1242 # Variable substitution
1243 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1244
1245 # Argument substitution
1246 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1247 }
1248 # Template substitution
1249 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1250 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1251
1252 array_pop( $this->mArgStack );
1253
1254 wfProfileOut( $fname );
1255 return $text;
1256 }
1257
1258 function variableSubstitution( $matches )
1259 {
1260 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1261 $text = $this->mVariables[$matches[1]];
1262 $this->mOutput->mContainsOldMagic = true;
1263 } else {
1264 $text = $matches[0];
1265 }
1266 return $text;
1267 }
1268
1269 function braceSubstitution( $matches )
1270 {
1271 global $wgLinkCache, $wgLang;
1272 $fname = "Parser::braceSubstitution";
1273 $found = false;
1274 $nowiki = false;
1275 $noparse = false;
1276
1277 $title = NULL;
1278
1279 # $newline is an optional newline character before the braces
1280 # $part1 is the bit before the first |, and must contain only title characters
1281 # $args is a list of arguments, starting from index 0, not including $part1
1282
1283 $newline = $matches[1];
1284 $part1 = $matches[2];
1285 # If the third subpattern matched anything, it will start with |
1286 if ( $matches[3] !== "" ) {
1287 $args = explode( "|", substr( $matches[3], 1 ) );
1288 } else {
1289 $args = array();
1290 }
1291 $argc = count( $args );
1292
1293 # {{{}}}
1294 if ( strpos( $matches[0], "{{{" ) !== false ) {
1295 $text = $matches[0];
1296 $found = true;
1297 $noparse = true;
1298 }
1299
1300 # SUBST
1301 if ( !$found ) {
1302 $mwSubst =& MagicWord::get( MAG_SUBST );
1303 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1304 if ( $this->mOutputType != OT_WIKI ) {
1305 # Invalid SUBST not replaced at PST time
1306 # Return without further processing
1307 $text = $matches[0];
1308 $found = true;
1309 $noparse= true;
1310 }
1311 } elseif ( $this->mOutputType == OT_WIKI ) {
1312 # SUBST not found in PST pass, do nothing
1313 $text = $matches[0];
1314 $found = true;
1315 }
1316 }
1317
1318 # MSG, MSGNW and INT
1319 if ( !$found ) {
1320 # Check for MSGNW:
1321 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1322 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1323 $nowiki = true;
1324 } else {
1325 # Remove obsolete MSG:
1326 $mwMsg =& MagicWord::get( MAG_MSG );
1327 $mwMsg->matchStartAndRemove( $part1 );
1328 }
1329
1330 # Check if it is an internal message
1331 $mwInt =& MagicWord::get( MAG_INT );
1332 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1333 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1334 $text = wfMsgReal( $part1, $args, true );
1335 $found = true;
1336 }
1337 }
1338 }
1339
1340 # NS
1341 if ( !$found ) {
1342 # Check for NS: (namespace expansion)
1343 $mwNs = MagicWord::get( MAG_NS );
1344 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1345 if ( intval( $part1 ) ) {
1346 $text = $wgLang->getNsText( intval( $part1 ) );
1347 $found = true;
1348 } else {
1349 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1350 if ( !is_null( $index ) ) {
1351 $text = $wgLang->getNsText( $index );
1352 $found = true;
1353 }
1354 }
1355 }
1356 }
1357
1358 # LOCALURL and LOCALURLE
1359 if ( !$found ) {
1360 $mwLocal = MagicWord::get( MAG_LOCALURL );
1361 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1362
1363 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1364 $func = 'getLocalURL';
1365 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1366 $func = 'escapeLocalURL';
1367 } else {
1368 $func = '';
1369 }
1370
1371 if ( $func !== '' ) {
1372 $title = Title::newFromText( $part1 );
1373 if ( !is_null( $title ) ) {
1374 if ( $argc > 0 ) {
1375 $text = $title->$func( $args[0] );
1376 } else {
1377 $text = $title->$func();
1378 }
1379 $found = true;
1380 }
1381 }
1382 }
1383
1384 # Internal variables
1385 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1386 $text = $this->mVariables[$part1];
1387 $found = true;
1388 $this->mOutput->mContainsOldMagic = true;
1389 }
1390 /*
1391 # Arguments input from the caller
1392 $inputArgs = end( $this->mArgStack );
1393 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1394 $text = $inputArgs[$part1];
1395 $found = true;
1396 }
1397 */
1398 # Load from database
1399 if ( !$found ) {
1400 $title = Title::newFromText( $part1, NS_TEMPLATE );
1401 if ( !is_null( $title ) && !$title->isExternal() ) {
1402 # Check for excessive inclusion
1403 $dbk = $title->getPrefixedDBkey();
1404 if ( $this->incrementIncludeCount( $dbk ) ) {
1405 $article = new Article( $title );
1406 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1407 if ( $articleContent !== false ) {
1408 $found = true;
1409 $text = $articleContent;
1410
1411 }
1412 }
1413
1414 # If the title is valid but undisplayable, make a link to it
1415 if ( $this->mOutputType == OT_HTML && !$found ) {
1416 $text = "[[" . $title->getPrefixedText() . "]]";
1417 $found = true;
1418 }
1419 }
1420 }
1421
1422 # Recursive parsing, escaping and link table handling
1423 # Only for HTML output
1424 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1425 $text = wfEscapeWikiText( $text );
1426 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1427 # Clean up argument array
1428 $assocArgs = array();
1429 $index = 1;
1430 foreach( $args as $arg ) {
1431 $eqpos = strpos( $arg, "=" );
1432 if ( $eqpos === false ) {
1433 $assocArgs[$index++] = $arg;
1434 } else {
1435 $name = trim( substr( $arg, 0, $eqpos ) );
1436 $value = trim( substr( $arg, $eqpos+1 ) );
1437 if ( $value === false ) {
1438 $value = "";
1439 }
1440 if ( $name !== false ) {
1441 $assocArgs[$name] = $value;
1442 }
1443 }
1444 }
1445
1446 # Do not enter included links in link table
1447 if ( !is_null( $title ) ) {
1448 $wgLinkCache->suspend();
1449 }
1450
1451 # Run full parser on the included text
1452 $text = $this->stripParse( $text, $newline, $assocArgs );
1453
1454 # Resume the link cache and register the inclusion as a link
1455 if ( !is_null( $title ) ) {
1456 $wgLinkCache->resume();
1457 $wgLinkCache->addLinkObj( $title );
1458 }
1459 }
1460
1461 if ( !$found ) {
1462 return $matches[0];
1463 } else {
1464 return $text;
1465 }
1466 }
1467
1468 # Triple brace replacement -- used for template arguments
1469 function argSubstitution( $matches )
1470 {
1471 $newline = $matches[1];
1472 $arg = trim( $matches[2] );
1473 $text = $matches[0];
1474 $inputArgs = end( $this->mArgStack );
1475
1476 if ( array_key_exists( $arg, $inputArgs ) ) {
1477 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1478 }
1479
1480 return $text;
1481 }
1482
1483 # Returns true if the function is allowed to include this entity
1484 function incrementIncludeCount( $dbk )
1485 {
1486 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1487 $this->mIncludeCount[$dbk] = 0;
1488 }
1489 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1490 return true;
1491 } else {
1492 return false;
1493 }
1494 }
1495
1496
1497 # Cleans up HTML, removes dangerous tags and attributes
1498 /* private */ function removeHTMLtags( $text )
1499 {
1500 global $wgUseTidy, $wgUserHtml;
1501 $fname = "Parser::removeHTMLtags";
1502 wfProfileIn( $fname );
1503
1504 if( $wgUserHtml ) {
1505 $htmlpairs = array( # Tags that must be closed
1506 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1507 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1508 "strike", "strong", "tt", "var", "div", "center",
1509 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1510 "ruby", "rt" , "rb" , "rp", "p"
1511 );
1512 $htmlsingle = array(
1513 "br", "hr", "li", "dt", "dd"
1514 );
1515 $htmlnest = array( # Tags that can be nested--??
1516 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1517 "dl", "font", "big", "small", "sub", "sup"
1518 );
1519 $tabletags = array( # Can only appear inside table
1520 "td", "th", "tr"
1521 );
1522 } else {
1523 $htmlpairs = array();
1524 $htmlsingle = array();
1525 $htmlnest = array();
1526 $tabletags = array();
1527 }
1528
1529 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1530 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1531
1532 $htmlattrs = $this->getHTMLattrs () ;
1533
1534 # Remove HTML comments
1535 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1536
1537 $bits = explode( "<", $text );
1538 $text = array_shift( $bits );
1539 if(!$wgUseTidy) {
1540 $tagstack = array(); $tablestack = array();
1541 foreach ( $bits as $x ) {
1542 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1543 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1544 $x, $regs );
1545 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1546 error_reporting( $prev );
1547
1548 $badtag = 0 ;
1549 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1550 # Check our stack
1551 if ( $slash ) {
1552 # Closing a tag...
1553 if ( ! in_array( $t, $htmlsingle ) &&
1554 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1555 @array_push( $tagstack, $ot );
1556 $badtag = 1;
1557 } else {
1558 if ( $t == "table" ) {
1559 $tagstack = array_pop( $tablestack );
1560 }
1561 $newparams = "";
1562 }
1563 } else {
1564 # Keep track for later
1565 if ( in_array( $t, $tabletags ) &&
1566 ! in_array( "table", $tagstack ) ) {
1567 $badtag = 1;
1568 } else if ( in_array( $t, $tagstack ) &&
1569 ! in_array ( $t , $htmlnest ) ) {
1570 $badtag = 1 ;
1571 } else if ( ! in_array( $t, $htmlsingle ) ) {
1572 if ( $t == "table" ) {
1573 array_push( $tablestack, $tagstack );
1574 $tagstack = array();
1575 }
1576 array_push( $tagstack, $t );
1577 }
1578 # Strip non-approved attributes from the tag
1579 $newparams = $this->fixTagAttributes($params);
1580
1581 }
1582 if ( ! $badtag ) {
1583 $rest = str_replace( ">", "&gt;", $rest );
1584 $text .= "<$slash$t $newparams$brace$rest";
1585 continue;
1586 }
1587 }
1588 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1589 }
1590 # Close off any remaining tags
1591 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1592 $text .= "</$t>\n";
1593 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1594 }
1595 } else {
1596 # this might be possible using tidy itself
1597 foreach ( $bits as $x ) {
1598 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1599 $x, $regs );
1600 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1601 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1602 $newparams = $this->fixTagAttributes($params);
1603 $rest = str_replace( ">", "&gt;", $rest );
1604 $text .= "<$slash$t $newparams$brace$rest";
1605 } else {
1606 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1607 }
1608 }
1609 }
1610 wfProfileOut( $fname );
1611 return $text;
1612 }
1613
1614
1615 /*
1616 *
1617 * This function accomplishes several tasks:
1618 * 1) Auto-number headings if that option is enabled
1619 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1620 * 3) Add a Table of contents on the top for users who have enabled the option
1621 * 4) Auto-anchor headings
1622 *
1623 * It loops through all headlines, collects the necessary data, then splits up the
1624 * string and re-inserts the newly formatted headlines.
1625 *
1626 */
1627
1628 /* private */ function formatHeadings( $text, $isMain=true )
1629 {
1630 global $wgInputEncoding;
1631
1632 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1633 $doShowToc = $this->mOptions->getShowToc();
1634 if( !$this->mTitle->userCanEdit() ) {
1635 $showEditLink = 0;
1636 $rightClickHack = 0;
1637 } else {
1638 $showEditLink = $this->mOptions->getEditSection();
1639 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1640 }
1641
1642 # Inhibit editsection links if requested in the page
1643 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1644 if( $esw->matchAndRemove( $text ) ) {
1645 $showEditLink = 0;
1646 }
1647 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1648 # do not add TOC
1649 $mw =& MagicWord::get( MAG_NOTOC );
1650 if( $mw->matchAndRemove( $text ) ) {
1651 $doShowToc = 0;
1652 }
1653
1654 # never add the TOC to the Main Page. This is an entry page that should not
1655 # be more than 1-2 screens large anyway
1656 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1657 $doShowToc = 0;
1658 }
1659
1660 # Get all headlines for numbering them and adding funky stuff like [edit]
1661 # links - this is for later, but we need the number of headlines right now
1662 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1663
1664 # if there are fewer than 4 headlines in the article, do not show TOC
1665 if( $numMatches < 4 ) {
1666 $doShowToc = 0;
1667 }
1668
1669 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1670 # override above conditions and always show TOC
1671 $mw =& MagicWord::get( MAG_FORCETOC );
1672 if ($mw->matchAndRemove( $text ) ) {
1673 $doShowToc = 1;
1674 }
1675
1676
1677 # We need this to perform operations on the HTML
1678 $sk =& $this->mOptions->getSkin();
1679
1680 # headline counter
1681 $headlineCount = 0;
1682
1683 # Ugh .. the TOC should have neat indentation levels which can be
1684 # passed to the skin functions. These are determined here
1685 $toclevel = 0;
1686 $toc = "";
1687 $full = "";
1688 $head = array();
1689 $sublevelCount = array();
1690 $level = 0;
1691 $prevlevel = 0;
1692 foreach( $matches[3] as $headline ) {
1693 $numbering = "";
1694 if( $level ) {
1695 $prevlevel = $level;
1696 }
1697 $level = $matches[1][$headlineCount];
1698 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1699 # reset when we enter a new level
1700 $sublevelCount[$level] = 0;
1701 $toc .= $sk->tocIndent( $level - $prevlevel );
1702 $toclevel += $level - $prevlevel;
1703 }
1704 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1705 # reset when we step back a level
1706 $sublevelCount[$level+1]=0;
1707 $toc .= $sk->tocUnindent( $prevlevel - $level );
1708 $toclevel -= $prevlevel - $level;
1709 }
1710 # count number of headlines for each level
1711 @$sublevelCount[$level]++;
1712 if( $doNumberHeadings || $doShowToc ) {
1713 $dot = 0;
1714 for( $i = 1; $i <= $level; $i++ ) {
1715 if( !empty( $sublevelCount[$i] ) ) {
1716 if( $dot ) {
1717 $numbering .= ".";
1718 }
1719 $numbering .= $sublevelCount[$i];
1720 $dot = 1;
1721 }
1722 }
1723 }
1724
1725 # The canonized header is a version of the header text safe to use for links
1726 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1727 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1728 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1729
1730 # strip out HTML
1731 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1732 $tocline = trim( $canonized_headline );
1733 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1734 $canonized_headline = str_replace('%','.',$canonized_headline);
1735 $refer[$headlineCount] = $canonized_headline;
1736
1737 # count how many in assoc. array so we can track dupes in anchors
1738 @$refers[$canonized_headline]++;
1739 $refcount[$headlineCount]=$refers[$canonized_headline];
1740
1741 # Prepend the number to the heading text
1742
1743 if( $doNumberHeadings || $doShowToc ) {
1744 $tocline = $numbering . " " . $tocline;
1745
1746 # Don't number the heading if it is the only one (looks silly)
1747 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1748 # the two are different if the line contains a link
1749 $headline=$numbering . " " . $headline;
1750 }
1751 }
1752
1753 # Create the anchor for linking from the TOC to the section
1754 $anchor = $canonized_headline;
1755 if($refcount[$headlineCount] > 1 ) {
1756 $anchor .= "_" . $refcount[$headlineCount];
1757 }
1758 if( $doShowToc ) {
1759 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1760 }
1761 if( $showEditLink ) {
1762 if ( empty( $head[$headlineCount] ) ) {
1763 $head[$headlineCount] = "";
1764 }
1765 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1766 }
1767
1768 # Add the edit section span
1769 if( $rightClickHack ) {
1770 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1771 }
1772
1773 # give headline the correct <h#> tag
1774 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1775
1776 $headlineCount++;
1777 }
1778
1779 if( $doShowToc ) {
1780 $toclines = $headlineCount;
1781 $toc .= $sk->tocUnindent( $toclevel );
1782 $toc = $sk->tocTable( $toc );
1783 }
1784
1785 # split up and insert constructed headlines
1786
1787 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1788 $i = 0;
1789
1790 foreach( $blocks as $block ) {
1791 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1792 # This is the [edit] link that appears for the top block of text when
1793 # section editing is enabled
1794
1795 # Disabled because it broke block formatting
1796 # For example, a bullet point in the top line
1797 # $full .= $sk->editSectionLink(0);
1798 }
1799 $full .= $block;
1800 if( $doShowToc && !$i && $isMain) {
1801 # Top anchor now in skin
1802 $full = $full.$toc;
1803 }
1804
1805 if( !empty( $head[$i] ) ) {
1806 $full .= $head[$i];
1807 }
1808 $i++;
1809 }
1810
1811 return $full;
1812 }
1813
1814 /* private */ function magicISBN( $text )
1815 {
1816 global $wgLang;
1817
1818 $a = split( "ISBN ", " $text" );
1819 if ( count ( $a ) < 2 ) return $text;
1820 $text = substr( array_shift( $a ), 1);
1821 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1822
1823 foreach ( $a as $x ) {
1824 $isbn = $blank = "" ;
1825 while ( " " == $x{0} ) {
1826 $blank .= " ";
1827 $x = substr( $x, 1 );
1828 }
1829 while ( strstr( $valid, $x{0} ) != false ) {
1830 $isbn .= $x{0};
1831 $x = substr( $x, 1 );
1832 }
1833 $num = str_replace( "-", "", $isbn );
1834 $num = str_replace( " ", "", $num );
1835
1836 if ( "" == $num ) {
1837 $text .= "ISBN $blank$x";
1838 } else {
1839 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1840 $text .= "<a href=\"" .
1841 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1842 "\" class=\"internal\">ISBN $isbn</a>";
1843 $text .= $x;
1844 }
1845 }
1846 return $text;
1847 }
1848 /* private */ function magicRFC( $text )
1849 {
1850 global $wgLang;
1851
1852 $a = split( "RFC ", " $text" );
1853 if ( count ( $a ) < 2 ) return $text;
1854 $text = substr( array_shift( $a ), 1);
1855 $valid = "0123456789";
1856
1857 foreach ( $a as $x ) {
1858 $rfc = $blank = "" ;
1859 while ( " " == $x{0} ) {
1860 $blank .= " ";
1861 $x = substr( $x, 1 );
1862 }
1863 while ( strstr( $valid, $x{0} ) != false ) {
1864 $rfc .= $x{0};
1865 $x = substr( $x, 1 );
1866 }
1867
1868 if ( "" == $rfc ) {
1869 $text .= "RFC $blank$x";
1870 } else {
1871 $url = wfmsg( "rfcurl" );
1872 $url = str_replace( "$1", $rfc, $url);
1873 $sk =& $this->mOptions->getSkin();
1874 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1875 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1876 }
1877 }
1878 return $text;
1879 }
1880
1881 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1882 {
1883 $this->mOptions = $options;
1884 $this->mTitle =& $title;
1885 $this->mOutputType = OT_WIKI;
1886
1887 if ( $clearState ) {
1888 $this->clearState();
1889 }
1890
1891 $stripState = false;
1892 $pairs = array(
1893 "\r\n" => "\n",
1894 );
1895 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1896 // now with regexes
1897 /*
1898 $pairs = array(
1899 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1900 "/<br *?>/i" => "<br />",
1901 );
1902 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1903 */
1904 $text = $this->strip( $text, $stripState, false );
1905 $text = $this->pstPass2( $text, $user );
1906 $text = $this->unstrip( $text, $stripState );
1907 $text = $this->unstripNoWiki( $text, $stripState );
1908 return $text;
1909 }
1910
1911 /* private */ function pstPass2( $text, &$user )
1912 {
1913 global $wgLang, $wgLocaltimezone, $wgCurParser;
1914
1915 # Variable replacement
1916 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1917 $text = $this->replaceVariables( $text );
1918
1919 # Signatures
1920 #
1921 $n = $user->getName();
1922 $k = $user->getOption( "nickname" );
1923 if ( "" == $k ) { $k = $n; }
1924 if(isset($wgLocaltimezone)) {
1925 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1926 }
1927 /* Note: this is an ugly timezone hack for the European wikis */
1928 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1929 " (" . date( "T" ) . ")";
1930 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1931
1932 $text = preg_replace( "/~~~~~/", $d, $text );
1933 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1934 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1935 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1936 Namespace::getUser() ) . ":$n|$k]]", $text );
1937
1938 # Context links: [[|name]] and [[name (context)|]]
1939 #
1940 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1941 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1942 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1943 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1944
1945 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1946 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1947 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1948 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1949 # [[ns:page (cont)|]]
1950 $context = "";
1951 $t = $this->mTitle->getText();
1952 if ( preg_match( $conpat, $t, $m ) ) {
1953 $context = $m[2];
1954 }
1955 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1956 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1957 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1958
1959 if ( "" == $context ) {
1960 $text = preg_replace( $p2, "[[\\1]]", $text );
1961 } else {
1962 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1963 }
1964
1965 /*
1966 $mw =& MagicWord::get( MAG_SUBST );
1967 $wgCurParser = $this->fork();
1968 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1969 $this->merge( $wgCurParser );
1970 */
1971
1972 # Trim trailing whitespace
1973 # MAG_END (__END__) tag allows for trailing
1974 # whitespace to be deliberately included
1975 $text = rtrim( $text );
1976 $mw =& MagicWord::get( MAG_END );
1977 $mw->matchAndRemove( $text );
1978
1979 return $text;
1980 }
1981
1982 # Set up some variables which are usually set up in parse()
1983 # so that an external function can call some class members with confidence
1984 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1985 {
1986 $this->mTitle =& $title;
1987 $this->mOptions = $options;
1988 $this->mOutputType = $outputType;
1989 if ( $clearState ) {
1990 $this->clearState();
1991 }
1992 }
1993
1994 function transformMsg( $text, $options ) {
1995 global $wgTitle;
1996 static $executing = false;
1997
1998 # Guard against infinite recursion
1999 if ( $executing ) {
2000 return $text;
2001 }
2002 $executing = true;
2003
2004 $this->mTitle = $wgTitle;
2005 $this->mOptions = $options;
2006 $this->mOutputType = OT_MSG;
2007 $this->clearState();
2008 $text = $this->replaceVariables( $text );
2009
2010 $executing = false;
2011 return $text;
2012 }
2013 }
2014
2015 class ParserOutput
2016 {
2017 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2018 var $mCacheTime; # Used in ParserCache
2019
2020 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2021 $containsOldMagic = false )
2022 {
2023 $this->mText = $text;
2024 $this->mLanguageLinks = $languageLinks;
2025 $this->mCategoryLinks = $categoryLinks;
2026 $this->mContainsOldMagic = $containsOldMagic;
2027 $this->mCacheTime = "";
2028 }
2029
2030 function getText() { return $this->mText; }
2031 function getLanguageLinks() { return $this->mLanguageLinks; }
2032 function getCategoryLinks() { return $this->mCategoryLinks; }
2033 function getCacheTime() { return $this->mCacheTime; }
2034 function containsOldMagic() { return $this->mContainsOldMagic; }
2035 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2036 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2037 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2038 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2039 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2040
2041 function merge( $other ) {
2042 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2043 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2044 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2045 }
2046
2047 }
2048
2049 class ParserOptions
2050 {
2051 # All variables are private
2052 var $mUseTeX; # Use texvc to expand <math> tags
2053 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2054 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2055 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2056 var $mAllowExternalImages; # Allow external images inline
2057 var $mSkin; # Reference to the preferred skin
2058 var $mDateFormat; # Date format index
2059 var $mEditSection; # Create "edit section" links
2060 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2061 var $mNumberHeadings; # Automatically number headings
2062 var $mShowToc; # Show table of contents
2063
2064 function getUseTeX() { return $this->mUseTeX; }
2065 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2066 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2067 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2068 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2069 function getSkin() { return $this->mSkin; }
2070 function getDateFormat() { return $this->mDateFormat; }
2071 function getEditSection() { return $this->mEditSection; }
2072 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2073 function getNumberHeadings() { return $this->mNumberHeadings; }
2074 function getShowToc() { return $this->mShowToc; }
2075
2076 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2077 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2078 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2079 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2080 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2081 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2082 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2083 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2084 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2085 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2086 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2087
2088 /* static */ function newFromUser( &$user )
2089 {
2090 $popts = new ParserOptions;
2091 $popts->initialiseFromUser( $user );
2092 return $popts;
2093 }
2094
2095 function initialiseFromUser( &$userInput )
2096 {
2097 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2098
2099 if ( !$userInput ) {
2100 $user = new User;
2101 $user->setLoaded( true );
2102 } else {
2103 $user =& $userInput;
2104 }
2105
2106 $this->mUseTeX = $wgUseTeX;
2107 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2108 $this->mUseDynamicDates = $wgUseDynamicDates;
2109 $this->mInterwikiMagic = $wgInterwikiMagic;
2110 $this->mAllowExternalImages = $wgAllowExternalImages;
2111 $this->mSkin =& $user->getSkin();
2112 $this->mDateFormat = $user->getOption( "date" );
2113 $this->mEditSection = $user->getOption( "editsection" );
2114 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2115 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2116 $this->mShowToc = $user->getOption( "showtoc" );
2117 }
2118
2119
2120 }
2121
2122 # Regex callbacks, used in Parser::replaceVariables
2123 function wfBraceSubstitution( $matches )
2124 {
2125 global $wgCurParser;
2126 return $wgCurParser->braceSubstitution( $matches );
2127 }
2128
2129 function wfArgSubstitution( $matches )
2130 {
2131 global $wgCurParser;
2132 return $wgCurParser->argSubstitution( $matches );
2133 }
2134
2135 function wfVariableSubstitution( $matches )
2136 {
2137 global $wgCurParser;
2138 return $wgCurParser->variableSubstitution( $matches );
2139 }
2140
2141 ?>