nowiki handling
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 $this->mInPre = false;
81 }
82
83 # First pass--just handle <nowiki> sections, pass the rest off
84 # to internalParse() which does all the real work.
85 #
86 # Returns a ParserOutput
87 #
88 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
89 {
90 global $wgUseTidy;
91 $fname = "Parser::parse";
92 wfProfileIn( $fname );
93
94 if ( $clearState ) {
95 $this->clearState();
96 }
97
98 $this->mOptions = $options;
99 $this->mTitle =& $title;
100 $this->mOutputType = OT_HTML;
101
102 $stripState = NULL;
103 $text = $this->strip( $text, $this->mStripState );
104 $text = $this->internalParse( $text, $linestart );
105 $text = $this->unstrip( $text, $this->mStripState );
106 # Clean up special characters, only run once, next-to-last before doBlockLevels
107 if(!$wgUseTidy) {
108 $fixtags = array(
109 # french spaces, last one Guillemet-left
110 # only if there is something before the space
111 "/(.) (\\?|:|!|\\302\\273)/i"=>"\\1&nbsp;\\2",
112 # french spaces, Guillemet-right
113 "/(\\302\\253) /i"=>"\\1&nbsp;",
114 "/<hr *>/i" => '<hr />',
115 "/<br *>/i" => '<br />',
116 "/<center *>/i"=>'<div class="center">',
117 "/<\\/center *>/i" => '</div>',
118 # Clean up spare ampersands; note that we probably ought to be
119 # more careful about named entities.
120 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
121 );
122 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
123 } else {
124 $fixtags = array(
125 # french spaces, last one Guillemet-left
126 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
127 # french spaces, Guillemet-right
128 "/(\\302\\253) /i"=>"\\1&nbsp;",
129 "/<center *>/i"=>'<div class="center">',
130 "/<\\/center *>/i" => '</div>'
131 );
132 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
133 }
134 # only once and last
135 $text = $this->doBlockLevels( $text, $linestart );
136 $text = $this->unstripNoWiki( $text, $this->mStripState );
137 if($wgUseTidy) {
138 $text = $this->tidy($text);
139 }
140 $this->mOutput->setText( $text );
141 wfProfileOut( $fname );
142 return $this->mOutput;
143 }
144
145 /* static */ function getRandomString()
146 {
147 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
148 }
149
150 # Replaces all occurrences of <$tag>content</$tag> in the text
151 # with a random marker and returns the new text. the output parameter
152 # $content will be an associative array filled with data on the form
153 # $unique_marker => content.
154
155 # If $content is already set, the additional entries will be appended
156
157 # If $tag is set to STRIP_COMMENTS, the function will extract
158 # <!-- HTML comments -->
159
160 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
161 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
162 if ( !$content ) {
163 $content = array( );
164 }
165 $n = 1;
166 $stripped = "";
167
168 while ( "" != $text ) {
169 if($tag==STRIP_COMMENTS) {
170 $p = preg_split( "/<!--/i", $text, 2 );
171 } else {
172 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
173 }
174 $stripped .= $p[0];
175 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
176 $text = "";
177 } else {
178 if($tag==STRIP_COMMENTS) {
179 $q = preg_split( "/-->/i", $p[1], 2 );
180 } else {
181 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
182 }
183 $marker = $rnd . sprintf("%08X", $n++);
184 $content[$marker] = $q[0];
185 $stripped .= $marker;
186 $text = $q[1];
187 }
188 }
189 return $stripped;
190 }
191
192 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
193 # If $render is set, performs necessary rendering operations on plugins
194 # Returns the text, and fills an array with data needed in unstrip()
195 # If the $state is already a valid strip state, it adds to the state
196
197 # When $stripcomments is set, HTML comments <!-- like this -->
198 # will be stripped in addition to other tags. This is important
199 # for section editing, where these comments cause confusion when
200 # counting the sections in the wikisource
201 function strip( $text, &$state, $stripcomments = false )
202 {
203 $render = ($this->mOutputType == OT_HTML);
204 $nowiki_content = array();
205 $hiero_content = array();
206 $timeline_content = array();
207 $math_content = array();
208 $pre_content = array();
209 $comment_content = array();
210
211 # Replace any instances of the placeholders
212 $uniq_prefix = UNIQ_PREFIX;
213 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
214
215 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
216 foreach( $nowiki_content as $marker => $content ){
217 if( $render ){
218 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
219 } else {
220 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
221 }
222 }
223
224 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
225 foreach( $hiero_content as $marker => $content ){
226 if( $render && $GLOBALS['wgUseWikiHiero']){
227 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
228 } else {
229 $hiero_content[$marker] = "<hiero>$content</hiero>";
230 }
231 }
232
233 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
234 foreach( $timeline_content as $marker => $content ){
235 if( $render && $GLOBALS['wgUseTimeline']){
236 $timeline_content[$marker] = renderTimeline( $content );
237 } else {
238 $timeline_content[$marker] = "<timeline>$content</timeline>";
239 }
240 }
241
242 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
243 foreach( $math_content as $marker => $content ){
244 if( $render ) {
245 if( $this->mOptions->getUseTeX() ) {
246 $math_content[$marker] = renderMath( $content );
247 } else {
248 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
249 }
250 } else {
251 $math_content[$marker] = "<math>$content</math>";
252 }
253 }
254
255 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
256 foreach( $pre_content as $marker => $content ){
257 if( $render ){
258 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
259 } else {
260 $pre_content[$marker] = "<pre>$content</pre>";
261 }
262 }
263 if($stripcomments) {
264 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
265 foreach( $comment_content as $marker => $content ){
266 $comment_content[$marker] = "<!--$content-->";
267 }
268 }
269
270 # Merge state with the pre-existing state, if there is one
271 if ( $state ) {
272 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
273 $state['hiero'] = $state['hiero'] + $hiero_content;
274 $state['timeline'] = $state['timeline'] + $timeline_content;
275 $state['math'] = $state['math'] + $math_content;
276 $state['pre'] = $state['pre'] + $pre_content;
277 $state['comment'] = $state['comment'] + $comment_content;
278 } else {
279 $state = array(
280 'nowiki' => $nowiki_content,
281 'hiero' => $hiero_content,
282 'timeline' => $timeline_content,
283 'math' => $math_content,
284 'pre' => $pre_content,
285 'comment' => $comment_content
286 );
287 }
288 return $text;
289 }
290
291 # always call unstripNoWiki() after this one
292 function unstrip( $text, &$state )
293 {
294 # Must expand in reverse order, otherwise nested tags will be corrupted
295 $contentDict = end( $state );
296 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
297 if( key($state) != 'nowiki') {
298 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
299 $text = str_replace( key( $contentDict ), $content, $text );
300 }
301 }
302 }
303
304 return $text;
305 }
306 # always call this after unstrip() to preserve the order
307 function unstripNoWiki( $text, &$state )
308 {
309 # Must expand in reverse order, otherwise nested tags will be corrupted
310 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
311 $text = str_replace( key( $state['nowiki'] ), $content, $text );
312 }
313
314 return $text;
315 }
316
317 # Add an item to the strip state
318 # Returns the unique tag which must be inserted into the stripped text
319 # The tag will be replaced with the original text in unstrip()
320
321 function insertStripItem( $text, &$state )
322 {
323 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
324 if ( !$state ) {
325 $state = array(
326 'nowiki' => array(),
327 'hiero' => array(),
328 'math' => array(),
329 'pre' => array()
330 );
331 }
332 $state['item'][$rnd] = $text;
333 return $rnd;
334 }
335
336 # This method generates the list of subcategories and pages for a category
337 function categoryMagic ()
338 {
339 global $wgLang , $wgUser ;
340 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
341
342 $cns = Namespace::getCategory() ;
343 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
344
345 $r = "<br style=\"clear:both;\"/>\n";
346
347
348 $sk =& $wgUser->getSkin() ;
349
350 $articles = array() ;
351 $children = array() ;
352 $data = array () ;
353 $id = $this->mTitle->getArticleID() ;
354
355 # FIXME: add limits
356 $t = wfStrencode( $this->mTitle->getDBKey() );
357 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
358 $res = wfQuery ( $sql, DB_READ ) ;
359 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
360
361 # For all pages that link to this category
362 foreach ( $data AS $x )
363 {
364 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
365 if ( $t != "" ) $t .= ":" ;
366 $t .= $x->cur_title ;
367
368 if ( $x->cur_namespace == $cns ) {
369 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
370 } else {
371 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
372 }
373 }
374 wfFreeResult ( $res ) ;
375
376 # Showing subcategories
377 if ( count ( $children ) > 0 ) {
378 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
379 $r .= implode ( ", " , $children ) ;
380 }
381
382 # Showing pages in this category
383 if ( count ( $articles ) > 0 ) {
384 $ti = $this->mTitle->getText() ;
385 $h = wfMsg( "category_header", $ti );
386 $r .= "<h2>{$h}</h2>\n" ;
387 $r .= implode ( ", " , $articles ) ;
388 }
389
390
391 return $r ;
392 }
393
394 function getHTMLattrs ()
395 {
396 $htmlattrs = array( # Allowed attributes--no scripting, etc.
397 "title", "align", "lang", "dir", "width", "height",
398 "bgcolor", "clear", /* BR */ "noshade", /* HR */
399 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
400 /* FONT */ "type", "start", "value", "compact",
401 /* For various lists, mostly deprecated but safe */
402 "summary", "width", "border", "frame", "rules",
403 "cellspacing", "cellpadding", "valign", "char",
404 "charoff", "colgroup", "col", "span", "abbr", "axis",
405 "headers", "scope", "rowspan", "colspan", /* Tables */
406 "id", "class", "name", "style" /* For CSS */
407 );
408 return $htmlattrs ;
409 }
410
411 function fixTagAttributes ( $t )
412 {
413 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
414 $htmlattrs = $this->getHTMLattrs() ;
415
416 # Strip non-approved attributes from the tag
417 $t = preg_replace(
418 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
419 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
420 $t);
421 # Strip javascript "expression" from stylesheets. Brute force approach:
422 # If anythin offensive is found, all attributes of the HTML tag are dropped
423
424 if( preg_match(
425 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
426 wfMungeToUtf8( $t ) ) )
427 {
428 $t="";
429 }
430
431 return trim ( $t ) ;
432 }
433
434 /* interface with html tidy, used if $wgUseTidy = true */
435 function tidy ( $text ) {
436 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
437 global $wgInputEncoding, $wgOutputEncoding;
438 $fname = "Parser::tidy";
439 wfProfileIn( $fname );
440
441 $cleansource = '';
442 switch(strtoupper($wgOutputEncoding)) {
443 case 'ISO-8859-1':
444 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
445 break;
446 case 'UTF-8':
447 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
448 break;
449 default:
450 $wgTidyOpts .= ' -raw';
451 }
452
453 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
454 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
455 '<head><title>test</title></head><body>'.$text.'</body></html>';
456 $descriptorspec = array(
457 0 => array("pipe", "r"),
458 1 => array("pipe", "w"),
459 2 => array("file", "/dev/null", "a")
460 );
461 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
462 if (is_resource($process)) {
463 fwrite($pipes[0], $wrappedtext);
464 fclose($pipes[0]);
465 while (!feof($pipes[1])) {
466 $cleansource .= fgets($pipes[1], 1024);
467 }
468 fclose($pipes[1]);
469 $return_value = proc_close($process);
470 }
471
472 wfProfileOut( $fname );
473
474 if( $cleansource == '' && $text != '') {
475 wfDebug( "Tidy error detected!\n" );
476 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
477 } else {
478 return $cleansource;
479 }
480 }
481
482 function doTableStuff ( $t )
483 {
484 $t = explode ( "\n" , $t ) ;
485 $td = array () ; # Is currently a td tag open?
486 $ltd = array () ; # Was it TD or TH?
487 $tr = array () ; # Is currently a tr tag open?
488 $ltr = array () ; # tr attributes
489 foreach ( $t AS $k => $x )
490 {
491 $x = trim ( $x ) ;
492 $fc = substr ( $x , 0 , 1 ) ;
493 if ( "{|" == substr ( $x , 0 , 2 ) )
494 {
495 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
496 array_push ( $td , false ) ;
497 array_push ( $ltd , "" ) ;
498 array_push ( $tr , false ) ;
499 array_push ( $ltr , "" ) ;
500 }
501 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
502 else if ( "|}" == substr ( $x , 0 , 2 ) )
503 {
504 $z = "</table>\n" ;
505 $l = array_pop ( $ltd ) ;
506 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
507 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
508 array_pop ( $ltr ) ;
509 $t[$k] = $z ;
510 }
511 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
512 {
513 $z = trim ( substr ( $x , 2 ) ) ;
514 $t[$k] = "<caption>{$z}</caption>\n" ;
515 }*/
516 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
517 {
518 $x = substr ( $x , 1 ) ;
519 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
520 $z = "" ;
521 $l = array_pop ( $ltd ) ;
522 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
523 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
524 array_pop ( $ltr ) ;
525 $t[$k] = $z ;
526 array_push ( $tr , false ) ;
527 array_push ( $td , false ) ;
528 array_push ( $ltd , "" ) ;
529 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
530 }
531 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
532 {
533 if ( "|+" == substr ( $x , 0 , 2 ) )
534 {
535 $fc = "+" ;
536 $x = substr ( $x , 1 ) ;
537 }
538 $after = substr ( $x , 1 ) ;
539 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
540 $after = explode ( "||" , $after ) ;
541 $t[$k] = "" ;
542 foreach ( $after AS $theline )
543 {
544 $z = "" ;
545 if ( $fc != "+" )
546 {
547 $tra = array_pop ( $ltr ) ;
548 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
549 array_push ( $tr , true ) ;
550 array_push ( $ltr , "" ) ;
551 }
552
553 $l = array_pop ( $ltd ) ;
554 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
555 if ( $fc == "|" ) $l = "td" ;
556 else if ( $fc == "!" ) $l = "th" ;
557 else if ( $fc == "+" ) $l = "caption" ;
558 else $l = "" ;
559 array_push ( $ltd , $l ) ;
560 $y = explode ( "|" , $theline , 2 ) ;
561 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
562 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
563 $t[$k] .= $y ;
564 array_push ( $td , true ) ;
565 }
566 }
567 }
568
569 # Closing open td, tr && table
570 while ( count ( $td ) > 0 )
571 {
572 if ( array_pop ( $td ) ) $t[] = "</td>" ;
573 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
574 $t[] = "</table>" ;
575 }
576
577 $t = implode ( "\n" , $t ) ;
578 # $t = $this->removeHTMLtags( $t );
579 return $t ;
580 }
581
582 # Parses the text and adds the result to the strip state
583 # Returns the strip tag
584 function stripParse( $text, $newline, $args )
585 {
586 $text = $this->strip( $text, $this->mStripState );
587 $text = $this->internalParse( $text, (bool)$newline, $args, false );
588 return $newline.$this->insertStripItem( $text, $this->mStripState );
589 }
590
591 function internalParse( $text, $linestart, $args = array(), $isMain=true )
592 {
593 $fname = "Parser::internalParse";
594 wfProfileIn( $fname );
595
596 $text = $this->removeHTMLtags( $text );
597 $text = $this->replaceVariables( $text, $args );
598
599 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr />", $text );
600
601 $text = $this->doHeadings( $text );
602 if($this->mOptions->getUseDynamicDates()) {
603 global $wgDateFormatter;
604 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
605 }
606 $text = $this->doAllQuotes( $text );
607 $text = $this->replaceExternalLinks( $text );
608 $text = $this->replaceInternalLinks ( $text );
609 $text = $this->replaceInternalLinks ( $text );
610 //$text = $this->doTokenizedParser ( $text );
611 $text = $this->doTableStuff ( $text ) ;
612 $text = $this->magicISBN( $text );
613 $text = $this->magicRFC( $text );
614 $text = $this->formatHeadings( $text, $isMain );
615 $sk =& $this->mOptions->getSkin();
616 $text = $sk->transformContent( $text );
617
618 if ( !isset ( $this->categoryMagicDone ) ) {
619 $text .= $this->categoryMagic () ;
620 $this->categoryMagicDone = true ;
621 }
622
623 wfProfileOut( $fname );
624 return $text;
625 }
626
627
628 /* private */ function doHeadings( $text )
629 {
630 for ( $i = 6; $i >= 1; --$i ) {
631 $h = substr( "======", 0, $i );
632 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
633 "<h{$i}>\\1</h{$i}>\\2", $text );
634 }
635 return $text;
636 }
637
638 /* private */ function doAllQuotes( $text )
639 {
640 $outtext = "";
641 $lines = explode( "\n", $text );
642 foreach ( $lines as $line ) {
643 $outtext .= $this->doQuotes ( "", $line, "" ) . "\n";
644 }
645 return substr($outtext, 0,-1);
646 }
647
648 /* private */ function doQuotes( $pre, $text, $mode )
649 {
650 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
651 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
652 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
653 if ( substr ($m[2], 0, 1) == "'" ) {
654 $m[2] = substr ($m[2], 1);
655 if ($mode == "em") {
656 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
657 } else if ($mode == "strong") {
658 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
659 } else if (($mode == "emstrong") || ($mode == "both")) {
660 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
661 } else if ($mode == "strongem") {
662 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
663 } else {
664 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
665 }
666 } else {
667 if ($mode == "strong") {
668 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
669 } else if ($mode == "em") {
670 return $m1_em . $this->doQuotes ( "", $m[2], "" );
671 } else if ($mode == "emstrong") {
672 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
673 } else if (($mode == "strongem") || ($mode == "both")) {
674 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
675 } else {
676 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
677 }
678 }
679 } else {
680 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
681 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
682 if ($mode == "") {
683 return $pre . $text;
684 } else if ($mode == "em") {
685 return $pre . $text_em;
686 } else if ($mode == "strong") {
687 return $pre . $text_strong;
688 } else if ($mode == "strongem") {
689 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
690 } else {
691 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
692 }
693 }
694 }
695
696 # Note: we have to do external links before the internal ones,
697 # and otherwise take great care in the order of things here, so
698 # that we don't end up interpreting some URLs twice.
699
700 /* private */ function replaceExternalLinks( $text )
701 {
702 $fname = "Parser::replaceExternalLinks";
703 wfProfileIn( $fname );
704 $text = $this->subReplaceExternalLinks( $text, "http", true );
705 $text = $this->subReplaceExternalLinks( $text, "https", true );
706 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
707 $text = $this->subReplaceExternalLinks( $text, "irc", false );
708 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
709 $text = $this->subReplaceExternalLinks( $text, "news", false );
710 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
711 wfProfileOut( $fname );
712 return $text;
713 }
714
715 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
716 {
717 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
718 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
719
720 # this is the list of separators that should be ignored if they
721 # are the last character of an URL but that should be included
722 # if they occur within the URL, e.g. "go to www.foo.com, where .."
723 # in this case, the last comma should not become part of the URL,
724 # but in "www.foo.com/123,2342,32.htm" it should.
725 $sep = ",;\.:";
726 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
727 $images = "gif|png|jpg|jpeg";
728
729 # PLEASE NOTE: The curly braces { } are not part of the regex,
730 # they are interpreted as part of the string (used to tell PHP
731 # that the content of the string should be inserted there).
732 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
733 "((?i){$images})([^{$uc}]|$)/";
734
735 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
736 $sk =& $this->mOptions->getSkin();
737
738 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
739 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
740 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
741 }
742 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
743 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
744 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
745 "</a>\\5", $s );
746 $s = str_replace( $unique, $protocol, $s );
747
748 $a = explode( "[{$protocol}:", " " . $s );
749 $s = array_shift( $a );
750 $s = substr( $s, 1 );
751
752 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
753 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
754
755 foreach ( $a as $line ) {
756 if ( preg_match( $e1, $line, $m ) ) {
757 $link = "{$protocol}:{$m[1]}";
758 $trail = $m[2];
759 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
760 else { $text = wfEscapeHTML( $link ); }
761 } else if ( preg_match( $e2, $line, $m ) ) {
762 $link = "{$protocol}:{$m[1]}";
763 $text = $m[2];
764 $trail = $m[3];
765 } else {
766 $s .= "[{$protocol}:" . $line;
767 continue;
768 }
769 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
770 $paren = "";
771 } else {
772 # Expand the URL for printable version
773 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
774 }
775 $la = $sk->getExternalLinkAttributes( $link, $text );
776 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
777
778 }
779 return $s;
780 }
781
782
783 /* private */ function replaceInternalLinks( $s )
784 {
785 global $wgLang, $wgLinkCache;
786 global $wgNamespacesWithSubpages, $wgLanguageCode;
787 static $fname = "Parser::replaceInternalLink" ;
788 wfProfileIn( $fname );
789
790 wfProfileIn( "$fname-setup" );
791 static $tc = FALSE;
792 # the % is needed to support urlencoded titles as well
793 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
794 $sk =& $this->mOptions->getSkin();
795
796 $a = explode( "[[", " " . $s );
797 $s = array_shift( $a );
798 $s = substr( $s, 1 );
799
800 # Match a link having the form [[namespace:link|alternate]]trail
801 static $e1 = FALSE;
802 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
803 # Match the end of a line for a word that's not followed by whitespace,
804 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
805 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
806 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
807 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
808
809
810 # Special and Media are pseudo-namespaces; no pages actually exist in them
811 static $image = FALSE;
812 static $special = FALSE;
813 static $media = FALSE;
814 static $category = FALSE;
815 if ( !$image ) { $image = Namespace::getImage(); }
816 if ( !$special ) { $special = Namespace::getSpecial(); }
817 if ( !$media ) { $media = Namespace::getMedia(); }
818 if ( !$category ) { $category = Namespace::getCategory(); }
819
820 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
821
822 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
823 $new_prefix = $m[2];
824 $s = $m[1];
825 } else {
826 $new_prefix="";
827 }
828
829 wfProfileOut( "$fname-setup" );
830
831 foreach ( $a as $line ) {
832 $prefix = $new_prefix;
833
834 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
835 $text = $m[2];
836 # fix up urlencoded title texts
837 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
838 $trail = $m[3];
839 } else { # Invalid form; output directly
840 $s .= $prefix . "[[" . $line ;
841 wfProfileOut( $fname );
842 continue;
843 }
844
845 /* Valid link forms:
846 Foobar -- normal
847 :Foobar -- override special treatment of prefix (images, language links)
848 /Foobar -- convert to CurrentPage/Foobar
849 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
850 */
851 $c = substr($m[1],0,1);
852 $noforce = ($c != ":");
853 if( $c == "/" ) { # subpage
854 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
855 $m[1]=substr($m[1],1,strlen($m[1])-2);
856 $noslash=$m[1];
857 } else {
858 $noslash=substr($m[1],1);
859 }
860 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
861 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
862 if( "" == $text ) {
863 $text= $m[1];
864 } # this might be changed for ugliness reasons
865 } else {
866 $link = $noslash; # no subpage allowed, use standard link
867 }
868 } elseif( $noforce ) { # no subpage
869 $link = $m[1];
870 } else {
871 $link = substr( $m[1], 1 );
872 }
873 $wasblank = ( "" == $text );
874 if( $wasblank )
875 $text = $link;
876
877 $nt = Title::newFromText( $link );
878 if( !$nt ) {
879 $s .= $prefix . "[[" . $line;
880 wfProfileOut( $fname );
881 continue;
882 }
883 $ns = $nt->getNamespace();
884 $iw = $nt->getInterWiki();
885 if( $noforce ) {
886 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
887 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
888 $tmp = $prefix . $trail ;
889 wfProfileOut( $fname );
890 $s .= (trim($tmp) == '')? '': $tmp;
891 continue;
892 }
893 if ( $ns == $image ) {
894 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
895 $wgLinkCache->addImageLinkObj( $nt );
896 wfProfileOut( $fname );
897 continue;
898 }
899 if ( $ns == $category ) {
900 $t = $nt->getText() ;
901 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
902
903 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
904 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
905 $wgLinkCache->resume();
906
907 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
908 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
909 $this->mOutput->mCategoryLinks[] = $t ;
910 $s .= $prefix . $trail ;
911 wfProfileOut( $fname );
912 continue;
913 }
914 }
915 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
916 ( strpos( $link, "#" ) == FALSE ) ) {
917 # Self-links are handled specially; generally de-link and change to bold.
918 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
919 wfProfileOut( $fname );
920 continue;
921 }
922
923 if( $ns == $media ) {
924 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
925 $wgLinkCache->addImageLinkObj( $nt );
926 wfProfileOut( $fname );
927 continue;
928 } elseif( $ns == $special ) {
929 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
930 wfProfileOut( $fname );
931 continue;
932 }
933 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
934 }
935 wfProfileOut( $fname );
936 return $s;
937 }
938
939 # Some functions here used by doBlockLevels()
940 #
941 /* private */ function closeParagraph()
942 {
943 $result = "";
944 if ( '' != $this->mLastSection ) {
945 $result = "</" . $this->mLastSection . ">\n";
946 }
947 $this->mInPre = false;
948 $this->mLastSection = "";
949 return $result;
950 }
951 # getCommon() returns the length of the longest common substring
952 # of both arguments, starting at the beginning of both.
953 #
954 /* private */ function getCommon( $st1, $st2 )
955 {
956 $fl = strlen( $st1 );
957 $shorter = strlen( $st2 );
958 if ( $fl < $shorter ) { $shorter = $fl; }
959
960 for ( $i = 0; $i < $shorter; ++$i ) {
961 if ( $st1{$i} != $st2{$i} ) { break; }
962 }
963 return $i;
964 }
965 # These next three functions open, continue, and close the list
966 # element appropriate to the prefix character passed into them.
967 #
968 /* private */ function openList( $char )
969 {
970 $result = $this->closeParagraph();
971
972 if ( "*" == $char ) { $result .= "<ul><li>"; }
973 else if ( "#" == $char ) { $result .= "<ol><li>"; }
974 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
975 else if ( ";" == $char ) {
976 $result .= "<dl><dt>";
977 $this->mDTopen = true;
978 }
979 else { $result = "<!-- ERR 1 -->"; }
980
981 return $result;
982 }
983
984 /* private */ function nextItem( $char )
985 {
986 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
987 else if ( ":" == $char || ";" == $char ) {
988 $close = "</dd>";
989 if ( $this->mDTopen ) { $close = "</dt>"; }
990 if ( ";" == $char ) {
991 $this->mDTopen = true;
992 return $close . "<dt>";
993 } else {
994 $this->mDTopen = false;
995 return $close . "<dd>";
996 }
997 }
998 return "<!-- ERR 2 -->";
999 }
1000
1001 /* private */function closeList( $char )
1002 {
1003 if ( "*" == $char ) { $text = "</li></ul>"; }
1004 else if ( "#" == $char ) { $text = "</li></ol>"; }
1005 else if ( ":" == $char ) {
1006 if ( $this->mDTopen ) {
1007 $this->mDTopen = false;
1008 $text = "</dt></dl>";
1009 } else {
1010 $text = "</dd></dl>";
1011 }
1012 }
1013 else { return "<!-- ERR 3 -->"; }
1014 return $text."\n";
1015 }
1016
1017 /* private */ function doBlockLevels( $text, $linestart ) {
1018 $fname = "Parser::doBlockLevels";
1019 wfProfileIn( $fname );
1020
1021 # Parsing through the text line by line. The main thing
1022 # happening here is handling of block-level elements p, pre,
1023 # and making lists from lines starting with * # : etc.
1024 #
1025 $textLines = explode( "\n", $text );
1026
1027 $lastPrefix = $output = $lastLine = '';
1028 $this->mDTopen = $inBlockElem = false;
1029 $prefixLength = 0;
1030 $paragraphStack = false;
1031
1032 if ( !$linestart ) {
1033 $output .= array_shift( $textLines );
1034 }
1035 foreach ( $textLines as $oLine ) {
1036 $lastPrefixLength = strlen( $lastPrefix );
1037 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1038 $preOpenMatch = preg_match("/<pre/i", $oLine );
1039 if (!$this->mInPre) {
1040 $this->mInPre = !empty($preOpenMatch);
1041 }
1042 if ( !$this->mInPre ) {
1043 # Multiple prefixes may abut each other for nested lists.
1044 $prefixLength = strspn( $oLine, "*#:;" );
1045 $pref = substr( $oLine, 0, $prefixLength );
1046
1047 # eh?
1048 $pref2 = str_replace( ";", ":", $pref );
1049 $t = substr( $oLine, $prefixLength );
1050 } else {
1051 # Don't interpret any other prefixes in preformatted text
1052 $prefixLength = 0;
1053 $pref = $pref2 = '';
1054 $t = $oLine;
1055 }
1056
1057 # List generation
1058 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1059 # Same as the last item, so no need to deal with nesting or opening stuff
1060 $output .= $this->nextItem( substr( $pref, -1 ) );
1061 $paragraphStack = false;
1062
1063 if ( ";" == substr( $pref, -1 ) ) {
1064 # The one nasty exception: definition lists work like this:
1065 # ; title : definition text
1066 # So we check for : in the remainder text to split up the
1067 # title and definition, without b0rking links.
1068 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1069 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1070 $term = $match[1];
1071 $output .= $term . $this->nextItem( ":" );
1072 $t = $match[2];
1073 }
1074 }
1075 } elseif( $prefixLength || $lastPrefixLength ) {
1076 # Either open or close a level...
1077 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1078 $paragraphStack = false;
1079
1080 while( $commonPrefixLength < $lastPrefixLength ) {
1081 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1082 --$lastPrefixLength;
1083 }
1084 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1085 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1086 }
1087 while ( $prefixLength > $commonPrefixLength ) {
1088 $char = substr( $pref, $commonPrefixLength, 1 );
1089 $output .= $this->openList( $char );
1090
1091 if ( ";" == $char ) {
1092 # FIXME: This is dupe of code above
1093 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1094 $term = $match[1];
1095 $output .= $term . $this->nextItem( ":" );
1096 $t = $match[2];
1097 }
1098 }
1099 ++$commonPrefixLength;
1100 }
1101 $lastPrefix = $pref2;
1102 }
1103 if( 0 == $prefixLength ) {
1104 # No prefix (not in list)--go to paragraph mode
1105 $uniq_prefix = UNIQ_PREFIX;
1106 // XXX: use a stack for nestable elements like span, table and div
1107 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/td|<\\/th)/i", $t );
1108 $closematch = preg_match(
1109 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1110 "<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1111 if ( $openmatch or $closematch ) {
1112 $paragraphStack = false;
1113 $output .= $this->closeParagraph();
1114 if($preOpenMatch and !$preCloseMatch) {
1115 $this->mInPre = true;
1116 }
1117 if ( $closematch ) {
1118 $inBlockElem = false;
1119 } else {
1120 $inBlockElem = true;
1121 }
1122 } else if ( !$inBlockElem && !$this->mInPre ) {
1123 if ( " " == $t{0} and trim($t) != '' ) {
1124 // pre
1125 if ($this->mLastSection != 'pre') {
1126 $paragraphStack = false;
1127 $output .= $this->closeParagraph().'<pre>';
1128 $this->mLastSection = 'pre';
1129 }
1130 } else {
1131 // paragraph
1132 if ( '' == trim($t) ) {
1133 if ( $paragraphStack ) {
1134 $output .= $paragraphStack.'<br />';
1135 $paragraphStack = false;
1136 $this->mLastSection = 'p';
1137 } else {
1138 if ($this->mLastSection != 'p' ) {
1139 $output .= $this->closeParagraph();
1140 $this->mLastSection = '';
1141 $paragraphStack = "<p>";
1142 } else {
1143 $paragraphStack = '</p><p>';
1144 }
1145 }
1146 } else {
1147 if ( $paragraphStack ) {
1148 $output .= $paragraphStack;
1149 $paragraphStack = false;
1150 $this->mLastSection = 'p';
1151 } else if ($this->mLastSection != 'p') {
1152 $output .= $this->closeParagraph().'<p>';
1153 $this->mLastSection = 'p';
1154 }
1155 }
1156 }
1157 }
1158 }
1159 if ($paragraphStack === false) {
1160 $output .= $t."\n";
1161 }
1162 }
1163 while ( $prefixLength ) {
1164 $output .= $this->closeList( $pref2{$prefixLength-1} );
1165 --$prefixLength;
1166 }
1167 if ( "" != $this->mLastSection ) {
1168 $output .= "</" . $this->mLastSection . ">";
1169 $this->mLastSection = "";
1170 }
1171
1172 wfProfileOut( $fname );
1173 return $output;
1174 }
1175
1176 function getVariableValue( $index ) {
1177 global $wgLang, $wgSitename, $wgServer;
1178
1179 switch ( $index ) {
1180 case MAG_CURRENTMONTH:
1181 return date( "m" );
1182 case MAG_CURRENTMONTHNAME:
1183 return $wgLang->getMonthName( date("n") );
1184 case MAG_CURRENTMONTHNAMEGEN:
1185 return $wgLang->getMonthNameGen( date("n") );
1186 case MAG_CURRENTDAY:
1187 return date("j");
1188 case MAG_PAGENAME:
1189 return $this->mTitle->getText();
1190 case MAG_NAMESPACE:
1191 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1192 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1193 case MAG_CURRENTDAYNAME:
1194 return $wgLang->getWeekdayName( date("w")+1 );
1195 case MAG_CURRENTYEAR:
1196 return date( "Y" );
1197 case MAG_CURRENTTIME:
1198 return $wgLang->time( wfTimestampNow(), false );
1199 case MAG_NUMBEROFARTICLES:
1200 return wfNumberOfArticles();
1201 case MAG_SITENAME:
1202 return $wgSitename;
1203 case MAG_SERVER:
1204 return $wgServer;
1205 default:
1206 return NULL;
1207 }
1208 }
1209
1210 function initialiseVariables()
1211 {
1212 global $wgVariableIDs;
1213 $this->mVariables = array();
1214 foreach ( $wgVariableIDs as $id ) {
1215 $mw =& MagicWord::get( $id );
1216 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1217 }
1218 }
1219
1220 /* private */ function replaceVariables( $text, $args = array() )
1221 {
1222 global $wgLang, $wgScript, $wgArticlePath;
1223
1224 $fname = "Parser::replaceVariables";
1225 wfProfileIn( $fname );
1226
1227 $bail = false;
1228 if ( !$this->mVariables ) {
1229 $this->initialiseVariables();
1230 }
1231 $titleChars = Title::legalChars();
1232 $nonBraceChars = str_replace( array( "{", "}" ), array( "", "" ), $titleChars );
1233
1234 # This function is called recursively. To keep track of arguments we need a stack:
1235 array_push( $this->mArgStack, $args );
1236
1237 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1238 $GLOBALS['wgCurParser'] =& $this;
1239
1240
1241 if ( $this->mOutputType == OT_HTML ) {
1242 # Variable substitution
1243 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", "wfVariableSubstitution", $text );
1244
1245 # Argument substitution
1246 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1247 }
1248 # Template substitution
1249 $regex = "/(\\n?){{([$nonBraceChars]*)(\\|.*?|)}}/s";
1250 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1251
1252 array_pop( $this->mArgStack );
1253
1254 wfProfileOut( $fname );
1255 return $text;
1256 }
1257
1258 function variableSubstitution( $matches )
1259 {
1260 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1261 $text = $this->mVariables[$matches[1]];
1262 $this->mOutput->mContainsOldMagic = true;
1263 } else {
1264 $text = $matches[0];
1265 }
1266 return $text;
1267 }
1268
1269 function braceSubstitution( $matches )
1270 {
1271 global $wgLinkCache, $wgLang;
1272 $fname = "Parser::braceSubstitution";
1273 $found = false;
1274 $nowiki = false;
1275 $noparse = false;
1276
1277 $title = NULL;
1278
1279 # $newline is an optional newline character before the braces
1280 # $part1 is the bit before the first |, and must contain only title characters
1281 # $args is a list of arguments, starting from index 0, not including $part1
1282
1283 $newline = $matches[1];
1284 $part1 = $matches[2];
1285 # If the third subpattern matched anything, it will start with |
1286 if ( $matches[3] !== "" ) {
1287 $args = explode( "|", substr( $matches[3], 1 ) );
1288 } else {
1289 $args = array();
1290 }
1291 $argc = count( $args );
1292
1293 # {{{}}}
1294 if ( strpos( $matches[0], "{{{" ) !== false ) {
1295 $text = $matches[0];
1296 $found = true;
1297 $noparse = true;
1298 }
1299
1300 # SUBST
1301 if ( !$found ) {
1302 $mwSubst =& MagicWord::get( MAG_SUBST );
1303 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1304 if ( $this->mOutputType != OT_WIKI ) {
1305 # Invalid SUBST not replaced at PST time
1306 # Return without further processing
1307 $text = $matches[0];
1308 $found = true;
1309 $noparse= true;
1310 }
1311 } elseif ( $this->mOutputType == OT_WIKI ) {
1312 # SUBST not found in PST pass, do nothing
1313 $text = $matches[0];
1314 $found = true;
1315 }
1316 }
1317
1318 # MSG, MSGNW and INT
1319 if ( !$found ) {
1320 # Check for MSGNW:
1321 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1322 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1323 $nowiki = true;
1324 } else {
1325 # Remove obsolete MSG:
1326 $mwMsg =& MagicWord::get( MAG_MSG );
1327 $mwMsg->matchStartAndRemove( $part1 );
1328 }
1329
1330 # Check if it is an internal message
1331 $mwInt =& MagicWord::get( MAG_INT );
1332 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1333 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1334 $text = wfMsgReal( $part1, $args, true );
1335 $found = true;
1336 }
1337 }
1338 }
1339
1340 # NS
1341 if ( !$found ) {
1342 # Check for NS: (namespace expansion)
1343 $mwNs = MagicWord::get( MAG_NS );
1344 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1345 if ( intval( $part1 ) ) {
1346 $text = $wgLang->getNsText( intval( $part1 ) );
1347 $found = true;
1348 } else {
1349 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1350 if ( !is_null( $index ) ) {
1351 $text = $wgLang->getNsText( $index );
1352 $found = true;
1353 }
1354 }
1355 }
1356 }
1357
1358 # LOCALURL and LOCALURLE
1359 if ( !$found ) {
1360 $mwLocal = MagicWord::get( MAG_LOCALURL );
1361 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1362
1363 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1364 $func = 'getLocalURL';
1365 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1366 $func = 'escapeLocalURL';
1367 } else {
1368 $func = '';
1369 }
1370
1371 if ( $func !== '' ) {
1372 $title = Title::newFromText( $part1 );
1373 if ( !is_null( $title ) ) {
1374 if ( $argc > 0 ) {
1375 $text = $title->$func( $args[0] );
1376 } else {
1377 $text = $title->$func();
1378 }
1379 $found = true;
1380 }
1381 }
1382 }
1383
1384 # Internal variables
1385 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1386 $text = $this->mVariables[$part1];
1387 $found = true;
1388 $this->mOutput->mContainsOldMagic = true;
1389 }
1390 /*
1391 # Arguments input from the caller
1392 $inputArgs = end( $this->mArgStack );
1393 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1394 $text = $inputArgs[$part1];
1395 $found = true;
1396 }
1397 */
1398 # Load from database
1399 if ( !$found ) {
1400 $title = Title::newFromText( $part1, NS_TEMPLATE );
1401 if ( !is_null( $title ) && !$title->isExternal() ) {
1402 # Check for excessive inclusion
1403 $dbk = $title->getPrefixedDBkey();
1404 if ( $this->incrementIncludeCount( $dbk ) ) {
1405 $article = new Article( $title );
1406 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1407 if ( $articleContent !== false ) {
1408 $found = true;
1409 $text = $articleContent;
1410
1411 }
1412 }
1413
1414 # If the title is valid but undisplayable, make a link to it
1415 if ( $this->mOutputType == OT_HTML && !$found ) {
1416 $text = "[[" . $title->getPrefixedText() . "]]";
1417 $found = true;
1418 }
1419 }
1420 }
1421
1422 # Recursive parsing, escaping and link table handling
1423 # Only for HTML output
1424 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1425 $text = wfEscapeWikiText( $text );
1426 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1427 # Clean up argument array
1428 $assocArgs = array();
1429 $index = 1;
1430 foreach( $args as $arg ) {
1431 $eqpos = strpos( $arg, "=" );
1432 if ( $eqpos === false ) {
1433 $assocArgs[$index++] = $arg;
1434 } else {
1435 $name = trim( substr( $arg, 0, $eqpos ) );
1436 $value = trim( substr( $arg, $eqpos+1 ) );
1437 if ( $value === false ) {
1438 $value = "";
1439 }
1440 if ( $name !== false ) {
1441 $assocArgs[$name] = $value;
1442 }
1443 }
1444 }
1445
1446 # Do not enter included links in link table
1447 if ( !is_null( $title ) ) {
1448 $wgLinkCache->suspend();
1449 }
1450
1451 # Run full parser on the included text
1452 $text = $this->stripParse( $text, $newline, $assocArgs );
1453
1454 # Resume the link cache and register the inclusion as a link
1455 if ( !is_null( $title ) ) {
1456 $wgLinkCache->resume();
1457 $wgLinkCache->addLinkObj( $title );
1458 }
1459 }
1460
1461 if ( !$found ) {
1462 return $matches[0];
1463 } else {
1464 return $text;
1465 }
1466 }
1467
1468 # Triple brace replacement -- used for template arguments
1469 function argSubstitution( $matches )
1470 {
1471 $newline = $matches[1];
1472 $arg = trim( $matches[2] );
1473 $text = $matches[0];
1474 $inputArgs = end( $this->mArgStack );
1475
1476 if ( array_key_exists( $arg, $inputArgs ) ) {
1477 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1478 }
1479
1480 return $text;
1481 }
1482
1483 # Returns true if the function is allowed to include this entity
1484 function incrementIncludeCount( $dbk )
1485 {
1486 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1487 $this->mIncludeCount[$dbk] = 0;
1488 }
1489 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1490 return true;
1491 } else {
1492 return false;
1493 }
1494 }
1495
1496
1497 # Cleans up HTML, removes dangerous tags and attributes
1498 /* private */ function removeHTMLtags( $text )
1499 {
1500 global $wgUseTidy, $wgUserHtml;
1501 $fname = "Parser::removeHTMLtags";
1502 wfProfileIn( $fname );
1503
1504 if( $wgUserHtml ) {
1505 $htmlpairs = array( # Tags that must be closed
1506 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1507 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1508 "strike", "strong", "tt", "var", "div", "center",
1509 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1510 "ruby", "rt" , "rb" , "rp", "p"
1511 );
1512 $htmlsingle = array(
1513 "br", "hr", "li", "dt", "dd"
1514 );
1515 $htmlnest = array( # Tags that can be nested--??
1516 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1517 "dl", "font", "big", "small", "sub", "sup"
1518 );
1519 $tabletags = array( # Can only appear inside table
1520 "td", "th", "tr"
1521 );
1522 } else {
1523 $htmlpairs = array();
1524 $htmlsingle = array();
1525 $htmlnest = array();
1526 $tabletags = array();
1527 }
1528
1529 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1530 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1531
1532 $htmlattrs = $this->getHTMLattrs () ;
1533
1534 # Remove HTML comments
1535 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1536
1537 $bits = explode( "<", $text );
1538 $text = array_shift( $bits );
1539 if(!$wgUseTidy) {
1540 $tagstack = array(); $tablestack = array();
1541 foreach ( $bits as $x ) {
1542 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1543 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1544 $x, $regs );
1545 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1546 error_reporting( $prev );
1547
1548 $badtag = 0 ;
1549 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1550 # Check our stack
1551 if ( $slash ) {
1552 # Closing a tag...
1553 if ( ! in_array( $t, $htmlsingle ) &&
1554 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1555 @array_push( $tagstack, $ot );
1556 $badtag = 1;
1557 } else {
1558 if ( $t == "table" ) {
1559 $tagstack = array_pop( $tablestack );
1560 }
1561 $newparams = "";
1562 }
1563 } else {
1564 # Keep track for later
1565 if ( in_array( $t, $tabletags ) &&
1566 ! in_array( "table", $tagstack ) ) {
1567 $badtag = 1;
1568 } else if ( in_array( $t, $tagstack ) &&
1569 ! in_array ( $t , $htmlnest ) ) {
1570 $badtag = 1 ;
1571 } else if ( ! in_array( $t, $htmlsingle ) ) {
1572 if ( $t == "table" ) {
1573 array_push( $tablestack, $tagstack );
1574 $tagstack = array();
1575 }
1576 array_push( $tagstack, $t );
1577 }
1578 # Strip non-approved attributes from the tag
1579 $newparams = $this->fixTagAttributes($params);
1580
1581 }
1582 if ( ! $badtag ) {
1583 $rest = str_replace( ">", "&gt;", $rest );
1584 $text .= "<$slash$t $newparams$brace$rest";
1585 continue;
1586 }
1587 }
1588 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1589 }
1590 # Close off any remaining tags
1591 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1592 $text .= "</$t>\n";
1593 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1594 }
1595 } else {
1596 # this might be possible using tidy itself
1597 foreach ( $bits as $x ) {
1598 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1599 $x, $regs );
1600 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1601 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1602 $newparams = $this->fixTagAttributes($params);
1603 $rest = str_replace( ">", "&gt;", $rest );
1604 $text .= "<$slash$t $newparams$brace$rest";
1605 } else {
1606 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1607 }
1608 }
1609 }
1610 wfProfileOut( $fname );
1611 return $text;
1612 }
1613
1614
1615 /*
1616 *
1617 * This function accomplishes several tasks:
1618 * 1) Auto-number headings if that option is enabled
1619 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1620 * 3) Add a Table of contents on the top for users who have enabled the option
1621 * 4) Auto-anchor headings
1622 *
1623 * It loops through all headlines, collects the necessary data, then splits up the
1624 * string and re-inserts the newly formatted headlines.
1625 *
1626 */
1627
1628 /* private */ function formatHeadings( $text, $isMain=true )
1629 {
1630 global $wgInputEncoding;
1631
1632 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1633 $doShowToc = $this->mOptions->getShowToc();
1634 if( !$this->mTitle->userCanEdit() ) {
1635 $showEditLink = 0;
1636 $rightClickHack = 0;
1637 } else {
1638 $showEditLink = $this->mOptions->getEditSection();
1639 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1640 }
1641
1642 # Inhibit editsection links if requested in the page
1643 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1644 if( $esw->matchAndRemove( $text ) ) {
1645 $showEditLink = 0;
1646 }
1647 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1648 # do not add TOC
1649 $mw =& MagicWord::get( MAG_NOTOC );
1650 if( $mw->matchAndRemove( $text ) ) {
1651 $doShowToc = 0;
1652 }
1653
1654 # never add the TOC to the Main Page. This is an entry page that should not
1655 # be more than 1-2 screens large anyway
1656 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1657 $doShowToc = 0;
1658 }
1659
1660 # Get all headlines for numbering them and adding funky stuff like [edit]
1661 # links - this is for later, but we need the number of headlines right now
1662 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1663
1664 # if there are fewer than 4 headlines in the article, do not show TOC
1665 if( $numMatches < 4 ) {
1666 $doShowToc = 0;
1667 }
1668
1669 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1670 # override above conditions and always show TOC
1671 $mw =& MagicWord::get( MAG_FORCETOC );
1672 if ($mw->matchAndRemove( $text ) ) {
1673 $doShowToc = 1;
1674 }
1675
1676
1677 # We need this to perform operations on the HTML
1678 $sk =& $this->mOptions->getSkin();
1679
1680 # headline counter
1681 $headlineCount = 0;
1682
1683 # Ugh .. the TOC should have neat indentation levels which can be
1684 # passed to the skin functions. These are determined here
1685 $toclevel = 0;
1686 $toc = "";
1687 $full = "";
1688 $head = array();
1689 $sublevelCount = array();
1690 $level = 0;
1691 $prevlevel = 0;
1692 foreach( $matches[3] as $headline ) {
1693 $numbering = "";
1694 if( $level ) {
1695 $prevlevel = $level;
1696 }
1697 $level = $matches[1][$headlineCount];
1698 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1699 # reset when we enter a new level
1700 $sublevelCount[$level] = 0;
1701 $toc .= $sk->tocIndent( $level - $prevlevel );
1702 $toclevel += $level - $prevlevel;
1703 }
1704 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1705 # reset when we step back a level
1706 $sublevelCount[$level+1]=0;
1707 $toc .= $sk->tocUnindent( $prevlevel - $level );
1708 $toclevel -= $prevlevel - $level;
1709 }
1710 # count number of headlines for each level
1711 @$sublevelCount[$level]++;
1712 if( $doNumberHeadings || $doShowToc ) {
1713 $dot = 0;
1714 for( $i = 1; $i <= $level; $i++ ) {
1715 if( !empty( $sublevelCount[$i] ) ) {
1716 if( $dot ) {
1717 $numbering .= ".";
1718 }
1719 $numbering .= $sublevelCount[$i];
1720 $dot = 1;
1721 }
1722 }
1723 }
1724
1725 # The canonized header is a version of the header text safe to use for links
1726 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1727 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1728 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1729
1730 # strip out HTML
1731 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1732 $tocline = trim( $canonized_headline );
1733 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1734 # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1735 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1736 $refer[$headlineCount] = $canonized_headline;
1737
1738 # count how many in assoc. array so we can track dupes in anchors
1739 @$refers[$canonized_headline]++;
1740 $refcount[$headlineCount]=$refers[$canonized_headline];
1741
1742 # Prepend the number to the heading text
1743
1744 if( $doNumberHeadings || $doShowToc ) {
1745 $tocline = $numbering . " " . $tocline;
1746
1747 # Don't number the heading if it is the only one (looks silly)
1748 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1749 # the two are different if the line contains a link
1750 $headline=$numbering . " " . $headline;
1751 }
1752 }
1753
1754 # Create the anchor for linking from the TOC to the section
1755 $anchor = $canonized_headline;
1756 if($refcount[$headlineCount] > 1 ) {
1757 $anchor .= "_" . $refcount[$headlineCount];
1758 }
1759 if( $doShowToc ) {
1760 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1761 }
1762 if( $showEditLink ) {
1763 if ( empty( $head[$headlineCount] ) ) {
1764 $head[$headlineCount] = "";
1765 }
1766 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1767 }
1768
1769 # Add the edit section span
1770 if( $rightClickHack ) {
1771 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1772 }
1773
1774 # give headline the correct <h#> tag
1775 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1776
1777 $headlineCount++;
1778 }
1779
1780 if( $doShowToc ) {
1781 $toclines = $headlineCount;
1782 $toc .= $sk->tocUnindent( $toclevel );
1783 $toc = $sk->tocTable( $toc );
1784 }
1785
1786 # split up and insert constructed headlines
1787
1788 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1789 $i = 0;
1790
1791 foreach( $blocks as $block ) {
1792 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1793 # This is the [edit] link that appears for the top block of text when
1794 # section editing is enabled
1795
1796 # Disabled because it broke block formatting
1797 # For example, a bullet point in the top line
1798 # $full .= $sk->editSectionLink(0);
1799 }
1800 $full .= $block;
1801 if( $doShowToc && !$i && $isMain) {
1802 # Top anchor now in skin
1803 $full = $full.$toc;
1804 }
1805
1806 if( !empty( $head[$i] ) ) {
1807 $full .= $head[$i];
1808 }
1809 $i++;
1810 }
1811
1812 return $full;
1813 }
1814
1815 /* private */ function magicISBN( $text )
1816 {
1817 global $wgLang;
1818
1819 $a = split( "ISBN ", " $text" );
1820 if ( count ( $a ) < 2 ) return $text;
1821 $text = substr( array_shift( $a ), 1);
1822 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1823
1824 foreach ( $a as $x ) {
1825 $isbn = $blank = "" ;
1826 while ( " " == $x{0} ) {
1827 $blank .= " ";
1828 $x = substr( $x, 1 );
1829 }
1830 while ( strstr( $valid, $x{0} ) != false ) {
1831 $isbn .= $x{0};
1832 $x = substr( $x, 1 );
1833 }
1834 $num = str_replace( "-", "", $isbn );
1835 $num = str_replace( " ", "", $num );
1836
1837 if ( "" == $num ) {
1838 $text .= "ISBN $blank$x";
1839 } else {
1840 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1841 $text .= "<a href=\"" .
1842 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1843 "\" class=\"internal\">ISBN $isbn</a>";
1844 $text .= $x;
1845 }
1846 }
1847 return $text;
1848 }
1849 /* private */ function magicRFC( $text )
1850 {
1851 global $wgLang;
1852
1853 $a = split( "RFC ", " $text" );
1854 if ( count ( $a ) < 2 ) return $text;
1855 $text = substr( array_shift( $a ), 1);
1856 $valid = "0123456789";
1857
1858 foreach ( $a as $x ) {
1859 $rfc = $blank = "" ;
1860 while ( " " == $x{0} ) {
1861 $blank .= " ";
1862 $x = substr( $x, 1 );
1863 }
1864 while ( strstr( $valid, $x{0} ) != false ) {
1865 $rfc .= $x{0};
1866 $x = substr( $x, 1 );
1867 }
1868
1869 if ( "" == $rfc ) {
1870 $text .= "RFC $blank$x";
1871 } else {
1872 $url = wfmsg( "rfcurl" );
1873 $url = str_replace( "$1", $rfc, $url);
1874 $sk =& $this->mOptions->getSkin();
1875 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1876 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1877 }
1878 }
1879 return $text;
1880 }
1881
1882 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1883 {
1884 $this->mOptions = $options;
1885 $this->mTitle =& $title;
1886 $this->mOutputType = OT_WIKI;
1887
1888 if ( $clearState ) {
1889 $this->clearState();
1890 }
1891
1892 $stripState = false;
1893 $pairs = array(
1894 "\r\n" => "\n",
1895 );
1896 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1897 // now with regexes
1898 /*
1899 $pairs = array(
1900 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1901 "/<br *?>/i" => "<br />",
1902 );
1903 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1904 */
1905 $text = $this->strip( $text, $stripState, false );
1906 $text = $this->pstPass2( $text, $user );
1907 $text = $this->unstrip( $text, $stripState );
1908 $text = $this->unstripNoWiki( $text, $stripState );
1909 return $text;
1910 }
1911
1912 /* private */ function pstPass2( $text, &$user )
1913 {
1914 global $wgLang, $wgLocaltimezone, $wgCurParser;
1915
1916 # Variable replacement
1917 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1918 $text = $this->replaceVariables( $text );
1919
1920 # Signatures
1921 #
1922 $n = $user->getName();
1923 $k = $user->getOption( "nickname" );
1924 if ( "" == $k ) { $k = $n; }
1925 if(isset($wgLocaltimezone)) {
1926 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1927 }
1928 /* Note: this is an ugly timezone hack for the European wikis */
1929 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1930 " (" . date( "T" ) . ")";
1931 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1932
1933 $text = preg_replace( "/~~~~~/", $d, $text );
1934 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1935 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1936 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1937 Namespace::getUser() ) . ":$n|$k]]", $text );
1938
1939 # Context links: [[|name]] and [[name (context)|]]
1940 #
1941 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1942 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1943 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1944 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1945
1946 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1947 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1948 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1949 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1950 # [[ns:page (cont)|]]
1951 $context = "";
1952 $t = $this->mTitle->getText();
1953 if ( preg_match( $conpat, $t, $m ) ) {
1954 $context = $m[2];
1955 }
1956 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1957 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1958 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1959
1960 if ( "" == $context ) {
1961 $text = preg_replace( $p2, "[[\\1]]", $text );
1962 } else {
1963 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1964 }
1965
1966 /*
1967 $mw =& MagicWord::get( MAG_SUBST );
1968 $wgCurParser = $this->fork();
1969 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1970 $this->merge( $wgCurParser );
1971 */
1972
1973 # Trim trailing whitespace
1974 # MAG_END (__END__) tag allows for trailing
1975 # whitespace to be deliberately included
1976 $text = rtrim( $text );
1977 $mw =& MagicWord::get( MAG_END );
1978 $mw->matchAndRemove( $text );
1979
1980 return $text;
1981 }
1982
1983 # Set up some variables which are usually set up in parse()
1984 # so that an external function can call some class members with confidence
1985 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1986 {
1987 $this->mTitle =& $title;
1988 $this->mOptions = $options;
1989 $this->mOutputType = $outputType;
1990 if ( $clearState ) {
1991 $this->clearState();
1992 }
1993 }
1994
1995 function transformMsg( $text, $options ) {
1996 global $wgTitle;
1997 static $executing = false;
1998
1999 # Guard against infinite recursion
2000 if ( $executing ) {
2001 return $text;
2002 }
2003 $executing = true;
2004
2005 $this->mTitle = $wgTitle;
2006 $this->mOptions = $options;
2007 $this->mOutputType = OT_MSG;
2008 $this->clearState();
2009 $text = $this->replaceVariables( $text );
2010
2011 $executing = false;
2012 return $text;
2013 }
2014 }
2015
2016 class ParserOutput
2017 {
2018 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2019 var $mCacheTime; # Used in ParserCache
2020
2021 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2022 $containsOldMagic = false )
2023 {
2024 $this->mText = $text;
2025 $this->mLanguageLinks = $languageLinks;
2026 $this->mCategoryLinks = $categoryLinks;
2027 $this->mContainsOldMagic = $containsOldMagic;
2028 $this->mCacheTime = "";
2029 }
2030
2031 function getText() { return $this->mText; }
2032 function getLanguageLinks() { return $this->mLanguageLinks; }
2033 function getCategoryLinks() { return $this->mCategoryLinks; }
2034 function getCacheTime() { return $this->mCacheTime; }
2035 function containsOldMagic() { return $this->mContainsOldMagic; }
2036 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2037 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2038 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2039 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2040 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2041
2042 function merge( $other ) {
2043 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2044 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2045 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2046 }
2047
2048 }
2049
2050 class ParserOptions
2051 {
2052 # All variables are private
2053 var $mUseTeX; # Use texvc to expand <math> tags
2054 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2055 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2056 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2057 var $mAllowExternalImages; # Allow external images inline
2058 var $mSkin; # Reference to the preferred skin
2059 var $mDateFormat; # Date format index
2060 var $mEditSection; # Create "edit section" links
2061 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2062 var $mNumberHeadings; # Automatically number headings
2063 var $mShowToc; # Show table of contents
2064
2065 function getUseTeX() { return $this->mUseTeX; }
2066 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2067 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2068 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2069 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2070 function getSkin() { return $this->mSkin; }
2071 function getDateFormat() { return $this->mDateFormat; }
2072 function getEditSection() { return $this->mEditSection; }
2073 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2074 function getNumberHeadings() { return $this->mNumberHeadings; }
2075 function getShowToc() { return $this->mShowToc; }
2076
2077 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2078 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2079 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2080 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2081 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2082 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2083 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2084 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2085 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2086 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2087 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2088
2089 /* static */ function newFromUser( &$user )
2090 {
2091 $popts = new ParserOptions;
2092 $popts->initialiseFromUser( $user );
2093 return $popts;
2094 }
2095
2096 function initialiseFromUser( &$userInput )
2097 {
2098 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2099
2100 if ( !$userInput ) {
2101 $user = new User;
2102 $user->setLoaded( true );
2103 } else {
2104 $user =& $userInput;
2105 }
2106
2107 $this->mUseTeX = $wgUseTeX;
2108 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2109 $this->mUseDynamicDates = $wgUseDynamicDates;
2110 $this->mInterwikiMagic = $wgInterwikiMagic;
2111 $this->mAllowExternalImages = $wgAllowExternalImages;
2112 $this->mSkin =& $user->getSkin();
2113 $this->mDateFormat = $user->getOption( "date" );
2114 $this->mEditSection = $user->getOption( "editsection" );
2115 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2116 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2117 $this->mShowToc = $user->getOption( "showtoc" );
2118 }
2119
2120
2121 }
2122
2123 # Regex callbacks, used in Parser::replaceVariables
2124 function wfBraceSubstitution( $matches )
2125 {
2126 global $wgCurParser;
2127 return $wgCurParser->braceSubstitution( $matches );
2128 }
2129
2130 function wfArgSubstitution( $matches )
2131 {
2132 global $wgCurParser;
2133 return $wgCurParser->argSubstitution( $matches );
2134 }
2135
2136 function wfVariableSubstitution( $matches )
2137 {
2138 global $wgCurParser;
2139 return $wgCurParser->variableSubstitution( $matches );
2140 }
2141
2142 ?>