* Changed inclusion syntax to allow e.g. {{stub}}
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 include_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Processes wiki markup
8 #
9 # There are two main entry points into the Parser class: parse() and preSaveTransform().
10 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
11 #
12 # Globals used:
13 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
14 #
15 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
16 #
17 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
18 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
19 # $wgLocaltimezone
20 #
21 # * only within ParserOptions
22 #
23 #
24 #----------------------------------------
25 # Variable substitution O(N^2) attack
26 #-----------------------------------------
27 # Without countermeasures, it would be possible to attack the parser by saving a page
28 # filled with a large number of inclusions of large pages. The size of the generated
29 # page would be proportional to the square of the input size. Hence, we limit the number
30 # of inclusions of any given page, thus bringing any attack back to O(N).
31 #
32 define( "MAX_INCLUDE_REPEAT", 5 );
33
34 # Recursion depth of variable/inclusion evaluation
35 define( "MAX_INCLUDE_PASSES", 3 );
36
37 # Allowed values for $mOutputType
38 define( "OT_HTML", 1 );
39 define( "OT_WIKI", 2 );
40
41 class Parser
42 {
43 # Cleared with clearState():
44 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
45 var $mVariables, $mIncludeCount;
46
47 # Temporary:
48 var $mOptions, $mTitle, $mOutputType;
49
50 function Parser()
51 {
52 $this->clearState();
53 }
54
55 function clearState()
56 {
57 $this->mOutput = new ParserOutput;
58 $this->mAutonumber = 0;
59 $this->mLastSection = "";
60 $this->mDTopen = false;
61 $this->mStripState = false;
62 $this->mVariables = false;
63 $this->mIncludeCount = array();
64 }
65
66 # First pass--just handle <nowiki> sections, pass the rest off
67 # to doWikiPass2() which does all the real work.
68 #
69 # Returns a ParserOutput
70 #
71 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
72 {
73 $fname = "Parser::parse";
74 wfProfileIn( $fname );
75
76 if ( $clearState ) {
77 $this->clearState();
78 }
79
80 $this->mOptions = $options;
81 $this->mTitle =& $title;
82 $this->mOutputType = OT_HTML;
83
84 $stripState = NULL;
85 $text = $this->strip( $text, $this->mStripState );
86 $text = $this->doWikiPass2( $text, $linestart );
87 $text = $this->unstrip( $text, $this->mStripState );
88
89 $this->mOutput->setText( $text );
90 wfProfileOut( $fname );
91 return $this->mOutput;
92 }
93
94 /* static */ function getRandomString()
95 {
96 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
97 }
98
99 # Strips <nowiki>, <pre> and <math>
100 # Returns the text, and fills an array with data needed in unstrip()
101 #
102 function strip( $text, &$state )
103 {
104 $state = array(
105 'nwlist' => array(),
106 'nwsecs' => 0,
107 'nwunq' => Parser::getRandomString(),
108 'mathlist' => array(),
109 'mathsecs' => 0,
110 'mathunq' => Parser::getRandomString(),
111 'prelist' => array(),
112 'presecs' => 0,
113 'preunq' => Parser::getRandomString()
114 );
115 $render = ($this->mOutputType == OT_HTML);
116 $stripped = "";
117 $stripped2 = "";
118 $stripped3 = "";
119
120 # Replace any instances of the placeholders
121 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
122 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
123 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
124
125 while ( "" != $text ) {
126 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
127 $stripped .= $p[0];
128 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
129 $text = "";
130 } else {
131 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
132 ++$state['nwsecs'];
133
134 if ( $render ) {
135 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
136 } else {
137 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
138 }
139
140 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
141 $text = $q[1];
142 }
143 }
144
145 if( $this->mOptions->getUseTeX() ) {
146 while ( "" != $stripped ) {
147 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
148 $stripped2 .= $p[0];
149 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
150 $stripped = "";
151 } else {
152 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
153 ++$state['mathsecs'];
154
155 if ( $render ) {
156 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
157 } else {
158 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
159 }
160
161 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
162 $stripped = $q[1];
163 }
164 }
165 } else {
166 $stripped2 = $stripped;
167 }
168
169 while ( "" != $stripped2 ) {
170 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
171 $stripped3 .= $p[0];
172 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
173 $stripped2 = "";
174 } else {
175 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
176 ++$state['presecs'];
177
178 if ( $render ) {
179 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
180 } else {
181 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
182 }
183
184 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
185 $stripped2 = $q[1];
186 }
187 }
188 return $stripped3;
189 }
190
191 function unstrip( $text, &$state )
192 {
193 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
194 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
195 }
196
197 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
198 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
199 }
200
201 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
202 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
203 }
204 return $text;
205 }
206
207 function categoryMagic ()
208 {
209 global $wgLang , $wgUser ;
210 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
211 $id = $this->mTitle->getArticleID() ;
212 $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ;
213 $ti = $this->mTitle->getText() ;
214 $ti = explode ( ":" , $ti , 2 ) ;
215 if ( $cat != $ti[0] ) return "" ;
216 $r = "<br break=all>\n" ;
217
218 $articles = array() ;
219 $parents = array () ;
220 $children = array() ;
221
222
223 # $sk =& $this->mGetSkin();
224 $sk =& $wgUser->getSkin() ;
225
226 $doesexist = false ;
227 if ( $doesexist ) {
228 $sql = "SELECT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
229 } else {
230 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
231 }
232
233 $res = wfQuery ( $sql, DB_READ ) ;
234 while ( $x = wfFetchObject ( $res ) )
235 {
236 # $t = new Title ;
237 # $t->newFromDBkey ( $x->l_from ) ;
238 # $t = $t->getText() ;
239 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
240 if ( $t != "" ) $t .= ":" ;
241 $t .= $x->cur_title ;
242
243 $y = explode ( ":" , $t , 2 ) ;
244 if ( count ( $y ) == 2 && $y[0] == $cat ) {
245 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
246 } else {
247 array_push ( $articles , $sk->makeLink ( $t ) ) ;
248 }
249 }
250 wfFreeResult ( $res ) ;
251
252 # Children
253 if ( count ( $children ) > 0 )
254 {
255 asort ( $children ) ;
256 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
257 $r .= implode ( ", " , $children ) ;
258 }
259
260 # Articles
261 if ( count ( $articles ) > 0 )
262 {
263 asort ( $articles ) ;
264 $h = wfMsg( "category_header", $ti[1] );
265 $r .= "<h2>{$h}</h2>\n" ;
266 $r .= implode ( ", " , $articles ) ;
267 }
268
269
270 return $r ;
271 }
272
273 function getHTMLattrs ()
274 {
275 $htmlattrs = array( # Allowed attributes--no scripting, etc.
276 "title", "align", "lang", "dir", "width", "height",
277 "bgcolor", "clear", /* BR */ "noshade", /* HR */
278 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
279 /* FONT */ "type", "start", "value", "compact",
280 /* For various lists, mostly deprecated but safe */
281 "summary", "width", "border", "frame", "rules",
282 "cellspacing", "cellpadding", "valign", "char",
283 "charoff", "colgroup", "col", "span", "abbr", "axis",
284 "headers", "scope", "rowspan", "colspan", /* Tables */
285 "id", "class", "name", "style" /* For CSS */
286 );
287 return $htmlattrs ;
288 }
289
290 function fixTagAttributes ( $t )
291 {
292 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
293 $htmlattrs = $this->getHTMLattrs() ;
294
295 # Strip non-approved attributes from the tag
296 $t = preg_replace(
297 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
298 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
299 $t);
300 # Strip javascript "expression" from stylesheets. Brute force approach:
301 # If anythin offensive is found, all attributes of the HTML tag are dropped
302
303 if( preg_match(
304 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
305 wfMungeToUtf8( $t ) ) )
306 {
307 $t="";
308 }
309
310 return trim ( $t ) ;
311 }
312
313 function doTableStuff ( $t )
314 {
315 $t = explode ( "\n" , $t ) ;
316 $td = array () ; # Is currently a td tag open?
317 $ltd = array () ; # Was it TD or TH?
318 $tr = array () ; # Is currently a tr tag open?
319 $ltr = array () ; # tr attributes
320 foreach ( $t AS $k => $x )
321 {
322 $x = rtrim ( $x ) ;
323 $fc = substr ( $x , 0 , 1 ) ;
324 if ( "{|" == substr ( $x , 0 , 2 ) )
325 {
326 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
327 array_push ( $td , false ) ;
328 array_push ( $ltd , "" ) ;
329 array_push ( $tr , false ) ;
330 array_push ( $ltr , "" ) ;
331 }
332 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
333 else if ( "|}" == substr ( $x , 0 , 2 ) )
334 {
335 $z = "</table>\n" ;
336 $l = array_pop ( $ltd ) ;
337 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
338 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
339 array_pop ( $ltr ) ;
340 $t[$k] = $z ;
341 }
342 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
343 {
344 $z = trim ( substr ( $x , 2 ) ) ;
345 $t[$k] = "<caption>{$z}</caption>\n" ;
346 }*/
347 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
348 {
349 $x = substr ( $x , 1 ) ;
350 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
351 $z = "" ;
352 $l = array_pop ( $ltd ) ;
353 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
354 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
355 array_pop ( $ltr ) ;
356 $t[$k] = $z ;
357 array_push ( $tr , false ) ;
358 array_push ( $td , false ) ;
359 array_push ( $ltd , "" ) ;
360 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
361 }
362 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
363 {
364 if ( "|+" == substr ( $x , 0 , 2 ) )
365 {
366 $fc = "+" ;
367 $x = substr ( $x , 1 ) ;
368 }
369 $after = substr ( $x , 1 ) ;
370 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
371 $after = explode ( "||" , $after ) ;
372 $t[$k] = "" ;
373 foreach ( $after AS $theline )
374 {
375 $z = "" ;
376 if ( $fc != "+" )
377 {
378 $tra = array_pop ( $ltr ) ;
379 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
380 array_push ( $tr , true ) ;
381 array_push ( $ltr , "" ) ;
382 }
383
384 $l = array_pop ( $ltd ) ;
385 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
386 if ( $fc == "|" ) $l = "TD" ;
387 else if ( $fc == "!" ) $l = "TH" ;
388 else if ( $fc == "+" ) $l = "CAPTION" ;
389 else $l = "" ;
390 array_push ( $ltd , $l ) ;
391 $y = explode ( "|" , $theline , 2 ) ;
392 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
393 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
394 $t[$k] .= $y ;
395 array_push ( $td , true ) ;
396 }
397 }
398 }
399
400 # Closing open td, tr && table
401 while ( count ( $td ) > 0 )
402 {
403 if ( array_pop ( $td ) ) $t[] = "</td>" ;
404 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
405 $t[] = "</table>" ;
406 }
407
408 $t = implode ( "\n" , $t ) ;
409 # $t = $this->removeHTMLtags( $t );
410 return $t ;
411 }
412
413 # Well, OK, it's actually about 14 passes. But since all the
414 # hard lifting is done inside PHP's regex code, it probably
415 # wouldn't speed things up much to add a real parser.
416 #
417 function doWikiPass2( $text, $linestart )
418 {
419 $fname = "Parser::doWikiPass2";
420 wfProfileIn( $fname );
421
422 $text = $this->removeHTMLtags( $text );
423 $text = $this->replaceVariables( $text );
424
425 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
426 $text = str_replace ( "<HR>", "<hr>", $text );
427
428 $text = $this->doHeadings( $text );
429 $text = $this->doBlockLevels( $text, $linestart );
430
431 if($this->mOptions->getUseDynamicDates()) {
432 global $wgDateFormatter;
433 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
434 }
435
436 $text = $this->replaceExternalLinks( $text );
437 $text = $this->replaceInternalLinks ( $text );
438 $text = $this->doTableStuff ( $text ) ;
439
440 $text = $this->formatHeadings( $text );
441
442 $sk =& $this->mOptions->getSkin();
443 $text = $sk->transformContent( $text );
444 $text .= $this->categoryMagic () ;
445
446 wfProfileOut( $fname );
447 return $text;
448 }
449
450
451 /* private */ function doHeadings( $text )
452 {
453 for ( $i = 6; $i >= 1; --$i ) {
454 $h = substr( "======", 0, $i );
455 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
456 "<h{$i}>\\1</h{$i}>\\2", $text );
457 }
458 return $text;
459 }
460
461 # Note: we have to do external links before the internal ones,
462 # and otherwise take great care in the order of things here, so
463 # that we don't end up interpreting some URLs twice.
464
465 /* private */ function replaceExternalLinks( $text )
466 {
467 $fname = "Parser::replaceExternalLinks";
468 wfProfileIn( $fname );
469 $text = $this->subReplaceExternalLinks( $text, "http", true );
470 $text = $this->subReplaceExternalLinks( $text, "https", true );
471 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
472 $text = $this->subReplaceExternalLinks( $text, "irc", false );
473 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
474 $text = $this->subReplaceExternalLinks( $text, "news", false );
475 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
476 wfProfileOut( $fname );
477 return $text;
478 }
479
480 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
481 {
482 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
483 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
484
485 # this is the list of separators that should be ignored if they
486 # are the last character of an URL but that should be included
487 # if they occur within the URL, e.g. "go to www.foo.com, where .."
488 # in this case, the last comma should not become part of the URL,
489 # but in "www.foo.com/123,2342,32.htm" it should.
490 $sep = ",;\.:";
491 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
492 $images = "gif|png|jpg|jpeg";
493
494 # PLEASE NOTE: The curly braces { } are not part of the regex,
495 # they are interpreted as part of the string (used to tell PHP
496 # that the content of the string should be inserted there).
497 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
498 "((?i){$images})([^{$uc}]|$)/";
499
500 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
501 $sk =& $this->mOptions->getSkin();
502
503 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
504 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
505 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
506 }
507 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
508 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
509 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
510 "</a>\\5", $s );
511 $s = str_replace( $unique, $protocol, $s );
512
513 $a = explode( "[{$protocol}:", " " . $s );
514 $s = array_shift( $a );
515 $s = substr( $s, 1 );
516
517 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
518 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
519
520 foreach ( $a as $line ) {
521 if ( preg_match( $e1, $line, $m ) ) {
522 $link = "{$protocol}:{$m[1]}";
523 $trail = $m[2];
524 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
525 else { $text = wfEscapeHTML( $link ); }
526 } else if ( preg_match( $e2, $line, $m ) ) {
527 $link = "{$protocol}:{$m[1]}";
528 $text = $m[2];
529 $trail = $m[3];
530 } else {
531 $s .= "[{$protocol}:" . $line;
532 continue;
533 }
534 if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
535 else $paren = "";
536 $la = $sk->getExternalLinkAttributes( $link, $text );
537 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
538
539 }
540 return $s;
541 }
542
543 /* private */ function handle3Quotes( &$state, $token )
544 {
545 if ( $state["strong"] ) {
546 if ( $state["em"] && $state["em"] > $state["strong"] )
547 {
548 # ''' lala ''lala '''
549 $s = "</em></strong><em>";
550 } else {
551 $s = "</strong>";
552 }
553 $state["strong"] = FALSE;
554 } else {
555 $s = "<strong>";
556 $state["strong"] = $token["pos"];
557 }
558 return $s;
559 }
560
561 /* private */ function handle2Quotes( &$state, $token )
562 {
563 if ( $state["em"] ) {
564 if ( $state["strong"] && $state["strong"] > $state["em"] )
565 {
566 # ''lala'''lala'' ....'''
567 $s = "</strong></em><strong>";
568 } else {
569 $s = "</em>";
570 }
571 $state["em"] = FALSE;
572 } else {
573 $s = "<em>";
574 $state["em"] = $token["pos"];
575 }
576 return $s;
577 }
578
579 /* private */ function handle5Quotes( &$state, $token )
580 {
581 if ( $state["em"] && $state["strong"] ) {
582 if ( $state["em"] < $state["strong"] ) {
583 $s .= "</strong></em>";
584 } else {
585 $s .= "</em></strong>";
586 }
587 $state["strong"] = $state["em"] = FALSE;
588 } elseif ( $state["em"] ) {
589 $s .= "</em><strong>";
590 $state["em"] = FALSE;
591 $state["strong"] = $token["pos"];
592 } elseif ( $state["strong"] ) {
593 $s .= "</strong><em>";
594 $state["strong"] = FALSE;
595 $state["em"] = $token["pos"];
596 } else { # not $em and not $strong
597 $s .= "<strong><em>";
598 $state["strong"] = $state["em"] = $token["pos"];
599 }
600 return $s;
601 }
602
603 /* private */ function replaceInternalLinks( $str )
604 {
605 global $wgLang; # for language specific parser hook
606
607 $tokenizer=Tokenizer::newFromString( $str );
608 $tokenStack = array();
609
610 $s="";
611 $state["em"] = FALSE;
612 $state["strong"] = FALSE;
613 $tagIsOpen = FALSE;
614 $threeopen = false;
615
616 # The tokenizer splits the text into tokens and returns them one by one.
617 # Every call to the tokenizer returns a new token.
618 while ( $token = $tokenizer->nextToken() )
619 {
620 $threeopen = false;
621 switch ( $token["type"] )
622 {
623 case "text":
624 # simple text with no further markup
625 $txt = $token["text"];
626 break;
627 case "[[[":
628 # remember the tag opened with 3 [
629 $threeopen = true;
630 case "[[":
631 # link opening tag.
632 # FIXME : Treat orphaned open tags (stack not empty when text is over)
633 $tagIsOpen = TRUE;
634 array_push( $tokenStack, $token );
635 $txt="";
636 break;
637
638 case "]]]":
639 case "]]":
640 # link close tag.
641 # get text from stack, glue it together, and call the code to handle a
642 # link
643
644 if ( count( $tokenStack ) == 0 )
645 {
646 # stack empty. Found a ]] without an opening [[
647 $txt = "]]";
648 } else {
649 $linkText = "";
650 $lastToken = array_pop( $tokenStack );
651 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
652 {
653 if( !empty( $lastToken["text"] ) ) {
654 $linkText = $lastToken["text"] . $linkText;
655 }
656 $lastToken = array_pop( $tokenStack );
657 }
658
659 $txt = $linkText ."]]";
660
661 if( isset( $lastToken["text"] ) ) {
662 $prefix = $lastToken["text"];
663 } else {
664 $prefix = "";
665 }
666 $nextToken = $tokenizer->previewToken();
667 if ( $nextToken["type"] == "text" )
668 {
669 # Preview just looks at it. Now we have to fetch it.
670 $nextToken = $tokenizer->nextToken();
671 $txt .= $nextToken["text"];
672 }
673 $txt = $this->handleInternalLink( $txt, $prefix );
674
675 # did the tag start with 3 [ ?
676 if($threeopen) {
677 # show the first as text
678 $txt = "[".$txt;
679 $threeopen=false;
680 }
681
682 }
683 $tagIsOpen = (count( $tokenStack ) != 0);
684 break;
685 case "----":
686 $txt = "\n<hr>\n";
687 break;
688 case "'''":
689 # This and the three next ones handle quotes
690 $txt = $this->handle3Quotes( $state, $token );
691 break;
692 case "''":
693 $txt = $this->handle2Quotes( $state, $token );
694 break;
695 case "'''''":
696 $txt = $this->handle5Quotes( $state, $token );
697 break;
698 case "":
699 # empty token
700 $txt="";
701 break;
702 case "RFC ":
703 if ( $tagIsOpen ) {
704 $txt = "RFC ";
705 } else {
706 $txt = $this->doMagicRFC( $tokenizer );
707 }
708 break;
709 case "ISBN ":
710 if ( $tagIsOpen ) {
711 $txt = "ISBN ";
712 } else {
713 $txt = $this->doMagicISBN( $tokenizer );
714 }
715 break;
716 default:
717 # Call language specific Hook.
718 $txt = $wgLang->processToken( $token, $tokenStack );
719 if ( NULL == $txt ) {
720 # An unkown token. Highlight.
721 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
722 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
723 }
724 break;
725 }
726 # If we're parsing the interior of a link, don't append the interior to $s,
727 # but push it to the stack so it can be processed when a ]] token is found.
728 if ( $tagIsOpen && $txt != "" ) {
729 $token["type"] = "text";
730 $token["text"] = $txt;
731 array_push( $tokenStack, $token );
732 } else {
733 $s .= $txt;
734 }
735 } #end while
736 if ( count( $tokenStack ) != 0 )
737 {
738 # still objects on stack. opened [[ tag without closing ]] tag.
739 $txt = "";
740 while ( $lastToken = array_pop( $tokenStack ) )
741 {
742 if ( $lastToken["type"] == "text" )
743 {
744 $txt = $lastToken["text"] . $txt;
745 } else {
746 $txt = $lastToken["type"] . $txt;
747 }
748 }
749 $s .= $txt;
750 }
751 return $s;
752 }
753
754 /* private */ function handleInternalLink( $line, $prefix )
755 {
756 global $wgLang, $wgLinkCache;
757 global $wgNamespacesWithSubpages, $wgLanguageCode;
758 static $fname = "Parser::replaceInternalLinks" ;
759 wfProfileIn( $fname );
760
761 wfProfileIn( "$fname-setup" );
762 static $tc = FALSE;
763 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
764 $sk =& $this->mOptions->getSkin();
765
766 # Match a link having the form [[namespace:link|alternate]]trail
767 static $e1 = FALSE;
768 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
769 # Match the end of a line for a word that's not followed by whitespace,
770 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
771 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
772 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
773 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
774
775
776 # Special and Media are pseudo-namespaces; no pages actually exist in them
777 static $image = FALSE;
778 static $special = FALSE;
779 static $media = FALSE;
780 static $category = FALSE;
781 if ( !$image ) { $image = Namespace::getImage(); }
782 if ( !$special ) { $special = Namespace::getSpecial(); }
783 if ( !$media ) { $media = Namespace::getMedia(); }
784 if ( !$category ) { $category = wfMsg ( "category" ) ; }
785
786 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
787
788 wfProfileOut( "$fname-setup" );
789 $s = "";
790
791 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
792 $text = $m[2];
793 $trail = $m[3];
794 } else { # Invalid form; output directly
795 $s .= $prefix . "[[" . $line ;
796 return $s;
797 }
798
799 /* Valid link forms:
800 Foobar -- normal
801 :Foobar -- override special treatment of prefix (images, language links)
802 /Foobar -- convert to CurrentPage/Foobar
803 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
804 */
805 $c = substr($m[1],0,1);
806 $noforce = ($c != ":");
807 if( $c == "/" ) { # subpage
808 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
809 $m[1]=substr($m[1],1,strlen($m[1])-2);
810 $noslash=$m[1];
811 } else {
812 $noslash=substr($m[1],1);
813 }
814 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
815 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
816 if( "" == $text ) {
817 $text= $m[1];
818 } # this might be changed for ugliness reasons
819 } else {
820 $link = $noslash; # no subpage allowed, use standard link
821 }
822 } elseif( $noforce ) { # no subpage
823 $link = $m[1];
824 } else {
825 $link = substr( $m[1], 1 );
826 }
827 if( "" == $text )
828 $text = $link;
829
830 $nt = Title::newFromText( $link );
831 if( !$nt ) {
832 $s .= $prefix . "[[" . $line;
833 return $s;
834 }
835 $ns = $nt->getNamespace();
836 $iw = $nt->getInterWiki();
837 if( $noforce ) {
838 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
839 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
840 $s .= $prefix . $trail;
841 return $s;
842 }
843 if( $ns == $image ) {
844 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
845 $wgLinkCache->addImageLinkObj( $nt );
846 return $s;
847 }
848 }
849 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
850 ( strpos( $link, "#" ) == FALSE ) ) {
851 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
852 return $s;
853 }
854
855 # Category feature
856 $catns = strtoupper ( $nt->getDBkey () ) ;
857 $catns = explode ( ":" , $catns ) ;
858 if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ;
859 else $catns = "" ;
860 if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) {
861 $t = explode ( ":" , $nt->getText() ) ;
862 array_shift ( $t ) ;
863 $t = implode ( ":" , $t ) ;
864 $t = $wgLang->ucFirst ( $t ) ;
865 $nnt = Title::newFromText ( $category.":".$t ) ;
866 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
867 $this->mOutput->mCategoryLinks[] = $t ;
868 $s .= $prefix . $trail ;
869 return $s ;
870 }
871 if( $ns == $media ) {
872 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
873 $wgLinkCache->addImageLinkObj( $nt );
874 return $s;
875 } elseif( $ns == $special ) {
876 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
877 return $s;
878 }
879 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
880
881 wfProfileOut( $fname );
882 return $s;
883 }
884
885 # Some functions here used by doBlockLevels()
886 #
887 /* private */ function closeParagraph()
888 {
889 $result = "";
890 if ( 0 != strcmp( "p", $this->mLastSection ) &&
891 0 != strcmp( "", $this->mLastSection ) ) {
892 $result = "</" . $this->mLastSection . ">";
893 }
894 $this->mLastSection = "";
895 return $result."\n";
896 }
897 # getCommon() returns the length of the longest common substring
898 # of both arguments, starting at the beginning of both.
899 #
900 /* private */ function getCommon( $st1, $st2 )
901 {
902 $fl = strlen( $st1 );
903 $shorter = strlen( $st2 );
904 if ( $fl < $shorter ) { $shorter = $fl; }
905
906 for ( $i = 0; $i < $shorter; ++$i ) {
907 if ( $st1{$i} != $st2{$i} ) { break; }
908 }
909 return $i;
910 }
911 # These next three functions open, continue, and close the list
912 # element appropriate to the prefix character passed into them.
913 #
914 /* private */ function openList( $char )
915 {
916 $result = $this->closeParagraph();
917
918 if ( "*" == $char ) { $result .= "<ul><li>"; }
919 else if ( "#" == $char ) { $result .= "<ol><li>"; }
920 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
921 else if ( ";" == $char ) {
922 $result .= "<dl><dt>";
923 $this->mDTopen = true;
924 }
925 else { $result = "<!-- ERR 1 -->"; }
926
927 return $result;
928 }
929
930 /* private */ function nextItem( $char )
931 {
932 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
933 else if ( ":" == $char || ";" == $char ) {
934 $close = "</dd>";
935 if ( $this->mDTopen ) { $close = "</dt>"; }
936 if ( ";" == $char ) {
937 $this->mDTopen = true;
938 return $close . "<dt>";
939 } else {
940 $this->mDTopen = false;
941 return $close . "<dd>";
942 }
943 }
944 return "<!-- ERR 2 -->";
945 }
946
947 /* private */function closeList( $char )
948 {
949 if ( "*" == $char ) { $text = "</li></ul>"; }
950 else if ( "#" == $char ) { $text = "</li></ol>"; }
951 else if ( ":" == $char ) {
952 if ( $this->mDTopen ) {
953 $this->mDTopen = false;
954 $text = "</dt></dl>";
955 } else {
956 $text = "</dd></dl>";
957 }
958 }
959 else { return "<!-- ERR 3 -->"; }
960 return $text."\n";
961 }
962
963 /* private */ function doBlockLevels( $text, $linestart )
964 {
965 $fname = "Parser::doBlockLevels";
966 wfProfileIn( $fname );
967 # Parsing through the text line by line. The main thing
968 # happening here is handling of block-level elements p, pre,
969 # and making lists from lines starting with * # : etc.
970 #
971 $a = explode( "\n", $text );
972 $text = $lastPref = "";
973 $this->mDTopen = $inBlockElem = false;
974
975 if ( ! $linestart ) { $text .= array_shift( $a ); }
976 foreach ( $a as $t ) {
977 if ( "" != $text ) { $text .= "\n"; }
978
979 $oLine = $t;
980 $opl = strlen( $lastPref );
981 $npl = strspn( $t, "*#:;" );
982 $pref = substr( $t, 0, $npl );
983 $pref2 = str_replace( ";", ":", $pref );
984 $t = substr( $t, $npl );
985
986 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
987 $text .= $this->nextItem( substr( $pref, -1 ) );
988
989 if ( ";" == substr( $pref, -1 ) ) {
990 $cpos = strpos( $t, ":" );
991 if ( ! ( false === $cpos ) ) {
992 $term = substr( $t, 0, $cpos );
993 $text .= $term . $this->nextItem( ":" );
994 $t = substr( $t, $cpos + 1 );
995 }
996 }
997 } else if (0 != $npl || 0 != $opl) {
998 $cpl = $this->getCommon( $pref, $lastPref );
999
1000 while ( $cpl < $opl ) {
1001 $text .= $this->closeList( $lastPref{$opl-1} );
1002 --$opl;
1003 }
1004 if ( $npl <= $cpl && $cpl > 0 ) {
1005 $text .= $this->nextItem( $pref{$cpl-1} );
1006 }
1007 while ( $npl > $cpl ) {
1008 $char = substr( $pref, $cpl, 1 );
1009 $text .= $this->openList( $char );
1010
1011 if ( ";" == $char ) {
1012 $cpos = strpos( $t, ":" );
1013 if ( ! ( false === $cpos ) ) {
1014 $term = substr( $t, 0, $cpos );
1015 $text .= $term . $this->nextItem( ":" );
1016 $t = substr( $t, $cpos + 1 );
1017 }
1018 }
1019 ++$cpl;
1020 }
1021 $lastPref = $pref2;
1022 }
1023 if ( 0 == $npl ) { # No prefix--go to paragraph mode
1024 if ( preg_match(
1025 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
1026 $text .= $this->closeParagraph();
1027 $inBlockElem = true;
1028 }
1029 if ( ! $inBlockElem ) {
1030 if ( " " == $t{0} ) {
1031 $newSection = "pre";
1032 # $t = wfEscapeHTML( $t );
1033 }
1034 else { $newSection = "p"; }
1035
1036 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
1037 $text .= $this->closeParagraph();
1038 $text .= "<" . $newSection . ">";
1039 } else if ( 0 != strcmp( $this->mLastSection,
1040 $newSection ) ) {
1041 $text .= $this->closeParagraph();
1042 if ( 0 != strcmp( "p", $newSection ) ) {
1043 $text .= "<" . $newSection . ">";
1044 }
1045 }
1046 $this->mLastSection = $newSection;
1047 }
1048 if ( $inBlockElem &&
1049 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
1050 $inBlockElem = false;
1051 }
1052 }
1053 $text .= $t;
1054 }
1055 while ( $npl ) {
1056 $text .= $this->closeList( $pref2{$npl-1} );
1057 --$npl;
1058 }
1059 if ( "" != $this->mLastSection ) {
1060 if ( "p" != $this->mLastSection ) {
1061 $text .= "</" . $this->mLastSection . ">";
1062 }
1063 $this->mLastSection = "";
1064 }
1065 wfProfileOut( $fname );
1066 return $text;
1067 }
1068
1069 function getVariableValue( $index ) {
1070 global $wgLang;
1071
1072 switch ( $index ) {
1073 case MAG_CURRENTMONTH:
1074 return date( "m" );
1075 case MAG_CURRENTMONTHNAME:
1076 return $wgLang->getMonthName( date("n") );
1077 case MAG_CURRENTMONTHNAMEGEN:
1078 return $wgLang->getMonthNameGen( date("n") );
1079 case MAG_CURRENTDAY:
1080 return date("j");
1081 case MAG_CURRENTDAYNAME:
1082 return $wgLang->getWeekdayName( date("w")+1 );
1083 case MAG_CURRENTYEAR:
1084 return date( "Y" );
1085 case MAG_CURRENTTIME:
1086 return $wgLang->time( wfTimestampNow(), false );
1087 case MAG_NUMBEROFARTICLES:
1088 return wfNumberOfArticles();
1089 default:
1090 return NULL;
1091 }
1092 }
1093
1094 function initialiseVariables()
1095 {
1096 global $wgVariableIDs;
1097 $this->mVariables = array();
1098
1099 foreach ( $wgVariableIDs as $id ) {
1100 $mw =& MagicWord::get( $id );
1101 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1102 }
1103 }
1104
1105 /* private */ function replaceVariables( $text )
1106 {
1107 global $wgLang, $wgCurParser;
1108 global $wgScript, $wgArticlePath;
1109
1110 $fname = "Parser::replaceVariables";
1111 wfProfileIn( $fname );
1112
1113 $bail = false;
1114 if ( !$this->mVariables ) {
1115 $this->initialiseVariables();
1116 }
1117 $titleChars = Title::legalChars();
1118 $regex = "/{{([$titleChars]*?)}}/s";
1119
1120 # "Recursive" variable expansion: run it through a couple of passes
1121 for ( $i=0; $i<MAX_INCLUDE_REPEAT && !$bail; $i++ ) {
1122 $oldText = $text;
1123
1124 # It's impossible to rebind a global in PHP
1125 # Instead, we run the substitution on a copy, then merge the changed fields back in
1126 $wgCurParser = $this->fork();
1127
1128 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1129 if ( $oldText == $text ) {
1130 $bail = true;
1131 }
1132 $this->merge( $wgCurParser );
1133 }
1134
1135 return $text;
1136 }
1137
1138 # Returns a copy of this object except with various variables cleared
1139 # This copy can be re-merged with the parent after operations on the copy
1140 function fork()
1141 {
1142 $copy = $this;
1143 $copy->mOutput = new ParserOutput;
1144 return $copy;
1145 }
1146
1147 # Merges a copy split off with fork()
1148 function merge( &$copy )
1149 {
1150 $this->mOutput->merge( $copy->mOutput );
1151
1152 # Merge include throttling arrays
1153 foreach( $copy->mIncludeCount as $dbk => $count ) {
1154 if ( array_key_exists( $dbk, $this->mIncludeCount ) ) {
1155 $this->mIncludeCount[$dbk] += $count;
1156 } else {
1157 $this->mIncludeCount[$dbk] = $count;
1158 }
1159 }
1160 }
1161
1162 function braceSubstitution( $matches )
1163 {
1164 global $wgLinkCache;
1165 $fname = "Parser::braceSubstitution";
1166 $found = false;
1167 $nowiki = false;
1168
1169 $text = $matches[1];
1170
1171 # SUBST
1172 $mwSubst =& MagicWord::get( MAG_SUBST );
1173 if ( $mwSubst->matchStartAndRemove( $text ) ) {
1174 if ( $this->mOutputType == OT_HTML ) {
1175 # Invalid SUBST not replaced at PST time
1176 # Return without further processing
1177 $text = $matches[0];
1178 $found = true;
1179 }
1180 } elseif ( $this->mOutputType == OT_WIKI ) {
1181 # SUBST not found in PST pass, do nothing
1182 $text = $matches[0];
1183 $found = true;
1184 }
1185
1186 # Various prefixes
1187 if ( !$found ) {
1188 # Check for MSGNW:
1189 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1190 if ( $mwMsgnw->matchStartAndRemove( $text ) ) {
1191 $nowiki = true;
1192 } else {
1193 # Remove obsolete MSG:
1194 $mwMsg =& MagicWord::get( MAG_MSG );
1195 $mwMsg->matchStartAndRemove( $text );
1196 }
1197
1198 # Check if it is an internal message
1199 $mwInt =& MagicWord::get( MAG_INT );
1200 if ( $mwInt->matchStartAndRemove( $text ) ) {
1201 $text = wfMsg( $text );
1202 $found = true;
1203 }
1204 }
1205
1206 # Check for a match against internal variables
1207 if ( !$found && array_key_exists( $text, $this->mVariables ) ) {
1208 $text = $this->mVariables[$text];
1209 $found = true;
1210 $this->mOutput->mContainsOldMagic = true;
1211 }
1212
1213 # Load from database
1214 if ( !$found ) {
1215 $title = Title::newFromText( $text, NS_TEMPLATE );
1216 if ( !is_null( $text ) && !$title->isExternal() ) {
1217 # Check for excessive inclusion
1218 $dbk = $title->getPrefixedDBkey();
1219 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1220 $this->mIncludeCount[$dbk] = 0;
1221 }
1222 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1223 $row = wfGetArray( "cur", array("cur_text"), array(
1224 "cur_namespace" => $title->getNamespace(),
1225 "cur_title" => $title->getDBkey() ), $fname );
1226 if ( $row !== false ) {
1227 $found = true;
1228 $text = $row->cur_text;
1229
1230 # Escaping and link table handling
1231 # Not required for preSaveTransform()
1232 if ( $this->mOutputType == OT_HTML ) {
1233 if ( $nowiki ) {
1234 $text = wfEscapeWikiText( $text );
1235 } else {
1236 $text = $this->removeHTMLtags( $text );
1237 }
1238 $wgLinkCache->suspend();
1239 $text = $this->replaceInternalLinks( $text );
1240 $wgLinkCache->resume();
1241 $wgLinkCache->addLinkObj( $title );
1242
1243 }
1244 }
1245 }
1246
1247 # If the title is valid but undisplayable, make a link to it
1248 if ( $this->mOutputType == OT_HTML && !$found ) {
1249 $text = "[[" . $title->getPrefixedText() . "]]";
1250 $found = true;
1251 }
1252 }
1253 }
1254
1255 if ( !$found ) {
1256 return $matches[0];
1257 } else {
1258 return $text;
1259 }
1260 }
1261
1262 # Cleans up HTML, removes dangerous tags and attributes
1263 /* private */ function removeHTMLtags( $text )
1264 {
1265 $fname = "Parser::removeHTMLtags";
1266 wfProfileIn( $fname );
1267 $htmlpairs = array( # Tags that must be closed
1268 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1269 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1270 "strike", "strong", "tt", "var", "div", "center",
1271 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1272 "ruby", "rt" , "rb" , "rp"
1273 );
1274 $htmlsingle = array(
1275 "br", "p", "hr", "li", "dt", "dd"
1276 );
1277 $htmlnest = array( # Tags that can be nested--??
1278 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1279 "dl", "font", "big", "small", "sub", "sup"
1280 );
1281 $tabletags = array( # Can only appear inside table
1282 "td", "th", "tr"
1283 );
1284
1285 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1286 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1287
1288 $htmlattrs = $this->getHTMLattrs () ;
1289
1290 # Remove HTML comments
1291 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1292
1293 $bits = explode( "<", $text );
1294 $text = array_shift( $bits );
1295 $tagstack = array(); $tablestack = array();
1296
1297 foreach ( $bits as $x ) {
1298 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1299 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1300 $x, $regs );
1301 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1302 error_reporting( $prev );
1303
1304 $badtag = 0 ;
1305 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1306 # Check our stack
1307 if ( $slash ) {
1308 # Closing a tag...
1309 if ( ! in_array( $t, $htmlsingle ) &&
1310 ( $ot = array_pop( $tagstack ) ) != $t ) {
1311 array_push( $tagstack, $ot );
1312 $badtag = 1;
1313 } else {
1314 if ( $t == "table" ) {
1315 $tagstack = array_pop( $tablestack );
1316 }
1317 $newparams = "";
1318 }
1319 } else {
1320 # Keep track for later
1321 if ( in_array( $t, $tabletags ) &&
1322 ! in_array( "table", $tagstack ) ) {
1323 $badtag = 1;
1324 } else if ( in_array( $t, $tagstack ) &&
1325 ! in_array ( $t , $htmlnest ) ) {
1326 $badtag = 1 ;
1327 } else if ( ! in_array( $t, $htmlsingle ) ) {
1328 if ( $t == "table" ) {
1329 array_push( $tablestack, $tagstack );
1330 $tagstack = array();
1331 }
1332 array_push( $tagstack, $t );
1333 }
1334 # Strip non-approved attributes from the tag
1335 $newparams = $this->fixTagAttributes($params);
1336
1337 }
1338 if ( ! $badtag ) {
1339 $rest = str_replace( ">", "&gt;", $rest );
1340 $text .= "<$slash$t $newparams$brace$rest";
1341 continue;
1342 }
1343 }
1344 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1345 }
1346 # Close off any remaining tags
1347 while ( $t = array_pop( $tagstack ) ) {
1348 $text .= "</$t>\n";
1349 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1350 }
1351 wfProfileOut( $fname );
1352 return $text;
1353 }
1354
1355 /*
1356 *
1357 * This function accomplishes several tasks:
1358 * 1) Auto-number headings if that option is enabled
1359 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1360 * 3) Add a Table of contents on the top for users who have enabled the option
1361 * 4) Auto-anchor headings
1362 *
1363 * It loops through all headlines, collects the necessary data, then splits up the
1364 * string and re-inserts the newly formatted headlines.
1365 *
1366 */
1367 /* private */ function formatHeadings( $text )
1368 {
1369 $nh=$this->mOptions->getNumberHeadings();
1370 $st=$this->mOptions->getShowToc();
1371 if(!$this->mTitle->userCanEdit()) {
1372 $es=0;
1373 $esr=0;
1374 } else {
1375 $es=$this->mOptions->getEditSection();
1376 $esr=$this->mOptions->getEditSectionOnRightClick();
1377 }
1378
1379 # Inhibit editsection links if requested in the page
1380 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1381 if ($esw->matchAndRemove( $text )) {
1382 $es=0;
1383 }
1384 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1385 # do not add TOC
1386 $mw =& MagicWord::get( MAG_NOTOC );
1387 if ($mw->matchAndRemove( $text ))
1388 {
1389 $st = 0;
1390 }
1391
1392 # never add the TOC to the Main Page. This is an entry page that should not
1393 # be more than 1-2 screens large anyway
1394 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1395
1396 # We need this to perform operations on the HTML
1397 $sk =& $this->mOptions->getSkin();
1398
1399 # Get all headlines for numbering them and adding funky stuff like [edit]
1400 # links
1401 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1402
1403 # headline counter
1404 $c=0;
1405
1406 # Ugh .. the TOC should have neat indentation levels which can be
1407 # passed to the skin functions. These are determined here
1408 $toclevel = 0;
1409 $toc = "";
1410 $full = "";
1411 $head = array();
1412 foreach($matches[3] as $headline) {
1413 if($level) { $prevlevel=$level;}
1414 $level=$matches[1][$c];
1415 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1416
1417 $h[$level]=0; // reset when we enter a new level
1418 $toc.=$sk->tocIndent($level-$prevlevel);
1419 $toclevel+=$level-$prevlevel;
1420
1421 }
1422 if(($nh||$st) && $level<$prevlevel) {
1423 $h[$level+1]=0; // reset when we step back a level
1424 $toc.=$sk->tocUnindent($prevlevel-$level);
1425 $toclevel-=$prevlevel-$level;
1426
1427 }
1428 $h[$level]++; // count number of headlines for each level
1429
1430 if($nh||$st) {
1431 for($i=1;$i<=$level;$i++) {
1432 if($h[$i]) {
1433 if($dot) {$numbering.=".";}
1434 $numbering.=$h[$i];
1435 $dot=1;
1436 }
1437 }
1438 }
1439
1440 // The canonized header is a version of the header text safe to use for links
1441 // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1442 $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1443 $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1444 $tocline = trim( $canonized_headline );
1445 $canonized_headline=str_replace('"',"",$canonized_headline);
1446 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1447 $refer[$c]=$canonized_headline;
1448 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1449 $refcount[$c]=$refers[$canonized_headline];
1450
1451 // Prepend the number to the heading text
1452
1453 if($nh||$st) {
1454 $tocline=$numbering ." ". $tocline;
1455
1456 // Don't number the heading if it is the only one (looks silly)
1457 if($nh && count($matches[3]) > 1) {
1458 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1459 }
1460 }
1461
1462 // Create the anchor for linking from the TOC to the section
1463 $anchor=$canonized_headline;
1464 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1465 if($st) {
1466 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1467 }
1468 if($es) {
1469 $head[$c].=$sk->editSectionLink($c+1);
1470 }
1471
1472
1473 // the headline might have a link
1474 if(preg_match("/(.*)<a(.*)/",$headline, $headlinematches))
1475 {
1476 // if so give an anchor name to the already existent link
1477 $headline = $headlinematches[1]
1478 ."<a name=\"".$anchor."\" ".$headlinematches[2];
1479 } else {
1480 // else create an anchor link for the headline
1481 $headline = "<a name=\"".$anchor."\">"
1482 .$headline
1483 ."</a>";
1484 }
1485
1486 // give headline the correct <h#> tag
1487 $head[$c].="<h".$level.$matches[2][$c] .$headline."</h".$level.">";
1488
1489 // Add the edit section link
1490
1491 if($esr) {
1492 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1493 }
1494
1495 $numbering="";
1496 $c++;
1497 $dot=0;
1498 }
1499
1500 if($st) {
1501 $toclines=$c;
1502 $toc.=$sk->tocUnindent($toclevel);
1503 $toc=$sk->tocTable($toc);
1504 }
1505
1506 // split up and insert constructed headlines
1507
1508 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1509 $i=0;
1510
1511 foreach($blocks as $block) {
1512 if(($es) && $c>0 && $i==0) {
1513 # This is the [edit] link that appears for the top block of text when
1514 # section editing is enabled
1515 $full.=$sk->editSectionLink(0);
1516 }
1517 $full.=$block;
1518 if($st && $toclines>3 && !$i) {
1519 # Let's add a top anchor just in case we want to link to the top of the page
1520 $full="<a name=\"top\"></a>".$full.$toc;
1521 }
1522
1523 if( !empty( $head[$i] ) ) {
1524 $full .= $head[$i];
1525 }
1526 $i++;
1527 }
1528
1529 return $full;
1530 }
1531
1532 /* private */ function doMagicISBN( &$tokenizer )
1533 {
1534 global $wgLang;
1535
1536 # Check whether next token is a text token
1537 # If yes, fetch it and convert the text into a
1538 # Special::BookSources link
1539 $token = $tokenizer->previewToken();
1540 while ( $token["type"] == "" )
1541 {
1542 $tokenizer->nextToken();
1543 $token = $tokenizer->previewToken();
1544 }
1545 if ( $token["type"] == "text" )
1546 {
1547 $token = $tokenizer->nextToken();
1548 $x = $token["text"];
1549 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1550
1551 $isbn = $blank = "" ;
1552 while ( " " == $x{0} ) {
1553 $blank .= " ";
1554 $x = substr( $x, 1 );
1555 }
1556 while ( strstr( $valid, $x{0} ) != false ) {
1557 $isbn .= $x{0};
1558 $x = substr( $x, 1 );
1559 }
1560 $num = str_replace( "-", "", $isbn );
1561 $num = str_replace( " ", "", $num );
1562
1563 if ( "" == $num ) {
1564 $text = "ISBN $blank$x";
1565 } else {
1566 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1567 $text = "<a href=\"" .
1568 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1569 "\" class=\"internal\">ISBN $isbn</a>";
1570 $text .= $x;
1571 }
1572 } else {
1573 $text = "ISBN ";
1574 }
1575 return $text;
1576 }
1577 /* private */ function doMagicRFC( &$tokenizer )
1578 {
1579 global $wgLang;
1580
1581 # Check whether next token is a text token
1582 # If yes, fetch it and convert the text into a
1583 # link to an RFC source
1584 $token = $tokenizer->previewToken();
1585 while ( $token["type"] == "" )
1586 {
1587 $tokenizer->nextToken();
1588 $token = $tokenizer->previewToken();
1589 }
1590 if ( $token["type"] == "text" )
1591 {
1592 $token = $tokenizer->nextToken();
1593 $x = $token["text"];
1594 $valid = "0123456789";
1595
1596 $rfc = $blank = "" ;
1597 while ( " " == $x{0} ) {
1598 $blank .= " ";
1599 $x = substr( $x, 1 );
1600 }
1601 while ( strstr( $valid, $x{0} ) != false ) {
1602 $rfc .= $x{0};
1603 $x = substr( $x, 1 );
1604 }
1605
1606 if ( "" == $rfc ) {
1607 $text .= "RFC $blank$x";
1608 } else {
1609 $url = wfmsg( "rfcurl" );
1610 $url = str_replace( "$1", $rfc, $url);
1611 $sk =& $this->mOptions->getSkin();
1612 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1613 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1614 }
1615 } else {
1616 $text = "RFC ";
1617 }
1618 return $text;
1619 }
1620
1621 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1622 {
1623 $this->mOptions = $options;
1624 $this->mTitle = $title;
1625 $this->mOutputType = OT_WIKI;
1626
1627 if ( $clearState ) {
1628 $this->clearState();
1629 }
1630
1631 $stripState = false;
1632 $text = str_replace("\r\n", "\n", $text);
1633 $text = $this->strip( $text, $stripState, false );
1634 $text = $this->pstPass2( $text, $user );
1635 $text = $this->unstrip( $text, $stripState );
1636 return $text;
1637 }
1638
1639 /* private */ function pstPass2( $text, &$user )
1640 {
1641 global $wgLang, $wgLocaltimezone, $wgCurParser;
1642
1643 # Variable replacement
1644 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1645 $text = $this->replaceVariables( $text );
1646
1647 # Signatures
1648 #
1649 $n = $user->getName();
1650 $k = $user->getOption( "nickname" );
1651 if ( "" == $k ) { $k = $n; }
1652 if(isset($wgLocaltimezone)) {
1653 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1654 }
1655 /* Note: this is an ugly timezone hack for the European wikis */
1656 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1657 " (" . date( "T" ) . ")";
1658 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1659
1660 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1661 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1662 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1663 Namespace::getUser() ) . ":$n|$k]]", $text );
1664
1665 # Context links: [[|name]] and [[name (context)|]]
1666 #
1667 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1668 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1669 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1670 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1671
1672 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1673 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1674 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1675 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1676 # [[ns:page (cont)|]]
1677 $context = "";
1678 $t = $this->mTitle->getText();
1679 if ( preg_match( $conpat, $t, $m ) ) {
1680 $context = $m[2];
1681 }
1682 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1683 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1684 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1685
1686 if ( "" == $context ) {
1687 $text = preg_replace( $p2, "[[\\1]]", $text );
1688 } else {
1689 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1690 }
1691
1692 /*
1693 $mw =& MagicWord::get( MAG_SUBST );
1694 $wgCurParser = $this->fork();
1695 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1696 $this->merge( $wgCurParser );
1697 */
1698
1699 # Trim trailing whitespace
1700 # MAG_END (__END__) tag allows for trailing
1701 # whitespace to be deliberately included
1702 $text = rtrim( $text );
1703 $mw =& MagicWord::get( MAG_END );
1704 $mw->matchAndRemove( $text );
1705
1706 return $text;
1707 }
1708
1709
1710 }
1711
1712 class ParserOutput
1713 {
1714 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1715
1716 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1717 $containsOldMagic = false )
1718 {
1719 $this->mText = $text;
1720 $this->mLanguageLinks = $languageLinks;
1721 $this->mCategoryLinks = $categoryLinks;
1722 $this->mContainsOldMagic = $containsOldMagic;
1723 }
1724
1725 function getText() { return $this->mText; }
1726 function getLanguageLinks() { return $this->mLanguageLinks; }
1727 function getCategoryLinks() { return $this->mCategoryLinks; }
1728 function containsOldMagic() { return $this->mContainsOldMagic; }
1729 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1730 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1731 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1732 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1733
1734 function merge( $other ) {
1735 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1736 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
1737 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
1738 }
1739
1740 }
1741
1742 class ParserOptions
1743 {
1744 # All variables are private
1745 var $mUseTeX; # Use texvc to expand <math> tags
1746 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1747 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1748 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1749 var $mAllowExternalImages; # Allow external images inline
1750 var $mSkin; # Reference to the preferred skin
1751 var $mDateFormat; # Date format index
1752 var $mEditSection; # Create "edit section" links
1753 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1754 var $mPrintable; # Generate printable output
1755 var $mNumberHeadings; # Automatically number headings
1756 var $mShowToc; # Show table of contents
1757
1758 function getUseTeX() { return $this->mUseTeX; }
1759 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1760 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1761 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1762 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1763 function getSkin() { return $this->mSkin; }
1764 function getDateFormat() { return $this->mDateFormat; }
1765 function getEditSection() { return $this->mEditSection; }
1766 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1767 function getPrintable() { return $this->mPrintable; }
1768 function getNumberHeadings() { return $this->mNumberHeadings; }
1769 function getShowToc() { return $this->mShowToc; }
1770
1771 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1772 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1773 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1774 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1775 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1776 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1777 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1778 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1779 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1780 function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1781 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1782 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1783
1784 /* static */ function newFromUser( &$user )
1785 {
1786 $popts = new ParserOptions;
1787 $popts->initialiseFromUser( &$user );
1788 return $popts;
1789 }
1790
1791 function initialiseFromUser( &$userInput )
1792 {
1793 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1794
1795 if ( !$userInput ) {
1796 $user = new User;
1797 } else {
1798 $user =& $userInput;
1799 }
1800
1801 $this->mUseTeX = $wgUseTeX;
1802 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1803 $this->mUseDynamicDates = $wgUseDynamicDates;
1804 $this->mInterwikiMagic = $wgInterwikiMagic;
1805 $this->mAllowExternalImages = $wgAllowExternalImages;
1806 $this->mSkin =& $user->getSkin();
1807 $this->mDateFormat = $user->getOption( "date" );
1808 $this->mEditSection = $user->getOption( "editsection" );
1809 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1810 $this->mPrintable = false;
1811 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1812 $this->mShowToc = $user->getOption( "showtoc" );
1813 }
1814
1815
1816 }
1817
1818 # Regex callbacks, used in Parser::replaceVariables
1819 function wfBraceSubstitution( $matches )
1820 {
1821 global $wgCurParser;
1822 return $wgCurParser->braceSubstitution( $matches );
1823 }
1824
1825 ?>