Fix source forge bug 803693
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 include_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Converts wikitext to HTML.
8 #
9 # Globals used:
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
11 #
12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
13 #
14 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
15 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
16 # $wgLocaltimezone
17 #
18 # * only within ParserOptions
19
20 class Parser
21 {
22 # Cleared with clearState():
23 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
24
25 # Temporary:
26 var $mOptions, $mTitle;
27
28 function Parser()
29 {
30 $this->clearState();
31 }
32
33 function clearState()
34 {
35 $this->mOutput = new ParserOutput;
36 $this->mAutonumber = 0;
37 $this->mLastSection = "";
38 $this->mDTopen = false;
39 $this->mStripState = false;
40 }
41
42 # First pass--just handle <nowiki> sections, pass the rest off
43 # to doWikiPass2() which does all the real work.
44 #
45 # Returns a ParserOutput
46 #
47 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
48 {
49 $fname = "Parser::parse";
50 wfProfileIn( $fname );
51
52 if ( $clearState ) {
53 $this->clearState();
54 }
55
56 $this->mOptions = $options;
57 $this->mTitle =& $title;
58
59 $stripState = NULL;
60 $text = $this->strip( $text, $this->mStripState, true );
61 $text = $this->doWikiPass2( $text, $linestart );
62 $text = $this->unstrip( $text, $this->mStripState );
63
64 $this->mOutput->setText( $text );
65 wfProfileOut( $fname );
66 return $this->mOutput;
67 }
68
69 /* static */ function getRandomString()
70 {
71 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
72 }
73
74 # Strips <nowiki>, <pre> and <math>
75 # Returns the text, and fills an array with data needed in unstrip()
76 #
77 function strip( $text, &$state, $render = true )
78 {
79 $state = array(
80 'nwlist' => array(),
81 'nwsecs' => 0,
82 'nwunq' => Parser::getRandomString(),
83 'mathlist' => array(),
84 'mathsecs' => 0,
85 'mathunq' => Parser::getRandomString(),
86 'prelist' => array(),
87 'presecs' => 0,
88 'preunq' => Parser::getRandomString()
89 );
90
91 $stripped = "";
92 $stripped2 = "";
93 $stripped3 = "";
94
95 # Replace any instances of the placeholders
96 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
97 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
98 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
99
100 while ( "" != $text ) {
101 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
102 $stripped .= $p[0];
103 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
104 $text = "";
105 } else {
106 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
107 ++$state['nwsecs'];
108
109 if ( $render ) {
110 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
111 } else {
112 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
113 }
114
115 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
116 $text = $q[1];
117 }
118 }
119
120 if( $this->mOptions->getUseTeX() ) {
121 while ( "" != $stripped ) {
122 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
123 $stripped2 .= $p[0];
124 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
125 $stripped = "";
126 } else {
127 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
128 ++$state['mathsecs'];
129
130 if ( $render ) {
131 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
132 } else {
133 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
134 }
135
136 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
137 $stripped = $q[1];
138 }
139 }
140 } else {
141 $stripped2 = $stripped;
142 }
143
144 while ( "" != $stripped2 ) {
145 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
146 $stripped3 .= $p[0];
147 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
148 $stripped2 = "";
149 } else {
150 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
151 ++$state['presecs'];
152
153 if ( $render ) {
154 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
155 } else {
156 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
157 }
158
159 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
160 $stripped2 = $q[1];
161 }
162 }
163 return $stripped3;
164 }
165
166 function unstrip( $text, &$state )
167 {
168 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
169 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
170 }
171
172 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
173 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
174 }
175
176 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
177 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
178 }
179 return $text;
180 }
181
182 function categoryMagic ()
183 {
184 global $wgLang , $wgUser ;
185 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
186 $id = $this->mTitle->getArticleID() ;
187 $cat = ucfirst ( wfMsg ( "category" ) ) ;
188 $ti = $this->mTitle->getText() ;
189 $ti = explode ( ":" , $ti , 2 ) ;
190 if ( $cat != $ti[0] ) return "" ;
191 $r = "<br break=all>\n" ;
192
193 $articles = array() ;
194 $parents = array () ;
195 $children = array() ;
196
197
198 # $sk =& $this->mGetSkin();
199 $sk =& $wgUser->getSkin() ;
200
201 $doesexist = false ;
202 if ( $doesexist ) {
203 $sql = "SELECT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
204 } else {
205 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
206 }
207
208 $res = wfQuery ( $sql, DB_READ ) ;
209 while ( $x = wfFetchObject ( $res ) )
210 {
211 # $t = new Title ;
212 # $t->newFromDBkey ( $x->l_from ) ;
213 # $t = $t->getText() ;
214 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
215 if ( $t != "" ) $t .= ":" ;
216 $t .= $x->cur_title ;
217
218 $y = explode ( ":" , $t , 2 ) ;
219 if ( count ( $y ) == 2 && $y[0] == $cat ) {
220 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
221 } else {
222 array_push ( $articles , $sk->makeLink ( $t ) ) ;
223 }
224 }
225 wfFreeResult ( $res ) ;
226
227 # Children
228 if ( count ( $children ) > 0 )
229 {
230 asort ( $children ) ;
231 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
232 $r .= implode ( ", " , $children ) ;
233 }
234
235 # Articles
236 if ( count ( $articles ) > 0 )
237 {
238 asort ( $articles ) ;
239 $h = wfMsg( "category_header", $ti[1] );
240 $r .= "<h2>{$h}</h2>\n" ;
241 $r .= implode ( ", " , $articles ) ;
242 }
243
244
245 return $r ;
246 }
247
248 function getHTMLattrs ()
249 {
250 $htmlattrs = array( # Allowed attributes--no scripting, etc.
251 "title", "align", "lang", "dir", "width", "height",
252 "bgcolor", "clear", /* BR */ "noshade", /* HR */
253 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
254 /* FONT */ "type", "start", "value", "compact",
255 /* For various lists, mostly deprecated but safe */
256 "summary", "width", "border", "frame", "rules",
257 "cellspacing", "cellpadding", "valign", "char",
258 "charoff", "colgroup", "col", "span", "abbr", "axis",
259 "headers", "scope", "rowspan", "colspan", /* Tables */
260 "id", "class", "name", "style" /* For CSS */
261 );
262 return $htmlattrs ;
263 }
264
265 function fixTagAttributes ( $t )
266 {
267 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
268 $htmlattrs = $this->getHTMLattrs() ;
269
270 # Strip non-approved attributes from the tag
271 $t = preg_replace(
272 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
273 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
274 $t);
275 # Strip javascript "expression" from stylesheets. Brute force approach:
276 # If anythin offensive is found, all attributes of the HTML tag are dropped
277
278 if( preg_match(
279 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
280 wfMungeToUtf8( $t ) ) )
281 {
282 $t="";
283 }
284
285 return trim ( $t ) ;
286 }
287
288 function doTableStuff ( $t )
289 {
290 $t = explode ( "\n" , $t ) ;
291 $td = array () ; # Is currently a td tag open?
292 $ltd = array () ; # Was it TD or TH?
293 $tr = array () ; # Is currently a tr tag open?
294 $ltr = array () ; # tr attributes
295 foreach ( $t AS $k => $x )
296 {
297 $x = rtrim ( $x ) ;
298 $fc = substr ( $x , 0 , 1 ) ;
299 if ( "{|" == substr ( $x , 0 , 2 ) )
300 {
301 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
302 array_push ( $td , false ) ;
303 array_push ( $ltd , "" ) ;
304 array_push ( $tr , false ) ;
305 array_push ( $ltr , "" ) ;
306 }
307 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
308 else if ( "|}" == substr ( $x , 0 , 2 ) )
309 {
310 $z = "</table>\n" ;
311 $l = array_pop ( $ltd ) ;
312 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
313 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
314 array_pop ( $ltr ) ;
315 $t[$k] = $z ;
316 }
317 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
318 {
319 $z = trim ( substr ( $x , 2 ) ) ;
320 $t[$k] = "<caption>{$z}</caption>\n" ;
321 }*/
322 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
323 {
324 $x = substr ( $x , 1 ) ;
325 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
326 $z = "" ;
327 $l = array_pop ( $ltd ) ;
328 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
329 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
330 array_pop ( $ltr ) ;
331 $t[$k] = $z ;
332 array_push ( $tr , false ) ;
333 array_push ( $td , false ) ;
334 array_push ( $ltd , "" ) ;
335 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
336 }
337 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
338 {
339 if ( "|+" == substr ( $x , 0 , 2 ) )
340 {
341 $fc = "+" ;
342 $x = substr ( $x , 1 ) ;
343 }
344 $after = substr ( $x , 1 ) ;
345 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
346 $after = explode ( "||" , $after ) ;
347 $t[$k] = "" ;
348 foreach ( $after AS $theline )
349 {
350 $z = "" ;
351 if ( $fc != "+" )
352 {
353 $tra = array_pop ( $ltr ) ;
354 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
355 array_push ( $tr , true ) ;
356 array_push ( $ltr , "" ) ;
357 }
358
359 $l = array_pop ( $ltd ) ;
360 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
361 if ( $fc == "|" ) $l = "TD" ;
362 else if ( $fc == "!" ) $l = "TH" ;
363 else if ( $fc == "+" ) $l = "CAPTION" ;
364 else $l = "" ;
365 array_push ( $ltd , $l ) ;
366 $y = explode ( "|" , $theline , 2 ) ;
367 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
368 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
369 $t[$k] .= $y ;
370 array_push ( $td , true ) ;
371 }
372 }
373 }
374
375 # Closing open td, tr && table
376 while ( count ( $td ) > 0 )
377 {
378 if ( array_pop ( $td ) ) $t[] = "</td>" ;
379 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
380 $t[] = "</table>" ;
381 }
382
383 $t = implode ( "\n" , $t ) ;
384 # $t = $this->removeHTMLtags( $t );
385 return $t ;
386 }
387
388 # Well, OK, it's actually about 14 passes. But since all the
389 # hard lifting is done inside PHP's regex code, it probably
390 # wouldn't speed things up much to add a real parser.
391 #
392 function doWikiPass2( $text, $linestart )
393 {
394 $fname = "OutputPage::doWikiPass2";
395 wfProfileIn( $fname );
396
397 $text = $this->removeHTMLtags( $text );
398 $text = $this->replaceVariables( $text );
399
400 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
401 $text = str_replace ( "<HR>", "<hr>", $text );
402
403 $text = $this->doHeadings( $text );
404 $text = $this->doBlockLevels( $text, $linestart );
405
406 if($this->mOptions->getUseDynamicDates()) {
407 global $wgDateFormatter;
408 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
409 }
410
411 $text = $this->replaceExternalLinks( $text );
412 $text = $this->replaceInternalLinks ( $text );
413 $text = $this->doTableStuff ( $text ) ;
414
415 $text = $this->formatHeadings( $text );
416
417 $sk =& $this->mOptions->getSkin();
418 $text = $sk->transformContent( $text );
419 $text .= $this->categoryMagic () ;
420
421 wfProfileOut( $fname );
422 return $text;
423 }
424
425
426 /* private */ function doHeadings( $text )
427 {
428 for ( $i = 6; $i >= 1; --$i ) {
429 $h = substr( "======", 0, $i );
430 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
431 "<h{$i}>\\1</h{$i}>\\2", $text );
432 }
433 return $text;
434 }
435
436 # Note: we have to do external links before the internal ones,
437 # and otherwise take great care in the order of things here, so
438 # that we don't end up interpreting some URLs twice.
439
440 /* private */ function replaceExternalLinks( $text )
441 {
442 $fname = "OutputPage::replaceExternalLinks";
443 wfProfileIn( $fname );
444 $text = $this->subReplaceExternalLinks( $text, "http", true );
445 $text = $this->subReplaceExternalLinks( $text, "https", true );
446 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
447 $text = $this->subReplaceExternalLinks( $text, "irc", false );
448 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
449 $text = $this->subReplaceExternalLinks( $text, "news", false );
450 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
451 wfProfileOut( $fname );
452 return $text;
453 }
454
455 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
456 {
457 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
458 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
459
460 # this is the list of separators that should be ignored if they
461 # are the last character of an URL but that should be included
462 # if they occur within the URL, e.g. "go to www.foo.com, where .."
463 # in this case, the last comma should not become part of the URL,
464 # but in "www.foo.com/123,2342,32.htm" it should.
465 $sep = ",;\.:";
466 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
467 $images = "gif|png|jpg|jpeg";
468
469 # PLEASE NOTE: The curly braces { } are not part of the regex,
470 # they are interpreted as part of the string (used to tell PHP
471 # that the content of the string should be inserted there).
472 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
473 "((?i){$images})([^{$uc}]|$)/";
474
475 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
476 $sk =& $this->mOptions->getSkin();
477
478 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
479 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
480 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
481 }
482 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
483 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
484 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
485 "</a>\\5", $s );
486 $s = str_replace( $unique, $protocol, $s );
487
488 $a = explode( "[{$protocol}:", " " . $s );
489 $s = array_shift( $a );
490 $s = substr( $s, 1 );
491
492 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
493 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
494
495 foreach ( $a as $line ) {
496 if ( preg_match( $e1, $line, $m ) ) {
497 $link = "{$protocol}:{$m[1]}";
498 $trail = $m[2];
499 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
500 else { $text = wfEscapeHTML( $link ); }
501 } else if ( preg_match( $e2, $line, $m ) ) {
502 $link = "{$protocol}:{$m[1]}";
503 $text = $m[2];
504 $trail = $m[3];
505 } else {
506 $s .= "[{$protocol}:" . $line;
507 continue;
508 }
509 if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
510 else $paren = "";
511 $la = $sk->getExternalLinkAttributes( $link, $text );
512 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
513
514 }
515 return $s;
516 }
517
518 /* private */ function handle3Quotes( &$state, $token )
519 {
520 if ( $state["strong"] ) {
521 if ( $state["em"] && $state["em"] > $state["strong"] )
522 {
523 # ''' lala ''lala '''
524 $s = "</em></strong><em>";
525 } else {
526 $s = "</strong>";
527 }
528 $state["strong"] = FALSE;
529 } else {
530 $s = "<strong>";
531 $state["strong"] = $token["pos"];
532 }
533 return $s;
534 }
535
536 /* private */ function handle2Quotes( &$state, $token )
537 {
538 if ( $state["em"] ) {
539 if ( $state["strong"] && $state["strong"] > $state["em"] )
540 {
541 # ''lala'''lala'' ....'''
542 $s = "</strong></em><strong>";
543 } else {
544 $s = "</em>";
545 }
546 $state["em"] = FALSE;
547 } else {
548 $s = "<em>";
549 $state["em"] = $token["pos"];
550 }
551 return $s;
552 }
553
554 /* private */ function handle5Quotes( &$state, $token )
555 {
556 if ( $state["em"] && $state["strong"] ) {
557 if ( $state["em"] < $state["strong"] ) {
558 $s .= "</strong></em>";
559 } else {
560 $s .= "</em></strong>";
561 }
562 $state["strong"] = $state["em"] = FALSE;
563 } elseif ( $state["em"] ) {
564 $s .= "</em><strong>";
565 $state["em"] = FALSE;
566 $state["strong"] = $token["pos"];
567 } elseif ( $state["strong"] ) {
568 $s .= "</strong><em>";
569 $state["strong"] = FALSE;
570 $state["em"] = $token["pos"];
571 } else { # not $em and not $strong
572 $s .= "<strong><em>";
573 $state["strong"] = $state["em"] = $token["pos"];
574 }
575 return $s;
576 }
577
578 /* private */ function replaceInternalLinks( $str )
579 {
580 global $wgLang; # for language specific parser hook
581
582 $tokenizer=Tokenizer::newFromString( $str );
583 $tokenStack = array();
584
585 $s="";
586 $state["em"] = FALSE;
587 $state["strong"] = FALSE;
588 $tagIsOpen = FALSE;
589
590 # The tokenizer splits the text into tokens and returns them one by one.
591 # Every call to the tokenizer returns a new token.
592 while ( $token = $tokenizer->nextToken() )
593 {
594 switch ( $token["type"] )
595 {
596 case "text":
597 # simple text with no further markup
598 $txt = $token["text"];
599 break;
600 case "[[[":
601 # remember the tag opened with 3 [
602 $threeopen = true;
603 case "[[":
604 # link opening tag.
605 # FIXME : Treat orphaned open tags (stack not empty when text is over)
606 $tagIsOpen = TRUE;
607 array_push( $tokenStack, $token );
608 $txt="";
609 break;
610
611 case "]]]":
612 case "]]":
613 # link close tag.
614 # get text from stack, glue it together, and call the code to handle a
615 # link
616
617 if ( count( $tokenStack ) == 0 )
618 {
619 # stack empty. Found a ]] without an opening [[
620 $txt = "]]";
621 } else {
622 $linkText = "";
623 $lastToken = array_pop( $tokenStack );
624 while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
625 {
626 if( !empty( $lastToken["text"] ) ) {
627 $linkText = $lastToken["text"] . $linkText;
628 }
629 $lastToken = array_pop( $tokenStack );
630 }
631
632 $txt = $linkText ."]]";
633
634 if( isset( $lastToken["text"] ) ) {
635 $prefix = $lastToken["text"];
636 } else {
637 $prefix = "";
638 }
639 $nextToken = $tokenizer->previewToken();
640 if ( $nextToken["type"] == "text" )
641 {
642 # Preview just looks at it. Now we have to fetch it.
643 $nextToken = $tokenizer->nextToken();
644 $txt .= $nextToken["text"];
645 }
646 $txt = $this->handleInternalLink( $txt, $prefix );
647
648 # did the tag start with 3 [ ?
649 if($threeopen) {
650 # show the first as text
651 $txt = "[".$txt;
652 $threeopen=false;
653 }
654
655 }
656 $tagIsOpen = (count( $tokenStack ) != 0);
657 break;
658 case "----":
659 $txt = "\n<hr>\n";
660 break;
661 case "'''":
662 # This and the three next ones handle quotes
663 $txt = $this->handle3Quotes( $state, $token );
664 break;
665 case "''":
666 $txt = $this->handle2Quotes( $state, $token );
667 break;
668 case "'''''":
669 $txt = $this->handle5Quotes( $state, $token );
670 break;
671 case "":
672 # empty token
673 $txt="";
674 break;
675 case "RFC ":
676 if ( $tagIsOpen ) {
677 $txt = "RFC ";
678 } else {
679 $txt = $this->doMagicRFC( $tokenizer );
680 }
681 break;
682 case "ISBN ":
683 if ( $tagIsOpen ) {
684 $txt = "ISBN ";
685 } else {
686 $txt = $this->doMagicISBN( $tokenizer );
687 }
688 break;
689 default:
690 # Call language specific Hook.
691 $txt = $wgLang->processToken( $token, $tokenStack );
692 if ( NULL == $txt ) {
693 # An unkown token. Highlight.
694 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
695 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
696 }
697 break;
698 }
699 # If we're parsing the interior of a link, don't append the interior to $s,
700 # but push it to the stack so it can be processed when a ]] token is found.
701 if ( $tagIsOpen && $txt != "" ) {
702 $token["type"] = "text";
703 $token["text"] = $txt;
704 array_push( $tokenStack, $token );
705 } else {
706 $s .= $txt;
707 }
708 } #end while
709 if ( count( $tokenStack ) != 0 )
710 {
711 # still objects on stack. opened [[ tag without closing ]] tag.
712 $txt = "";
713 while ( $lastToken = array_pop( $tokenStack ) )
714 {
715 if ( $lastToken["type"] == "text" )
716 {
717 $txt = $lastToken["text"] . $txt;
718 } else {
719 $txt = $lastToken["type"] . $txt;
720 }
721 }
722 $s .= $txt;
723 }
724 return $s;
725 }
726
727 /* private */ function handleInternalLink( $line, $prefix )
728 {
729 global $wgLang, $wgLinkCache;
730 global $wgNamespacesWithSubpages, $wgLanguageCode;
731 static $fname = "OutputPage::replaceInternalLinks" ;
732 wfProfileIn( $fname );
733
734 wfProfileIn( "$fname-setup" );
735 static $tc = FALSE;
736 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
737 $sk =& $this->mOptions->getSkin();
738
739 # Match a link having the form [[namespace:link|alternate]]trail
740 static $e1 = FALSE;
741 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
742 # Match the end of a line for a word that's not followed by whitespace,
743 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
744 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
745 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
746 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
747
748
749 # Special and Media are pseudo-namespaces; no pages actually exist in them
750 static $image = FALSE;
751 static $special = FALSE;
752 static $media = FALSE;
753 static $category = FALSE;
754 if ( !$image ) { $image = Namespace::getImage(); }
755 if ( !$special ) { $special = Namespace::getSpecial(); }
756 if ( !$media ) { $media = Namespace::getMedia(); }
757 if ( !$category ) { $category = wfMsg ( "category" ) ; }
758
759 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
760
761 wfProfileOut( "$fname-setup" );
762 $s = "";
763
764 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
765 $text = $m[2];
766 $trail = $m[3];
767 } else { # Invalid form; output directly
768 $s .= $prefix . "[[" . $line ;
769 return $s;
770 }
771
772 /* Valid link forms:
773 Foobar -- normal
774 :Foobar -- override special treatment of prefix (images, language links)
775 /Foobar -- convert to CurrentPage/Foobar
776 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
777 */
778 $c = substr($m[1],0,1);
779 $noforce = ($c != ":");
780 if( $c == "/" ) { # subpage
781 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
782 $m[1]=substr($m[1],1,strlen($m[1])-2);
783 $noslash=$m[1];
784 } else {
785 $noslash=substr($m[1],1);
786 }
787 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
788 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
789 if( "" == $text ) {
790 $text= $m[1];
791 } # this might be changed for ugliness reasons
792 } else {
793 $link = $noslash; # no subpage allowed, use standard link
794 }
795 } elseif( $noforce ) { # no subpage
796 $link = $m[1];
797 } else {
798 $link = substr( $m[1], 1 );
799 }
800 if( "" == $text )
801 $text = $link;
802
803 $nt = Title::newFromText( $link );
804 if( !$nt ) {
805 $s .= $prefix . "[[" . $line;
806 return $s;
807 }
808 $ns = $nt->getNamespace();
809 $iw = $nt->getInterWiki();
810 if( $noforce ) {
811 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
812 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
813 $s .= $prefix . $trail;
814 return $s;
815 }
816 if( $ns == $image ) {
817 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
818 $wgLinkCache->addImageLinkObj( $nt );
819 return $s;
820 }
821 }
822 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
823 ( strpos( $link, "#" ) == FALSE ) ) {
824 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
825 return $s;
826 }
827 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
828 $t = explode ( ":" , $nt->getText() ) ;
829 array_shift ( $t ) ;
830 $t = implode ( ":" , $t ) ;
831 $t = $wgLang->ucFirst ( $t ) ;
832 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
833 $nnt = Title::newFromText ( $category.":".$t ) ;
834 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
835 $this->mCategoryLinks[] = $t ;
836 $s .= $prefix . $trail ;
837 return $s ;
838 }
839 if( $ns == $media ) {
840 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
841 $wgLinkCache->addImageLinkObj( $nt );
842 return $s;
843 } elseif( $ns == $special ) {
844 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
845 return $s;
846 }
847 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
848
849 wfProfileOut( $fname );
850 return $s;
851 }
852
853 # Some functions here used by doBlockLevels()
854 #
855 /* private */ function closeParagraph()
856 {
857 $result = "";
858 if ( 0 != strcmp( "p", $this->mLastSection ) &&
859 0 != strcmp( "", $this->mLastSection ) ) {
860 $result = "</" . $this->mLastSection . ">";
861 }
862 $this->mLastSection = "";
863 return $result."\n";
864 }
865 # getCommon() returns the length of the longest common substring
866 # of both arguments, starting at the beginning of both.
867 #
868 /* private */ function getCommon( $st1, $st2 )
869 {
870 $fl = strlen( $st1 );
871 $shorter = strlen( $st2 );
872 if ( $fl < $shorter ) { $shorter = $fl; }
873
874 for ( $i = 0; $i < $shorter; ++$i ) {
875 if ( $st1{$i} != $st2{$i} ) { break; }
876 }
877 return $i;
878 }
879 # These next three functions open, continue, and close the list
880 # element appropriate to the prefix character passed into them.
881 #
882 /* private */ function openList( $char )
883 {
884 $result = $this->closeParagraph();
885
886 if ( "*" == $char ) { $result .= "<ul><li>"; }
887 else if ( "#" == $char ) { $result .= "<ol><li>"; }
888 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
889 else if ( ";" == $char ) {
890 $result .= "<dl><dt>";
891 $this->mDTopen = true;
892 }
893 else { $result = "<!-- ERR 1 -->"; }
894
895 return $result;
896 }
897
898 /* private */ function nextItem( $char )
899 {
900 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
901 else if ( ":" == $char || ";" == $char ) {
902 $close = "</dd>";
903 if ( $this->mDTopen ) { $close = "</dt>"; }
904 if ( ";" == $char ) {
905 $this->mDTopen = true;
906 return $close . "<dt>";
907 } else {
908 $this->mDTopen = false;
909 return $close . "<dd>";
910 }
911 }
912 return "<!-- ERR 2 -->";
913 }
914
915 /* private */function closeList( $char )
916 {
917 if ( "*" == $char ) { $text = "</li></ul>"; }
918 else if ( "#" == $char ) { $text = "</li></ol>"; }
919 else if ( ":" == $char ) {
920 if ( $this->mDTopen ) {
921 $this->mDTopen = false;
922 $text = "</dt></dl>";
923 } else {
924 $text = "</dd></dl>";
925 }
926 }
927 else { return "<!-- ERR 3 -->"; }
928 return $text."\n";
929 }
930
931 /* private */ function doBlockLevels( $text, $linestart )
932 {
933 $fname = "OutputPage::doBlockLevels";
934 wfProfileIn( $fname );
935 # Parsing through the text line by line. The main thing
936 # happening here is handling of block-level elements p, pre,
937 # and making lists from lines starting with * # : etc.
938 #
939 $a = explode( "\n", $text );
940 $text = $lastPref = "";
941 $this->mDTopen = $inBlockElem = false;
942
943 if ( ! $linestart ) { $text .= array_shift( $a ); }
944 foreach ( $a as $t ) {
945 if ( "" != $text ) { $text .= "\n"; }
946
947 $oLine = $t;
948 $opl = strlen( $lastPref );
949 $npl = strspn( $t, "*#:;" );
950 $pref = substr( $t, 0, $npl );
951 $pref2 = str_replace( ";", ":", $pref );
952 $t = substr( $t, $npl );
953
954 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
955 $text .= $this->nextItem( substr( $pref, -1 ) );
956
957 if ( ";" == substr( $pref, -1 ) ) {
958 $cpos = strpos( $t, ":" );
959 if ( ! ( false === $cpos ) ) {
960 $term = substr( $t, 0, $cpos );
961 $text .= $term . $this->nextItem( ":" );
962 $t = substr( $t, $cpos + 1 );
963 }
964 }
965 } else if (0 != $npl || 0 != $opl) {
966 $cpl = $this->getCommon( $pref, $lastPref );
967
968 while ( $cpl < $opl ) {
969 $text .= $this->closeList( $lastPref{$opl-1} );
970 --$opl;
971 }
972 if ( $npl <= $cpl && $cpl > 0 ) {
973 $text .= $this->nextItem( $pref{$cpl-1} );
974 }
975 while ( $npl > $cpl ) {
976 $char = substr( $pref, $cpl, 1 );
977 $text .= $this->openList( $char );
978
979 if ( ";" == $char ) {
980 $cpos = strpos( $t, ":" );
981 if ( ! ( false === $cpos ) ) {
982 $term = substr( $t, 0, $cpos );
983 $text .= $term . $this->nextItem( ":" );
984 $t = substr( $t, $cpos + 1 );
985 }
986 }
987 ++$cpl;
988 }
989 $lastPref = $pref2;
990 }
991 if ( 0 == $npl ) { # No prefix--go to paragraph mode
992 if ( preg_match(
993 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
994 $text .= $this->closeParagraph();
995 $inBlockElem = true;
996 }
997 if ( ! $inBlockElem ) {
998 if ( " " == $t{0} ) {
999 $newSection = "pre";
1000 # $t = wfEscapeHTML( $t );
1001 }
1002 else { $newSection = "p"; }
1003
1004 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
1005 $text .= $this->closeParagraph();
1006 $text .= "<" . $newSection . ">";
1007 } else if ( 0 != strcmp( $this->mLastSection,
1008 $newSection ) ) {
1009 $text .= $this->closeParagraph();
1010 if ( 0 != strcmp( "p", $newSection ) ) {
1011 $text .= "<" . $newSection . ">";
1012 }
1013 }
1014 $this->mLastSection = $newSection;
1015 }
1016 if ( $inBlockElem &&
1017 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
1018 $inBlockElem = false;
1019 }
1020 }
1021 $text .= $t;
1022 }
1023 while ( $npl ) {
1024 $text .= $this->closeList( $pref2{$npl-1} );
1025 --$npl;
1026 }
1027 if ( "" != $this->mLastSection ) {
1028 if ( "p" != $this->mLastSection ) {
1029 $text .= "</" . $this->mLastSection . ">";
1030 }
1031 $this->mLastSection = "";
1032 }
1033 wfProfileOut( $fname );
1034 return $text;
1035 }
1036
1037 /* private */ function replaceVariables( $text )
1038 {
1039 global $wgLang, $wgCurOut;
1040 $fname = "OutputPage::replaceVariables";
1041 wfProfileIn( $fname );
1042
1043 $magic = array();
1044
1045 # Basic variables
1046 # See Language.php for the definition of each magic word
1047 # As with sigs, this uses the server's local time -- ensure
1048 # this is appropriate for your audience!
1049
1050 $magic[MAG_CURRENTMONTH] = date( "m" );
1051 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1052 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1053 $magic[MAG_CURRENTDAY] = date("j");
1054 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1055 $magic[MAG_CURRENTYEAR] = date( "Y" );
1056 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1057
1058 $this->mOutput->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1059
1060 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1061 if ( $mw->match( $text ) ) {
1062 $v = wfNumberOfArticles();
1063 $text = $mw->replace( $v, $text );
1064 if( $mw->getWasModified() ) { $this->mOutput->mContainsOldMagic++; }
1065 }
1066
1067 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1068 # The callbacks are at the bottom of this file
1069 $wgCurOut = $this;
1070 $mw =& MagicWord::get( MAG_MSG );
1071 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1072 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1073
1074 $mw =& MagicWord::get( MAG_MSGNW );
1075 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1076 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1077
1078 wfProfileOut( $fname );
1079 return $text;
1080 }
1081
1082 # Cleans up HTML, removes dangerous tags and attributes
1083 /* private */ function removeHTMLtags( $text )
1084 {
1085 $fname = "OutputPage::removeHTMLtags";
1086 wfProfileIn( $fname );
1087 $htmlpairs = array( # Tags that must be closed
1088 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1089 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1090 "strike", "strong", "tt", "var", "div", "center",
1091 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1092 "ruby", "rt" , "rb" , "rp"
1093 );
1094 $htmlsingle = array(
1095 "br", "p", "hr", "li", "dt", "dd"
1096 );
1097 $htmlnest = array( # Tags that can be nested--??
1098 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1099 "dl", "font", "big", "small", "sub", "sup"
1100 );
1101 $tabletags = array( # Can only appear inside table
1102 "td", "th", "tr"
1103 );
1104
1105 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1106 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1107
1108 $htmlattrs = $this->getHTMLattrs () ;
1109
1110 # Remove HTML comments
1111 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1112
1113 $bits = explode( "<", $text );
1114 $text = array_shift( $bits );
1115 $tagstack = array(); $tablestack = array();
1116
1117 foreach ( $bits as $x ) {
1118 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1119 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1120 $x, $regs );
1121 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1122 error_reporting( $prev );
1123
1124 $badtag = 0 ;
1125 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1126 # Check our stack
1127 if ( $slash ) {
1128 # Closing a tag...
1129 if ( ! in_array( $t, $htmlsingle ) &&
1130 ( $ot = array_pop( $tagstack ) ) != $t ) {
1131 array_push( $tagstack, $ot );
1132 $badtag = 1;
1133 } else {
1134 if ( $t == "table" ) {
1135 $tagstack = array_pop( $tablestack );
1136 }
1137 $newparams = "";
1138 }
1139 } else {
1140 # Keep track for later
1141 if ( in_array( $t, $tabletags ) &&
1142 ! in_array( "table", $tagstack ) ) {
1143 $badtag = 1;
1144 } else if ( in_array( $t, $tagstack ) &&
1145 ! in_array ( $t , $htmlnest ) ) {
1146 $badtag = 1 ;
1147 } else if ( ! in_array( $t, $htmlsingle ) ) {
1148 if ( $t == "table" ) {
1149 array_push( $tablestack, $tagstack );
1150 $tagstack = array();
1151 }
1152 array_push( $tagstack, $t );
1153 }
1154 # Strip non-approved attributes from the tag
1155 $newparams = $this->fixTagAttributes($params);
1156
1157 }
1158 if ( ! $badtag ) {
1159 $rest = str_replace( ">", "&gt;", $rest );
1160 $text .= "<$slash$t $newparams$brace$rest";
1161 continue;
1162 }
1163 }
1164 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1165 }
1166 # Close off any remaining tags
1167 while ( $t = array_pop( $tagstack ) ) {
1168 $text .= "</$t>\n";
1169 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1170 }
1171 wfProfileOut( $fname );
1172 return $text;
1173 }
1174
1175 /*
1176 *
1177 * This function accomplishes several tasks:
1178 * 1) Auto-number headings if that option is enabled
1179 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1180 * 3) Add a Table of contents on the top for users who have enabled the option
1181 * 4) Auto-anchor headings
1182 *
1183 * It loops through all headlines, collects the necessary data, then splits up the
1184 * string and re-inserts the newly formatted headlines.
1185 *
1186 * */
1187 /* private */ function formatHeadings( $text )
1188 {
1189 $nh=$this->mOptions->getNumberHeadings();
1190 $st=$this->mOptions->getShowToc();
1191 if(!$this->mTitle->userCanEdit()) {
1192 $es=0;
1193 $esr=0;
1194 } else {
1195 $es=$this->mOptions->getEditSection();
1196 $esr=$this->mOptions->getEditSectionOnRightClick();
1197 }
1198
1199 # Inhibit editsection links if requested in the page
1200 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1201 if ($esw->matchAndRemove( $text )) {
1202 $es=0;
1203 }
1204 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1205 # do not add TOC
1206 $mw =& MagicWord::get( MAG_NOTOC );
1207 if ($mw->matchAndRemove( $text ))
1208 {
1209 $st = 0;
1210 }
1211
1212 # never add the TOC to the Main Page. This is an entry page that should not
1213 # be more than 1-2 screens large anyway
1214 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1215
1216 # We need this to perform operations on the HTML
1217 $sk =& $this->mOptions->getSkin();
1218
1219 # Get all headlines for numbering them and adding funky stuff like [edit]
1220 # links
1221 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1222
1223 # headline counter
1224 $c=0;
1225
1226 # Ugh .. the TOC should have neat indentation levels which can be
1227 # passed to the skin functions. These are determined here
1228 $toclevel = 0;
1229 $toc = "";
1230 $full = "";
1231 $head = array();
1232 foreach($matches[3] as $headline) {
1233 if($level) { $prevlevel=$level;}
1234 $level=$matches[1][$c];
1235 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1236
1237 $h[$level]=0; // reset when we enter a new level
1238 $toc.=$sk->tocIndent($level-$prevlevel);
1239 $toclevel+=$level-$prevlevel;
1240
1241 }
1242 if(($nh||$st) && $level<$prevlevel) {
1243 $h[$level+1]=0; // reset when we step back a level
1244 $toc.=$sk->tocUnindent($prevlevel-$level);
1245 $toclevel-=$prevlevel-$level;
1246
1247 }
1248 $h[$level]++; // count number of headlines for each level
1249
1250 if($nh||$st) {
1251 for($i=1;$i<=$level;$i++) {
1252 if($h[$i]) {
1253 if($dot) {$numbering.=".";}
1254 $numbering.=$h[$i];
1255 $dot=1;
1256 }
1257 }
1258 }
1259
1260 // The canonized header is a version of the header text safe to use for links
1261 // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1262 $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1263 $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1264 $tocline = trim( $canonized_headline );
1265 $canonized_headline=str_replace('"',"",$canonized_headline);
1266 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1267 $refer[$c]=$canonized_headline;
1268 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1269 $refcount[$c]=$refers[$canonized_headline];
1270
1271 // Prepend the number to the heading text
1272
1273 if($nh||$st) {
1274 $tocline=$numbering ." ". $tocline;
1275
1276 // Don't number the heading if it is the only one (looks silly)
1277 if($nh && count($matches[3]) > 1) {
1278 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1279 }
1280 }
1281
1282 // Create the anchor for linking from the TOC to the section
1283 $anchor=$canonized_headline;
1284 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1285 if($st) {
1286 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1287 }
1288 if($es) {
1289 $head[$c].=$sk->editSectionLink($c+1);
1290 }
1291
1292
1293 // the headline might have a link
1294 if(preg_match("/(.*)<a(.*)/",$headline, $headlinematches))
1295 {
1296 // if so give an anchor name to the already existent link
1297 $headline = $headlinematches[1]
1298 ."<a name=\"".$anchor."\" ".$headlinematches[2];
1299 } else {
1300 // else create an anchor link for the headline
1301 $headline = "<a name=\"".$anchor."\">"
1302 .$headline
1303 ."</a>";
1304 }
1305
1306 // give headline the correct <h#> tag
1307 $head[$c].="<h".$level.$matches[2][$c] .$headline."</h".$level.">";
1308
1309 // Add the edit section link
1310
1311 if($esr) {
1312 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1313 }
1314
1315 $numbering="";
1316 $c++;
1317 $dot=0;
1318 }
1319
1320 if($st) {
1321 $toclines=$c;
1322 $toc.=$sk->tocUnindent($toclevel);
1323 $toc=$sk->tocTable($toc);
1324 }
1325
1326 // split up and insert constructed headlines
1327
1328 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1329 $i=0;
1330
1331 foreach($blocks as $block) {
1332 if(($es) && $c>0 && $i==0) {
1333 # This is the [edit] link that appears for the top block of text when
1334 # section editing is enabled
1335 $full.=$sk->editSectionLink(0);
1336 }
1337 $full.=$block;
1338 if($st && $toclines>3 && !$i) {
1339 # Let's add a top anchor just in case we want to link to the top of the page
1340 $full="<a name=\"top\"></a>".$full.$toc;
1341 }
1342
1343 if( !empty( $head[$i] ) ) {
1344 $full .= $head[$i];
1345 }
1346 $i++;
1347 }
1348
1349 return $full;
1350 }
1351
1352 /* private */ function doMagicISBN( &$tokenizer )
1353 {
1354 global $wgLang;
1355
1356 # Check whether next token is a text token
1357 # If yes, fetch it and convert the text into a
1358 # Special::BookSources link
1359 $token = $tokenizer->previewToken();
1360 while ( $token["type"] == "" )
1361 {
1362 $tokenizer->nextToken();
1363 $token = $tokenizer->previewToken();
1364 }
1365 if ( $token["type"] == "text" )
1366 {
1367 $token = $tokenizer->nextToken();
1368 $x = $token["text"];
1369 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1370
1371 $isbn = $blank = "" ;
1372 while ( " " == $x{0} ) {
1373 $blank .= " ";
1374 $x = substr( $x, 1 );
1375 }
1376 while ( strstr( $valid, $x{0} ) != false ) {
1377 $isbn .= $x{0};
1378 $x = substr( $x, 1 );
1379 }
1380 $num = str_replace( "-", "", $isbn );
1381 $num = str_replace( " ", "", $num );
1382
1383 if ( "" == $num ) {
1384 $text = "ISBN $blank$x";
1385 } else {
1386 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1387 $text = "<a href=\"" .
1388 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1389 "\" class=\"internal\">ISBN $isbn</a>";
1390 $text .= $x;
1391 }
1392 } else {
1393 $text = "ISBN ";
1394 }
1395 return $text;
1396 }
1397 /* private */ function doMagicRFC( &$tokenizer )
1398 {
1399 global $wgLang;
1400
1401 # Check whether next token is a text token
1402 # If yes, fetch it and convert the text into a
1403 # link to an RFC source
1404 $token = $tokenizer->previewToken();
1405 while ( $token["type"] == "" )
1406 {
1407 $tokenizer->nextToken();
1408 $token = $tokenizer->previewToken();
1409 }
1410 if ( $token["type"] == "text" )
1411 {
1412 $token = $tokenizer->nextToken();
1413 $x = $token["text"];
1414 $valid = "0123456789";
1415
1416 $rfc = $blank = "" ;
1417 while ( " " == $x{0} ) {
1418 $blank .= " ";
1419 $x = substr( $x, 1 );
1420 }
1421 while ( strstr( $valid, $x{0} ) != false ) {
1422 $rfc .= $x{0};
1423 $x = substr( $x, 1 );
1424 }
1425
1426 if ( "" == $rfc ) {
1427 $text .= "RFC $blank$x";
1428 } else {
1429 $url = wfmsg( "rfcurl" );
1430 $url = str_replace( "$1", $rfc, $url);
1431 $sk =& $this->mOptions->getSkin();
1432 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1433 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1434 }
1435 } else {
1436 $text = "RFC ";
1437 }
1438 return $text;
1439 }
1440
1441 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1442 {
1443 $this->mOptions = $options;
1444 $this->mTitle = $title;
1445 if ( $clearState ) {
1446 $this->clearState();
1447 }
1448
1449 $stripState = false;
1450 $text = str_replace("\r\n", "\n", $text);
1451 $text = $this->strip( $text, $stripState, false );
1452 $text = $this->pstPass2( $text, $user );
1453 $text = $this->unstrip( $text, $stripState );
1454 return $text;
1455 }
1456
1457 /* private */ function pstPass2( $text, &$user )
1458 {
1459 global $wgLang, $wgLocaltimezone;
1460
1461 # Signatures
1462 #
1463 $n = $user->getName();
1464 $k = $user->getOption( "nickname" );
1465 if ( "" == $k ) { $k = $n; }
1466 if(isset($wgLocaltimezone)) {
1467 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1468 }
1469 /* Note: this is an ugly timezone hack for the European wikis */
1470 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1471 " (" . date( "T" ) . ")";
1472 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1473
1474 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1475 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1476 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1477 Namespace::getUser() ) . ":$n|$k]]", $text );
1478
1479 # Context links: [[|name]] and [[name (context)|]]
1480 #
1481 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1482 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1483 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1484 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1485
1486 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1487 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1488 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1489 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1490 # [[ns:page (cont)|]]
1491 $context = "";
1492 $t = $this->mTitle->getText();
1493 if ( preg_match( $conpat, $t, $m ) ) {
1494 $context = $m[2];
1495 }
1496 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1497 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1498 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1499
1500 if ( "" == $context ) {
1501 $text = preg_replace( $p2, "[[\\1]]", $text );
1502 } else {
1503 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1504 }
1505
1506 # {{SUBST:xxx}} variables
1507 #
1508 $mw =& MagicWord::get( MAG_SUBST );
1509 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1510
1511 # Trim trailing whitespace
1512 # MAG_END (__END__) tag allows for trailing
1513 # whitespace to be deliberately included
1514 $text = rtrim( $text );
1515 $mw =& MagicWord::get( MAG_END );
1516 $mw->matchAndRemove( $text );
1517
1518 return $text;
1519 }
1520
1521
1522 }
1523
1524 class ParserOutput
1525 {
1526 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1527
1528 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1529 $containsOldMagic = false )
1530 {
1531 $this->mText = $text;
1532 $this->mLanguageLinks = $languageLinks;
1533 $this->mCategoryLinks = $categoryLinks;
1534 $this->mContainsOldMagic = $containsOldMagic;
1535 }
1536
1537 function getText() { return $this->mText; }
1538 function getLanguageLinks() { return $this->mLanguageLinks; }
1539 function getCategoryLinks() { return $this->mCategoryLinks; }
1540 function containsOldMagic() { return $this->mContainsOldMagic; }
1541 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1542 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1543 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1544 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1545 }
1546
1547 class ParserOptions
1548 {
1549 # All variables are private
1550 var $mUseTeX; # Use texvc to expand <math> tags
1551 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1552 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1553 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1554 var $mAllowExternalImages; # Allow external images inline
1555 var $mSkin; # Reference to the preferred skin
1556 var $mDateFormat; # Date format index
1557 var $mEditSection; # Create "edit section" links
1558 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1559 var $mPrintable; # Generate printable output
1560 var $mNumberHeadings; # Automatically number headings
1561 var $mShowToc; # Show table of contents
1562
1563 function getUseTeX() { return $this->mUseTeX; }
1564 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1565 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1566 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1567 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1568 function getSkin() { return $this->mSkin; }
1569 function getDateFormat() { return $this->mDateFormat; }
1570 function getEditSection() { return $this->mEditSection; }
1571 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1572 function getPrintable() { return $this->mPrintable; }
1573 function getNumberHeadings() { return $this->mNumberHeadings; }
1574 function getShowToc() { return $this->mShowToc; }
1575
1576 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1577 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1578 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1579 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1580 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1581 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1582 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1583 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1584 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1585 function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1586 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1587 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1588
1589 /* static */ function newFromUser( &$user )
1590 {
1591 $popts = new ParserOptions;
1592 $popts->initialiseFromUser( &$user );
1593 return $popts;
1594 }
1595
1596 function initialiseFromUser( &$userInput )
1597 {
1598 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1599
1600 if ( !$userInput ) {
1601 $user = new User;
1602 } else {
1603 $user =& $userInput;
1604 }
1605
1606 $this->mUseTeX = $wgUseTeX;
1607 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1608 $this->mUseDynamicDates = $wgUseDynamicDates;
1609 $this->mInterwikiMagic = $wgInterwikiMagic;
1610 $this->mAllowExternalImages = $wgAllowExternalImages;
1611 $this->mSkin =& $user->getSkin();
1612 $this->mDateFormat = $user->getOption( "date" );
1613 $this->mEditSection = $user->getOption( "editsection" );
1614 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1615 $this->mPrintable = false;
1616 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1617 $this->mShowToc = $user->getOption( "showtoc" );
1618 }
1619
1620
1621 }
1622
1623 # Regex callbacks, used in OutputPage::replaceVariables
1624
1625 # Just get rid of the dangerous stuff
1626 # Necessary because replaceVariables is called after removeHTMLtags,
1627 # and message text can come from any user
1628 function wfReplaceMsgVar( $matches ) {
1629 global $wgCurOut, $wgLinkCache;
1630 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1631 $wgLinkCache->suspend();
1632 $text = $wgCurOut->replaceInternalLinks( $text );
1633 $wgLinkCache->resume();
1634 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1635 return $text;
1636 }
1637
1638 # Effective <nowiki></nowiki>
1639 # Not real <nowiki> because this is called after nowiki sections are processed
1640 function wfReplaceMsgnwVar( $matches ) {
1641 global $wgCurOut, $wgLinkCache;
1642 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1643 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1644 return $text;
1645 }
1646
1647
1648
1649 ?>