98c0d558280deae5c5a495991b52062af10aefd3
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 include_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Converts wikitext to HTML.
8 #
9 # Globals used:
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
11 #
12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
13 #
14 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
15 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
16 # $wgLocaltimezone
17 #
18 # * only within ParserOptions
19
20 class Parser
21 {
22 # Cleared with clearState():
23 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
24
25 # Temporary:
26 var $mOptions, $mTitle;
27
28 function Parser()
29 {
30 $this->clearState();
31 }
32
33 function clearState()
34 {
35 $this->mOutput = new ParserOutput;
36 $this->mAutonumber = 0;
37 $this->mLastSection = "";
38 $this->mDTopen = false;
39 $this->mStripState = false;
40 }
41
42 # First pass--just handle <nowiki> sections, pass the rest off
43 # to doWikiPass2() which does all the real work.
44 #
45 # Returns a ParserOutput
46 #
47 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
48 {
49 $fname = "Parser::parse";
50 wfProfileIn( $fname );
51
52 if ( $clearState ) {
53 $this->clearState();
54 }
55
56 $this->mOptions = $options;
57 $this->mTitle =& $title;
58
59 $stripState = NULL;
60 $text = $this->strip( $text, $this->mStripState, true );
61 $text = $this->doWikiPass2( $text, $linestart );
62 $text = $this->unstrip( $text, $this->mStripState );
63
64 $this->mOutput->setText( $text );
65 wfProfileOut( $fname );
66 return $this->mOutput;
67 }
68
69 /* static */ function getRandomString()
70 {
71 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
72 }
73
74 # Strips <nowiki>, <pre> and <math>
75 # Returns the text, and fills an array with data needed in unstrip()
76 #
77 function strip( $text, &$state, $render = true )
78 {
79 $state = array(
80 'nwlist' => array(),
81 'nwsecs' => 0,
82 'nwunq' => Parser::getRandomString(),
83 'mathlist' => array(),
84 'mathsecs' => 0,
85 'mathunq' => Parser::getRandomString(),
86 'prelist' => array(),
87 'presecs' => 0,
88 'preunq' => Parser::getRandomString()
89 );
90
91 $stripped = "";
92 $stripped2 = "";
93 $stripped3 = "";
94
95 # Replace any instances of the placeholders
96 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
97 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
98 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
99
100 while ( "" != $text ) {
101 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
102 $stripped .= $p[0];
103 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
104 $text = "";
105 } else {
106 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
107 ++$state['nwsecs'];
108
109 if ( $render ) {
110 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
111 } else {
112 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
113 }
114
115 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
116 $text = $q[1];
117 }
118 }
119
120 if( $this->mOptions->getUseTeX() ) {
121 while ( "" != $stripped ) {
122 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
123 $stripped2 .= $p[0];
124 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
125 $stripped = "";
126 } else {
127 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
128 ++$state['mathsecs'];
129
130 if ( $render ) {
131 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
132 } else {
133 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
134 }
135
136 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
137 $stripped = $q[1];
138 }
139 }
140 } else {
141 $stripped2 = $stripped;
142 }
143
144 while ( "" != $stripped2 ) {
145 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
146 $stripped3 .= $p[0];
147 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
148 $stripped2 = "";
149 } else {
150 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
151 ++$state['presecs'];
152
153 if ( $render ) {
154 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
155 } else {
156 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
157 }
158
159 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
160 $stripped2 = $q[1];
161 }
162 }
163 return $stripped3;
164 }
165
166 function unstrip( $text, &$state )
167 {
168 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
169 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
170 }
171
172 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
173 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
174 }
175
176 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
177 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
178 }
179 return $text;
180 }
181
182 function categoryMagic ()
183 {
184 global $wgLang , $wgUser ;
185 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
186 $id = $this->mTitle->getArticleID() ;
187 $cat = ucfirst ( wfMsg ( "category" ) ) ;
188 $ti = $this->mTitle->getText() ;
189 $ti = explode ( ":" , $ti , 2 ) ;
190 if ( $cat != $ti[0] ) return "" ;
191 $r = "<br break=all>\n" ;
192
193 $articles = array() ;
194 $parents = array () ;
195 $children = array() ;
196
197
198 # $sk =& $this->mGetSkin();
199 $sk =& $wgUser->getSkin() ;
200
201 $doesexist = false ;
202 if ( $doesexist ) {
203 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
204 } else {
205 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
206 }
207
208 $res = wfQuery ( $sql, DB_READ ) ;
209 while ( $x = wfFetchObject ( $res ) )
210 {
211 # $t = new Title ;
212 # $t->newFromDBkey ( $x->l_from ) ;
213 # $t = $t->getText() ;
214 if ( $doesexist ) {
215 $t = $x->l_from ;
216 } else {
217 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
218 if ( $t != "" ) $t .= ":" ;
219 $t .= $x->cur_title ;
220 }
221
222 $y = explode ( ":" , $t , 2 ) ;
223 if ( count ( $y ) == 2 && $y[0] == $cat ) {
224 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
225 } else {
226 array_push ( $articles , $sk->makeLink ( $t ) ) ;
227 }
228 }
229 wfFreeResult ( $res ) ;
230
231 # Children
232 if ( count ( $children ) > 0 )
233 {
234 asort ( $children ) ;
235 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
236 $r .= implode ( ", " , $children ) ;
237 }
238
239 # Articles
240 if ( count ( $articles ) > 0 )
241 {
242 asort ( $articles ) ;
243 $h = wfMsg( "category_header", $ti[1] );
244 $r .= "<h2>{$h}</h2>\n" ;
245 $r .= implode ( ", " , $articles ) ;
246 }
247
248
249 return $r ;
250 }
251
252 function getHTMLattrs ()
253 {
254 $htmlattrs = array( # Allowed attributes--no scripting, etc.
255 "title", "align", "lang", "dir", "width", "height",
256 "bgcolor", "clear", /* BR */ "noshade", /* HR */
257 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
258 /* FONT */ "type", "start", "value", "compact",
259 /* For various lists, mostly deprecated but safe */
260 "summary", "width", "border", "frame", "rules",
261 "cellspacing", "cellpadding", "valign", "char",
262 "charoff", "colgroup", "col", "span", "abbr", "axis",
263 "headers", "scope", "rowspan", "colspan", /* Tables */
264 "id", "class", "name", "style" /* For CSS */
265 );
266 return $htmlattrs ;
267 }
268
269 function fixTagAttributes ( $t )
270 {
271 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
272 $htmlattrs = $this->getHTMLattrs() ;
273
274 # Strip non-approved attributes from the tag
275 $t = preg_replace(
276 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
277 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
278 $t);
279 # Strip javascript "expression" from stylesheets. Brute force approach:
280 # If anythin offensive is found, all attributes of the HTML tag are dropped
281
282 if( preg_match(
283 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
284 wfMungeToUtf8( $t ) ) )
285 {
286 $t="";
287 }
288
289 return trim ( $t ) ;
290 }
291
292 function doTableStuff ( $t )
293 {
294 $t = explode ( "\n" , $t ) ;
295 $td = array () ; # Is currently a td tag open?
296 $ltd = array () ; # Was it TD or TH?
297 $tr = array () ; # Is currently a tr tag open?
298 $ltr = array () ; # tr attributes
299 foreach ( $t AS $k => $x )
300 {
301 $x = rtrim ( $x ) ;
302 $fc = substr ( $x , 0 , 1 ) ;
303 if ( "{|" == substr ( $x , 0 , 2 ) )
304 {
305 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
306 array_push ( $td , false ) ;
307 array_push ( $ltd , "" ) ;
308 array_push ( $tr , false ) ;
309 array_push ( $ltr , "" ) ;
310 }
311 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
312 else if ( "|}" == substr ( $x , 0 , 2 ) )
313 {
314 $z = "</table>\n" ;
315 $l = array_pop ( $ltd ) ;
316 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
317 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
318 array_pop ( $ltr ) ;
319 $t[$k] = $z ;
320 }
321 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
322 {
323 $z = trim ( substr ( $x , 2 ) ) ;
324 $t[$k] = "<caption>{$z}</caption>\n" ;
325 }*/
326 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
327 {
328 $x = substr ( $x , 1 ) ;
329 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
330 $z = "" ;
331 $l = array_pop ( $ltd ) ;
332 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
333 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
334 array_pop ( $ltr ) ;
335 $t[$k] = $z ;
336 array_push ( $tr , false ) ;
337 array_push ( $td , false ) ;
338 array_push ( $ltd , "" ) ;
339 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
340 }
341 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
342 {
343 if ( "|+" == substr ( $x , 0 , 2 ) )
344 {
345 $fc = "+" ;
346 $x = substr ( $x , 1 ) ;
347 }
348 $after = substr ( $x , 1 ) ;
349 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
350 $after = explode ( "||" , $after ) ;
351 $t[$k] = "" ;
352 foreach ( $after AS $theline )
353 {
354 $z = "" ;
355 if ( $fc != "+" )
356 {
357 $tra = array_pop ( $ltr ) ;
358 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
359 array_push ( $tr , true ) ;
360 array_push ( $ltr , "" ) ;
361 }
362
363 $l = array_pop ( $ltd ) ;
364 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
365 if ( $fc == "|" ) $l = "TD" ;
366 else if ( $fc == "!" ) $l = "TH" ;
367 else if ( $fc == "+" ) $l = "CAPTION" ;
368 else $l = "" ;
369 array_push ( $ltd , $l ) ;
370 $y = explode ( "|" , $theline , 2 ) ;
371 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
372 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
373 $t[$k] .= $y ;
374 array_push ( $td , true ) ;
375 }
376 }
377 }
378
379 # Closing open td, tr && table
380 while ( count ( $td ) > 0 )
381 {
382 if ( array_pop ( $td ) ) $t[] = "</td>" ;
383 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
384 $t[] = "</table>" ;
385 }
386
387 $t = implode ( "\n" , $t ) ;
388 # $t = $this->removeHTMLtags( $t );
389 return $t ;
390 }
391
392 # Well, OK, it's actually about 14 passes. But since all the
393 # hard lifting is done inside PHP's regex code, it probably
394 # wouldn't speed things up much to add a real parser.
395 #
396 function doWikiPass2( $text, $linestart )
397 {
398 $fname = "OutputPage::doWikiPass2";
399 wfProfileIn( $fname );
400
401 $text = $this->removeHTMLtags( $text );
402 $text = $this->replaceVariables( $text );
403
404 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
405 $text = str_replace ( "<HR>", "<hr>", $text );
406
407 $text = $this->doHeadings( $text );
408 $text = $this->doBlockLevels( $text, $linestart );
409
410 if($this->mOptions->getUseDynamicDates()) {
411 global $wgDateFormatter;
412 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
413 }
414
415 $text = $this->replaceExternalLinks( $text );
416 $text = $this->replaceInternalLinks ( $text );
417 $text = $this->doTableStuff ( $text ) ;
418
419 $text = $this->formatHeadings( $text );
420
421 $sk =& $this->mOptions->getSkin();
422 $text = $sk->transformContent( $text );
423 $text .= $this->categoryMagic () ;
424
425 wfProfileOut( $fname );
426 return $text;
427 }
428
429
430 /* private */ function doHeadings( $text )
431 {
432 for ( $i = 6; $i >= 1; --$i ) {
433 $h = substr( "======", 0, $i );
434 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
435 "<h{$i}>\\1</h{$i}>\\2", $text );
436 }
437 return $text;
438 }
439
440 # Note: we have to do external links before the internal ones,
441 # and otherwise take great care in the order of things here, so
442 # that we don't end up interpreting some URLs twice.
443
444 /* private */ function replaceExternalLinks( $text )
445 {
446 $fname = "OutputPage::replaceExternalLinks";
447 wfProfileIn( $fname );
448 $text = $this->subReplaceExternalLinks( $text, "http", true );
449 $text = $this->subReplaceExternalLinks( $text, "https", true );
450 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
451 $text = $this->subReplaceExternalLinks( $text, "irc", false );
452 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
453 $text = $this->subReplaceExternalLinks( $text, "news", false );
454 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
455 wfProfileOut( $fname );
456 return $text;
457 }
458
459 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
460 {
461 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
462 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
463
464 # this is the list of separators that should be ignored if they
465 # are the last character of an URL but that should be included
466 # if they occur within the URL, e.g. "go to www.foo.com, where .."
467 # in this case, the last comma should not become part of the URL,
468 # but in "www.foo.com/123,2342,32.htm" it should.
469 $sep = ",;\.:";
470 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
471 $images = "gif|png|jpg|jpeg";
472
473 # PLEASE NOTE: The curly braces { } are not part of the regex,
474 # they are interpreted as part of the string (used to tell PHP
475 # that the content of the string should be inserted there).
476 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
477 "((?i){$images})([^{$uc}]|$)/";
478
479 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
480 $sk =& $this->mOptions->getSkin();
481
482 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
483 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
484 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
485 }
486 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
487 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
488 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
489 "</a>\\5", $s );
490 $s = str_replace( $unique, $protocol, $s );
491
492 $a = explode( "[{$protocol}:", " " . $s );
493 $s = array_shift( $a );
494 $s = substr( $s, 1 );
495
496 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
497 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
498
499 foreach ( $a as $line ) {
500 if ( preg_match( $e1, $line, $m ) ) {
501 $link = "{$protocol}:{$m[1]}";
502 $trail = $m[2];
503 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
504 else { $text = wfEscapeHTML( $link ); }
505 } else if ( preg_match( $e2, $line, $m ) ) {
506 $link = "{$protocol}:{$m[1]}";
507 $text = $m[2];
508 $trail = $m[3];
509 } else {
510 $s .= "[{$protocol}:" . $line;
511 continue;
512 }
513 if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
514 else $paren = "";
515 $la = $sk->getExternalLinkAttributes( $link, $text );
516 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
517
518 }
519 return $s;
520 }
521
522 /* private */ function handle3Quotes( &$state, $token )
523 {
524 if ( $state["strong"] ) {
525 if ( $state["em"] && $state["em"] > $state["strong"] )
526 {
527 # ''' lala ''lala '''
528 $s = "</em></strong><em>";
529 } else {
530 $s = "</strong>";
531 }
532 $state["strong"] = FALSE;
533 } else {
534 $s = "<strong>";
535 $state["strong"] = $token["pos"];
536 }
537 return $s;
538 }
539
540 /* private */ function handle2Quotes( &$state, $token )
541 {
542 if ( $state["em"] ) {
543 if ( $state["strong"] && $state["strong"] > $state["em"] )
544 {
545 # ''lala'''lala'' ....'''
546 $s = "</strong></em><strong>";
547 } else {
548 $s = "</em>";
549 }
550 $state["em"] = FALSE;
551 } else {
552 $s = "<em>";
553 $state["em"] = $token["pos"];
554 }
555 return $s;
556 }
557
558 /* private */ function handle5Quotes( &$state, $token )
559 {
560 if ( $state["em"] && $state["strong"] ) {
561 if ( $state["em"] < $state["strong"] ) {
562 $s .= "</strong></em>";
563 } else {
564 $s .= "</em></strong>";
565 }
566 $state["strong"] = $state["em"] = FALSE;
567 } elseif ( $state["em"] ) {
568 $s .= "</em><strong>";
569 $state["em"] = FALSE;
570 $state["strong"] = $token["pos"];
571 } elseif ( $state["strong"] ) {
572 $s .= "</strong><em>";
573 $state["strong"] = FALSE;
574 $state["em"] = $token["pos"];
575 } else { # not $em and not $strong
576 $s .= "<strong><em>";
577 $state["strong"] = $state["em"] = $token["pos"];
578 }
579 return $s;
580 }
581
582 /* private */ function replaceInternalLinks( $str )
583 {
584 global $wgLang; # for language specific parser hook
585
586 $tokenizer=Tokenizer::newFromString( $str );
587 $tokenStack = array();
588
589 $s="";
590 $state["em"] = FALSE;
591 $state["strong"] = FALSE;
592 $tagIsOpen = FALSE;
593
594 # The tokenizer splits the text into tokens and returns them one by one.
595 # Every call to the tokenizer returns a new token.
596 while ( $token = $tokenizer->nextToken() )
597 {
598 switch ( $token["type"] )
599 {
600 case "text":
601 # simple text with no further markup
602 $txt = $token["text"];
603 break;
604 case "[[":
605 # link opening tag.
606 # FIXME : Treat orphaned open tags (stack not empty when text is over)
607 $tagIsOpen = TRUE;
608 array_push( $tokenStack, $token );
609 $txt="";
610 break;
611 case "]]":
612 # link close tag.
613 # get text from stack, glue it together, and call the code to handle a
614 # link
615 if ( count( $tokenStack ) == 0 )
616 {
617 # stack empty. Found a ]] without an opening [[
618 $txt = "]]";
619 } else {
620 $linkText = "";
621 $lastToken = array_pop( $tokenStack );
622 while ( $lastToken["type"] != "[[" )
623 {
624 $linkText = $lastToken["text"] . $linkText;
625 $lastToken = array_pop( $tokenStack );
626 }
627 $txt = $linkText ."]]";
628 $prefix = $lastToken["text"];
629 $nextToken = $tokenizer->previewToken();
630 if ( $nextToken["type"] == "text" )
631 {
632 # Preview just looks at it. Now we have to fetch it.
633 $nextToken = $tokenizer->nextToken();
634 $txt .= $nextToken["text"];
635 }
636 $txt = $this->handleInternalLink( $txt, $prefix );
637 }
638 $tagIsOpen = (count( $tokenStack ) != 0);
639 break;
640 case "----":
641 $txt = "\n<hr>\n";
642 break;
643 case "'''":
644 # This and the three next ones handle quotes
645 $txt = $this->handle3Quotes( $state, $token );
646 break;
647 case "''":
648 $txt = $this->handle2Quotes( $state, $token );
649 break;
650 case "'''''":
651 $txt = $this->handle5Quotes( $state, $token );
652 break;
653 case "":
654 # empty token
655 $txt="";
656 break;
657 case "RFC ":
658 $txt = $this->doMagicRFC( $tokenizer );
659 break;
660 case "ISBN ":
661 $txt = $this->doMagicISBN( $tokenizer );
662 break;
663 default:
664 # Call language specific Hook.
665 $txt = $wgLang->processToken( $token, $tokenStack );
666 if ( NULL == $txt ) {
667 # An unkown token. Highlight.
668 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
669 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
670 }
671 break;
672 }
673 # If we're parsing the interior of a link, don't append the interior to $s,
674 # but push it to the stack so it can be processed when a ]] token is found.
675 if ( $tagIsOpen && $txt != "" ) {
676 $token["type"] = "text";
677 $token["text"] = $txt;
678 array_push( $tokenStack, $token );
679 } else {
680 $s .= $txt;
681 }
682 } #end while
683 if ( count( $tokenStack ) != 0 )
684 {
685 # still objects on stack. opened [[ tag without closing ]] tag.
686 $txt = "";
687 while ( $lastToken = array_pop( $tokenStack ) )
688 {
689 if ( $lastToken["type"] == "text" )
690 {
691 $txt = $lastToken["text"] . $txt;
692 } else {
693 $txt = $lastToken["type"] . $txt;
694 }
695 }
696 $s .= $txt;
697 }
698 return $s;
699 }
700
701 /* private */ function handleInternalLink( $line, $prefix )
702 {
703 global $wgLang, $wgLinkCache;
704 global $wgNamespacesWithSubpages, $wgLanguageCode;
705 static $fname = "OutputPage::replaceInternalLinks" ;
706 wfProfileIn( $fname );
707
708 wfProfileIn( "$fname-setup" );
709 static $tc = FALSE;
710 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
711 $sk =& $this->mOptions->getSkin();
712
713 # Match a link having the form [[namespace:link|alternate]]trail
714 static $e1 = FALSE;
715 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
716 # Match the end of a line for a word that's not followed by whitespace,
717 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
718 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
719 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
720 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
721
722
723 # Special and Media are pseudo-namespaces; no pages actually exist in them
724 static $image = FALSE;
725 static $special = FALSE;
726 static $media = FALSE;
727 static $category = FALSE;
728 if ( !$image ) { $image = Namespace::getImage(); }
729 if ( !$special ) { $special = Namespace::getSpecial(); }
730 if ( !$media ) { $media = Namespace::getMedia(); }
731 if ( !$category ) { $category = wfMsg ( "category" ) ; }
732
733 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
734
735 wfProfileOut( "$fname-setup" );
736
737 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
738 $text = $m[2];
739 $trail = $m[3];
740 } else { # Invalid form; output directly
741 $s .= $prefix . "[[" . $line ;
742 return $s;
743 }
744
745 /* Valid link forms:
746 Foobar -- normal
747 :Foobar -- override special treatment of prefix (images, language links)
748 /Foobar -- convert to CurrentPage/Foobar
749 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
750 */
751 $c = substr($m[1],0,1);
752 $noforce = ($c != ":");
753 if( $c == "/" ) { # subpage
754 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
755 $m[1]=substr($m[1],1,strlen($m[1])-2);
756 $noslash=$m[1];
757 } else {
758 $noslash=substr($m[1],1);
759 }
760 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
761 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
762 if( "" == $text ) {
763 $text= $m[1];
764 } # this might be changed for ugliness reasons
765 } else {
766 $link = $noslash; # no subpage allowed, use standard link
767 }
768 } elseif( $noforce ) { # no subpage
769 $link = $m[1];
770 } else {
771 $link = substr( $m[1], 1 );
772 }
773 if( "" == $text )
774 $text = $link;
775
776 $nt = Title::newFromText( $link );
777 if( !$nt ) {
778 $s .= $prefix . "[[" . $line;
779 return $s;
780 }
781 $ns = $nt->getNamespace();
782 $iw = $nt->getInterWiki();
783 if( $noforce ) {
784 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
785 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
786 $s .= $prefix . $trail;
787 return $s;
788 }
789 if( $ns == $image ) {
790 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
791 $wgLinkCache->addImageLinkObj( $nt );
792 return $s;
793 }
794 }
795 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
796 ( strpos( $link, "#" ) == FALSE ) ) {
797 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
798 return $s;
799 }
800 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
801 $t = explode ( ":" , $nt->getText() ) ;
802 array_shift ( $t ) ;
803 $t = implode ( ":" , $t ) ;
804 $t = $wgLang->ucFirst ( $t ) ;
805 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
806 $nnt = Title::newFromText ( $category.":".$t ) ;
807 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
808 $this->mCategoryLinks[] = $t ;
809 $s .= $prefix . $trail ;
810 return $s ;
811 }
812 if( $ns == $media ) {
813 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
814 $wgLinkCache->addImageLinkObj( $nt );
815 return $s;
816 } elseif( $ns == $special ) {
817 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
818 return $s;
819 }
820 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
821
822 wfProfileOut( $fname );
823 return $s;
824 }
825
826 # Some functions here used by doBlockLevels()
827 #
828 /* private */ function closeParagraph()
829 {
830 $result = "";
831 if ( 0 != strcmp( "p", $this->mLastSection ) &&
832 0 != strcmp( "", $this->mLastSection ) ) {
833 $result = "</" . $this->mLastSection . ">";
834 }
835 $this->mLastSection = "";
836 return $result."\n";
837 }
838 # getCommon() returns the length of the longest common substring
839 # of both arguments, starting at the beginning of both.
840 #
841 /* private */ function getCommon( $st1, $st2 )
842 {
843 $fl = strlen( $st1 );
844 $shorter = strlen( $st2 );
845 if ( $fl < $shorter ) { $shorter = $fl; }
846
847 for ( $i = 0; $i < $shorter; ++$i ) {
848 if ( $st1{$i} != $st2{$i} ) { break; }
849 }
850 return $i;
851 }
852 # These next three functions open, continue, and close the list
853 # element appropriate to the prefix character passed into them.
854 #
855 /* private */ function openList( $char )
856 {
857 $result = $this->closeParagraph();
858
859 if ( "*" == $char ) { $result .= "<ul><li>"; }
860 else if ( "#" == $char ) { $result .= "<ol><li>"; }
861 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
862 else if ( ";" == $char ) {
863 $result .= "<dl><dt>";
864 $this->mDTopen = true;
865 }
866 else { $result = "<!-- ERR 1 -->"; }
867
868 return $result;
869 }
870
871 /* private */ function nextItem( $char )
872 {
873 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
874 else if ( ":" == $char || ";" == $char ) {
875 $close = "</dd>";
876 if ( $this->mDTopen ) { $close = "</dt>"; }
877 if ( ";" == $char ) {
878 $this->mDTopen = true;
879 return $close . "<dt>";
880 } else {
881 $this->mDTopen = false;
882 return $close . "<dd>";
883 }
884 }
885 return "<!-- ERR 2 -->";
886 }
887
888 /* private */function closeList( $char )
889 {
890 if ( "*" == $char ) { $text = "</li></ul>"; }
891 else if ( "#" == $char ) { $text = "</li></ol>"; }
892 else if ( ":" == $char ) {
893 if ( $this->mDTopen ) {
894 $this->mDTopen = false;
895 $text = "</dt></dl>";
896 } else {
897 $text = "</dd></dl>";
898 }
899 }
900 else { return "<!-- ERR 3 -->"; }
901 return $text."\n";
902 }
903
904 /* private */ function doBlockLevels( $text, $linestart )
905 {
906 $fname = "OutputPage::doBlockLevels";
907 wfProfileIn( $fname );
908 # Parsing through the text line by line. The main thing
909 # happening here is handling of block-level elements p, pre,
910 # and making lists from lines starting with * # : etc.
911 #
912 $a = explode( "\n", $text );
913 $text = $lastPref = "";
914 $this->mDTopen = $inBlockElem = false;
915
916 if ( ! $linestart ) { $text .= array_shift( $a ); }
917 foreach ( $a as $t ) {
918 if ( "" != $text ) { $text .= "\n"; }
919
920 $oLine = $t;
921 $opl = strlen( $lastPref );
922 $npl = strspn( $t, "*#:;" );
923 $pref = substr( $t, 0, $npl );
924 $pref2 = str_replace( ";", ":", $pref );
925 $t = substr( $t, $npl );
926
927 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
928 $text .= $this->nextItem( substr( $pref, -1 ) );
929
930 if ( ";" == substr( $pref, -1 ) ) {
931 $cpos = strpos( $t, ":" );
932 if ( ! ( false === $cpos ) ) {
933 $term = substr( $t, 0, $cpos );
934 $text .= $term . $this->nextItem( ":" );
935 $t = substr( $t, $cpos + 1 );
936 }
937 }
938 } else if (0 != $npl || 0 != $opl) {
939 $cpl = $this->getCommon( $pref, $lastPref );
940
941 while ( $cpl < $opl ) {
942 $text .= $this->closeList( $lastPref{$opl-1} );
943 --$opl;
944 }
945 if ( $npl <= $cpl && $cpl > 0 ) {
946 $text .= $this->nextItem( $pref{$cpl-1} );
947 }
948 while ( $npl > $cpl ) {
949 $char = substr( $pref, $cpl, 1 );
950 $text .= $this->openList( $char );
951
952 if ( ";" == $char ) {
953 $cpos = strpos( $t, ":" );
954 if ( ! ( false === $cpos ) ) {
955 $term = substr( $t, 0, $cpos );
956 $text .= $term . $this->nextItem( ":" );
957 $t = substr( $t, $cpos + 1 );
958 }
959 }
960 ++$cpl;
961 }
962 $lastPref = $pref2;
963 }
964 if ( 0 == $npl ) { # No prefix--go to paragraph mode
965 if ( preg_match(
966 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
967 $text .= $this->closeParagraph();
968 $inBlockElem = true;
969 }
970 if ( ! $inBlockElem ) {
971 if ( " " == $t{0} ) {
972 $newSection = "pre";
973 # $t = wfEscapeHTML( $t );
974 }
975 else { $newSection = "p"; }
976
977 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
978 $text .= $this->closeParagraph();
979 $text .= "<" . $newSection . ">";
980 } else if ( 0 != strcmp( $this->mLastSection,
981 $newSection ) ) {
982 $text .= $this->closeParagraph();
983 if ( 0 != strcmp( "p", $newSection ) ) {
984 $text .= "<" . $newSection . ">";
985 }
986 }
987 $this->mLastSection = $newSection;
988 }
989 if ( $inBlockElem &&
990 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
991 $inBlockElem = false;
992 }
993 }
994 $text .= $t;
995 }
996 while ( $npl ) {
997 $text .= $this->closeList( $pref2{$npl-1} );
998 --$npl;
999 }
1000 if ( "" != $this->mLastSection ) {
1001 if ( "p" != $this->mLastSection ) {
1002 $text .= "</" . $this->mLastSection . ">";
1003 }
1004 $this->mLastSection = "";
1005 }
1006 wfProfileOut( $fname );
1007 return $text;
1008 }
1009
1010 /* private */ function replaceVariables( $text )
1011 {
1012 global $wgLang, $wgCurOut;
1013 $fname = "OutputPage::replaceVariables";
1014 wfProfileIn( $fname );
1015
1016 $magic = array();
1017
1018 # Basic variables
1019 # See Language.php for the definition of each magic word
1020 # As with sigs, this uses the server's local time -- ensure
1021 # this is appropriate for your audience!
1022
1023 $magic[MAG_CURRENTMONTH] = date( "m" );
1024 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1025 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1026 $magic[MAG_CURRENTDAY] = date("j");
1027 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1028 $magic[MAG_CURRENTYEAR] = date( "Y" );
1029 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1030
1031 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1032
1033 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1034 if ( $mw->match( $text ) ) {
1035 $v = wfNumberOfArticles();
1036 $text = $mw->replace( $v, $text );
1037 if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
1038 }
1039
1040 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1041 # The callbacks are at the bottom of this file
1042 $wgCurOut = $this;
1043 $mw =& MagicWord::get( MAG_MSG );
1044 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1045 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1046
1047 $mw =& MagicWord::get( MAG_MSGNW );
1048 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1049 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1050
1051 wfProfileOut( $fname );
1052 return $text;
1053 }
1054
1055 # Cleans up HTML, removes dangerous tags and attributes
1056 /* private */ function removeHTMLtags( $text )
1057 {
1058 $fname = "OutputPage::removeHTMLtags";
1059 wfProfileIn( $fname );
1060 $htmlpairs = array( # Tags that must be closed
1061 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1062 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1063 "strike", "strong", "tt", "var", "div", "center",
1064 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1065 "ruby", "rt" , "rb" , "rp"
1066 );
1067 $htmlsingle = array(
1068 "br", "p", "hr", "li", "dt", "dd"
1069 );
1070 $htmlnest = array( # Tags that can be nested--??
1071 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1072 "dl", "font", "big", "small", "sub", "sup"
1073 );
1074 $tabletags = array( # Can only appear inside table
1075 "td", "th", "tr"
1076 );
1077
1078 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1079 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1080
1081 $htmlattrs = $this->getHTMLattrs () ;
1082
1083 # Remove HTML comments
1084 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1085
1086 $bits = explode( "<", $text );
1087 $text = array_shift( $bits );
1088 $tagstack = array(); $tablestack = array();
1089
1090 foreach ( $bits as $x ) {
1091 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1092 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1093 $x, $regs );
1094 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1095 error_reporting( $prev );
1096
1097 $badtag = 0 ;
1098 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1099 # Check our stack
1100 if ( $slash ) {
1101 # Closing a tag...
1102 if ( ! in_array( $t, $htmlsingle ) &&
1103 ( $ot = array_pop( $tagstack ) ) != $t ) {
1104 array_push( $tagstack, $ot );
1105 $badtag = 1;
1106 } else {
1107 if ( $t == "table" ) {
1108 $tagstack = array_pop( $tablestack );
1109 }
1110 $newparams = "";
1111 }
1112 } else {
1113 # Keep track for later
1114 if ( in_array( $t, $tabletags ) &&
1115 ! in_array( "table", $tagstack ) ) {
1116 $badtag = 1;
1117 } else if ( in_array( $t, $tagstack ) &&
1118 ! in_array ( $t , $htmlnest ) ) {
1119 $badtag = 1 ;
1120 } else if ( ! in_array( $t, $htmlsingle ) ) {
1121 if ( $t == "table" ) {
1122 array_push( $tablestack, $tagstack );
1123 $tagstack = array();
1124 }
1125 array_push( $tagstack, $t );
1126 }
1127 # Strip non-approved attributes from the tag
1128 $newparams = $this->fixTagAttributes($params);
1129
1130 }
1131 if ( ! $badtag ) {
1132 $rest = str_replace( ">", "&gt;", $rest );
1133 $text .= "<$slash$t $newparams$brace$rest";
1134 continue;
1135 }
1136 }
1137 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1138 }
1139 # Close off any remaining tags
1140 while ( $t = array_pop( $tagstack ) ) {
1141 $text .= "</$t>\n";
1142 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1143 }
1144 wfProfileOut( $fname );
1145 return $text;
1146 }
1147
1148 /*
1149 *
1150 * This function accomplishes several tasks:
1151 * 1) Auto-number headings if that option is enabled
1152 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1153 * 3) Add a Table of contents on the top for users who have enabled the option
1154 * 4) Auto-anchor headings
1155 *
1156 * It loops through all headlines, collects the necessary data, then splits up the
1157 * string and re-inserts the newly formatted headlines.
1158 *
1159 * */
1160 /* private */ function formatHeadings( $text )
1161 {
1162 $nh=$this->mOptions->getNumberHeadings();
1163 $st=$this->mOptions->getShowToc();
1164 if(!$this->mTitle->userCanEdit()) {
1165 $es=0;
1166 $esr=0;
1167 } else {
1168 $es=$this->mOptions->getEditSection();
1169 $esr=$this->mOptions->getEditSectionOnRightClick();
1170 }
1171
1172 # Inhibit editsection links if requested in the page
1173 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1174 if ($esw->matchAndRemove( $text )) {
1175 $es=0;
1176 }
1177 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1178 # do not add TOC
1179 $mw =& MagicWord::get( MAG_NOTOC );
1180 if ($mw->matchAndRemove( $text ))
1181 {
1182 $st = 0;
1183 }
1184
1185 # never add the TOC to the Main Page. This is an entry page that should not
1186 # be more than 1-2 screens large anyway
1187 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1188
1189 # We need this to perform operations on the HTML
1190 $sk =& $this->mOptions->getSkin();
1191
1192 # Get all headlines for numbering them and adding funky stuff like [edit]
1193 # links
1194 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1195
1196 # headline counter
1197 $c=0;
1198
1199 # Ugh .. the TOC should have neat indentation levels which can be
1200 # passed to the skin functions. These are determined here
1201 foreach($matches[3] as $headline) {
1202 if($level) { $prevlevel=$level;}
1203 $level=$matches[1][$c];
1204 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1205
1206 $h[$level]=0; // reset when we enter a new level
1207 $toc.=$sk->tocIndent($level-$prevlevel);
1208 $toclevel+=$level-$prevlevel;
1209
1210 }
1211 if(($nh||$st) && $level<$prevlevel) {
1212 $h[$level+1]=0; // reset when we step back a level
1213 $toc.=$sk->tocUnindent($prevlevel-$level);
1214 $toclevel-=$prevlevel-$level;
1215
1216 }
1217 $h[$level]++; // count number of headlines for each level
1218
1219 if($nh||$st) {
1220 for($i=1;$i<=$level;$i++) {
1221 if($h[$i]) {
1222 if($dot) {$numbering.=".";}
1223 $numbering.=$h[$i];
1224 $dot=1;
1225 }
1226 }
1227 }
1228
1229 // The canonized header is a version of the header text safe to use for links
1230 // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1231 $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1232 $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1233 $tocline = trim( $canonized_headline );
1234 $canonized_headline=str_replace('"',"",$canonized_headline);
1235 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1236 $refer[$c]=$canonized_headline;
1237 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1238 $refcount[$c]=$refers[$canonized_headline];
1239
1240 // Prepend the number to the heading text
1241
1242 if($nh||$st) {
1243 $tocline=$numbering ." ". $tocline;
1244
1245 // Don't number the heading if it is the only one (looks silly)
1246 if($nh && count($matches[3]) > 1) {
1247 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1248 }
1249 }
1250
1251 // Create the anchor for linking from the TOC to the section
1252
1253 $anchor=$canonized_headline;
1254 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1255 if($st) {
1256 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1257 }
1258 if($es) {
1259 $head[$c].=$sk->editSectionLink($c+1);
1260 }
1261
1262 // Put it all together
1263
1264 $head[$c].="<h".$level.$matches[2][$c]
1265 ."<a name=\"".$anchor."\">"
1266 .$headline
1267 ."</a>"
1268 ."</h".$level.">";
1269
1270 // Add the edit section link
1271
1272 if($esr) {
1273 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1274 }
1275
1276 $numbering="";
1277 $c++;
1278 $dot=0;
1279 }
1280
1281 if($st) {
1282 $toclines=$c;
1283 $toc.=$sk->tocUnindent($toclevel);
1284 $toc=$sk->tocTable($toc);
1285 }
1286
1287 // split up and insert constructed headlines
1288
1289 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1290 $i=0;
1291
1292 foreach($blocks as $block) {
1293 if(($es) && $c>0 && $i==0) {
1294 # This is the [edit] link that appears for the top block of text when
1295 # section editing is enabled
1296 $full.=$sk->editSectionLink(0);
1297 }
1298 $full.=$block;
1299 if($st && $toclines>3 && !$i) {
1300 # Let's add a top anchor just in case we want to link to the top of the page
1301 $full="<a name=\"top\"></a>".$full.$toc;
1302 }
1303
1304 $full.=$head[$i];
1305 $i++;
1306 }
1307
1308 return $full;
1309 }
1310
1311 /* private */ function doMagicISBN( &$tokenizer )
1312 {
1313 global $wgLang;
1314
1315 # Check whether next token is a text token
1316 # If yes, fetch it and convert the text into a
1317 # Special::BookSources link
1318 $token = $tokenizer->previewToken();
1319 while ( $token["type"] == "" )
1320 {
1321 $tokenizer->nextToken();
1322 $token = $tokenizer->previewToken();
1323 }
1324 if ( $token["type"] == "text" )
1325 {
1326 $token = $tokenizer->nextToken();
1327 $x = $token["text"];
1328 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1329
1330 $isbn = $blank = "" ;
1331 while ( " " == $x{0} ) {
1332 $blank .= " ";
1333 $x = substr( $x, 1 );
1334 }
1335 while ( strstr( $valid, $x{0} ) != false ) {
1336 $isbn .= $x{0};
1337 $x = substr( $x, 1 );
1338 }
1339 $num = str_replace( "-", "", $isbn );
1340 $num = str_replace( " ", "", $num );
1341
1342 if ( "" == $num ) {
1343 $text .= "ISBN $blank$x";
1344 } else {
1345 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1346 $text .= "<a href=\"" .
1347 $titleObj->getUrl( "isbn={$num}", false, true ) .
1348 "\" class=\"internal\">ISBN $isbn</a>";
1349 $text .= $x;
1350 }
1351 } else {
1352 $text = "ISBN ";
1353 }
1354 return $text;
1355 }
1356 /* private */ function doMagicRFC( &$tokenizer )
1357 {
1358 global $wgLang;
1359
1360 # Check whether next token is a text token
1361 # If yes, fetch it and convert the text into a
1362 # link to an RFC source
1363 $token = $tokenizer->previewToken();
1364 while ( $token["type"] == "" )
1365 {
1366 $tokenizer->nextToken();
1367 $token = $tokenizer->previewToken();
1368 }
1369 if ( $token["type"] == "text" )
1370 {
1371 $token = $tokenizer->nextToken();
1372 $x = $token["text"];
1373 $valid = "0123456789";
1374
1375 $rfc = $blank = "" ;
1376 while ( " " == $x{0} ) {
1377 $blank .= " ";
1378 $x = substr( $x, 1 );
1379 }
1380 while ( strstr( $valid, $x{0} ) != false ) {
1381 $rfc .= $x{0};
1382 $x = substr( $x, 1 );
1383 }
1384
1385 if ( "" == $rfc ) {
1386 $text .= "RFC $blank$x";
1387 } else {
1388 $url = wfmsg( "rfcurl" );
1389 $url = str_replace( "$1", $rfc, $url);
1390 $sk =& $this->mOptions->getSkin();
1391 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1392 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1393 }
1394 } else {
1395 $text = "RFC ";
1396 }
1397 return $text;
1398 }
1399
1400 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1401 {
1402 $this->mOptions = $options;
1403 $this->mTitle = $title;
1404 if ( $clearState ) {
1405 $this->clearState;
1406 }
1407
1408 $stripState = false;
1409 $text = $this->strip( $text, $stripState, false );
1410 $text = $this->pstPass2( $text, $user );
1411 $text = $this->unstrip( $text, $stripState );
1412 return $text;
1413 }
1414
1415 /* private */ function pstPass2( $text, &$user )
1416 {
1417 global $wgLang, $wgLocaltimezone;
1418
1419 # Signatures
1420 #
1421 $n = $user->getName();
1422 $k = $user->getOption( "nickname" );
1423 if ( "" == $k ) { $k = $n; }
1424 if(isset($wgLocaltimezone)) {
1425 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1426 }
1427 /* Note: this is an ugly timezone hack for the European wikis */
1428 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1429 " (" . date( "T" ) . ")";
1430 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1431
1432 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1433 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1434 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1435 Namespace::getUser() ) . ":$n|$k]]", $text );
1436
1437 # Context links: [[|name]] and [[name (context)|]]
1438 #
1439 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1440 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1441 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1442 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1443
1444 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1445 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1446 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1447 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1448 # [[ns:page (cont)|]]
1449 $context = "";
1450 $t = $this->mTitle->getText();
1451 if ( preg_match( $conpat, $t, $m ) ) {
1452 $context = $m[2];
1453 }
1454 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1455 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1456 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1457
1458 if ( "" == $context ) {
1459 $text = preg_replace( $p2, "[[\\1]]", $text );
1460 } else {
1461 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1462 }
1463
1464 # {{SUBST:xxx}} variables
1465 #
1466 $mw =& MagicWord::get( MAG_SUBST );
1467 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1468
1469 # Trim trailing whitespace
1470 # MAG_END (__END__) tag allows for trailing
1471 # whitespace to be deliberately included
1472 $text = rtrim( $text );
1473 $mw =& MagicWord::get( MAG_END );
1474 $mw->matchAndRemove( $text );
1475
1476 return $text;
1477 }
1478
1479
1480 }
1481
1482 class ParserOutput
1483 {
1484 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1485
1486 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1487 $containsOldMagic = false )
1488 {
1489 $this->mText = $text;
1490 $this->mLanguageLinks = $languageLinks;
1491 $this->mCategoryLinks = $categoryLinks;
1492 $this->mContainsOldMagic = $containsOldMagic;
1493 }
1494
1495 function getText() { return $this->mText; }
1496 function getLanguageLinks() { return $this->mLanguageLinks; }
1497 function getCategoryLinks() { return $this->mCategoryLinks; }
1498 function containsOldMagic() { return $this->mContainsOldMagic; }
1499 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1500 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1501 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1502 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1503 }
1504
1505 class ParserOptions
1506 {
1507 # All variables are private
1508 var $mUseTeX; # Use texvc to expand <math> tags
1509 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1510 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1511 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1512 var $mAllowExternalImages; # Allow external images inline
1513 var $mSkin; # Reference to the preferred skin
1514 var $mDateFormat; # Date format index
1515 var $mEditSection; # Create "edit section" links
1516 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1517 var $mPrintable; # Generate printable output
1518 var $mNumberHeadings; # Automatically number headings
1519 var $mShowToc; # Show table of contents
1520
1521 function getUseTeX() { return $this->mUseTeX; }
1522 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1523 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1524 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1525 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1526 function getSkin() { return $this->mSkin; }
1527 function getDateFormat() { return $this->mDateFormat; }
1528 function getEditSection() { return $this->mEditSection; }
1529 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1530 function getPrintable() { return $this->mPrintable; }
1531 function getNumberHeadings() { return $this->mNumberHeadings; }
1532 function getShowToc() { return $this->mShowToc; }
1533
1534 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1535 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1536 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1537 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1538 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1539 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1540 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1541 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1542 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1543 function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1544 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1545 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1546
1547 /* static */ function newFromUser( &$user )
1548 {
1549 $popts = new ParserOptions;
1550 $popts->initialiseFromUser( &$user );
1551 return $popts;
1552 }
1553
1554 function initialiseFromUser( &$userInput )
1555 {
1556 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1557
1558 if ( !$userInput ) {
1559 $user = new User;
1560 } else {
1561 $user =& $userInput;
1562 }
1563
1564 $this->mUseTeX = $wgUseTeX;
1565 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1566 $this->mUseDynamicDates = $wgUseDynamicDates;
1567 $this->mInterwikiMagic = $wgInterwikiMagic;
1568 $this->mAllowExternalImages = $wgAllowExternalImages;
1569 $this->mSkin =& $user->getSkin();
1570 $this->mDateFormat = $user->getOption( "date" );
1571 $this->mEditSection = $user->getOption( "editsection" );
1572 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1573 $this->mPrintable = false;
1574 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1575 $this->mShowToc = $user->getOption( "showtoc" );
1576 }
1577
1578
1579 }
1580
1581 # Regex callbacks, used in OutputPage::replaceVariables
1582
1583 # Just get rid of the dangerous stuff
1584 # Necessary because replaceVariables is called after removeHTMLtags,
1585 # and message text can come from any user
1586 function wfReplaceMsgVar( $matches ) {
1587 global $wgCurOut, $wgLinkCache;
1588 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1589 $wgLinkCache->suspend();
1590 $text = $wgCurOut->replaceInternalLinks( $text );
1591 $wgLinkCache->resume();
1592 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1593 return $text;
1594 }
1595
1596 # Effective <nowiki></nowiki>
1597 # Not real <nowiki> because this is called after nowiki sections are processed
1598 function wfReplaceMsgnwVar( $matches ) {
1599 global $wgCurOut, $wgLinkCache;
1600 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1601 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1602 return $text;
1603 }
1604
1605
1606
1607 ?>