ISBN links disabled insides of [[link]]s, too, upon brions request
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 include_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Converts wikitext to HTML.
8 #
9 # Globals used:
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
11 #
12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
13 #
14 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
15 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
16 # $wgLocaltimezone
17 #
18 # * only within ParserOptions
19
20 class Parser
21 {
22 # Cleared with clearState():
23 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
24
25 # Temporary:
26 var $mOptions, $mTitle;
27
28 function Parser()
29 {
30 $this->clearState();
31 }
32
33 function clearState()
34 {
35 $this->mOutput = new ParserOutput;
36 $this->mAutonumber = 0;
37 $this->mLastSection = "";
38 $this->mDTopen = false;
39 $this->mStripState = false;
40 }
41
42 # First pass--just handle <nowiki> sections, pass the rest off
43 # to doWikiPass2() which does all the real work.
44 #
45 # Returns a ParserOutput
46 #
47 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
48 {
49 $fname = "Parser::parse";
50 wfProfileIn( $fname );
51
52 if ( $clearState ) {
53 $this->clearState();
54 }
55
56 $this->mOptions = $options;
57 $this->mTitle =& $title;
58
59 $stripState = NULL;
60 $text = $this->strip( $text, $this->mStripState, true );
61 $text = $this->doWikiPass2( $text, $linestart );
62 $text = $this->unstrip( $text, $this->mStripState );
63
64 $this->mOutput->setText( $text );
65 wfProfileOut( $fname );
66 return $this->mOutput;
67 }
68
69 /* static */ function getRandomString()
70 {
71 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
72 }
73
74 # Strips <nowiki>, <pre> and <math>
75 # Returns the text, and fills an array with data needed in unstrip()
76 #
77 function strip( $text, &$state, $render = true )
78 {
79 $state = array(
80 'nwlist' => array(),
81 'nwsecs' => 0,
82 'nwunq' => Parser::getRandomString(),
83 'mathlist' => array(),
84 'mathsecs' => 0,
85 'mathunq' => Parser::getRandomString(),
86 'prelist' => array(),
87 'presecs' => 0,
88 'preunq' => Parser::getRandomString()
89 );
90
91 $stripped = "";
92 $stripped2 = "";
93 $stripped3 = "";
94
95 # Replace any instances of the placeholders
96 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
97 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
98 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
99
100 while ( "" != $text ) {
101 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
102 $stripped .= $p[0];
103 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
104 $text = "";
105 } else {
106 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
107 ++$state['nwsecs'];
108
109 if ( $render ) {
110 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
111 } else {
112 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
113 }
114
115 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
116 $text = $q[1];
117 }
118 }
119
120 if( $this->mOptions->getUseTeX() ) {
121 while ( "" != $stripped ) {
122 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
123 $stripped2 .= $p[0];
124 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
125 $stripped = "";
126 } else {
127 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
128 ++$state['mathsecs'];
129
130 if ( $render ) {
131 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
132 } else {
133 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
134 }
135
136 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
137 $stripped = $q[1];
138 }
139 }
140 } else {
141 $stripped2 = $stripped;
142 }
143
144 while ( "" != $stripped2 ) {
145 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
146 $stripped3 .= $p[0];
147 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
148 $stripped2 = "";
149 } else {
150 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
151 ++$state['presecs'];
152
153 if ( $render ) {
154 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
155 } else {
156 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
157 }
158
159 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
160 $stripped2 = $q[1];
161 }
162 }
163 return $stripped3;
164 }
165
166 function unstrip( $text, &$state )
167 {
168 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
169 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
170 }
171
172 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
173 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
174 }
175
176 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
177 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
178 }
179 return $text;
180 }
181
182 function categoryMagic ()
183 {
184 global $wgLang , $wgUser ;
185 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
186 $id = $this->mTitle->getArticleID() ;
187 $cat = ucfirst ( wfMsg ( "category" ) ) ;
188 $ti = $this->mTitle->getText() ;
189 $ti = explode ( ":" , $ti , 2 ) ;
190 if ( $cat != $ti[0] ) return "" ;
191 $r = "<br break=all>\n" ;
192
193 $articles = array() ;
194 $parents = array () ;
195 $children = array() ;
196
197
198 # $sk =& $this->mGetSkin();
199 $sk =& $wgUser->getSkin() ;
200
201 $doesexist = false ;
202 if ( $doesexist ) {
203 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
204 } else {
205 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
206 }
207
208 $res = wfQuery ( $sql, DB_READ ) ;
209 while ( $x = wfFetchObject ( $res ) )
210 {
211 # $t = new Title ;
212 # $t->newFromDBkey ( $x->l_from ) ;
213 # $t = $t->getText() ;
214 if ( $doesexist ) {
215 $t = $x->l_from ;
216 } else {
217 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
218 if ( $t != "" ) $t .= ":" ;
219 $t .= $x->cur_title ;
220 }
221
222 $y = explode ( ":" , $t , 2 ) ;
223 if ( count ( $y ) == 2 && $y[0] == $cat ) {
224 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
225 } else {
226 array_push ( $articles , $sk->makeLink ( $t ) ) ;
227 }
228 }
229 wfFreeResult ( $res ) ;
230
231 # Children
232 if ( count ( $children ) > 0 )
233 {
234 asort ( $children ) ;
235 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
236 $r .= implode ( ", " , $children ) ;
237 }
238
239 # Articles
240 if ( count ( $articles ) > 0 )
241 {
242 asort ( $articles ) ;
243 $h = wfMsg( "category_header", $ti[1] );
244 $r .= "<h2>{$h}</h2>\n" ;
245 $r .= implode ( ", " , $articles ) ;
246 }
247
248
249 return $r ;
250 }
251
252 function getHTMLattrs ()
253 {
254 $htmlattrs = array( # Allowed attributes--no scripting, etc.
255 "title", "align", "lang", "dir", "width", "height",
256 "bgcolor", "clear", /* BR */ "noshade", /* HR */
257 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
258 /* FONT */ "type", "start", "value", "compact",
259 /* For various lists, mostly deprecated but safe */
260 "summary", "width", "border", "frame", "rules",
261 "cellspacing", "cellpadding", "valign", "char",
262 "charoff", "colgroup", "col", "span", "abbr", "axis",
263 "headers", "scope", "rowspan", "colspan", /* Tables */
264 "id", "class", "name", "style" /* For CSS */
265 );
266 return $htmlattrs ;
267 }
268
269 function fixTagAttributes ( $t )
270 {
271 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
272 $htmlattrs = $this->getHTMLattrs() ;
273
274 # Strip non-approved attributes from the tag
275 $t = preg_replace(
276 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
277 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
278 $t);
279 # Strip javascript "expression" from stylesheets. Brute force approach:
280 # If anythin offensive is found, all attributes of the HTML tag are dropped
281
282 if( preg_match(
283 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
284 wfMungeToUtf8( $t ) ) )
285 {
286 $t="";
287 }
288
289 return trim ( $t ) ;
290 }
291
292 function doTableStuff ( $t )
293 {
294 $t = explode ( "\n" , $t ) ;
295 $td = array () ; # Is currently a td tag open?
296 $ltd = array () ; # Was it TD or TH?
297 $tr = array () ; # Is currently a tr tag open?
298 $ltr = array () ; # tr attributes
299 foreach ( $t AS $k => $x )
300 {
301 $x = rtrim ( $x ) ;
302 $fc = substr ( $x , 0 , 1 ) ;
303 if ( "{|" == substr ( $x , 0 , 2 ) )
304 {
305 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
306 array_push ( $td , false ) ;
307 array_push ( $ltd , "" ) ;
308 array_push ( $tr , false ) ;
309 array_push ( $ltr , "" ) ;
310 }
311 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
312 else if ( "|}" == substr ( $x , 0 , 2 ) )
313 {
314 $z = "</table>\n" ;
315 $l = array_pop ( $ltd ) ;
316 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
317 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
318 array_pop ( $ltr ) ;
319 $t[$k] = $z ;
320 }
321 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
322 {
323 $z = trim ( substr ( $x , 2 ) ) ;
324 $t[$k] = "<caption>{$z}</caption>\n" ;
325 }*/
326 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
327 {
328 $x = substr ( $x , 1 ) ;
329 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
330 $z = "" ;
331 $l = array_pop ( $ltd ) ;
332 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
333 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
334 array_pop ( $ltr ) ;
335 $t[$k] = $z ;
336 array_push ( $tr , false ) ;
337 array_push ( $td , false ) ;
338 array_push ( $ltd , "" ) ;
339 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
340 }
341 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
342 {
343 if ( "|+" == substr ( $x , 0 , 2 ) )
344 {
345 $fc = "+" ;
346 $x = substr ( $x , 1 ) ;
347 }
348 $after = substr ( $x , 1 ) ;
349 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
350 $after = explode ( "||" , $after ) ;
351 $t[$k] = "" ;
352 foreach ( $after AS $theline )
353 {
354 $z = "" ;
355 if ( $fc != "+" )
356 {
357 $tra = array_pop ( $ltr ) ;
358 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
359 array_push ( $tr , true ) ;
360 array_push ( $ltr , "" ) ;
361 }
362
363 $l = array_pop ( $ltd ) ;
364 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
365 if ( $fc == "|" ) $l = "TD" ;
366 else if ( $fc == "!" ) $l = "TH" ;
367 else if ( $fc == "+" ) $l = "CAPTION" ;
368 else $l = "" ;
369 array_push ( $ltd , $l ) ;
370 $y = explode ( "|" , $theline , 2 ) ;
371 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
372 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
373 $t[$k] .= $y ;
374 array_push ( $td , true ) ;
375 }
376 }
377 }
378
379 # Closing open td, tr && table
380 while ( count ( $td ) > 0 )
381 {
382 if ( array_pop ( $td ) ) $t[] = "</td>" ;
383 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
384 $t[] = "</table>" ;
385 }
386
387 $t = implode ( "\n" , $t ) ;
388 # $t = $this->removeHTMLtags( $t );
389 return $t ;
390 }
391
392 # Well, OK, it's actually about 14 passes. But since all the
393 # hard lifting is done inside PHP's regex code, it probably
394 # wouldn't speed things up much to add a real parser.
395 #
396 function doWikiPass2( $text, $linestart )
397 {
398 $fname = "OutputPage::doWikiPass2";
399 wfProfileIn( $fname );
400
401 $text = $this->removeHTMLtags( $text );
402 $text = $this->replaceVariables( $text );
403
404 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
405 $text = str_replace ( "<HR>", "<hr>", $text );
406
407 $text = $this->doHeadings( $text );
408 $text = $this->doBlockLevels( $text, $linestart );
409
410 if($this->mOptions->getUseDynamicDates()) {
411 global $wgDateFormatter;
412 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
413 }
414
415 $text = $this->replaceExternalLinks( $text );
416 $text = $this->replaceInternalLinks ( $text );
417 $text = $this->doTableStuff ( $text ) ;
418
419 $text = $this->formatHeadings( $text );
420
421 $sk =& $this->mOptions->getSkin();
422 $text = $sk->transformContent( $text );
423 $text .= $this->categoryMagic () ;
424
425 wfProfileOut( $fname );
426 return $text;
427 }
428
429
430 /* private */ function doHeadings( $text )
431 {
432 for ( $i = 6; $i >= 1; --$i ) {
433 $h = substr( "======", 0, $i );
434 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
435 "<h{$i}>\\1</h{$i}>\\2", $text );
436 }
437 return $text;
438 }
439
440 # Note: we have to do external links before the internal ones,
441 # and otherwise take great care in the order of things here, so
442 # that we don't end up interpreting some URLs twice.
443
444 /* private */ function replaceExternalLinks( $text )
445 {
446 $fname = "OutputPage::replaceExternalLinks";
447 wfProfileIn( $fname );
448 $text = $this->subReplaceExternalLinks( $text, "http", true );
449 $text = $this->subReplaceExternalLinks( $text, "https", true );
450 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
451 $text = $this->subReplaceExternalLinks( $text, "irc", false );
452 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
453 $text = $this->subReplaceExternalLinks( $text, "news", false );
454 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
455 wfProfileOut( $fname );
456 return $text;
457 }
458
459 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
460 {
461 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
462 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
463
464 # this is the list of separators that should be ignored if they
465 # are the last character of an URL but that should be included
466 # if they occur within the URL, e.g. "go to www.foo.com, where .."
467 # in this case, the last comma should not become part of the URL,
468 # but in "www.foo.com/123,2342,32.htm" it should.
469 $sep = ",;\.:";
470 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
471 $images = "gif|png|jpg|jpeg";
472
473 # PLEASE NOTE: The curly braces { } are not part of the regex,
474 # they are interpreted as part of the string (used to tell PHP
475 # that the content of the string should be inserted there).
476 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
477 "((?i){$images})([^{$uc}]|$)/";
478
479 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
480 $sk =& $this->mOptions->getSkin();
481
482 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
483 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
484 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
485 }
486 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
487 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
488 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
489 "</a>\\5", $s );
490 $s = str_replace( $unique, $protocol, $s );
491
492 $a = explode( "[{$protocol}:", " " . $s );
493 $s = array_shift( $a );
494 $s = substr( $s, 1 );
495
496 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
497 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
498
499 foreach ( $a as $line ) {
500 if ( preg_match( $e1, $line, $m ) ) {
501 $link = "{$protocol}:{$m[1]}";
502 $trail = $m[2];
503 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
504 else { $text = wfEscapeHTML( $link ); }
505 } else if ( preg_match( $e2, $line, $m ) ) {
506 $link = "{$protocol}:{$m[1]}";
507 $text = $m[2];
508 $trail = $m[3];
509 } else {
510 $s .= "[{$protocol}:" . $line;
511 continue;
512 }
513 if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
514 else $paren = "";
515 $la = $sk->getExternalLinkAttributes( $link, $text );
516 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
517
518 }
519 return $s;
520 }
521
522 /* private */ function handle3Quotes( &$state, $token )
523 {
524 if ( $state["strong"] ) {
525 if ( $state["em"] && $state["em"] > $state["strong"] )
526 {
527 # ''' lala ''lala '''
528 $s = "</em></strong><em>";
529 } else {
530 $s = "</strong>";
531 }
532 $state["strong"] = FALSE;
533 } else {
534 $s = "<strong>";
535 $state["strong"] = $token["pos"];
536 }
537 return $s;
538 }
539
540 /* private */ function handle2Quotes( &$state, $token )
541 {
542 if ( $state["em"] ) {
543 if ( $state["strong"] && $state["strong"] > $state["em"] )
544 {
545 # ''lala'''lala'' ....'''
546 $s = "</strong></em><strong>";
547 } else {
548 $s = "</em>";
549 }
550 $state["em"] = FALSE;
551 } else {
552 $s = "<em>";
553 $state["em"] = $token["pos"];
554 }
555 return $s;
556 }
557
558 /* private */ function handle5Quotes( &$state, $token )
559 {
560 if ( $state["em"] && $state["strong"] ) {
561 if ( $state["em"] < $state["strong"] ) {
562 $s .= "</strong></em>";
563 } else {
564 $s .= "</em></strong>";
565 }
566 $state["strong"] = $state["em"] = FALSE;
567 } elseif ( $state["em"] ) {
568 $s .= "</em><strong>";
569 $state["em"] = FALSE;
570 $state["strong"] = $token["pos"];
571 } elseif ( $state["strong"] ) {
572 $s .= "</strong><em>";
573 $state["strong"] = FALSE;
574 $state["em"] = $token["pos"];
575 } else { # not $em and not $strong
576 $s .= "<strong><em>";
577 $state["strong"] = $state["em"] = $token["pos"];
578 }
579 return $s;
580 }
581
582 /* private */ function replaceInternalLinks( $str )
583 {
584 global $wgLang; # for language specific parser hook
585
586 $tokenizer=Tokenizer::newFromString( $str );
587 $tokenStack = array();
588
589 $s="";
590 $state["em"] = FALSE;
591 $state["strong"] = FALSE;
592 $tagIsOpen = FALSE;
593
594 # The tokenizer splits the text into tokens and returns them one by one.
595 # Every call to the tokenizer returns a new token.
596 while ( $token = $tokenizer->nextToken() )
597 {
598 switch ( $token["type"] )
599 {
600 case "text":
601 # simple text with no further markup
602 $txt = $token["text"];
603 break;
604 case "[[":
605 # link opening tag.
606 # FIXME : Treat orphaned open tags (stack not empty when text is over)
607 $tagIsOpen = TRUE;
608 array_push( $tokenStack, $token );
609 $txt="";
610 break;
611 case "]]":
612 # link close tag.
613 # get text from stack, glue it together, and call the code to handle a
614 # link
615 if ( count( $tokenStack ) == 0 )
616 {
617 # stack empty. Found a ]] without an opening [[
618 $txt = "]]";
619 } else {
620 $linkText = "";
621 $lastToken = array_pop( $tokenStack );
622 while ( $lastToken["type"] != "[[" )
623 {
624 $linkText = $lastToken["text"] . $linkText;
625 $lastToken = array_pop( $tokenStack );
626 }
627 $txt = $linkText ."]]";
628 $prefix = $lastToken["text"];
629 $nextToken = $tokenizer->previewToken();
630 if ( $nextToken["type"] == "text" )
631 {
632 # Preview just looks at it. Now we have to fetch it.
633 $nextToken = $tokenizer->nextToken();
634 $txt .= $nextToken["text"];
635 }
636 $txt = $this->handleInternalLink( $txt, $prefix );
637 }
638 $tagIsOpen = (count( $tokenStack ) != 0);
639 break;
640 case "----":
641 $txt = "\n<hr>\n";
642 break;
643 case "'''":
644 # This and the three next ones handle quotes
645 $txt = $this->handle3Quotes( $state, $token );
646 break;
647 case "''":
648 $txt = $this->handle2Quotes( $state, $token );
649 break;
650 case "'''''":
651 $txt = $this->handle5Quotes( $state, $token );
652 break;
653 case "":
654 # empty token
655 $txt="";
656 break;
657 case "RFC ":
658 if ( $tagIsOpen ) {
659 $txt = "RFC ";
660 } else {
661 $txt = $this->doMagicRFC( $tokenizer );
662 }
663 break;
664 case "ISBN ":
665 if ( $tagIsOpen ) {
666 $txt = "ISBN ";
667 } else {
668 $txt = $this->doMagicISBN( $tokenizer );
669 }
670 break;
671 default:
672 # Call language specific Hook.
673 $txt = $wgLang->processToken( $token, $tokenStack );
674 if ( NULL == $txt ) {
675 # An unkown token. Highlight.
676 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
677 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
678 }
679 break;
680 }
681 # If we're parsing the interior of a link, don't append the interior to $s,
682 # but push it to the stack so it can be processed when a ]] token is found.
683 if ( $tagIsOpen && $txt != "" ) {
684 $token["type"] = "text";
685 $token["text"] = $txt;
686 array_push( $tokenStack, $token );
687 } else {
688 $s .= $txt;
689 }
690 } #end while
691 if ( count( $tokenStack ) != 0 )
692 {
693 # still objects on stack. opened [[ tag without closing ]] tag.
694 $txt = "";
695 while ( $lastToken = array_pop( $tokenStack ) )
696 {
697 if ( $lastToken["type"] == "text" )
698 {
699 $txt = $lastToken["text"] . $txt;
700 } else {
701 $txt = $lastToken["type"] . $txt;
702 }
703 }
704 $s .= $txt;
705 }
706 return $s;
707 }
708
709 /* private */ function handleInternalLink( $line, $prefix )
710 {
711 global $wgLang, $wgLinkCache;
712 global $wgNamespacesWithSubpages, $wgLanguageCode;
713 static $fname = "OutputPage::replaceInternalLinks" ;
714 wfProfileIn( $fname );
715
716 wfProfileIn( "$fname-setup" );
717 static $tc = FALSE;
718 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
719 $sk =& $this->mOptions->getSkin();
720
721 # Match a link having the form [[namespace:link|alternate]]trail
722 static $e1 = FALSE;
723 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
724 # Match the end of a line for a word that's not followed by whitespace,
725 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
726 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
727 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
728 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
729
730
731 # Special and Media are pseudo-namespaces; no pages actually exist in them
732 static $image = FALSE;
733 static $special = FALSE;
734 static $media = FALSE;
735 static $category = FALSE;
736 if ( !$image ) { $image = Namespace::getImage(); }
737 if ( !$special ) { $special = Namespace::getSpecial(); }
738 if ( !$media ) { $media = Namespace::getMedia(); }
739 if ( !$category ) { $category = wfMsg ( "category" ) ; }
740
741 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
742
743 wfProfileOut( "$fname-setup" );
744
745 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
746 $text = $m[2];
747 $trail = $m[3];
748 } else { # Invalid form; output directly
749 $s .= $prefix . "[[" . $line ;
750 return $s;
751 }
752
753 /* Valid link forms:
754 Foobar -- normal
755 :Foobar -- override special treatment of prefix (images, language links)
756 /Foobar -- convert to CurrentPage/Foobar
757 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
758 */
759 $c = substr($m[1],0,1);
760 $noforce = ($c != ":");
761 if( $c == "/" ) { # subpage
762 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
763 $m[1]=substr($m[1],1,strlen($m[1])-2);
764 $noslash=$m[1];
765 } else {
766 $noslash=substr($m[1],1);
767 }
768 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
769 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
770 if( "" == $text ) {
771 $text= $m[1];
772 } # this might be changed for ugliness reasons
773 } else {
774 $link = $noslash; # no subpage allowed, use standard link
775 }
776 } elseif( $noforce ) { # no subpage
777 $link = $m[1];
778 } else {
779 $link = substr( $m[1], 1 );
780 }
781 if( "" == $text )
782 $text = $link;
783
784 $nt = Title::newFromText( $link );
785 if( !$nt ) {
786 $s .= $prefix . "[[" . $line;
787 return $s;
788 }
789 $ns = $nt->getNamespace();
790 $iw = $nt->getInterWiki();
791 if( $noforce ) {
792 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
793 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
794 $s .= $prefix . $trail;
795 return $s;
796 }
797 if( $ns == $image ) {
798 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
799 $wgLinkCache->addImageLinkObj( $nt );
800 return $s;
801 }
802 }
803 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
804 ( strpos( $link, "#" ) == FALSE ) ) {
805 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
806 return $s;
807 }
808 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
809 $t = explode ( ":" , $nt->getText() ) ;
810 array_shift ( $t ) ;
811 $t = implode ( ":" , $t ) ;
812 $t = $wgLang->ucFirst ( $t ) ;
813 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
814 $nnt = Title::newFromText ( $category.":".$t ) ;
815 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
816 $this->mCategoryLinks[] = $t ;
817 $s .= $prefix . $trail ;
818 return $s ;
819 }
820 if( $ns == $media ) {
821 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
822 $wgLinkCache->addImageLinkObj( $nt );
823 return $s;
824 } elseif( $ns == $special ) {
825 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
826 return $s;
827 }
828 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
829
830 wfProfileOut( $fname );
831 return $s;
832 }
833
834 # Some functions here used by doBlockLevels()
835 #
836 /* private */ function closeParagraph()
837 {
838 $result = "";
839 if ( 0 != strcmp( "p", $this->mLastSection ) &&
840 0 != strcmp( "", $this->mLastSection ) ) {
841 $result = "</" . $this->mLastSection . ">";
842 }
843 $this->mLastSection = "";
844 return $result."\n";
845 }
846 # getCommon() returns the length of the longest common substring
847 # of both arguments, starting at the beginning of both.
848 #
849 /* private */ function getCommon( $st1, $st2 )
850 {
851 $fl = strlen( $st1 );
852 $shorter = strlen( $st2 );
853 if ( $fl < $shorter ) { $shorter = $fl; }
854
855 for ( $i = 0; $i < $shorter; ++$i ) {
856 if ( $st1{$i} != $st2{$i} ) { break; }
857 }
858 return $i;
859 }
860 # These next three functions open, continue, and close the list
861 # element appropriate to the prefix character passed into them.
862 #
863 /* private */ function openList( $char )
864 {
865 $result = $this->closeParagraph();
866
867 if ( "*" == $char ) { $result .= "<ul><li>"; }
868 else if ( "#" == $char ) { $result .= "<ol><li>"; }
869 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
870 else if ( ";" == $char ) {
871 $result .= "<dl><dt>";
872 $this->mDTopen = true;
873 }
874 else { $result = "<!-- ERR 1 -->"; }
875
876 return $result;
877 }
878
879 /* private */ function nextItem( $char )
880 {
881 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
882 else if ( ":" == $char || ";" == $char ) {
883 $close = "</dd>";
884 if ( $this->mDTopen ) { $close = "</dt>"; }
885 if ( ";" == $char ) {
886 $this->mDTopen = true;
887 return $close . "<dt>";
888 } else {
889 $this->mDTopen = false;
890 return $close . "<dd>";
891 }
892 }
893 return "<!-- ERR 2 -->";
894 }
895
896 /* private */function closeList( $char )
897 {
898 if ( "*" == $char ) { $text = "</li></ul>"; }
899 else if ( "#" == $char ) { $text = "</li></ol>"; }
900 else if ( ":" == $char ) {
901 if ( $this->mDTopen ) {
902 $this->mDTopen = false;
903 $text = "</dt></dl>";
904 } else {
905 $text = "</dd></dl>";
906 }
907 }
908 else { return "<!-- ERR 3 -->"; }
909 return $text."\n";
910 }
911
912 /* private */ function doBlockLevels( $text, $linestart )
913 {
914 $fname = "OutputPage::doBlockLevels";
915 wfProfileIn( $fname );
916 # Parsing through the text line by line. The main thing
917 # happening here is handling of block-level elements p, pre,
918 # and making lists from lines starting with * # : etc.
919 #
920 $a = explode( "\n", $text );
921 $text = $lastPref = "";
922 $this->mDTopen = $inBlockElem = false;
923
924 if ( ! $linestart ) { $text .= array_shift( $a ); }
925 foreach ( $a as $t ) {
926 if ( "" != $text ) { $text .= "\n"; }
927
928 $oLine = $t;
929 $opl = strlen( $lastPref );
930 $npl = strspn( $t, "*#:;" );
931 $pref = substr( $t, 0, $npl );
932 $pref2 = str_replace( ";", ":", $pref );
933 $t = substr( $t, $npl );
934
935 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
936 $text .= $this->nextItem( substr( $pref, -1 ) );
937
938 if ( ";" == substr( $pref, -1 ) ) {
939 $cpos = strpos( $t, ":" );
940 if ( ! ( false === $cpos ) ) {
941 $term = substr( $t, 0, $cpos );
942 $text .= $term . $this->nextItem( ":" );
943 $t = substr( $t, $cpos + 1 );
944 }
945 }
946 } else if (0 != $npl || 0 != $opl) {
947 $cpl = $this->getCommon( $pref, $lastPref );
948
949 while ( $cpl < $opl ) {
950 $text .= $this->closeList( $lastPref{$opl-1} );
951 --$opl;
952 }
953 if ( $npl <= $cpl && $cpl > 0 ) {
954 $text .= $this->nextItem( $pref{$cpl-1} );
955 }
956 while ( $npl > $cpl ) {
957 $char = substr( $pref, $cpl, 1 );
958 $text .= $this->openList( $char );
959
960 if ( ";" == $char ) {
961 $cpos = strpos( $t, ":" );
962 if ( ! ( false === $cpos ) ) {
963 $term = substr( $t, 0, $cpos );
964 $text .= $term . $this->nextItem( ":" );
965 $t = substr( $t, $cpos + 1 );
966 }
967 }
968 ++$cpl;
969 }
970 $lastPref = $pref2;
971 }
972 if ( 0 == $npl ) { # No prefix--go to paragraph mode
973 if ( preg_match(
974 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
975 $text .= $this->closeParagraph();
976 $inBlockElem = true;
977 }
978 if ( ! $inBlockElem ) {
979 if ( " " == $t{0} ) {
980 $newSection = "pre";
981 # $t = wfEscapeHTML( $t );
982 }
983 else { $newSection = "p"; }
984
985 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
986 $text .= $this->closeParagraph();
987 $text .= "<" . $newSection . ">";
988 } else if ( 0 != strcmp( $this->mLastSection,
989 $newSection ) ) {
990 $text .= $this->closeParagraph();
991 if ( 0 != strcmp( "p", $newSection ) ) {
992 $text .= "<" . $newSection . ">";
993 }
994 }
995 $this->mLastSection = $newSection;
996 }
997 if ( $inBlockElem &&
998 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
999 $inBlockElem = false;
1000 }
1001 }
1002 $text .= $t;
1003 }
1004 while ( $npl ) {
1005 $text .= $this->closeList( $pref2{$npl-1} );
1006 --$npl;
1007 }
1008 if ( "" != $this->mLastSection ) {
1009 if ( "p" != $this->mLastSection ) {
1010 $text .= "</" . $this->mLastSection . ">";
1011 }
1012 $this->mLastSection = "";
1013 }
1014 wfProfileOut( $fname );
1015 return $text;
1016 }
1017
1018 /* private */ function replaceVariables( $text )
1019 {
1020 global $wgLang, $wgCurOut;
1021 $fname = "OutputPage::replaceVariables";
1022 wfProfileIn( $fname );
1023
1024 $magic = array();
1025
1026 # Basic variables
1027 # See Language.php for the definition of each magic word
1028 # As with sigs, this uses the server's local time -- ensure
1029 # this is appropriate for your audience!
1030
1031 $magic[MAG_CURRENTMONTH] = date( "m" );
1032 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1033 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1034 $magic[MAG_CURRENTDAY] = date("j");
1035 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1036 $magic[MAG_CURRENTYEAR] = date( "Y" );
1037 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1038
1039 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1040
1041 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1042 if ( $mw->match( $text ) ) {
1043 $v = wfNumberOfArticles();
1044 $text = $mw->replace( $v, $text );
1045 if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
1046 }
1047
1048 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1049 # The callbacks are at the bottom of this file
1050 $wgCurOut = $this;
1051 $mw =& MagicWord::get( MAG_MSG );
1052 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1053 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1054
1055 $mw =& MagicWord::get( MAG_MSGNW );
1056 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1057 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1058
1059 wfProfileOut( $fname );
1060 return $text;
1061 }
1062
1063 # Cleans up HTML, removes dangerous tags and attributes
1064 /* private */ function removeHTMLtags( $text )
1065 {
1066 $fname = "OutputPage::removeHTMLtags";
1067 wfProfileIn( $fname );
1068 $htmlpairs = array( # Tags that must be closed
1069 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1070 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1071 "strike", "strong", "tt", "var", "div", "center",
1072 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1073 "ruby", "rt" , "rb" , "rp"
1074 );
1075 $htmlsingle = array(
1076 "br", "p", "hr", "li", "dt", "dd"
1077 );
1078 $htmlnest = array( # Tags that can be nested--??
1079 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1080 "dl", "font", "big", "small", "sub", "sup"
1081 );
1082 $tabletags = array( # Can only appear inside table
1083 "td", "th", "tr"
1084 );
1085
1086 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1087 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1088
1089 $htmlattrs = $this->getHTMLattrs () ;
1090
1091 # Remove HTML comments
1092 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1093
1094 $bits = explode( "<", $text );
1095 $text = array_shift( $bits );
1096 $tagstack = array(); $tablestack = array();
1097
1098 foreach ( $bits as $x ) {
1099 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1100 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1101 $x, $regs );
1102 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1103 error_reporting( $prev );
1104
1105 $badtag = 0 ;
1106 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1107 # Check our stack
1108 if ( $slash ) {
1109 # Closing a tag...
1110 if ( ! in_array( $t, $htmlsingle ) &&
1111 ( $ot = array_pop( $tagstack ) ) != $t ) {
1112 array_push( $tagstack, $ot );
1113 $badtag = 1;
1114 } else {
1115 if ( $t == "table" ) {
1116 $tagstack = array_pop( $tablestack );
1117 }
1118 $newparams = "";
1119 }
1120 } else {
1121 # Keep track for later
1122 if ( in_array( $t, $tabletags ) &&
1123 ! in_array( "table", $tagstack ) ) {
1124 $badtag = 1;
1125 } else if ( in_array( $t, $tagstack ) &&
1126 ! in_array ( $t , $htmlnest ) ) {
1127 $badtag = 1 ;
1128 } else if ( ! in_array( $t, $htmlsingle ) ) {
1129 if ( $t == "table" ) {
1130 array_push( $tablestack, $tagstack );
1131 $tagstack = array();
1132 }
1133 array_push( $tagstack, $t );
1134 }
1135 # Strip non-approved attributes from the tag
1136 $newparams = $this->fixTagAttributes($params);
1137
1138 }
1139 if ( ! $badtag ) {
1140 $rest = str_replace( ">", "&gt;", $rest );
1141 $text .= "<$slash$t $newparams$brace$rest";
1142 continue;
1143 }
1144 }
1145 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1146 }
1147 # Close off any remaining tags
1148 while ( $t = array_pop( $tagstack ) ) {
1149 $text .= "</$t>\n";
1150 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1151 }
1152 wfProfileOut( $fname );
1153 return $text;
1154 }
1155
1156 /*
1157 *
1158 * This function accomplishes several tasks:
1159 * 1) Auto-number headings if that option is enabled
1160 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1161 * 3) Add a Table of contents on the top for users who have enabled the option
1162 * 4) Auto-anchor headings
1163 *
1164 * It loops through all headlines, collects the necessary data, then splits up the
1165 * string and re-inserts the newly formatted headlines.
1166 *
1167 * */
1168 /* private */ function formatHeadings( $text )
1169 {
1170 $nh=$this->mOptions->getNumberHeadings();
1171 $st=$this->mOptions->getShowToc();
1172 if(!$this->mTitle->userCanEdit()) {
1173 $es=0;
1174 $esr=0;
1175 } else {
1176 $es=$this->mOptions->getEditSection();
1177 $esr=$this->mOptions->getEditSectionOnRightClick();
1178 }
1179
1180 # Inhibit editsection links if requested in the page
1181 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1182 if ($esw->matchAndRemove( $text )) {
1183 $es=0;
1184 }
1185 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1186 # do not add TOC
1187 $mw =& MagicWord::get( MAG_NOTOC );
1188 if ($mw->matchAndRemove( $text ))
1189 {
1190 $st = 0;
1191 }
1192
1193 # never add the TOC to the Main Page. This is an entry page that should not
1194 # be more than 1-2 screens large anyway
1195 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1196
1197 # We need this to perform operations on the HTML
1198 $sk =& $this->mOptions->getSkin();
1199
1200 # Get all headlines for numbering them and adding funky stuff like [edit]
1201 # links
1202 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1203
1204 # headline counter
1205 $c=0;
1206
1207 # Ugh .. the TOC should have neat indentation levels which can be
1208 # passed to the skin functions. These are determined here
1209 foreach($matches[3] as $headline) {
1210 if($level) { $prevlevel=$level;}
1211 $level=$matches[1][$c];
1212 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1213
1214 $h[$level]=0; // reset when we enter a new level
1215 $toc.=$sk->tocIndent($level-$prevlevel);
1216 $toclevel+=$level-$prevlevel;
1217
1218 }
1219 if(($nh||$st) && $level<$prevlevel) {
1220 $h[$level+1]=0; // reset when we step back a level
1221 $toc.=$sk->tocUnindent($prevlevel-$level);
1222 $toclevel-=$prevlevel-$level;
1223
1224 }
1225 $h[$level]++; // count number of headlines for each level
1226
1227 if($nh||$st) {
1228 for($i=1;$i<=$level;$i++) {
1229 if($h[$i]) {
1230 if($dot) {$numbering.=".";}
1231 $numbering.=$h[$i];
1232 $dot=1;
1233 }
1234 }
1235 }
1236
1237 // The canonized header is a version of the header text safe to use for links
1238 // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1239 $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1240 $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1241 $tocline = trim( $canonized_headline );
1242 $canonized_headline=str_replace('"',"",$canonized_headline);
1243 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1244 $refer[$c]=$canonized_headline;
1245 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1246 $refcount[$c]=$refers[$canonized_headline];
1247
1248 // Prepend the number to the heading text
1249
1250 if($nh||$st) {
1251 $tocline=$numbering ." ". $tocline;
1252
1253 // Don't number the heading if it is the only one (looks silly)
1254 if($nh && count($matches[3]) > 1) {
1255 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1256 }
1257 }
1258
1259 // Create the anchor for linking from the TOC to the section
1260
1261 $anchor=$canonized_headline;
1262 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1263 if($st) {
1264 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1265 }
1266 if($es) {
1267 $head[$c].=$sk->editSectionLink($c+1);
1268 }
1269
1270 // Put it all together
1271
1272 $head[$c].="<h".$level.$matches[2][$c]
1273 ."<a name=\"".$anchor."\">"
1274 .$headline
1275 ."</a>"
1276 ."</h".$level.">";
1277
1278 // Add the edit section link
1279
1280 if($esr) {
1281 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1282 }
1283
1284 $numbering="";
1285 $c++;
1286 $dot=0;
1287 }
1288
1289 if($st) {
1290 $toclines=$c;
1291 $toc.=$sk->tocUnindent($toclevel);
1292 $toc=$sk->tocTable($toc);
1293 }
1294
1295 // split up and insert constructed headlines
1296
1297 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1298 $i=0;
1299
1300 foreach($blocks as $block) {
1301 if(($es) && $c>0 && $i==0) {
1302 # This is the [edit] link that appears for the top block of text when
1303 # section editing is enabled
1304 $full.=$sk->editSectionLink(0);
1305 }
1306 $full.=$block;
1307 if($st && $toclines>3 && !$i) {
1308 # Let's add a top anchor just in case we want to link to the top of the page
1309 $full="<a name=\"top\"></a>".$full.$toc;
1310 }
1311
1312 $full.=$head[$i];
1313 $i++;
1314 }
1315
1316 return $full;
1317 }
1318
1319 /* private */ function doMagicISBN( &$tokenizer )
1320 {
1321 global $wgLang;
1322
1323 # Check whether next token is a text token
1324 # If yes, fetch it and convert the text into a
1325 # Special::BookSources link
1326 $token = $tokenizer->previewToken();
1327 while ( $token["type"] == "" )
1328 {
1329 $tokenizer->nextToken();
1330 $token = $tokenizer->previewToken();
1331 }
1332 if ( $token["type"] == "text" )
1333 {
1334 $token = $tokenizer->nextToken();
1335 $x = $token["text"];
1336 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1337
1338 $isbn = $blank = "" ;
1339 while ( " " == $x{0} ) {
1340 $blank .= " ";
1341 $x = substr( $x, 1 );
1342 }
1343 while ( strstr( $valid, $x{0} ) != false ) {
1344 $isbn .= $x{0};
1345 $x = substr( $x, 1 );
1346 }
1347 $num = str_replace( "-", "", $isbn );
1348 $num = str_replace( " ", "", $num );
1349
1350 if ( "" == $num ) {
1351 $text .= "ISBN $blank$x";
1352 } else {
1353 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1354 $text .= "<a href=\"" .
1355 $titleObj->getUrl( "isbn={$num}", false, true ) .
1356 "\" class=\"internal\">ISBN $isbn</a>";
1357 $text .= $x;
1358 }
1359 } else {
1360 $text = "ISBN ";
1361 }
1362 return $text;
1363 }
1364 /* private */ function doMagicRFC( &$tokenizer )
1365 {
1366 global $wgLang;
1367
1368 # Check whether next token is a text token
1369 # If yes, fetch it and convert the text into a
1370 # link to an RFC source
1371 $token = $tokenizer->previewToken();
1372 while ( $token["type"] == "" )
1373 {
1374 $tokenizer->nextToken();
1375 $token = $tokenizer->previewToken();
1376 }
1377 if ( $token["type"] == "text" )
1378 {
1379 $token = $tokenizer->nextToken();
1380 $x = $token["text"];
1381 $valid = "0123456789";
1382
1383 $rfc = $blank = "" ;
1384 while ( " " == $x{0} ) {
1385 $blank .= " ";
1386 $x = substr( $x, 1 );
1387 }
1388 while ( strstr( $valid, $x{0} ) != false ) {
1389 $rfc .= $x{0};
1390 $x = substr( $x, 1 );
1391 }
1392
1393 if ( "" == $rfc ) {
1394 $text .= "RFC $blank$x";
1395 } else {
1396 $url = wfmsg( "rfcurl" );
1397 $url = str_replace( "$1", $rfc, $url);
1398 $sk =& $this->mOptions->getSkin();
1399 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1400 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1401 }
1402 } else {
1403 $text = "RFC ";
1404 }
1405 return $text;
1406 }
1407
1408 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1409 {
1410 $this->mOptions = $options;
1411 $this->mTitle = $title;
1412 if ( $clearState ) {
1413 $this->clearState;
1414 }
1415
1416 $stripState = false;
1417 $text = $this->strip( $text, $stripState, false );
1418 $text = $this->pstPass2( $text, $user );
1419 $text = $this->unstrip( $text, $stripState );
1420 return $text;
1421 }
1422
1423 /* private */ function pstPass2( $text, &$user )
1424 {
1425 global $wgLang, $wgLocaltimezone;
1426
1427 # Signatures
1428 #
1429 $n = $user->getName();
1430 $k = $user->getOption( "nickname" );
1431 if ( "" == $k ) { $k = $n; }
1432 if(isset($wgLocaltimezone)) {
1433 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1434 }
1435 /* Note: this is an ugly timezone hack for the European wikis */
1436 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1437 " (" . date( "T" ) . ")";
1438 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1439
1440 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1441 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1442 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1443 Namespace::getUser() ) . ":$n|$k]]", $text );
1444
1445 # Context links: [[|name]] and [[name (context)|]]
1446 #
1447 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1448 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1449 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1450 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1451
1452 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1453 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1454 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1455 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1456 # [[ns:page (cont)|]]
1457 $context = "";
1458 $t = $this->mTitle->getText();
1459 if ( preg_match( $conpat, $t, $m ) ) {
1460 $context = $m[2];
1461 }
1462 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1463 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1464 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1465
1466 if ( "" == $context ) {
1467 $text = preg_replace( $p2, "[[\\1]]", $text );
1468 } else {
1469 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1470 }
1471
1472 # {{SUBST:xxx}} variables
1473 #
1474 $mw =& MagicWord::get( MAG_SUBST );
1475 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1476
1477 # Trim trailing whitespace
1478 # MAG_END (__END__) tag allows for trailing
1479 # whitespace to be deliberately included
1480 $text = rtrim( $text );
1481 $mw =& MagicWord::get( MAG_END );
1482 $mw->matchAndRemove( $text );
1483
1484 return $text;
1485 }
1486
1487
1488 }
1489
1490 class ParserOutput
1491 {
1492 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1493
1494 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1495 $containsOldMagic = false )
1496 {
1497 $this->mText = $text;
1498 $this->mLanguageLinks = $languageLinks;
1499 $this->mCategoryLinks = $categoryLinks;
1500 $this->mContainsOldMagic = $containsOldMagic;
1501 }
1502
1503 function getText() { return $this->mText; }
1504 function getLanguageLinks() { return $this->mLanguageLinks; }
1505 function getCategoryLinks() { return $this->mCategoryLinks; }
1506 function containsOldMagic() { return $this->mContainsOldMagic; }
1507 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1508 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1509 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1510 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1511 }
1512
1513 class ParserOptions
1514 {
1515 # All variables are private
1516 var $mUseTeX; # Use texvc to expand <math> tags
1517 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1518 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1519 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1520 var $mAllowExternalImages; # Allow external images inline
1521 var $mSkin; # Reference to the preferred skin
1522 var $mDateFormat; # Date format index
1523 var $mEditSection; # Create "edit section" links
1524 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1525 var $mPrintable; # Generate printable output
1526 var $mNumberHeadings; # Automatically number headings
1527 var $mShowToc; # Show table of contents
1528
1529 function getUseTeX() { return $this->mUseTeX; }
1530 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1531 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1532 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1533 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1534 function getSkin() { return $this->mSkin; }
1535 function getDateFormat() { return $this->mDateFormat; }
1536 function getEditSection() { return $this->mEditSection; }
1537 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1538 function getPrintable() { return $this->mPrintable; }
1539 function getNumberHeadings() { return $this->mNumberHeadings; }
1540 function getShowToc() { return $this->mShowToc; }
1541
1542 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1543 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1544 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1545 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1546 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1547 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1548 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1549 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1550 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1551 function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1552 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1553 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1554
1555 /* static */ function newFromUser( &$user )
1556 {
1557 $popts = new ParserOptions;
1558 $popts->initialiseFromUser( &$user );
1559 return $popts;
1560 }
1561
1562 function initialiseFromUser( &$userInput )
1563 {
1564 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1565
1566 if ( !$userInput ) {
1567 $user = new User;
1568 } else {
1569 $user =& $userInput;
1570 }
1571
1572 $this->mUseTeX = $wgUseTeX;
1573 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1574 $this->mUseDynamicDates = $wgUseDynamicDates;
1575 $this->mInterwikiMagic = $wgInterwikiMagic;
1576 $this->mAllowExternalImages = $wgAllowExternalImages;
1577 $this->mSkin =& $user->getSkin();
1578 $this->mDateFormat = $user->getOption( "date" );
1579 $this->mEditSection = $user->getOption( "editsection" );
1580 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1581 $this->mPrintable = false;
1582 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1583 $this->mShowToc = $user->getOption( "showtoc" );
1584 }
1585
1586
1587 }
1588
1589 # Regex callbacks, used in OutputPage::replaceVariables
1590
1591 # Just get rid of the dangerous stuff
1592 # Necessary because replaceVariables is called after removeHTMLtags,
1593 # and message text can come from any user
1594 function wfReplaceMsgVar( $matches ) {
1595 global $wgCurOut, $wgLinkCache;
1596 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1597 $wgLinkCache->suspend();
1598 $text = $wgCurOut->replaceInternalLinks( $text );
1599 $wgLinkCache->resume();
1600 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1601 return $text;
1602 }
1603
1604 # Effective <nowiki></nowiki>
1605 # Not real <nowiki> because this is called after nowiki sections are processed
1606 function wfReplaceMsgnwVar( $matches ) {
1607 global $wgCurOut, $wgLinkCache;
1608 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1609 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1610 return $text;
1611 }
1612
1613
1614
1615 ?>