Some changes to the link tables. They now all use a key on cur_id for the *_from...
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 include_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Converts wikitext to HTML.
8 #
9 # Globals used:
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
11 #
12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
13 #
14 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
15 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
16 # $wgLocaltimezone
17 #
18 # * only within ParserOptions
19
20 class Parser
21 {
22 # Cleared with clearState():
23 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
24
25 # Temporary:
26 var $mOptions, $mTitle;
27
28 function Parser()
29 {
30 $this->clearState();
31 }
32
33 function clearState()
34 {
35 $this->mOutput = new ParserOutput;
36 $this->mAutonumber = 0;
37 $this->mLastSection = "";
38 $this->mDTopen = false;
39 $this->mStripState = false;
40 }
41
42 # First pass--just handle <nowiki> sections, pass the rest off
43 # to doWikiPass2() which does all the real work.
44 #
45 # Returns a ParserOutput
46 #
47 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
48 {
49 $fname = "Parser::parse";
50 wfProfileIn( $fname );
51
52 if ( $clearState ) {
53 $this->clearState();
54 }
55
56 $this->mOptions = $options;
57 $this->mTitle =& $title;
58
59 $stripState = NULL;
60 $text = $this->strip( $text, $this->mStripState, true );
61 $text = $this->doWikiPass2( $text, $linestart );
62 $text = $this->unstrip( $text, $this->mStripState );
63
64 $this->mOutput->setText( $text );
65 wfProfileOut( $fname );
66 return $this->mOutput;
67 }
68
69 /* static */ function getRandomString()
70 {
71 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
72 }
73
74 # Strips <nowiki>, <pre> and <math>
75 # Returns the text, and fills an array with data needed in unstrip()
76 #
77 function strip( $text, &$state, $render = true )
78 {
79 $state = array(
80 'nwlist' => array(),
81 'nwsecs' => 0,
82 'nwunq' => Parser::getRandomString(),
83 'mathlist' => array(),
84 'mathsecs' => 0,
85 'mathunq' => Parser::getRandomString(),
86 'prelist' => array(),
87 'presecs' => 0,
88 'preunq' => Parser::getRandomString()
89 );
90
91 $stripped = "";
92 $stripped2 = "";
93 $stripped3 = "";
94
95 # Replace any instances of the placeholders
96 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
97 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
98 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
99
100 while ( "" != $text ) {
101 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
102 $stripped .= $p[0];
103 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
104 $text = "";
105 } else {
106 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
107 ++$state['nwsecs'];
108
109 if ( $render ) {
110 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
111 } else {
112 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
113 }
114
115 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
116 $text = $q[1];
117 }
118 }
119
120 if( $this->mOptions->getUseTeX() ) {
121 while ( "" != $stripped ) {
122 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
123 $stripped2 .= $p[0];
124 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
125 $stripped = "";
126 } else {
127 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
128 ++$state['mathsecs'];
129
130 if ( $render ) {
131 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
132 } else {
133 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
134 }
135
136 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
137 $stripped = $q[1];
138 }
139 }
140 } else {
141 $stripped2 = $stripped;
142 }
143
144 while ( "" != $stripped2 ) {
145 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
146 $stripped3 .= $p[0];
147 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
148 $stripped2 = "";
149 } else {
150 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
151 ++$state['presecs'];
152
153 if ( $render ) {
154 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
155 } else {
156 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
157 }
158
159 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
160 $stripped2 = $q[1];
161 }
162 }
163 return $stripped3;
164 }
165
166 function unstrip( $text, &$state )
167 {
168 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
169 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
170 }
171
172 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
173 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
174 }
175
176 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
177 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
178 }
179 return $text;
180 }
181
182 function categoryMagic ()
183 {
184 global $wgLang , $wgUser ;
185 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
186 $id = $this->mTitle->getArticleID() ;
187 $cat = ucfirst ( wfMsg ( "category" ) ) ;
188 $ti = $this->mTitle->getText() ;
189 $ti = explode ( ":" , $ti , 2 ) ;
190 if ( $cat != $ti[0] ) return "" ;
191 $r = "<br break=all>\n" ;
192
193 $articles = array() ;
194 $parents = array () ;
195 $children = array() ;
196
197
198 # $sk =& $this->mGetSkin();
199 $sk =& $wgUser->getSkin() ;
200
201 $doesexist = false ;
202 if ( $doesexist ) {
203 $sql = "SELECT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
204 } else {
205 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
206 }
207
208 $res = wfQuery ( $sql, DB_READ ) ;
209 while ( $x = wfFetchObject ( $res ) )
210 {
211 # $t = new Title ;
212 # $t->newFromDBkey ( $x->l_from ) ;
213 # $t = $t->getText() ;
214 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
215 if ( $t != "" ) $t .= ":" ;
216 $t .= $x->cur_title ;
217
218 $y = explode ( ":" , $t , 2 ) ;
219 if ( count ( $y ) == 2 && $y[0] == $cat ) {
220 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
221 } else {
222 array_push ( $articles , $sk->makeLink ( $t ) ) ;
223 }
224 }
225 wfFreeResult ( $res ) ;
226
227 # Children
228 if ( count ( $children ) > 0 )
229 {
230 asort ( $children ) ;
231 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
232 $r .= implode ( ", " , $children ) ;
233 }
234
235 # Articles
236 if ( count ( $articles ) > 0 )
237 {
238 asort ( $articles ) ;
239 $h = wfMsg( "category_header", $ti[1] );
240 $r .= "<h2>{$h}</h2>\n" ;
241 $r .= implode ( ", " , $articles ) ;
242 }
243
244
245 return $r ;
246 }
247
248 function getHTMLattrs ()
249 {
250 $htmlattrs = array( # Allowed attributes--no scripting, etc.
251 "title", "align", "lang", "dir", "width", "height",
252 "bgcolor", "clear", /* BR */ "noshade", /* HR */
253 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
254 /* FONT */ "type", "start", "value", "compact",
255 /* For various lists, mostly deprecated but safe */
256 "summary", "width", "border", "frame", "rules",
257 "cellspacing", "cellpadding", "valign", "char",
258 "charoff", "colgroup", "col", "span", "abbr", "axis",
259 "headers", "scope", "rowspan", "colspan", /* Tables */
260 "id", "class", "name", "style" /* For CSS */
261 );
262 return $htmlattrs ;
263 }
264
265 function fixTagAttributes ( $t )
266 {
267 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
268 $htmlattrs = $this->getHTMLattrs() ;
269
270 # Strip non-approved attributes from the tag
271 $t = preg_replace(
272 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
273 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
274 $t);
275 # Strip javascript "expression" from stylesheets. Brute force approach:
276 # If anythin offensive is found, all attributes of the HTML tag are dropped
277
278 if( preg_match(
279 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
280 wfMungeToUtf8( $t ) ) )
281 {
282 $t="";
283 }
284
285 return trim ( $t ) ;
286 }
287
288 function doTableStuff ( $t )
289 {
290 $t = explode ( "\n" , $t ) ;
291 $td = array () ; # Is currently a td tag open?
292 $ltd = array () ; # Was it TD or TH?
293 $tr = array () ; # Is currently a tr tag open?
294 $ltr = array () ; # tr attributes
295 foreach ( $t AS $k => $x )
296 {
297 $x = rtrim ( $x ) ;
298 $fc = substr ( $x , 0 , 1 ) ;
299 if ( "{|" == substr ( $x , 0 , 2 ) )
300 {
301 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
302 array_push ( $td , false ) ;
303 array_push ( $ltd , "" ) ;
304 array_push ( $tr , false ) ;
305 array_push ( $ltr , "" ) ;
306 }
307 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
308 else if ( "|}" == substr ( $x , 0 , 2 ) )
309 {
310 $z = "</table>\n" ;
311 $l = array_pop ( $ltd ) ;
312 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
313 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
314 array_pop ( $ltr ) ;
315 $t[$k] = $z ;
316 }
317 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
318 {
319 $z = trim ( substr ( $x , 2 ) ) ;
320 $t[$k] = "<caption>{$z}</caption>\n" ;
321 }*/
322 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
323 {
324 $x = substr ( $x , 1 ) ;
325 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
326 $z = "" ;
327 $l = array_pop ( $ltd ) ;
328 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
329 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
330 array_pop ( $ltr ) ;
331 $t[$k] = $z ;
332 array_push ( $tr , false ) ;
333 array_push ( $td , false ) ;
334 array_push ( $ltd , "" ) ;
335 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
336 }
337 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
338 {
339 if ( "|+" == substr ( $x , 0 , 2 ) )
340 {
341 $fc = "+" ;
342 $x = substr ( $x , 1 ) ;
343 }
344 $after = substr ( $x , 1 ) ;
345 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
346 $after = explode ( "||" , $after ) ;
347 $t[$k] = "" ;
348 foreach ( $after AS $theline )
349 {
350 $z = "" ;
351 if ( $fc != "+" )
352 {
353 $tra = array_pop ( $ltr ) ;
354 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
355 array_push ( $tr , true ) ;
356 array_push ( $ltr , "" ) ;
357 }
358
359 $l = array_pop ( $ltd ) ;
360 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
361 if ( $fc == "|" ) $l = "TD" ;
362 else if ( $fc == "!" ) $l = "TH" ;
363 else if ( $fc == "+" ) $l = "CAPTION" ;
364 else $l = "" ;
365 array_push ( $ltd , $l ) ;
366 $y = explode ( "|" , $theline , 2 ) ;
367 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
368 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
369 $t[$k] .= $y ;
370 array_push ( $td , true ) ;
371 }
372 }
373 }
374
375 # Closing open td, tr && table
376 while ( count ( $td ) > 0 )
377 {
378 if ( array_pop ( $td ) ) $t[] = "</td>" ;
379 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
380 $t[] = "</table>" ;
381 }
382
383 $t = implode ( "\n" , $t ) ;
384 # $t = $this->removeHTMLtags( $t );
385 return $t ;
386 }
387
388 # Well, OK, it's actually about 14 passes. But since all the
389 # hard lifting is done inside PHP's regex code, it probably
390 # wouldn't speed things up much to add a real parser.
391 #
392 function doWikiPass2( $text, $linestart )
393 {
394 $fname = "OutputPage::doWikiPass2";
395 wfProfileIn( $fname );
396
397 $text = $this->removeHTMLtags( $text );
398 $text = $this->replaceVariables( $text );
399
400 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
401 $text = str_replace ( "<HR>", "<hr>", $text );
402
403 $text = $this->doHeadings( $text );
404 $text = $this->doBlockLevels( $text, $linestart );
405
406 if($this->mOptions->getUseDynamicDates()) {
407 global $wgDateFormatter;
408 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
409 }
410
411 $text = $this->replaceExternalLinks( $text );
412 $text = $this->replaceInternalLinks ( $text );
413 $text = $this->doTableStuff ( $text ) ;
414
415 $text = $this->formatHeadings( $text );
416
417 $sk =& $this->mOptions->getSkin();
418 $text = $sk->transformContent( $text );
419 $text .= $this->categoryMagic () ;
420
421 wfProfileOut( $fname );
422 return $text;
423 }
424
425
426 /* private */ function doHeadings( $text )
427 {
428 for ( $i = 6; $i >= 1; --$i ) {
429 $h = substr( "======", 0, $i );
430 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
431 "<h{$i}>\\1</h{$i}>\\2", $text );
432 }
433 return $text;
434 }
435
436 # Note: we have to do external links before the internal ones,
437 # and otherwise take great care in the order of things here, so
438 # that we don't end up interpreting some URLs twice.
439
440 /* private */ function replaceExternalLinks( $text )
441 {
442 $fname = "OutputPage::replaceExternalLinks";
443 wfProfileIn( $fname );
444 $text = $this->subReplaceExternalLinks( $text, "http", true );
445 $text = $this->subReplaceExternalLinks( $text, "https", true );
446 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
447 $text = $this->subReplaceExternalLinks( $text, "irc", false );
448 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
449 $text = $this->subReplaceExternalLinks( $text, "news", false );
450 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
451 wfProfileOut( $fname );
452 return $text;
453 }
454
455 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
456 {
457 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
458 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
459
460 # this is the list of separators that should be ignored if they
461 # are the last character of an URL but that should be included
462 # if they occur within the URL, e.g. "go to www.foo.com, where .."
463 # in this case, the last comma should not become part of the URL,
464 # but in "www.foo.com/123,2342,32.htm" it should.
465 $sep = ",;\.:";
466 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
467 $images = "gif|png|jpg|jpeg";
468
469 # PLEASE NOTE: The curly braces { } are not part of the regex,
470 # they are interpreted as part of the string (used to tell PHP
471 # that the content of the string should be inserted there).
472 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
473 "((?i){$images})([^{$uc}]|$)/";
474
475 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
476 $sk =& $this->mOptions->getSkin();
477
478 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
479 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
480 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
481 }
482 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
483 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
484 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
485 "</a>\\5", $s );
486 $s = str_replace( $unique, $protocol, $s );
487
488 $a = explode( "[{$protocol}:", " " . $s );
489 $s = array_shift( $a );
490 $s = substr( $s, 1 );
491
492 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
493 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
494
495 foreach ( $a as $line ) {
496 if ( preg_match( $e1, $line, $m ) ) {
497 $link = "{$protocol}:{$m[1]}";
498 $trail = $m[2];
499 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
500 else { $text = wfEscapeHTML( $link ); }
501 } else if ( preg_match( $e2, $line, $m ) ) {
502 $link = "{$protocol}:{$m[1]}";
503 $text = $m[2];
504 $trail = $m[3];
505 } else {
506 $s .= "[{$protocol}:" . $line;
507 continue;
508 }
509 if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
510 else $paren = "";
511 $la = $sk->getExternalLinkAttributes( $link, $text );
512 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
513
514 }
515 return $s;
516 }
517
518 /* private */ function handle3Quotes( &$state, $token )
519 {
520 if ( $state["strong"] ) {
521 if ( $state["em"] && $state["em"] > $state["strong"] )
522 {
523 # ''' lala ''lala '''
524 $s = "</em></strong><em>";
525 } else {
526 $s = "</strong>";
527 }
528 $state["strong"] = FALSE;
529 } else {
530 $s = "<strong>";
531 $state["strong"] = $token["pos"];
532 }
533 return $s;
534 }
535
536 /* private */ function handle2Quotes( &$state, $token )
537 {
538 if ( $state["em"] ) {
539 if ( $state["strong"] && $state["strong"] > $state["em"] )
540 {
541 # ''lala'''lala'' ....'''
542 $s = "</strong></em><strong>";
543 } else {
544 $s = "</em>";
545 }
546 $state["em"] = FALSE;
547 } else {
548 $s = "<em>";
549 $state["em"] = $token["pos"];
550 }
551 return $s;
552 }
553
554 /* private */ function handle5Quotes( &$state, $token )
555 {
556 if ( $state["em"] && $state["strong"] ) {
557 if ( $state["em"] < $state["strong"] ) {
558 $s .= "</strong></em>";
559 } else {
560 $s .= "</em></strong>";
561 }
562 $state["strong"] = $state["em"] = FALSE;
563 } elseif ( $state["em"] ) {
564 $s .= "</em><strong>";
565 $state["em"] = FALSE;
566 $state["strong"] = $token["pos"];
567 } elseif ( $state["strong"] ) {
568 $s .= "</strong><em>";
569 $state["strong"] = FALSE;
570 $state["em"] = $token["pos"];
571 } else { # not $em and not $strong
572 $s .= "<strong><em>";
573 $state["strong"] = $state["em"] = $token["pos"];
574 }
575 return $s;
576 }
577
578 /* private */ function replaceInternalLinks( $str )
579 {
580 global $wgLang; # for language specific parser hook
581
582 $tokenizer=Tokenizer::newFromString( $str );
583 $tokenStack = array();
584
585 $s="";
586 $state["em"] = FALSE;
587 $state["strong"] = FALSE;
588 $tagIsOpen = FALSE;
589
590 # The tokenizer splits the text into tokens and returns them one by one.
591 # Every call to the tokenizer returns a new token.
592 while ( $token = $tokenizer->nextToken() )
593 {
594 switch ( $token["type"] )
595 {
596 case "text":
597 # simple text with no further markup
598 $txt = $token["text"];
599 break;
600 case "[[":
601 # link opening tag.
602 # FIXME : Treat orphaned open tags (stack not empty when text is over)
603 $tagIsOpen = TRUE;
604 array_push( $tokenStack, $token );
605 $txt="";
606 break;
607 case "]]":
608 # link close tag.
609 # get text from stack, glue it together, and call the code to handle a
610 # link
611 if ( count( $tokenStack ) == 0 )
612 {
613 # stack empty. Found a ]] without an opening [[
614 $txt = "]]";
615 } else {
616 $linkText = "";
617 $lastToken = array_pop( $tokenStack );
618 while ( $lastToken["type"] != "[[" )
619 {
620 if( !empty( $lastToken["text"] ) ) {
621 $linkText = $lastToken["text"] . $linkText;
622 }
623 $lastToken = array_pop( $tokenStack );
624 }
625 $txt = $linkText ."]]";
626 if( isset( $lastToken["text"] ) ) {
627 $prefix = $lastToken["text"];
628 } else {
629 $prefix = "";
630 }
631 $nextToken = $tokenizer->previewToken();
632 if ( $nextToken["type"] == "text" )
633 {
634 # Preview just looks at it. Now we have to fetch it.
635 $nextToken = $tokenizer->nextToken();
636 $txt .= $nextToken["text"];
637 }
638 $txt = $this->handleInternalLink( $txt, $prefix );
639 }
640 $tagIsOpen = (count( $tokenStack ) != 0);
641 break;
642 case "----":
643 $txt = "\n<hr>\n";
644 break;
645 case "'''":
646 # This and the three next ones handle quotes
647 $txt = $this->handle3Quotes( $state, $token );
648 break;
649 case "''":
650 $txt = $this->handle2Quotes( $state, $token );
651 break;
652 case "'''''":
653 $txt = $this->handle5Quotes( $state, $token );
654 break;
655 case "":
656 # empty token
657 $txt="";
658 break;
659 case "RFC ":
660 if ( $tagIsOpen ) {
661 $txt = "RFC ";
662 } else {
663 $txt = $this->doMagicRFC( $tokenizer );
664 }
665 break;
666 case "ISBN ":
667 if ( $tagIsOpen ) {
668 $txt = "ISBN ";
669 } else {
670 $txt = $this->doMagicISBN( $tokenizer );
671 }
672 break;
673 default:
674 # Call language specific Hook.
675 $txt = $wgLang->processToken( $token, $tokenStack );
676 if ( NULL == $txt ) {
677 # An unkown token. Highlight.
678 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
679 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
680 }
681 break;
682 }
683 # If we're parsing the interior of a link, don't append the interior to $s,
684 # but push it to the stack so it can be processed when a ]] token is found.
685 if ( $tagIsOpen && $txt != "" ) {
686 $token["type"] = "text";
687 $token["text"] = $txt;
688 array_push( $tokenStack, $token );
689 } else {
690 $s .= $txt;
691 }
692 } #end while
693 if ( count( $tokenStack ) != 0 )
694 {
695 # still objects on stack. opened [[ tag without closing ]] tag.
696 $txt = "";
697 while ( $lastToken = array_pop( $tokenStack ) )
698 {
699 if ( $lastToken["type"] == "text" )
700 {
701 $txt = $lastToken["text"] . $txt;
702 } else {
703 $txt = $lastToken["type"] . $txt;
704 }
705 }
706 $s .= $txt;
707 }
708 return $s;
709 }
710
711 /* private */ function handleInternalLink( $line, $prefix )
712 {
713 global $wgLang, $wgLinkCache;
714 global $wgNamespacesWithSubpages, $wgLanguageCode;
715 static $fname = "OutputPage::replaceInternalLinks" ;
716 wfProfileIn( $fname );
717
718 wfProfileIn( "$fname-setup" );
719 static $tc = FALSE;
720 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
721 $sk =& $this->mOptions->getSkin();
722
723 # Match a link having the form [[namespace:link|alternate]]trail
724 static $e1 = FALSE;
725 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
726 # Match the end of a line for a word that's not followed by whitespace,
727 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
728 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
729 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
730 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
731
732
733 # Special and Media are pseudo-namespaces; no pages actually exist in them
734 static $image = FALSE;
735 static $special = FALSE;
736 static $media = FALSE;
737 static $category = FALSE;
738 if ( !$image ) { $image = Namespace::getImage(); }
739 if ( !$special ) { $special = Namespace::getSpecial(); }
740 if ( !$media ) { $media = Namespace::getMedia(); }
741 if ( !$category ) { $category = wfMsg ( "category" ) ; }
742
743 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
744
745 wfProfileOut( "$fname-setup" );
746 $s = "";
747
748 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
749 $text = $m[2];
750 $trail = $m[3];
751 } else { # Invalid form; output directly
752 $s .= $prefix . "[[" . $line ;
753 return $s;
754 }
755
756 /* Valid link forms:
757 Foobar -- normal
758 :Foobar -- override special treatment of prefix (images, language links)
759 /Foobar -- convert to CurrentPage/Foobar
760 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
761 */
762 $c = substr($m[1],0,1);
763 $noforce = ($c != ":");
764 if( $c == "/" ) { # subpage
765 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
766 $m[1]=substr($m[1],1,strlen($m[1])-2);
767 $noslash=$m[1];
768 } else {
769 $noslash=substr($m[1],1);
770 }
771 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
772 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
773 if( "" == $text ) {
774 $text= $m[1];
775 } # this might be changed for ugliness reasons
776 } else {
777 $link = $noslash; # no subpage allowed, use standard link
778 }
779 } elseif( $noforce ) { # no subpage
780 $link = $m[1];
781 } else {
782 $link = substr( $m[1], 1 );
783 }
784 if( "" == $text )
785 $text = $link;
786
787 $nt = Title::newFromText( $link );
788 if( !$nt ) {
789 $s .= $prefix . "[[" . $line;
790 return $s;
791 }
792 $ns = $nt->getNamespace();
793 $iw = $nt->getInterWiki();
794 if( $noforce ) {
795 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
796 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
797 $s .= $prefix . $trail;
798 return $s;
799 }
800 if( $ns == $image ) {
801 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
802 $wgLinkCache->addImageLinkObj( $nt );
803 return $s;
804 }
805 }
806 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
807 ( strpos( $link, "#" ) == FALSE ) ) {
808 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
809 return $s;
810 }
811 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
812 $t = explode ( ":" , $nt->getText() ) ;
813 array_shift ( $t ) ;
814 $t = implode ( ":" , $t ) ;
815 $t = $wgLang->ucFirst ( $t ) ;
816 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
817 $nnt = Title::newFromText ( $category.":".$t ) ;
818 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
819 $this->mCategoryLinks[] = $t ;
820 $s .= $prefix . $trail ;
821 return $s ;
822 }
823 if( $ns == $media ) {
824 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
825 $wgLinkCache->addImageLinkObj( $nt );
826 return $s;
827 } elseif( $ns == $special ) {
828 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
829 return $s;
830 }
831 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
832
833 wfProfileOut( $fname );
834 return $s;
835 }
836
837 # Some functions here used by doBlockLevels()
838 #
839 /* private */ function closeParagraph()
840 {
841 $result = "";
842 if ( 0 != strcmp( "p", $this->mLastSection ) &&
843 0 != strcmp( "", $this->mLastSection ) ) {
844 $result = "</" . $this->mLastSection . ">";
845 }
846 $this->mLastSection = "";
847 return $result."\n";
848 }
849 # getCommon() returns the length of the longest common substring
850 # of both arguments, starting at the beginning of both.
851 #
852 /* private */ function getCommon( $st1, $st2 )
853 {
854 $fl = strlen( $st1 );
855 $shorter = strlen( $st2 );
856 if ( $fl < $shorter ) { $shorter = $fl; }
857
858 for ( $i = 0; $i < $shorter; ++$i ) {
859 if ( $st1{$i} != $st2{$i} ) { break; }
860 }
861 return $i;
862 }
863 # These next three functions open, continue, and close the list
864 # element appropriate to the prefix character passed into them.
865 #
866 /* private */ function openList( $char )
867 {
868 $result = $this->closeParagraph();
869
870 if ( "*" == $char ) { $result .= "<ul><li>"; }
871 else if ( "#" == $char ) { $result .= "<ol><li>"; }
872 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
873 else if ( ";" == $char ) {
874 $result .= "<dl><dt>";
875 $this->mDTopen = true;
876 }
877 else { $result = "<!-- ERR 1 -->"; }
878
879 return $result;
880 }
881
882 /* private */ function nextItem( $char )
883 {
884 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
885 else if ( ":" == $char || ";" == $char ) {
886 $close = "</dd>";
887 if ( $this->mDTopen ) { $close = "</dt>"; }
888 if ( ";" == $char ) {
889 $this->mDTopen = true;
890 return $close . "<dt>";
891 } else {
892 $this->mDTopen = false;
893 return $close . "<dd>";
894 }
895 }
896 return "<!-- ERR 2 -->";
897 }
898
899 /* private */function closeList( $char )
900 {
901 if ( "*" == $char ) { $text = "</li></ul>"; }
902 else if ( "#" == $char ) { $text = "</li></ol>"; }
903 else if ( ":" == $char ) {
904 if ( $this->mDTopen ) {
905 $this->mDTopen = false;
906 $text = "</dt></dl>";
907 } else {
908 $text = "</dd></dl>";
909 }
910 }
911 else { return "<!-- ERR 3 -->"; }
912 return $text."\n";
913 }
914
915 /* private */ function doBlockLevels( $text, $linestart )
916 {
917 $fname = "OutputPage::doBlockLevels";
918 wfProfileIn( $fname );
919 # Parsing through the text line by line. The main thing
920 # happening here is handling of block-level elements p, pre,
921 # and making lists from lines starting with * # : etc.
922 #
923 $a = explode( "\n", $text );
924 $text = $lastPref = "";
925 $this->mDTopen = $inBlockElem = false;
926
927 if ( ! $linestart ) { $text .= array_shift( $a ); }
928 foreach ( $a as $t ) {
929 if ( "" != $text ) { $text .= "\n"; }
930
931 $oLine = $t;
932 $opl = strlen( $lastPref );
933 $npl = strspn( $t, "*#:;" );
934 $pref = substr( $t, 0, $npl );
935 $pref2 = str_replace( ";", ":", $pref );
936 $t = substr( $t, $npl );
937
938 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
939 $text .= $this->nextItem( substr( $pref, -1 ) );
940
941 if ( ";" == substr( $pref, -1 ) ) {
942 $cpos = strpos( $t, ":" );
943 if ( ! ( false === $cpos ) ) {
944 $term = substr( $t, 0, $cpos );
945 $text .= $term . $this->nextItem( ":" );
946 $t = substr( $t, $cpos + 1 );
947 }
948 }
949 } else if (0 != $npl || 0 != $opl) {
950 $cpl = $this->getCommon( $pref, $lastPref );
951
952 while ( $cpl < $opl ) {
953 $text .= $this->closeList( $lastPref{$opl-1} );
954 --$opl;
955 }
956 if ( $npl <= $cpl && $cpl > 0 ) {
957 $text .= $this->nextItem( $pref{$cpl-1} );
958 }
959 while ( $npl > $cpl ) {
960 $char = substr( $pref, $cpl, 1 );
961 $text .= $this->openList( $char );
962
963 if ( ";" == $char ) {
964 $cpos = strpos( $t, ":" );
965 if ( ! ( false === $cpos ) ) {
966 $term = substr( $t, 0, $cpos );
967 $text .= $term . $this->nextItem( ":" );
968 $t = substr( $t, $cpos + 1 );
969 }
970 }
971 ++$cpl;
972 }
973 $lastPref = $pref2;
974 }
975 if ( 0 == $npl ) { # No prefix--go to paragraph mode
976 if ( preg_match(
977 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
978 $text .= $this->closeParagraph();
979 $inBlockElem = true;
980 }
981 if ( ! $inBlockElem ) {
982 if ( " " == $t{0} ) {
983 $newSection = "pre";
984 # $t = wfEscapeHTML( $t );
985 }
986 else { $newSection = "p"; }
987
988 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
989 $text .= $this->closeParagraph();
990 $text .= "<" . $newSection . ">";
991 } else if ( 0 != strcmp( $this->mLastSection,
992 $newSection ) ) {
993 $text .= $this->closeParagraph();
994 if ( 0 != strcmp( "p", $newSection ) ) {
995 $text .= "<" . $newSection . ">";
996 }
997 }
998 $this->mLastSection = $newSection;
999 }
1000 if ( $inBlockElem &&
1001 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
1002 $inBlockElem = false;
1003 }
1004 }
1005 $text .= $t;
1006 }
1007 while ( $npl ) {
1008 $text .= $this->closeList( $pref2{$npl-1} );
1009 --$npl;
1010 }
1011 if ( "" != $this->mLastSection ) {
1012 if ( "p" != $this->mLastSection ) {
1013 $text .= "</" . $this->mLastSection . ">";
1014 }
1015 $this->mLastSection = "";
1016 }
1017 wfProfileOut( $fname );
1018 return $text;
1019 }
1020
1021 /* private */ function replaceVariables( $text )
1022 {
1023 global $wgLang, $wgCurOut;
1024 $fname = "OutputPage::replaceVariables";
1025 wfProfileIn( $fname );
1026
1027 $magic = array();
1028
1029 # Basic variables
1030 # See Language.php for the definition of each magic word
1031 # As with sigs, this uses the server's local time -- ensure
1032 # this is appropriate for your audience!
1033
1034 $magic[MAG_CURRENTMONTH] = date( "m" );
1035 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1036 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1037 $magic[MAG_CURRENTDAY] = date("j");
1038 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1039 $magic[MAG_CURRENTYEAR] = date( "Y" );
1040 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1041
1042 $this->mOutput->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1043
1044 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1045 if ( $mw->match( $text ) ) {
1046 $v = wfNumberOfArticles();
1047 $text = $mw->replace( $v, $text );
1048 if( $mw->getWasModified() ) { $this->mOutput->mContainsOldMagic++; }
1049 }
1050
1051 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1052 # The callbacks are at the bottom of this file
1053 $wgCurOut = $this;
1054 $mw =& MagicWord::get( MAG_MSG );
1055 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1056 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1057
1058 $mw =& MagicWord::get( MAG_MSGNW );
1059 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1060 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1061
1062 wfProfileOut( $fname );
1063 return $text;
1064 }
1065
1066 # Cleans up HTML, removes dangerous tags and attributes
1067 /* private */ function removeHTMLtags( $text )
1068 {
1069 $fname = "OutputPage::removeHTMLtags";
1070 wfProfileIn( $fname );
1071 $htmlpairs = array( # Tags that must be closed
1072 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1073 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1074 "strike", "strong", "tt", "var", "div", "center",
1075 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1076 "ruby", "rt" , "rb" , "rp"
1077 );
1078 $htmlsingle = array(
1079 "br", "p", "hr", "li", "dt", "dd"
1080 );
1081 $htmlnest = array( # Tags that can be nested--??
1082 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1083 "dl", "font", "big", "small", "sub", "sup"
1084 );
1085 $tabletags = array( # Can only appear inside table
1086 "td", "th", "tr"
1087 );
1088
1089 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1090 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1091
1092 $htmlattrs = $this->getHTMLattrs () ;
1093
1094 # Remove HTML comments
1095 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1096
1097 $bits = explode( "<", $text );
1098 $text = array_shift( $bits );
1099 $tagstack = array(); $tablestack = array();
1100
1101 foreach ( $bits as $x ) {
1102 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1103 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1104 $x, $regs );
1105 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1106 error_reporting( $prev );
1107
1108 $badtag = 0 ;
1109 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1110 # Check our stack
1111 if ( $slash ) {
1112 # Closing a tag...
1113 if ( ! in_array( $t, $htmlsingle ) &&
1114 ( $ot = array_pop( $tagstack ) ) != $t ) {
1115 array_push( $tagstack, $ot );
1116 $badtag = 1;
1117 } else {
1118 if ( $t == "table" ) {
1119 $tagstack = array_pop( $tablestack );
1120 }
1121 $newparams = "";
1122 }
1123 } else {
1124 # Keep track for later
1125 if ( in_array( $t, $tabletags ) &&
1126 ! in_array( "table", $tagstack ) ) {
1127 $badtag = 1;
1128 } else if ( in_array( $t, $tagstack ) &&
1129 ! in_array ( $t , $htmlnest ) ) {
1130 $badtag = 1 ;
1131 } else if ( ! in_array( $t, $htmlsingle ) ) {
1132 if ( $t == "table" ) {
1133 array_push( $tablestack, $tagstack );
1134 $tagstack = array();
1135 }
1136 array_push( $tagstack, $t );
1137 }
1138 # Strip non-approved attributes from the tag
1139 $newparams = $this->fixTagAttributes($params);
1140
1141 }
1142 if ( ! $badtag ) {
1143 $rest = str_replace( ">", "&gt;", $rest );
1144 $text .= "<$slash$t $newparams$brace$rest";
1145 continue;
1146 }
1147 }
1148 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1149 }
1150 # Close off any remaining tags
1151 while ( $t = array_pop( $tagstack ) ) {
1152 $text .= "</$t>\n";
1153 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1154 }
1155 wfProfileOut( $fname );
1156 return $text;
1157 }
1158
1159 /*
1160 *
1161 * This function accomplishes several tasks:
1162 * 1) Auto-number headings if that option is enabled
1163 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1164 * 3) Add a Table of contents on the top for users who have enabled the option
1165 * 4) Auto-anchor headings
1166 *
1167 * It loops through all headlines, collects the necessary data, then splits up the
1168 * string and re-inserts the newly formatted headlines.
1169 *
1170 * */
1171 /* private */ function formatHeadings( $text )
1172 {
1173 $nh=$this->mOptions->getNumberHeadings();
1174 $st=$this->mOptions->getShowToc();
1175 if(!$this->mTitle->userCanEdit()) {
1176 $es=0;
1177 $esr=0;
1178 } else {
1179 $es=$this->mOptions->getEditSection();
1180 $esr=$this->mOptions->getEditSectionOnRightClick();
1181 }
1182
1183 # Inhibit editsection links if requested in the page
1184 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1185 if ($esw->matchAndRemove( $text )) {
1186 $es=0;
1187 }
1188 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1189 # do not add TOC
1190 $mw =& MagicWord::get( MAG_NOTOC );
1191 if ($mw->matchAndRemove( $text ))
1192 {
1193 $st = 0;
1194 }
1195
1196 # never add the TOC to the Main Page. This is an entry page that should not
1197 # be more than 1-2 screens large anyway
1198 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1199
1200 # We need this to perform operations on the HTML
1201 $sk =& $this->mOptions->getSkin();
1202
1203 # Get all headlines for numbering them and adding funky stuff like [edit]
1204 # links
1205 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1206
1207 # headline counter
1208 $c=0;
1209
1210 # Ugh .. the TOC should have neat indentation levels which can be
1211 # passed to the skin functions. These are determined here
1212 $toclevel = 0;
1213 $toc = "";
1214 $full = "";
1215 $head = array();
1216 foreach($matches[3] as $headline) {
1217 if($level) { $prevlevel=$level;}
1218 $level=$matches[1][$c];
1219 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1220
1221 $h[$level]=0; // reset when we enter a new level
1222 $toc.=$sk->tocIndent($level-$prevlevel);
1223 $toclevel+=$level-$prevlevel;
1224
1225 }
1226 if(($nh||$st) && $level<$prevlevel) {
1227 $h[$level+1]=0; // reset when we step back a level
1228 $toc.=$sk->tocUnindent($prevlevel-$level);
1229 $toclevel-=$prevlevel-$level;
1230
1231 }
1232 $h[$level]++; // count number of headlines for each level
1233
1234 if($nh||$st) {
1235 for($i=1;$i<=$level;$i++) {
1236 if($h[$i]) {
1237 if($dot) {$numbering.=".";}
1238 $numbering.=$h[$i];
1239 $dot=1;
1240 }
1241 }
1242 }
1243
1244 // The canonized header is a version of the header text safe to use for links
1245 // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1246 $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1247 $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1248 $tocline = trim( $canonized_headline );
1249 $canonized_headline=str_replace('"',"",$canonized_headline);
1250 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1251 $refer[$c]=$canonized_headline;
1252 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1253 $refcount[$c]=$refers[$canonized_headline];
1254
1255 // Prepend the number to the heading text
1256
1257 if($nh||$st) {
1258 $tocline=$numbering ." ". $tocline;
1259
1260 // Don't number the heading if it is the only one (looks silly)
1261 if($nh && count($matches[3]) > 1) {
1262 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1263 }
1264 }
1265
1266 // Create the anchor for linking from the TOC to the section
1267 $anchor=$canonized_headline;
1268 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1269 if($st) {
1270 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1271 }
1272 if($es) {
1273 $head[$c].=$sk->editSectionLink($c+1);
1274 }
1275
1276 // Put it all together
1277
1278 $head[$c].="<h".$level.$matches[2][$c]
1279 ."<a name=\"".$anchor."\">"
1280 .$headline
1281 ."</a>"
1282 ."</h".$level.">";
1283
1284 // Add the edit section link
1285
1286 if($esr) {
1287 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1288 }
1289
1290 $numbering="";
1291 $c++;
1292 $dot=0;
1293 }
1294
1295 if($st) {
1296 $toclines=$c;
1297 $toc.=$sk->tocUnindent($toclevel);
1298 $toc=$sk->tocTable($toc);
1299 }
1300
1301 // split up and insert constructed headlines
1302
1303 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1304 $i=0;
1305
1306 foreach($blocks as $block) {
1307 if(($es) && $c>0 && $i==0) {
1308 # This is the [edit] link that appears for the top block of text when
1309 # section editing is enabled
1310 $full.=$sk->editSectionLink(0);
1311 }
1312 $full.=$block;
1313 if($st && $toclines>3 && !$i) {
1314 # Let's add a top anchor just in case we want to link to the top of the page
1315 $full="<a name=\"top\"></a>".$full.$toc;
1316 }
1317
1318 if( !empty( $head[$i] ) ) {
1319 $full .= $head[$i];
1320 }
1321 $i++;
1322 }
1323
1324 return $full;
1325 }
1326
1327 /* private */ function doMagicISBN( &$tokenizer )
1328 {
1329 global $wgLang;
1330
1331 # Check whether next token is a text token
1332 # If yes, fetch it and convert the text into a
1333 # Special::BookSources link
1334 $token = $tokenizer->previewToken();
1335 while ( $token["type"] == "" )
1336 {
1337 $tokenizer->nextToken();
1338 $token = $tokenizer->previewToken();
1339 }
1340 if ( $token["type"] == "text" )
1341 {
1342 $token = $tokenizer->nextToken();
1343 $x = $token["text"];
1344 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1345
1346 $isbn = $blank = "" ;
1347 while ( " " == $x{0} ) {
1348 $blank .= " ";
1349 $x = substr( $x, 1 );
1350 }
1351 while ( strstr( $valid, $x{0} ) != false ) {
1352 $isbn .= $x{0};
1353 $x = substr( $x, 1 );
1354 }
1355 $num = str_replace( "-", "", $isbn );
1356 $num = str_replace( " ", "", $num );
1357
1358 if ( "" == $num ) {
1359 $text = "ISBN $blank$x";
1360 } else {
1361 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1362 $text = "<a href=\"" .
1363 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1364 "\" class=\"internal\">ISBN $isbn</a>";
1365 $text .= $x;
1366 }
1367 } else {
1368 $text = "ISBN ";
1369 }
1370 return $text;
1371 }
1372 /* private */ function doMagicRFC( &$tokenizer )
1373 {
1374 global $wgLang;
1375
1376 # Check whether next token is a text token
1377 # If yes, fetch it and convert the text into a
1378 # link to an RFC source
1379 $token = $tokenizer->previewToken();
1380 while ( $token["type"] == "" )
1381 {
1382 $tokenizer->nextToken();
1383 $token = $tokenizer->previewToken();
1384 }
1385 if ( $token["type"] == "text" )
1386 {
1387 $token = $tokenizer->nextToken();
1388 $x = $token["text"];
1389 $valid = "0123456789";
1390
1391 $rfc = $blank = "" ;
1392 while ( " " == $x{0} ) {
1393 $blank .= " ";
1394 $x = substr( $x, 1 );
1395 }
1396 while ( strstr( $valid, $x{0} ) != false ) {
1397 $rfc .= $x{0};
1398 $x = substr( $x, 1 );
1399 }
1400
1401 if ( "" == $rfc ) {
1402 $text .= "RFC $blank$x";
1403 } else {
1404 $url = wfmsg( "rfcurl" );
1405 $url = str_replace( "$1", $rfc, $url);
1406 $sk =& $this->mOptions->getSkin();
1407 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1408 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1409 }
1410 } else {
1411 $text = "RFC ";
1412 }
1413 return $text;
1414 }
1415
1416 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1417 {
1418 $this->mOptions = $options;
1419 $this->mTitle = $title;
1420 if ( $clearState ) {
1421 $this->clearState();
1422 }
1423
1424 $stripState = false;
1425 $text = $this->strip( $text, $stripState, false );
1426 $text = $this->pstPass2( $text, $user );
1427 $text = $this->unstrip( $text, $stripState );
1428 return $text;
1429 }
1430
1431 /* private */ function pstPass2( $text, &$user )
1432 {
1433 global $wgLang, $wgLocaltimezone;
1434
1435 # Signatures
1436 #
1437 $n = $user->getName();
1438 $k = $user->getOption( "nickname" );
1439 if ( "" == $k ) { $k = $n; }
1440 if(isset($wgLocaltimezone)) {
1441 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1442 }
1443 /* Note: this is an ugly timezone hack for the European wikis */
1444 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1445 " (" . date( "T" ) . ")";
1446 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1447
1448 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1449 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1450 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1451 Namespace::getUser() ) . ":$n|$k]]", $text );
1452
1453 # Context links: [[|name]] and [[name (context)|]]
1454 #
1455 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1456 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1457 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1458 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1459
1460 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1461 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1462 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1463 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1464 # [[ns:page (cont)|]]
1465 $context = "";
1466 $t = $this->mTitle->getText();
1467 if ( preg_match( $conpat, $t, $m ) ) {
1468 $context = $m[2];
1469 }
1470 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1471 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1472 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1473
1474 if ( "" == $context ) {
1475 $text = preg_replace( $p2, "[[\\1]]", $text );
1476 } else {
1477 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1478 }
1479
1480 # {{SUBST:xxx}} variables
1481 #
1482 $mw =& MagicWord::get( MAG_SUBST );
1483 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1484
1485 # Trim trailing whitespace
1486 # MAG_END (__END__) tag allows for trailing
1487 # whitespace to be deliberately included
1488 $text = rtrim( $text );
1489 $mw =& MagicWord::get( MAG_END );
1490 $mw->matchAndRemove( $text );
1491
1492 return $text;
1493 }
1494
1495
1496 }
1497
1498 class ParserOutput
1499 {
1500 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1501
1502 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1503 $containsOldMagic = false )
1504 {
1505 $this->mText = $text;
1506 $this->mLanguageLinks = $languageLinks;
1507 $this->mCategoryLinks = $categoryLinks;
1508 $this->mContainsOldMagic = $containsOldMagic;
1509 }
1510
1511 function getText() { return $this->mText; }
1512 function getLanguageLinks() { return $this->mLanguageLinks; }
1513 function getCategoryLinks() { return $this->mCategoryLinks; }
1514 function containsOldMagic() { return $this->mContainsOldMagic; }
1515 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1516 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1517 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1518 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1519 }
1520
1521 class ParserOptions
1522 {
1523 # All variables are private
1524 var $mUseTeX; # Use texvc to expand <math> tags
1525 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1526 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1527 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1528 var $mAllowExternalImages; # Allow external images inline
1529 var $mSkin; # Reference to the preferred skin
1530 var $mDateFormat; # Date format index
1531 var $mEditSection; # Create "edit section" links
1532 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1533 var $mPrintable; # Generate printable output
1534 var $mNumberHeadings; # Automatically number headings
1535 var $mShowToc; # Show table of contents
1536
1537 function getUseTeX() { return $this->mUseTeX; }
1538 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1539 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1540 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1541 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1542 function getSkin() { return $this->mSkin; }
1543 function getDateFormat() { return $this->mDateFormat; }
1544 function getEditSection() { return $this->mEditSection; }
1545 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1546 function getPrintable() { return $this->mPrintable; }
1547 function getNumberHeadings() { return $this->mNumberHeadings; }
1548 function getShowToc() { return $this->mShowToc; }
1549
1550 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1551 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1552 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1553 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1554 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1555 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1556 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1557 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1558 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1559 function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1560 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1561 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1562
1563 /* static */ function newFromUser( &$user )
1564 {
1565 $popts = new ParserOptions;
1566 $popts->initialiseFromUser( &$user );
1567 return $popts;
1568 }
1569
1570 function initialiseFromUser( &$userInput )
1571 {
1572 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1573
1574 if ( !$userInput ) {
1575 $user = new User;
1576 } else {
1577 $user =& $userInput;
1578 }
1579
1580 $this->mUseTeX = $wgUseTeX;
1581 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1582 $this->mUseDynamicDates = $wgUseDynamicDates;
1583 $this->mInterwikiMagic = $wgInterwikiMagic;
1584 $this->mAllowExternalImages = $wgAllowExternalImages;
1585 $this->mSkin =& $user->getSkin();
1586 $this->mDateFormat = $user->getOption( "date" );
1587 $this->mEditSection = $user->getOption( "editsection" );
1588 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1589 $this->mPrintable = false;
1590 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1591 $this->mShowToc = $user->getOption( "showtoc" );
1592 }
1593
1594
1595 }
1596
1597 # Regex callbacks, used in OutputPage::replaceVariables
1598
1599 # Just get rid of the dangerous stuff
1600 # Necessary because replaceVariables is called after removeHTMLtags,
1601 # and message text can come from any user
1602 function wfReplaceMsgVar( $matches ) {
1603 global $wgCurOut, $wgLinkCache;
1604 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1605 $wgLinkCache->suspend();
1606 $text = $wgCurOut->replaceInternalLinks( $text );
1607 $wgLinkCache->resume();
1608 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1609 return $text;
1610 }
1611
1612 # Effective <nowiki></nowiki>
1613 # Not real <nowiki> because this is called after nowiki sections are processed
1614 function wfReplaceMsgnwVar( $matches ) {
1615 global $wgCurOut, $wgLinkCache;
1616 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1617 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1618 return $text;
1619 }
1620
1621
1622
1623 ?>